/* * Create a shared memory segment of the given size and initialize. Also, * register an on_shmem_exit callback to release the storage. */ void * pool_shared_memory_create(size_t size) { int shmid; void *memAddress; /* Try to create new segment */ shmid = shmget(IPC_PRIVATE, size, IPC_CREAT | IPC_EXCL | IPCProtection); if (shmid < 0) { pool_error("could not create shared memory segment: %s", strerror(errno)); return NULL; } /* Register on-exit routine to delete the new segment */ on_shmem_exit(IpcMemoryDelete, shmid); /* OK, should be able to attach to the segment */ memAddress = shmat(shmid, NULL, PG_SHMAT_FLAGS); if (memAddress == (void *) -1) { pool_error("shmat(id=%d) failed: %s", shmid, strerror(errno)); return NULL; } /* Register on-exit routine to detach new segment before deleting */ on_shmem_exit(IpcMemoryDetach, (Datum) memAddress); return memAddress; }
/* * ProcSignalInit * Register the current process in the procsignal array * * The passed index should be my BackendId if the process has one, * or MaxBackends + aux process type if not. */ void ProcSignalInit(int pss_idx) { volatile ProcSignalSlot *slot; Assert(pss_idx >= 1 && pss_idx <= NumProcSignalSlots); slot = &ProcSignalSlots[pss_idx - 1]; /* sanity check */ if (slot->pss_pid != 0) elog(LOG, "process %d taking over ProcSignal slot %d, but it's not empty", MyProcPid, pss_idx); /* Clear out any leftover signal reasons */ MemSet(slot->pss_signalFlags, 0, NUM_PROCSIGNALS * sizeof(sig_atomic_t)); /* Mark slot with my PID */ slot->pss_pid = MyProcPid; /* Remember slot location for CheckProcSignal */ MyProcSignalSlot = slot; /* Set up to release the slot on process exit */ on_shmem_exit(CleanupProcSignalState, Int32GetDatum(pss_idx)); }
/* * Create a semaphore set and initialize. */ void pool_semaphore_create(int numSems) { int i; /* Try to create new semaphore set */ semId = semget(IPC_PRIVATE, numSems, IPC_CREAT | IPC_EXCL | IPCProtection); if (semId < 0) ereport(FATAL, (errmsg("Unable to create semaphores:%d error:\"%s\"",numSems,strerror(errno)))); on_shmem_exit(IpcSemaphoreKill, semId); /* Initialize it to count 1 */ for (i = 0; i < numSems; i++) { union semun semun; semun.val = 1; if (semctl(semId, i, SETVAL, semun) < 0) ereport(FATAL, (errmsg("Unable to create semaphores:%d error:\"%s\"",numSems,strerror(errno)), errdetail("semctl(%d, %d, SETVAL, %d) failed",semId,i,semun.val))); } }
/* * SIBackendInit * Initialize a new backend to operate on the sinval buffer * * Returns: * >0 A-OK * 0 Failed to find a free procState slot (ie, MaxBackends exceeded) * <0 Some other failure (not currently used) * * NB: this routine, and all following ones, must be executed with the * SInvalLock lock held, since there may be multiple backends trying * to access the buffer. */ int SIBackendInit(SISeg *segP) { int index; ProcState *stateP = NULL; /* Look for a free entry in the procState array */ for (index = 0; index < segP->lastBackend; index++) { if (segP->procState[index].nextMsgNum < 0) /* inactive slot? */ { stateP = &segP->procState[index]; break; } } if (stateP == NULL) { if (segP->lastBackend < segP->maxBackends) { stateP = &segP->procState[segP->lastBackend]; Assert(stateP->nextMsgNum < 0); segP->lastBackend++; } else { /* out of procState slots */ MyBackendId = InvalidBackendId; return 0; } } MyBackendId = (stateP - &segP->procState[0]) + 1; #ifdef INVALIDDEBUG elog(DEBUG2, "my backend id is %d", MyBackendId); #endif /* INVALIDDEBUG */ /* Advertise assigned backend ID in MyProc */ MyProc->backendId = MyBackendId; /* Reduce free slot count */ segP->freeBackends--; /* Fetch next local transaction ID into local memory */ nextLocalTransactionId = segP->nextLXID[MyBackendId - 1]; /* mark myself active, with all extant messages already read */ stateP->nextMsgNum = segP->maxMsgNum; stateP->resetState = false; /* register exit routine to mark my entry inactive at exit */ on_shmem_exit(CleanupInvalidationState, PointerGetDatum(segP)); return 1; }
/* * PGReserveSemaphores --- initialize semaphore support * * In the Win32 implementation, we acquire semaphores on-demand; the * maxSemas parameter is just used to size the array that keeps track of * acquired semas for subsequent releasing. We use anonymous semaphores * so the semaphores are automatically freed when the last referencing * process exits. */ void PGReserveSemaphores(int maxSemas, int port) { mySemSet = (HANDLE *) malloc(maxSemas * sizeof(HANDLE)); if (mySemSet == NULL) elog(PANIC, "out of memory"); numSems = 0; maxSems = maxSemas; on_shmem_exit(ReleaseSemaphores, 0); }
/* Initialize a per-walsender data structure for this walsender process */ static void InitWalSnd(void) { int i; /* * WalSndCtl should be set up already (we inherit this by fork() or * EXEC_BACKEND mechanism from the postmaster). */ Assert(WalSndCtl != NULL); Assert(MyWalSnd == NULL); /* * Find a free walsender slot and reserve it. If this fails, we must be * out of WalSnd structures. */ for (i = 0; i < max_wal_senders; i++) { /* use volatile pointer to prevent code rearrangement */ volatile WalSnd *walsnd = &WalSndCtl->walsnds[i]; SpinLockAcquire(&walsnd->mutex); if (walsnd->pid != 0) { SpinLockRelease(&walsnd->mutex); continue; } else { /* * Found a free slot. Reserve it for us. */ walsnd->pid = MyProcPid; MemSet(&walsnd->sentPtr, 0, sizeof(XLogRecPtr)); walsnd->state = WALSNDSTATE_STARTUP; SpinLockRelease(&walsnd->mutex); /* don't need the lock anymore */ OwnLatch((Latch *) &walsnd->latch); MyWalSnd = (WalSnd *) walsnd; break; } } if (MyWalSnd == NULL) ereport(FATAL, (errcode(ERRCODE_TOO_MANY_CONNECTIONS), errmsg("number of requested standby connections " "exceeds max_wal_senders (currently %d)", max_wal_senders))); /* Arrange to clean up at walsender exit */ on_shmem_exit(WalSndKill, 0); }
/* * PGReserveSemaphores --- initialize semaphore support * * This is called during postmaster start or shared memory reinitialization. * It should do whatever is needed to be able to support up to maxSemas * subsequent PGSemaphoreCreate calls. Also, if any system resources * are acquired here or in PGSemaphoreCreate, register an on_shmem_exit * callback to release them. * * The port number is passed for possible use as a key (for Posix, we use * it to generate the starting semaphore name). In a standalone backend, * zero will be passed. * * In the Posix implementation, we acquire semaphores on-demand; the * maxSemas parameter is just used to size the array that keeps track of * acquired semas for subsequent releasing. */ void PGReserveSemaphores(int maxSemas, int port) { mySemPointers = (sem_t **) malloc(maxSemas * sizeof(sem_t *)); if (mySemPointers == NULL) elog(PANIC, "out of memory"); numSems = 0; maxSems = maxSemas; nextSemKey = port * 1000; on_shmem_exit(ReleaseSemaphores, 0); }
static void init_lwlock_stats(void) { int *LWLockCounter = (int *) ((char *) LWLockArray - 2 * sizeof(int)); int numLocks = LWLockCounter[1]; sh_acquire_counts = calloc(numLocks, sizeof(int)); ex_acquire_counts = calloc(numLocks, sizeof(int)); spin_delay_counts = calloc(numLocks, sizeof(int)); block_counts = calloc(numLocks, sizeof(int)); counts_for_pid = MyProcPid; on_shmem_exit(print_lwlock_stats, 0); }
/* * PGReserveSemaphores --- initialize semaphore support * * This is called during postmaster start or shared memory reinitialization. * It should do whatever is needed to be able to support up to maxSemas * subsequent PGSemaphoreCreate calls. Also, if any system resources * are acquired here or in PGSemaphoreCreate, register an on_shmem_exit * callback to release them. * * The port number is passed for possible use as a key (for SysV, we use * it to generate the starting semaphore key). In a standalone backend, * zero will be passed. * * In the SysV implementation, we acquire semaphore sets on-demand; the * maxSemas parameter is just used to size the array that keeps track of * acquired sets for subsequent releasing. */ void PGReserveSemaphores(int maxSemas, int port) { maxSemaSets = (maxSemas + SEMAS_PER_SET - 1) / SEMAS_PER_SET; mySemaSets = (IpcSemaphoreId *) malloc(maxSemaSets * sizeof(IpcSemaphoreId)); if (mySemaSets == NULL) elog(PANIC, "out of memory"); numSemaSets = 0; nextSemaKey = port * 1000; nextSemaNumber = SEMAS_PER_SET; /* force sema set alloc on 1st call */ on_shmem_exit(ReleaseSemaphores, 0); }
/* * Establish an AuxProcessResourceOwner for the current process. */ void CreateAuxProcessResourceOwner(void) { Assert(AuxProcessResourceOwner == NULL); Assert(CurrentResourceOwner == NULL); AuxProcessResourceOwner = ResourceOwnerCreate(NULL, "AuxiliaryProcess"); CurrentResourceOwner = AuxProcessResourceOwner; /* * Register a shmem-exit callback for cleanup of aux-process resource * owner. (This needs to run after, e.g., ShutdownXLOG.) */ on_shmem_exit(ReleaseAuxProcessResourcesCallback, 0); }
/* * InitProcessPhase2 -- make MyProc visible in the shared ProcArray. * * This is separate from InitProcess because we can't acquire LWLocks until * we've created a PGPROC, but in the EXEC_BACKEND case ProcArrayAdd won't * work until after we've done CreateSharedMemoryAndSemaphores. */ void InitProcessPhase2(void) { Assert(MyProc != NULL); /* * Add our PGPROC to the PGPROC array in shared memory. */ ProcArrayAdd(MyProc); /* * Arrange to clean that up at backend exit. */ on_shmem_exit(RemoveProcFromArray, 0); }
/* * shmem_startup_hook */ static void pgcl_shmem_startup(void) { if (prev_shmem_startup_hook) prev_shmem_startup_hook(); /* * If we're in the postmaster (or a standalone backend...), * set up a shmem exit hook to call command_at_shutdown. */ if (!IsUnderPostmaster) on_shmem_exit(pgcl_shmem_shutdown, (Datum) 0); if (command_at_startup) ExecuteCommand(command_at_startup, "command_at_startup"); }
/* * PGReserveSemaphores --- initialize semaphore support * * This is called during postmaster start or shared memory reinitialization. * It should do whatever is needed to be able to support up to maxSemas * subsequent PGSemaphoreCreate calls. Also, if any system resources * are acquired here or in PGSemaphoreCreate, register an on_shmem_exit * callback to release them. * * The port number is passed for possible use as a key (for SysV, we use * it to generate the starting semaphore key). In a standalone backend, * zero will be passed. * * In the SysV implementation, we acquire semaphore sets on-demand; the * maxSemas parameter is just used to size the array that keeps track of * acquired sets for subsequent releasing. */ void PGReserveSemaphores(int maxSemas, int port) { maxSemaSets = (maxSemas + SEMAS_PER_SET - 1) / SEMAS_PER_SET; mySemaSets = (IpcSemaphoreId *) malloc(maxSemaSets * sizeof(IpcSemaphoreId)); if (mySemaSets == NULL) elog(PANIC, "out of memory"); numSemaSets = 0; nextSemaKey = port * 1000; nextSemaNumber = SEMAS_PER_SET; /* force sema set alloc on 1st call */ elog((Debug_print_semaphore_detail ? LOG : DEBUG5), "maxSemaSets %d, nextSemaKey %d, nextSemaNumber %d", maxSemaSets, nextSemaKey, nextSemaNumber); on_shmem_exit(ReleaseSemaphores, 0); }
void PersistentDatabase_DirIterateInit(void) { READ_PERSISTENT_STATE_ORDERED_LOCK_DECLARE; Assert(persistentDatabaseSharedData != NULL); PersistentDatabase_VerifyInitScan(); PersistentDatabase_ReleaseDirIterator(); READ_PERSISTENT_STATE_ORDERED_LOCK; if (!iterateOnShmemExitArmed) { on_shmem_exit(AtProcExit_PersistentDatabase, 0); iterateOnShmemExitArmed = true; } dirIterateDatabaseDirEntry = NULL; READ_PERSISTENT_STATE_ORDERED_UNLOCK; }
static void init_lwlock_stats(void) { HASHCTL ctl; static MemoryContext lwlock_stats_cxt = NULL; static bool exit_registered = false; if (lwlock_stats_cxt != NULL) MemoryContextDelete(lwlock_stats_cxt); /* * The LWLock stats will be updated within a critical section, which * requires allocating new hash entries. Allocations within a critical * section are normally not allowed because running out of memory would * lead to a PANIC, but LWLOCK_STATS is debugging code that's not normally * turned on in production, so that's an acceptable risk. The hash entries * are small, so the risk of running out of memory is minimal in practice. */ lwlock_stats_cxt = AllocSetContextCreate(TopMemoryContext, "LWLock stats", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); MemoryContextAllowInCriticalSection(lwlock_stats_cxt, true); MemSet(&ctl, 0, sizeof(ctl)); ctl.keysize = sizeof(lwlock_stats_key); ctl.entrysize = sizeof(lwlock_stats); ctl.hash = tag_hash; ctl.hcxt = lwlock_stats_cxt; lwlock_stats_htab = hash_create("lwlock stats", 16384, &ctl, HASH_ELEM | HASH_FUNCTION | HASH_CONTEXT); if (!exit_registered) { on_shmem_exit(print_lwlock_stats, 0); exit_registered = true; } }
/* * InitProcessPhase2 -- make MyProc visible in the shared ProcArray. * * This is separate from InitProcess because we can't acquire LWLocks until * we've created a PGPROC, but in the EXEC_BACKEND case there is a good deal * of stuff to be done before this step that will require LWLock access. */ void InitProcessPhase2(void) { Assert(MyProc != NULL); /* * We should now know what database we're in, so advertise that. (We need * not do any locking here, since no other backend can yet see our * PGPROC.) */ Assert(OidIsValid(MyDatabaseId)); MyProc->databaseId = MyDatabaseId; /* * Add our PGPROC to the PGPROC array in shared memory. */ ProcArrayAdd(MyProc); /* * Arrange to clean that up at backend exit. */ on_shmem_exit(RemoveProcFromArray, 0); }
/** * Initialize global sate of backoff scheduler. This is called during creation * of shared memory and semaphores. */ void BackoffStateInit() { bool found = false; /* Create or attach to the shared array */ backoffSingleton = (BackoffState *) ShmemInitStruct("Backoff Global State", sizeof(BackoffState), &found); if (!found) { bool ret = false; /* * We're the first - initialize. */ MemSet(backoffSingleton, 0, sizeof(BackoffState)); backoffSingleton->numEntries = MaxBackends; backoffSingleton->backendEntries = (BackoffBackendSharedEntry *) ShmemInitStruct("Backoff Backend Entries", mul_size(sizeof(BackoffBackendSharedEntry), backoffSingleton->numEntries), &ret); backoffSingleton->sweeperInProgress = false; Assert(!ret); } on_shmem_exit(BackoffStateAtExit, 0); }
/* * PGReserveSemaphores --- initialize semaphore support * * This is called during postmaster start or shared memory reinitialization. * It should do whatever is needed to be able to support up to maxSemas * subsequent PGSemaphoreCreate calls. Also, if any system resources * are acquired here or in PGSemaphoreCreate, register an on_shmem_exit * callback to release them. * * The port number is passed for possible use as a key (for Posix, we use * it to generate the starting semaphore name). In a standalone backend, * zero will be passed. * * In the Posix implementation, we acquire semaphores on-demand; the * maxSemas parameter is just used to size the arrays. For unnamed * semaphores, there is an array of PGSemaphoreData structs in shared memory. * For named semaphores, we keep a postmaster-local array of sem_t pointers, * which we use for releasing the semphores when done. * (This design minimizes the dependency of postmaster shutdown on the * contents of shared memory, which a failed backend might have clobbered. * We can't do much about the possibility of sem_destroy() crashing, but * we don't have to expose the counters to other processes.) */ void PGReserveSemaphores(int maxSemas, int port) { #ifdef USE_NAMED_POSIX_SEMAPHORES mySemPointers = (sem_t **) malloc(maxSemas * sizeof(sem_t *)); if (mySemPointers == NULL) elog(PANIC, "out of memory"); #else /* * We must use ShmemAllocUnlocked(), since the spinlock protecting * ShmemAlloc() won't be ready yet. (This ordering is necessary when we * are emulating spinlocks with semaphores.) */ sharedSemas = (PGSemaphore) ShmemAllocUnlocked(PGSemaphoreShmemSize(maxSemas)); #endif numSems = 0; maxSems = maxSemas; nextSemKey = port * 1000; on_shmem_exit(ReleaseSemaphores, 0); }
/* * PGReserveSemaphores --- initialize semaphore support * * This is called during postmaster start or shared memory reinitialization. * It should do whatever is needed to be able to support up to maxSemas * subsequent PGSemaphoreCreate calls. Also, if any system resources * are acquired here or in PGSemaphoreCreate, register an on_shmem_exit * callback to release them. * * The port number is passed for possible use as a key (for SysV, we use * it to generate the starting semaphore key). In a standalone backend, * zero will be passed. * * In the SysV implementation, we acquire semaphore sets on-demand; the * maxSemas parameter is just used to size the arrays. There is an array * of PGSemaphoreData structs in shared memory, and a postmaster-local array * with one entry per SysV semaphore set, which we use for releasing the * semaphore sets when done. (This design ensures that postmaster shutdown * doesn't rely on the contents of shared memory, which a failed backend might * have clobbered.) */ void PGReserveSemaphores(int maxSemas, int port) { /* * We must use ShmemAllocUnlocked(), since the spinlock protecting * ShmemAlloc() won't be ready yet. (This ordering is necessary when we * are emulating spinlocks with semaphores.) */ sharedSemas = (PGSemaphore) ShmemAllocUnlocked(PGSemaphoreShmemSize(maxSemas)); numSharedSemas = 0; maxSharedSemas = maxSemas; maxSemaSets = (maxSemas + SEMAS_PER_SET - 1) / SEMAS_PER_SET; mySemaSets = (IpcSemaphoreId *) malloc(maxSemaSets * sizeof(IpcSemaphoreId)); if (mySemaSets == NULL) elog(PANIC, "out of memory"); numSemaSets = 0; nextSemaKey = port * 1000; nextSemaNumber = SEMAS_PER_SET; /* force sema set alloc on 1st call */ on_shmem_exit(ReleaseSemaphores, 0); }
/* * SharedInvalBackendInit * Initialize a new backend to operate on the sinval buffer */ void SharedInvalBackendInit(bool sendOnly) { int index; ProcState *stateP = NULL; SISeg *segP = shmInvalBuffer; /* * This can run in parallel with read operations, and for that matter with * write operations; but not in parallel with additions and removals of * backends, nor in parallel with SICleanupQueue. It doesn't seem worth * having a third lock, so we choose to use SInvalWriteLock to serialize * additions/removals. */ LWLockAcquire(SInvalWriteLock, LW_EXCLUSIVE); /* Look for a free entry in the procState array */ for (index = 0; index < segP->lastBackend; index++) { if (segP->procState[index].procPid == 0) /* inactive slot? */ { stateP = &segP->procState[index]; break; } } if (stateP == NULL) { if (segP->lastBackend < segP->maxBackends) { stateP = &segP->procState[segP->lastBackend]; Assert(stateP->procPid == 0); segP->lastBackend++; } else { /* * out of procState slots: MaxBackends exceeded -- report normally */ MyBackendId = InvalidBackendId; LWLockRelease(SInvalWriteLock); ereport(FATAL, (errcode(ERRCODE_TOO_MANY_CONNECTIONS), errmsg("sorry, too many clients already"))); } } MyBackendId = (stateP - &segP->procState[0]) + 1; /* Advertise assigned backend ID in MyProc */ MyProc->backendId = MyBackendId; /* Fetch next local transaction ID into local memory */ nextLocalTransactionId = stateP->nextLXID; /* mark myself active, with all extant messages already read */ stateP->procPid = MyProcPid; stateP->nextMsgNum = segP->maxMsgNum; stateP->resetState = false; stateP->signaled = false; stateP->sendOnly = sendOnly; LWLockRelease(SInvalWriteLock); /* register exit routine to mark my entry inactive at exit */ on_shmem_exit(CleanupInvalidationState, PointerGetDatum(segP)); elog(DEBUG4, "my backend id is %d", MyBackendId); }
/* -------------------------------- * InitPostgres * Initialize POSTGRES. * * The database can be specified by name, using the in_dbname parameter, or by * OID, using the dboid parameter. In the latter case, the actual database * name can be returned to the caller in out_dbname. If out_dbname isn't * NULL, it must point to a buffer of size NAMEDATALEN. * * In bootstrap mode no parameters are used. The autovacuum launcher process * doesn't use any parameters either, because it only goes far enough to be * able to read pg_database; it doesn't connect to any particular database. * In walsender mode only username is used. * * As of PostgreSQL 8.2, we expect InitProcess() was already called, so we * already have a PGPROC struct ... but it's not completely filled in yet. * * Note: * Be very careful with the order of calls in the InitPostgres function. * -------------------------------- */ void InitPostgres(const char *in_dbname, Oid dboid, const char *username, char *out_dbname) { bool bootstrap = IsBootstrapProcessingMode(); bool am_superuser; char *fullpath; char dbname[NAMEDATALEN]; elog(DEBUG3, "InitPostgres"); /* * Add my PGPROC struct to the ProcArray. * * Once I have done this, I am visible to other backends! */ InitProcessPhase2(); /* * Initialize my entry in the shared-invalidation manager's array of * per-backend data. * * Sets up MyBackendId, a unique backend identifier. */ MyBackendId = InvalidBackendId; SharedInvalBackendInit(false); if (MyBackendId > MaxBackends || MyBackendId <= 0) elog(FATAL, "bad backend ID: %d", MyBackendId); /* Now that we have a BackendId, we can participate in ProcSignal */ ProcSignalInit(MyBackendId); /* * Also set up timeout handlers needed for backend operation. We need * these in every case except bootstrap. */ if (!bootstrap) { RegisterTimeout(DEADLOCK_TIMEOUT, CheckDeadLock); RegisterTimeout(STATEMENT_TIMEOUT, StatementTimeoutHandler); RegisterTimeout(LOCK_TIMEOUT, LockTimeoutHandler); } /* * bufmgr needs another initialization call too */ InitBufferPoolBackend(); /* * Initialize local process's access to XLOG. */ if (IsUnderPostmaster) { /* * The postmaster already started the XLOG machinery, but we need to * call InitXLOGAccess(), if the system isn't in hot-standby mode. * This is handled by calling RecoveryInProgress and ignoring the * result. */ (void) RecoveryInProgress(); } else { /* * We are either a bootstrap process or a standalone backend. Either * way, start up the XLOG machinery, and register to have it closed * down at exit. */ StartupXLOG(); on_shmem_exit(ShutdownXLOG, 0); } /* * Initialize the relation cache and the system catalog caches. Note that * no catalog access happens here; we only set up the hashtable structure. * We must do this before starting a transaction because transaction abort * would try to touch these hashtables. */ RelationCacheInitialize(); InitCatalogCache(); InitPlanCache(); /* Initialize portal manager */ EnablePortalManager(); /* Initialize stats collection --- must happen before first xact */ if (!bootstrap) pgstat_initialize(); /* * Load relcache entries for the shared system catalogs. This must create * at least entries for pg_database and catalogs used for authentication. */ RelationCacheInitializePhase2(); /* * Set up process-exit callback to do pre-shutdown cleanup. This is the * first before_shmem_exit callback we register; thus, this will be the * last thing we do before low-level modules like the buffer manager begin * to close down. We need to have this in place before we begin our first * transaction --- if we fail during the initialization transaction, as is * entirely possible, we need the AbortTransaction call to clean up. */ before_shmem_exit(ShutdownPostgres, 0); /* The autovacuum launcher is done here */ if (IsAutoVacuumLauncherProcess()) return; /* * Start a new transaction here before first access to db, and get a * snapshot. We don't have a use for the snapshot itself, but we're * interested in the secondary effect that it sets RecentGlobalXmin. (This * is critical for anything that reads heap pages, because HOT may decide * to prune them even if the process doesn't attempt to modify any * tuples.) */ if (!bootstrap) { /* statement_timestamp must be set for timeouts to work correctly */ SetCurrentStatementStartTimestamp(); StartTransactionCommand(); /* * transaction_isolation will have been set to the default by the * above. If the default is "serializable", and we are in hot * standby, we will fail if we don't change it to something lower. * Fortunately, "read committed" is plenty good enough. */ XactIsoLevel = XACT_READ_COMMITTED; (void) GetTransactionSnapshot(); } /* * Perform client authentication if necessary, then figure out our * postgres user ID, and see if we are a superuser. * * In standalone mode and in autovacuum worker processes, we use a fixed * ID, otherwise we figure it out from the authenticated user name. */ if (bootstrap || IsAutoVacuumWorkerProcess()) { InitializeSessionUserIdStandalone(); am_superuser = true; } else if (!IsUnderPostmaster) { InitializeSessionUserIdStandalone(); am_superuser = true; if (!ThereIsAtLeastOneRole()) ereport(WARNING, (errcode(ERRCODE_UNDEFINED_OBJECT), errmsg("no roles are defined in this database system"), errhint("You should immediately run CREATE USER \"%s\" SUPERUSER;.", username))); } else if (IsBackgroundWorker) { if (username == NULL) { InitializeSessionUserIdStandalone(); am_superuser = true; } else { InitializeSessionUserId(username); am_superuser = superuser(); } } else { /* normal multiuser case */ Assert(MyProcPort != NULL); PerformAuthentication(MyProcPort); InitializeSessionUserId(username); am_superuser = superuser(); } /* * If we're trying to shut down, only superusers can connect, and new * replication connections are not allowed. */ if ((!am_superuser || am_walsender) && MyProcPort != NULL && MyProcPort->canAcceptConnections == CAC_WAITBACKUP) { if (am_walsender) ereport(FATAL, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("new replication connections are not allowed during database shutdown"))); else ereport(FATAL, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("must be superuser to connect during database shutdown"))); } /* * Binary upgrades only allowed super-user connections */ if (IsBinaryUpgrade && !am_superuser) { ereport(FATAL, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("must be superuser to connect in binary upgrade mode"))); } /* * The last few connections slots are reserved for superusers. Although * replication connections currently require superuser privileges, we * don't allow them to consume the reserved slots, which are intended for * interactive use. */ if ((!am_superuser || am_walsender) && ReservedBackends > 0 && !HaveNFreeProcs(ReservedBackends)) ereport(FATAL, (errcode(ERRCODE_TOO_MANY_CONNECTIONS), errmsg("remaining connection slots are reserved for non-replication superuser connections"))); /* Check replication permissions needed for walsender processes. */ if (am_walsender) { Assert(!bootstrap); if (!superuser() && !has_rolreplication(GetUserId())) ereport(FATAL, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("must be superuser or replication role to start walsender"))); } /* * If this is a plain walsender only supporting physical replication, we * don't want to connect to any particular database. Just finish the * backend startup by processing any options from the startup packet, and * we're done. */ if (am_walsender && !am_db_walsender) { /* process any options passed in the startup packet */ if (MyProcPort != NULL) process_startup_options(MyProcPort, am_superuser); /* Apply PostAuthDelay as soon as we've read all options */ if (PostAuthDelay > 0) pg_usleep(PostAuthDelay * 1000000L); /* initialize client encoding */ InitializeClientEncoding(); /* report this backend in the PgBackendStatus array */ pgstat_bestart(); /* close the transaction we started above */ CommitTransactionCommand(); return; } /* * Set up the global variables holding database id and default tablespace. * But note we won't actually try to touch the database just yet. * * We take a shortcut in the bootstrap case, otherwise we have to look up * the db's entry in pg_database. */ if (bootstrap) { MyDatabaseId = TemplateDbOid; MyDatabaseTableSpace = DEFAULTTABLESPACE_OID; } else if (in_dbname != NULL) { HeapTuple tuple; Form_pg_database dbform; tuple = GetDatabaseTuple(in_dbname); if (!HeapTupleIsValid(tuple)) ereport(FATAL, (errcode(ERRCODE_UNDEFINED_DATABASE), errmsg("database \"%s\" does not exist", in_dbname))); dbform = (Form_pg_database) GETSTRUCT(tuple); MyDatabaseId = HeapTupleGetOid(tuple); MyDatabaseTableSpace = dbform->dattablespace; /* take database name from the caller, just for paranoia */ strlcpy(dbname, in_dbname, sizeof(dbname)); } else if (OidIsValid(dboid)) { /* caller specified database by OID */ HeapTuple tuple; Form_pg_database dbform; tuple = GetDatabaseTupleByOid(dboid); if (!HeapTupleIsValid(tuple)) ereport(FATAL, (errcode(ERRCODE_UNDEFINED_DATABASE), errmsg("database %u does not exist", dboid))); dbform = (Form_pg_database) GETSTRUCT(tuple); MyDatabaseId = HeapTupleGetOid(tuple); MyDatabaseTableSpace = dbform->dattablespace; Assert(MyDatabaseId == dboid); strlcpy(dbname, NameStr(dbform->datname), sizeof(dbname)); /* pass the database name back to the caller */ if (out_dbname) strcpy(out_dbname, dbname); } else { /* * If this is a background worker not bound to any particular * database, we're done now. Everything that follows only makes * sense if we are bound to a specific database. We do need to * close the transaction we started before returning. */ if (!bootstrap) CommitTransactionCommand(); return; } /* * Now, take a writer's lock on the database we are trying to connect to. * If there is a concurrently running DROP DATABASE on that database, this * will block us until it finishes (and has committed its update of * pg_database). * * Note that the lock is not held long, only until the end of this startup * transaction. This is OK since we will advertise our use of the * database in the ProcArray before dropping the lock (in fact, that's the * next thing to do). Anyone trying a DROP DATABASE after this point will * see us in the array once they have the lock. Ordering is important for * this because we don't want to advertise ourselves as being in this * database until we have the lock; otherwise we create what amounts to a * deadlock with CountOtherDBBackends(). * * Note: use of RowExclusiveLock here is reasonable because we envision * our session as being a concurrent writer of the database. If we had a * way of declaring a session as being guaranteed-read-only, we could use * AccessShareLock for such sessions and thereby not conflict against * CREATE DATABASE. */ if (!bootstrap) LockSharedObject(DatabaseRelationId, MyDatabaseId, 0, RowExclusiveLock); /* * Now we can mark our PGPROC entry with the database ID. * * We assume this is an atomic store so no lock is needed; though actually * things would work fine even if it weren't atomic. Anyone searching the * ProcArray for this database's ID should hold the database lock, so they * would not be executing concurrently with this store. A process looking * for another database's ID could in theory see a chance match if it read * a partially-updated databaseId value; but as long as all such searches * wait and retry, as in CountOtherDBBackends(), they will certainly see * the correct value on their next try. */ MyProc->databaseId = MyDatabaseId; /* * We established a catalog snapshot while reading pg_authid and/or * pg_database; but until we have set up MyDatabaseId, we won't react to * incoming sinval messages for unshared catalogs, so we won't realize it * if the snapshot has been invalidated. Assume it's no good anymore. */ InvalidateCatalogSnapshot(); /* * Recheck pg_database to make sure the target database hasn't gone away. * If there was a concurrent DROP DATABASE, this ensures we will die * cleanly without creating a mess. */ if (!bootstrap) { HeapTuple tuple; tuple = GetDatabaseTuple(dbname); if (!HeapTupleIsValid(tuple) || MyDatabaseId != HeapTupleGetOid(tuple) || MyDatabaseTableSpace != ((Form_pg_database) GETSTRUCT(tuple))->dattablespace) ereport(FATAL, (errcode(ERRCODE_UNDEFINED_DATABASE), errmsg("database \"%s\" does not exist", dbname), errdetail("It seems to have just been dropped or renamed."))); } /* * Now we should be able to access the database directory safely. Verify * it's there and looks reasonable. */ fullpath = GetDatabasePath(MyDatabaseId, MyDatabaseTableSpace); if (!bootstrap) { if (access(fullpath, F_OK) == -1) { if (errno == ENOENT) ereport(FATAL, (errcode(ERRCODE_UNDEFINED_DATABASE), errmsg("database \"%s\" does not exist", dbname), errdetail("The database subdirectory \"%s\" is missing.", fullpath))); else ereport(FATAL, (errcode_for_file_access(), errmsg("could not access directory \"%s\": %m", fullpath))); } ValidatePgVersion(fullpath); } SetDatabasePath(fullpath); /* * It's now possible to do real access to the system catalogs. * * Load relcache entries for the system catalogs. This must create at * least the minimum set of "nailed-in" cache entries. */ RelationCacheInitializePhase3(); /* set up ACL framework (so CheckMyDatabase can check permissions) */ initialize_acl(); /* * Re-read the pg_database row for our database, check permissions and set * up database-specific GUC settings. We can't do this until all the * database-access infrastructure is up. (Also, it wants to know if the * user is a superuser, so the above stuff has to happen first.) */ if (!bootstrap) CheckMyDatabase(dbname, am_superuser); /* * Now process any command-line switches and any additional GUC variable * settings passed in the startup packet. We couldn't do this before * because we didn't know if client is a superuser. */ if (MyProcPort != NULL) process_startup_options(MyProcPort, am_superuser); /* Process pg_db_role_setting options */ process_settings(MyDatabaseId, GetSessionUserId()); /* Apply PostAuthDelay as soon as we've read all options */ if (PostAuthDelay > 0) pg_usleep(PostAuthDelay * 1000000L); /* * Initialize various default states that can't be set up until we've * selected the active user and gotten the right GUC settings. */ /* set default namespace search path */ InitializeSearchPath(); /* initialize client encoding */ InitializeClientEncoding(); /* report this backend in the PgBackendStatus array */ if (!bootstrap) pgstat_bestart(); /* close the transaction we started above */ if (!bootstrap) CommitTransactionCommand(); }
/* * LWLockAcquire - acquire a lightweight lock in the specified mode * * If the lock is not available, sleep until it is. * * Side effect: cancel/die interrupts are held off until lock release. */ void LWLockAcquire(LWLockId lockid, LWLockMode mode) { volatile LWLock *lock = &(LWLockArray[lockid].lock); PGPROC *proc = MyProc; bool retry = false; int extraWaits = 0; PRINT_LWDEBUG("LWLockAcquire", lockid, lock); #ifdef LWLOCK_STATS /* Set up local count state first time through in a given process */ if (counts_for_pid != MyProcPid) { int *LWLockCounter = (int *) ((char *) LWLockArray - 2 * sizeof(int)); int numLocks = LWLockCounter[1]; sh_acquire_counts = calloc(numLocks, sizeof(int)); ex_acquire_counts = calloc(numLocks, sizeof(int)); block_counts = calloc(numLocks, sizeof(int)); counts_for_pid = MyProcPid; on_shmem_exit(print_lwlock_stats, 0); } /* Count lock acquisition attempts */ if (mode == LW_EXCLUSIVE) ex_acquire_counts[lockid]++; else sh_acquire_counts[lockid]++; #endif /* LWLOCK_STATS */ /* * We can't wait if we haven't got a PGPROC. This should only occur * during bootstrap or shared memory initialization. Put an Assert here * to catch unsafe coding practices. */ Assert(!(proc == NULL && IsUnderPostmaster)); /* Ensure we will have room to remember the lock */ if (num_held_lwlocks >= MAX_SIMUL_LWLOCKS) elog(ERROR, "too many LWLocks taken"); /* * Lock out cancel/die interrupts until we exit the code section protected * by the LWLock. This ensures that interrupts will not interfere with * manipulations of data structures in shared memory. */ HOLD_INTERRUPTS(); /* * Loop here to try to acquire lock after each time we are signaled by * LWLockRelease. * * NOTE: it might seem better to have LWLockRelease actually grant us the * lock, rather than retrying and possibly having to go back to sleep. But * in practice that is no good because it means a process swap for every * lock acquisition when two or more processes are contending for the same * lock. Since LWLocks are normally used to protect not-very-long * sections of computation, a process needs to be able to acquire and * release the same lock many times during a single CPU time slice, even * in the presence of contention. The efficiency of being able to do that * outweighs the inefficiency of sometimes wasting a process dispatch * cycle because the lock is not free when a released waiter finally gets * to run. See pgsql-hackers archives for 29-Dec-01. */ for (;;) { bool mustwait; /* Acquire mutex. Time spent holding mutex should be short! */ SpinLockAcquire(&lock->mutex); /* If retrying, allow LWLockRelease to release waiters again */ if (retry) lock->releaseOK = true; /* If I can get the lock, do so quickly. */ if (mode == LW_EXCLUSIVE) { if (lock->exclusive == 0 && lock->shared == 0) { lock->exclusive++; mustwait = false; } else mustwait = true; } else { if (lock->exclusive == 0) { lock->shared++; mustwait = false; } else mustwait = true; } if (!mustwait) break; /* got the lock */ /* * Add myself to wait queue. * * If we don't have a PGPROC structure, there's no way to wait. This * should never occur, since MyProc should only be null during shared * memory initialization. */ if (proc == NULL) elog(PANIC, "cannot wait without a PGPROC structure"); proc->lwWaiting = true; proc->lwExclusive = (mode == LW_EXCLUSIVE); proc->lwWaitLink = NULL; if (lock->head == NULL) lock->head = proc; else lock->tail->lwWaitLink = proc; lock->tail = proc; /* Can release the mutex now */ SpinLockRelease(&lock->mutex); /* * Wait until awakened. * * Since we share the process wait semaphore with the regular lock * manager and ProcWaitForSignal, and we may need to acquire an LWLock * while one of those is pending, it is possible that we get awakened * for a reason other than being signaled by LWLockRelease. If so, * loop back and wait again. Once we've gotten the LWLock, * re-increment the sema by the number of additional signals received, * so that the lock manager or signal manager will see the received * signal when it next waits. */ LOG_LWDEBUG("LWLockAcquire", lockid, "waiting"); #ifdef LWLOCK_STATS block_counts[lockid]++; #endif TRACE_POSTGRESQL_LWLOCK_WAIT_START(lockid, mode); for (;;) { /* "false" means cannot accept cancel/die interrupt here. */ PGSemaphoreLock(&proc->sem, false); if (!proc->lwWaiting) break; extraWaits++; } TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(lockid, mode); LOG_LWDEBUG("LWLockAcquire", lockid, "awakened"); /* Now loop back and try to acquire lock again. */ retry = true; } /* We are done updating shared state of the lock itself. */ SpinLockRelease(&lock->mutex); TRACE_POSTGRESQL_LWLOCK_ACQUIRE(lockid, mode); /* Add lock to list of locks held by this backend */ held_lwlocks[num_held_lwlocks++] = lockid; /* * Fix the process wait semaphore's count for any absorbed wakeups. */ while (extraWaits-- > 0) PGSemaphoreUnlock(&proc->sem); }
/* * LWLockAcquire - acquire a lightweight lock in the specified mode * * If the lock is not available, sleep until it is. * * Side effect: cancel/die interrupts are held off until lock release. */ void LWLockAcquire(LWLockId lockid, LWLockMode mode) { volatile LWLock *lock = &(LWLockArray[lockid].lock); #if LWLOCK_LOCK_PARTS > 1 volatile LWLockPart *part = LWLOCK_PART(lock, lockid, MyBackendId); #endif PGPROC *proc = MyProc; bool retry = false; int extraWaits = 0; PRINT_LWDEBUG("LWLockAcquire", lockid, lock); #ifdef LWLOCK_STATS /* Set up local count state first time through in a given process */ if (counts_for_pid != MyProcPid) { int *LWLockCounter = (int *) ((char *) LWLockArray - 2 * sizeof(int)); int numLocks = LWLockCounter[1]; sh_acquire_counts = calloc(numLocks, sizeof(int)); ex_acquire_counts = calloc(numLocks, sizeof(int)); block_counts = calloc(numLocks, sizeof(int)); counts_for_pid = MyProcPid; on_shmem_exit(print_lwlock_stats, 0); } /* Count lock acquisition attempts */ if (mode == LW_EXCLUSIVE) ex_acquire_counts[lockid]++; else sh_acquire_counts[lockid]++; #endif /* LWLOCK_STATS */ /* * We can't wait if we haven't got a PGPROC. This should only occur * during bootstrap or shared memory initialization. Put an Assert here * to catch unsafe coding practices. */ Assert(!(proc == NULL && IsUnderPostmaster)); /* Ensure we will have room to remember the lock */ if (num_held_lwlocks >= MAX_SIMUL_LWLOCKS) elog(ERROR, "too many LWLocks taken"); /* * Lock out cancel/die interrupts until we exit the code section protected * by the LWLock. This ensures that interrupts will not interfere with * manipulations of data structures in shared memory. */ HOLD_INTERRUPTS(); /* * Loop here to try to acquire lock after each time we are signaled by * LWLockRelease. * * NOTE: it might seem better to have LWLockRelease actually grant us the * lock, rather than retrying and possibly having to go back to sleep. But * in practice that is no good because it means a process swap for every * lock acquisition when two or more processes are contending for the same * lock. Since LWLocks are normally used to protect not-very-long * sections of computation, a process needs to be able to acquire and * release the same lock many times during a single CPU time slice, even * in the presence of contention. The efficiency of being able to do that * outweighs the inefficiency of sometimes wasting a process dispatch * cycle because the lock is not free when a released waiter finally gets * to run. See pgsql-hackers archives for 29-Dec-01. */ for (;;) { bool mustwait; if (mode == LW_SHARED) { #ifdef LWLOCK_PART_SHARED_OPS_ATOMIC /* Increment shared counter partition. If there's no contention, * this is sufficient to take the lock */ LWLOCK_PART_SHARED_POSTINC_ATOMIC(lock, lockid, part, MyBackendId); LWLOCK_PART_SHARED_FENCE(); /* A concurrent exclusive locking attempt does the following * three steps * 1) Acquire mutex * 2) Check shared counter partitions for readers. * 3a) If found add proc to wait queue, block, restart at (1) * 3b) If not found, set exclusive flag, continue with (4) * 4) Enter protected section * The fence after the atomic add above ensures that no further * such attempt can proceed to (3b) or beyond. There may be * pre-existing exclusive locking attempts at step (3b) or beyond, * but we can recognize those by either the mutex being taken, or * the exclusive flag being set. Conversely, if we see neither, we * may proceed and enter the protected section. * * FIXME: This doesn't work if slock_t is a struct or doesn't * use 0 for state "unlocked". */ if ((lock->mutex == 0) && (lock->exclusive == 0)) { /* If retrying, allow LWLockRelease to release waiters again. * Usually this happens after we acquired the mutex, but if * we skip that, we still need to set releaseOK. * * Acquiring the mutex here is not really an option - if many * reader are awoken simultaneously by an exclusive unlock, * that would be a source of considerable contention. * * Fotunately, this is safe even without the mutex. First, * there actually cannot be any non-fast path unlocking * attempt in progress, because we'd then either still see * the exclusive flag set or the mutex being taken. And * even if there was, and such an attempt cleared the flag * immediately after we set it, it'd also wake up some waiter * who'd then re-set the flag. * * The only reason to do this here, and not directly * after returning from PGSemaphoreLock(), is that it seems * benefical to make SpinLockAcquire() the first thing to * touch the lock if possible, in case we acquire the spin * lock at all. That way, the cache line doesn't go through * a possible shared state, but instead directly to exclusive. * On Opterons at least, there seems to be a difference, c.f. * the comment above tas() for x86_64 in s_lock.h */ if (retry && !lock->releaseOK) lock->releaseOK = true; goto lock_acquired; } /* At this point, we don't know if the concurrent exclusive locker * has proceeded to (3b) or blocked. We must take the mutex and * re-check */ #endif /* LWLOCK_PART_SHARED_OPS_ATOMIC */ /* Acquire mutex. Time spent holding mutex should be short! */ SpinLockAcquire(&lock->mutex); if (lock->exclusive == 0) { #ifdef LWLOCK_PART_SHARED_OPS_ATOMIC /* Already incremented the shared counter partition above */ #else lock->shared++; #endif mustwait = false; } else { #ifdef LWLOCK_PART_SHARED_OPS_ATOMIC /* Must undo shared counter partition increment. Note that * we *need* to do that while holding the mutex. Otherwise, * the exclusive lock could be released and attempted to be * re-acquired before we undo the increment. That attempt * would then block, even though there'd be no lock holder * left */ LWLOCK_PART_SHARED_POSTDEC_ATOMIC(lock, lockid, part, MyBackendId); #endif mustwait = true; } } else { /* Step (1). Acquire mutex. Time spent holding mutex should be * short! */ SpinLockAcquire(&lock->mutex); if (lock->exclusive == 0) { /* Step (2). Check for shared lockers. This surely happens * after (1), otherwise SpinLockAcquire() is broken. Lock * acquire semantics demand that no load must be re-ordered * from after a lock acquisition to before, for obvious * reasons. */ LWLOCK_IS_SHARED(mustwait, lock, lockid); if (!mustwait) { /* Step (3a). Set exclusive flag. This surely happens * after (2) because it depends on the result of (2), * no matter how much reordering is going on here. */ lock->exclusive++; } } else mustwait = true; } /* If retrying, allow LWLockRelease to release waiters again. * This is also separately done in the LW_SHARED early exit case * above, and in contrast to there we don't hold the mutex there. * See the comment there for why this is safe */ if (retry) lock->releaseOK = true; if (!mustwait) break; /* got the lock */ /* * Step (3b). Add myself to wait queue. * * If we don't have a PGPROC structure, there's no way to wait. This * should never occur, since MyProc should only be null during shared * memory initialization. */ if (proc == NULL) elog(PANIC, "cannot wait without a PGPROC structure"); proc->lwWaiting = true; proc->lwExclusive = (mode == LW_EXCLUSIVE); proc->lwWaitLink = NULL; if (lock->head == NULL) lock->head = proc; else lock->tail->lwWaitLink = proc; lock->tail = proc; /* Can release the mutex now */ SpinLockRelease(&lock->mutex); /* * Wait until awakened. * * Since we share the process wait semaphore with the regular lock * manager and ProcWaitForSignal, and we may need to acquire an LWLock * while one of those is pending, it is possible that we get awakened * for a reason other than being signaled by LWLockRelease. If so, * loop back and wait again. Once we've gotten the LWLock, * re-increment the sema by the number of additional signals received, * so that the lock manager or signal manager will see the received * signal when it next waits. */ LOG_LWDEBUG("LWLockAcquire", lockid, "waiting"); #ifdef LWLOCK_STATS block_counts[lockid]++; #endif TRACE_POSTGRESQL_LWLOCK_WAIT_START(lockid, mode); for (;;) { /* "false" means cannot accept cancel/die interrupt here. */ PGSemaphoreLock(&proc->sem, false); if (!proc->lwWaiting) break; extraWaits++; } TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(lockid, mode); LOG_LWDEBUG("LWLockAcquire", lockid, "awakened"); /* Now loop back and try to acquire lock again. */ retry = true; } /* We are done updating shared state of the lock itself. */ SpinLockRelease(&lock->mutex); /* Step 4. Enter protected section. This surely happens after (3), * this time because lock release semantics demand that no store * must be moved from before a lock release to after the release, * again for obvious reasons */ #ifdef LWLOCK_PART_SHARED_OPS_ATOMIC lock_acquired: #endif TRACE_POSTGRESQL_LWLOCK_ACQUIRE(lockid, mode); /* Add lock to list of locks held by this backend */ held_lwlocks[num_held_lwlocks] = lockid; held_lwlocks_mode[num_held_lwlocks] = mode; ++num_held_lwlocks; /* * Fix the process wait semaphore's count for any absorbed wakeups. */ while (extraWaits-- > 0) PGSemaphoreUnlock(&proc->sem); }
/* * shmem_startup hook: allocate or attach to shared memory, * then load any pre-existing statistics from file. */ static void pgss_shmem_startup(void) { bool found; HASHCTL info; FILE *file; uint32 header; int32 num; int32 i; int query_size; int buffer_size; char *buffer = NULL; if (prev_shmem_startup_hook) prev_shmem_startup_hook(); /* reset in case this is a restart within the postmaster */ pgss = NULL; pgss_hash = NULL; /* * Create or attach to the shared memory state, including hash table */ LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE); pgss = ShmemInitStruct("pg_stat_statements", sizeof(pgssSharedState), &found); if (!found) { /* First time through ... */ pgss->lock = LWLockAssign(); pgss->query_size = pgstat_track_activity_query_size; } /* Be sure everyone agrees on the hash table entry size */ query_size = pgss->query_size; memset(&info, 0, sizeof(info)); info.keysize = sizeof(pgssHashKey); info.entrysize = offsetof(pgssEntry, query) +query_size; info.hash = pgss_hash_fn; info.match = pgss_match_fn; pgss_hash = ShmemInitHash("pg_stat_statements hash", pgss_max, pgss_max, &info, HASH_ELEM | HASH_FUNCTION | HASH_COMPARE); LWLockRelease(AddinShmemInitLock); /* * If we're in the postmaster (or a standalone backend...), set up a shmem * exit hook to dump the statistics to disk. */ if (!IsUnderPostmaster) on_shmem_exit(pgss_shmem_shutdown, (Datum) 0); /* * Attempt to load old statistics from the dump file, if this is the first * time through and we weren't told not to. */ if (found || !pgss_save) return; /* * Note: we don't bother with locks here, because there should be no other * processes running when this code is reached. */ file = AllocateFile(PGSS_DUMP_FILE, PG_BINARY_R); if (file == NULL) { if (errno == ENOENT) return; /* ignore not-found error */ goto error; } buffer_size = query_size; buffer = (char *) palloc(buffer_size); if (fread(&header, sizeof(uint32), 1, file) != 1 || header != PGSS_FILE_HEADER || fread(&num, sizeof(int32), 1, file) != 1) goto error; for (i = 0; i < num; i++) { pgssEntry temp; pgssEntry *entry; if (fread(&temp, offsetof(pgssEntry, mutex), 1, file) != 1) goto error; /* Encoding is the only field we can easily sanity-check */ if (!PG_VALID_BE_ENCODING(temp.key.encoding)) goto error; /* Previous incarnation might have had a larger query_size */ if (temp.key.query_len >= buffer_size) { buffer = (char *) repalloc(buffer, temp.key.query_len + 1); buffer_size = temp.key.query_len + 1; } if (fread(buffer, 1, temp.key.query_len, file) != temp.key.query_len) goto error; buffer[temp.key.query_len] = '\0'; /* Clip to available length if needed */ if (temp.key.query_len >= query_size) temp.key.query_len = pg_encoding_mbcliplen(temp.key.encoding, buffer, temp.key.query_len, query_size - 1); temp.key.query_ptr = buffer; /* make the hashtable entry (discards old entries if too many) */ entry = entry_alloc(&temp.key); /* copy in the actual stats */ entry->counters = temp.counters; } pfree(buffer); FreeFile(file); return; error: ereport(LOG, (errcode_for_file_access(), errmsg("could not read pg_stat_statement file \"%s\": %m", PGSS_DUMP_FILE))); if (buffer) pfree(buffer); if (file) FreeFile(file); /* If possible, throw away the bogus file; ignore any error */ unlink(PGSS_DUMP_FILE); }
/* -------------------------------- * InitPostgres * Initialize POSTGRES. * * The database can be specified by name, using the in_dbname parameter, or by * OID, using the dboid parameter. In the latter case, the actual database * name can be returned to the caller in out_dbname. If out_dbname isn't * NULL, it must point to a buffer of size NAMEDATALEN. * * In bootstrap mode no parameters are used. The autovacuum launcher process * doesn't use any parameters either, because it only goes far enough to be * able to read pg_database; it doesn't connect to any particular database. * In walsender mode only username is used. * * As of PostgreSQL 8.2, we expect InitProcess() was already called, so we * already have a PGPROC struct ... but it's not completely filled in yet. * * Note: * Be very careful with the order of calls in the InitPostgres function. * -------------------------------- */ void InitPostgres(const char *in_dbname, Oid dboid, const char *username, char *out_dbname) { bool bootstrap = IsBootstrapProcessingMode(); bool am_superuser; GucContext gucctx; char *fullpath; char dbname[NAMEDATALEN]; elog(DEBUG3, "InitPostgres"); /* * Add my PGPROC struct to the ProcArray. * * Once I have done this, I am visible to other backends! */ InitProcessPhase2(); /* * Initialize my entry in the shared-invalidation manager's array of * per-backend data. * * Sets up MyBackendId, a unique backend identifier. */ MyBackendId = InvalidBackendId; SharedInvalBackendInit(false); if (MyBackendId > MaxBackends || MyBackendId <= 0) elog(FATAL, "bad backend id: %d", MyBackendId); /* Now that we have a BackendId, we can participate in ProcSignal */ ProcSignalInit(MyBackendId); /* * bufmgr needs another initialization call too */ InitBufferPoolBackend(); /* * Initialize local process's access to XLOG, if appropriate. In * bootstrap case we skip this since StartupXLOG() was run instead. */ if (!bootstrap) (void) RecoveryInProgress(); /* * Initialize the relation cache and the system catalog caches. Note that * no catalog access happens here; we only set up the hashtable structure. * We must do this before starting a transaction because transaction abort * would try to touch these hashtables. */ RelationCacheInitialize(); InitCatalogCache(); InitPlanCache(); /* Initialize portal manager */ EnablePortalManager(); /* Initialize stats collection --- must happen before first xact */ if (!bootstrap) pgstat_initialize(); /* * Load relcache entries for the shared system catalogs. This must create * at least an entry for pg_database. */ RelationCacheInitializePhase2(); /* * Set up process-exit callback to do pre-shutdown cleanup. This has to * be after we've initialized all the low-level modules like the buffer * manager, because during shutdown this has to run before the low-level * modules start to close down. On the other hand, we want it in place * before we begin our first transaction --- if we fail during the * initialization transaction, as is entirely possible, we need the * AbortTransaction call to clean up. */ on_shmem_exit(ShutdownPostgres, 0); /* The autovacuum launcher is done here */ if (IsAutoVacuumLauncherProcess()) return; /* * Start a new transaction here before first access to db, and get a * snapshot. We don't have a use for the snapshot itself, but we're * interested in the secondary effect that it sets RecentGlobalXmin. (This * is critical for anything that reads heap pages, because HOT may decide * to prune them even if the process doesn't attempt to modify any * tuples.) */ if (!bootstrap) { StartTransactionCommand(); (void) GetTransactionSnapshot(); } /* * Set up the global variables holding database id and default tablespace. * But note we won't actually try to touch the database just yet. * * We take a shortcut in the bootstrap and walsender case, otherwise we * have to look up the db's entry in pg_database. */ if (bootstrap || am_walsender) { MyDatabaseId = TemplateDbOid; MyDatabaseTableSpace = DEFAULTTABLESPACE_OID; } else if (in_dbname != NULL) { HeapTuple tuple; Form_pg_database dbform; tuple = GetDatabaseTuple(in_dbname); if (!HeapTupleIsValid(tuple)) ereport(FATAL, (errcode(ERRCODE_UNDEFINED_DATABASE), errmsg("database \"%s\" does not exist", in_dbname))); dbform = (Form_pg_database) GETSTRUCT(tuple); MyDatabaseId = HeapTupleGetOid(tuple); MyDatabaseTableSpace = dbform->dattablespace; /* take database name from the caller, just for paranoia */ strlcpy(dbname, in_dbname, sizeof(dbname)); } else { /* caller specified database by OID */ HeapTuple tuple; Form_pg_database dbform; tuple = GetDatabaseTupleByOid(dboid); if (!HeapTupleIsValid(tuple)) ereport(FATAL, (errcode(ERRCODE_UNDEFINED_DATABASE), errmsg("database %u does not exist", dboid))); dbform = (Form_pg_database) GETSTRUCT(tuple); MyDatabaseId = HeapTupleGetOid(tuple); MyDatabaseTableSpace = dbform->dattablespace; Assert(MyDatabaseId == dboid); strlcpy(dbname, NameStr(dbform->datname), sizeof(dbname)); /* pass the database name back to the caller */ if (out_dbname) strcpy(out_dbname, dbname); } /* Now we can mark our PGPROC entry with the database ID */ /* (We assume this is an atomic store so no lock is needed) */ MyProc->databaseId = MyDatabaseId; /* * Now, take a writer's lock on the database we are trying to connect to. * If there is a concurrently running DROP DATABASE on that database, this * will block us until it finishes (and has committed its update of * pg_database). * * Note that the lock is not held long, only until the end of this startup * transaction. This is OK since we are already advertising our use of * the database in the PGPROC array; anyone trying a DROP DATABASE after * this point will see us there. * * Note: use of RowExclusiveLock here is reasonable because we envision * our session as being a concurrent writer of the database. If we had a * way of declaring a session as being guaranteed-read-only, we could use * AccessShareLock for such sessions and thereby not conflict against * CREATE DATABASE. */ if (!bootstrap && !am_walsender) LockSharedObject(DatabaseRelationId, MyDatabaseId, 0, RowExclusiveLock); /* * Recheck pg_database to make sure the target database hasn't gone away. * If there was a concurrent DROP DATABASE, this ensures we will die * cleanly without creating a mess. */ if (!bootstrap && !am_walsender) { HeapTuple tuple; tuple = GetDatabaseTuple(dbname); if (!HeapTupleIsValid(tuple) || MyDatabaseId != HeapTupleGetOid(tuple) || MyDatabaseTableSpace != ((Form_pg_database) GETSTRUCT(tuple))->dattablespace) ereport(FATAL, (errcode(ERRCODE_UNDEFINED_DATABASE), errmsg("database \"%s\" does not exist", dbname), errdetail("It seems to have just been dropped or renamed."))); } /* * Now we should be able to access the database directory safely. Verify * it's there and looks reasonable. */ fullpath = GetDatabasePath(MyDatabaseId, MyDatabaseTableSpace); if (!bootstrap && !am_walsender) { if (access(fullpath, F_OK) == -1) { if (errno == ENOENT) ereport(FATAL, (errcode(ERRCODE_UNDEFINED_DATABASE), errmsg("database \"%s\" does not exist", dbname), errdetail("The database subdirectory \"%s\" is missing.", fullpath))); else ereport(FATAL, (errcode_for_file_access(), errmsg("could not access directory \"%s\": %m", fullpath))); } ValidatePgVersion(fullpath); } SetDatabasePath(fullpath); /* * It's now possible to do real access to the system catalogs. * * Load relcache entries for the system catalogs. This must create at * least the minimum set of "nailed-in" cache entries. */ RelationCacheInitializePhase3(); /* * Perform client authentication if necessary, then figure out our * postgres user ID, and see if we are a superuser. * * In standalone mode and in autovacuum worker processes, we use a fixed * ID, otherwise we figure it out from the authenticated user name. */ if (bootstrap || IsAutoVacuumWorkerProcess()) { InitializeSessionUserIdStandalone(); am_superuser = true; } else if (!IsUnderPostmaster) { InitializeSessionUserIdStandalone(); am_superuser = true; if (!ThereIsAtLeastOneRole()) ereport(WARNING, (errcode(ERRCODE_UNDEFINED_OBJECT), errmsg("no roles are defined in this database system"), errhint("You should immediately run CREATE USER \"%s\" SUPERUSER;.", username))); } else { /* normal multiuser case */ Assert(MyProcPort != NULL); PerformAuthentication(MyProcPort); InitializeSessionUserId(username); am_superuser = superuser(); } /* set up ACL framework (so CheckMyDatabase can check permissions) */ initialize_acl(); /* Process pg_db_role_setting options */ process_settings(MyDatabaseId, GetSessionUserId()); /* * Re-read the pg_database row for our database, check permissions and set * up database-specific GUC settings. We can't do this until all the * database-access infrastructure is up. (Also, it wants to know if the * user is a superuser, so the above stuff has to happen first.) */ if (!bootstrap && !am_walsender) CheckMyDatabase(dbname, am_superuser); /* * If we're trying to shut down, only superusers can connect. */ if (!am_superuser && MyProcPort != NULL && MyProcPort->canAcceptConnections == CAC_WAITBACKUP) ereport(FATAL, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("must be superuser to connect during database shutdown"))); /* * Check a normal user hasn't connected to a superuser reserved slot. */ if (!am_superuser && ReservedBackends > 0 && !HaveNFreeProcs(ReservedBackends)) ereport(FATAL, (errcode(ERRCODE_TOO_MANY_CONNECTIONS), errmsg("connection limit exceeded for non-superusers"))); /* * Now process any command-line switches that were included in the startup * packet, if we are in a regular backend. We couldn't do this before * because we didn't know if client is a superuser. */ gucctx = am_superuser ? PGC_SUSET : PGC_BACKEND; if (MyProcPort != NULL && MyProcPort->cmdline_options != NULL) { /* * The maximum possible number of commandline arguments that could * come from MyProcPort->cmdline_options is (strlen + 1) / 2; see * pg_split_opts(). */ char **av; int maxac; int ac; maxac = 2 + (strlen(MyProcPort->cmdline_options) + 1) / 2; av = (char **) palloc(maxac * sizeof(char *)); ac = 0; av[ac++] = "postgres"; /* Note this mangles MyProcPort->cmdline_options */ pg_split_opts(av, &ac, MyProcPort->cmdline_options); av[ac] = NULL; Assert(ac < maxac); (void) process_postgres_switches(ac, av, gucctx); } /* * Process any additional GUC variable settings passed in startup packet. * These are handled exactly like command-line variables. */ if (MyProcPort != NULL) { ListCell *gucopts = list_head(MyProcPort->guc_options); while (gucopts) { char *name; char *value; name = lfirst(gucopts); gucopts = lnext(gucopts); value = lfirst(gucopts); gucopts = lnext(gucopts); SetConfigOption(name, value, gucctx, PGC_S_CLIENT); } } /* Apply PostAuthDelay as soon as we've read all options */ if (PostAuthDelay > 0) pg_usleep(PostAuthDelay * 1000000L); /* * Initialize various default states that can't be set up until we've * selected the active user and gotten the right GUC settings. */ /* set default namespace search path */ InitializeSearchPath(); /* initialize client encoding */ InitializeClientEncoding(); /* reset the database for walsender */ if (am_walsender) MyProc->databaseId = MyDatabaseId = InvalidOid; /* report this backend in the PgBackendStatus array */ if (!bootstrap) pgstat_bestart(); /* close the transaction we started above */ if (!bootstrap) CommitTransactionCommand(); }
/* * PGSharedMemoryCreate * * Create a shared memory segment of the given size and initialize its * standard header. Also, register an on_shmem_exit callback to release * the storage. * * Dead Postgres segments are recycled if found, but we do not fail upon * collision with non-Postgres shmem segments. The idea here is to detect and * re-use keys that may have been assigned by a crashed postmaster or backend. * * makePrivate means to always create a new segment, rather than attach to * or recycle any existing segment. * * The port number is passed for possible use as a key (for SysV, we use * it to generate the starting shmem key). In a standalone backend, * zero will be passed. */ PGShmemHeader * PGSharedMemoryCreate(Size size, bool makePrivate, int port, PGShmemHeader **shim) { IpcMemoryKey NextShmemSegID; void *memAddress; PGShmemHeader *hdr; IpcMemoryId shmid; struct stat statbuf; Size sysvsize; /* Complain if hugepages demanded but we can't possibly support them */ #if !defined(USE_ANONYMOUS_SHMEM) || !defined(MAP_HUGETLB) if (huge_pages == HUGE_PAGES_ON) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("huge pages not supported on this platform"))); #endif /* Room for a header? */ Assert(size > MAXALIGN(sizeof(PGShmemHeader))); #ifdef USE_ANONYMOUS_SHMEM AnonymousShmem = CreateAnonymousSegment(&size); AnonymousShmemSize = size; /* Register on-exit routine to unmap the anonymous segment */ on_shmem_exit(AnonymousShmemDetach, (Datum) 0); /* Now we need only allocate a minimal-sized SysV shmem block. */ sysvsize = sizeof(PGShmemHeader); #else sysvsize = size; #endif /* Make sure PGSharedMemoryAttach doesn't fail without need */ UsedShmemSegAddr = NULL; /* Loop till we find a free IPC key */ NextShmemSegID = port * 1000; for (NextShmemSegID++;; NextShmemSegID++) { /* Try to create new segment */ memAddress = InternalIpcMemoryCreate(NextShmemSegID, sysvsize); if (memAddress) break; /* successful create and attach */ /* Check shared memory and possibly remove and recreate */ if (makePrivate) /* a standalone backend shouldn't do this */ continue; if ((memAddress = PGSharedMemoryAttach(NextShmemSegID, &shmid)) == NULL) continue; /* can't attach, not one of mine */ /* * If I am not the creator and it belongs to an extant process, * continue. */ hdr = (PGShmemHeader *) memAddress; if (hdr->creatorPID != getpid()) { if (kill(hdr->creatorPID, 0) == 0 || errno != ESRCH) { shmdt(memAddress); continue; /* segment belongs to a live process */ } } /* * The segment appears to be from a dead Postgres process, or from a * previous cycle of life in this same process. Zap it, if possible, * and any associated dynamic shared memory segments, as well. This * probably shouldn't fail, but if it does, assume the segment belongs * to someone else after all, and continue quietly. */ if (hdr->dsm_control != 0) dsm_cleanup_using_control_segment(hdr->dsm_control); shmdt(memAddress); if (shmctl(shmid, IPC_RMID, NULL) < 0) continue; /* * Now try again to create the segment. */ memAddress = InternalIpcMemoryCreate(NextShmemSegID, sysvsize); if (memAddress) break; /* successful create and attach */ /* * Can only get here if some other process managed to create the same * shmem key before we did. Let him have that one, loop around to try * next key. */ } /* * OK, we created a new segment. Mark it as created by this process. The * order of assignments here is critical so that another Postgres process * can't see the header as valid but belonging to an invalid PID! */ hdr = (PGShmemHeader *) memAddress; hdr->creatorPID = getpid(); hdr->magic = PGShmemMagic; hdr->dsm_control = 0; /* Fill in the data directory ID info, too */ if (stat(DataDir, &statbuf) < 0) ereport(FATAL, (errcode_for_file_access(), errmsg("could not stat data directory \"%s\": %m", DataDir))); hdr->device = statbuf.st_dev; hdr->inode = statbuf.st_ino; /* * Initialize space allocation status for segment. */ hdr->totalsize = size; hdr->freeoffset = MAXALIGN(sizeof(PGShmemHeader)); *shim = hdr; /* Save info for possible future use */ UsedShmemSegAddr = memAddress; UsedShmemSegID = (unsigned long) NextShmemSegID; /* * If AnonymousShmem is NULL here, then we're not using anonymous shared * memory, and should return a pointer to the System V shared memory * block. Otherwise, the System V shared memory block is only a shim, and * we must return a pointer to the real block. */ #ifdef USE_ANONYMOUS_SHMEM if (AnonymousShmem == NULL) return hdr; memcpy(AnonymousShmem, hdr, sizeof(PGShmemHeader)); return (PGShmemHeader *) AnonymousShmem; #else return hdr; #endif }
/* * InitAuxiliaryProcess -- create a per-auxiliary-process data structure * * This is called by bgwriter and similar processes so that they will have a * MyProc value that's real enough to let them wait for LWLocks. The PGPROC * and sema that are assigned are one of the extra ones created during * InitProcGlobal. * * Auxiliary processes are presently not expected to wait for real (lockmgr) * locks, so we need not set up the deadlock checker. They are never added * to the ProcArray or the sinval messaging mechanism, either. They also * don't get a VXID assigned, since this is only useful when we actually * hold lockmgr locks. * * Startup process however uses locks but never waits for them in the * normal backend sense. Startup process also takes part in sinval messaging * as a sendOnly process, so never reads messages from sinval queue. So * Startup process does have a VXID and does show up in pg_locks. */ void InitAuxiliaryProcess(void) { PGPROC *auxproc; int proctype; /* * ProcGlobal should be set up already (if we are a backend, we inherit * this by fork() or EXEC_BACKEND mechanism from the postmaster). */ if (ProcGlobal == NULL || AuxiliaryProcs == NULL) elog(PANIC, "proc header uninitialized"); if (MyProc != NULL) elog(ERROR, "you already exist"); /* * We use the ProcStructLock to protect assignment and releasing of * AuxiliaryProcs entries. * * While we are holding the ProcStructLock, also copy the current shared * estimate of spins_per_delay to local storage. */ SpinLockAcquire(ProcStructLock); set_spins_per_delay(ProcGlobal->spins_per_delay); /* * Find a free auxproc ... *big* trouble if there isn't one ... */ for (proctype = 0; proctype < NUM_AUXILIARY_PROCS; proctype++) { auxproc = &AuxiliaryProcs[proctype]; if (auxproc->pid == 0) break; } if (proctype >= NUM_AUXILIARY_PROCS) { SpinLockRelease(ProcStructLock); elog(FATAL, "all AuxiliaryProcs are in use"); } /* Mark auxiliary proc as in use by me */ /* use volatile pointer to prevent code rearrangement */ ((volatile PGPROC *) auxproc)->pid = MyProcPid; MyProc = auxproc; MyPgXact = &ProcGlobal->allPgXact[auxproc->pgprocno]; SpinLockRelease(ProcStructLock); /* * Initialize all fields of MyProc, except for those previously * initialized by InitProcGlobal. */ SHMQueueElemInit(&(MyProc->links)); MyProc->waitStatus = STATUS_OK; MyProc->lxid = InvalidLocalTransactionId; MyProc->fpVXIDLock = false; MyProc->fpLocalTransactionId = InvalidLocalTransactionId; MyPgXact->xid = InvalidTransactionId; MyPgXact->xmin = InvalidTransactionId; MyProc->backendId = InvalidBackendId; MyProc->databaseId = InvalidOid; MyProc->roleId = InvalidOid; MyPgXact->delayChkpt = false; MyPgXact->vacuumFlags = 0; MyProc->lwWaiting = false; MyProc->lwWaitMode = 0; MyProc->waitLock = NULL; MyProc->waitProcLock = NULL; #ifdef USE_ASSERT_CHECKING { int i; /* Last process should have released all locks. */ for (i = 0; i < NUM_LOCK_PARTITIONS; i++) Assert(SHMQueueEmpty(&(MyProc->myProcLocks[i]))); } #endif /* * Acquire ownership of the PGPROC's latch, so that we can use WaitLatch * on it. That allows us to repoint the process latch, which so far * points to process local one, to the shared one. */ OwnLatch(&MyProc->procLatch); SwitchToSharedLatch(); /* * We might be reusing a semaphore that belonged to a failed process. So * be careful and reinitialize its value here. (This is not strictly * necessary anymore, but seems like a good idea for cleanliness.) */ PGSemaphoreReset(&MyProc->sem); /* * Arrange to clean up at process exit. */ on_shmem_exit(AuxiliaryProcKill, Int32GetDatum(proctype)); }
/* * InitProcess -- initialize a per-process data structure for this backend */ void InitProcess(void) { /* use volatile pointer to prevent code rearrangement */ volatile PROC_HDR *procglobal = ProcGlobal; /* * ProcGlobal should be set up already (if we are a backend, we inherit * this by fork() or EXEC_BACKEND mechanism from the postmaster). */ if (procglobal == NULL) elog(PANIC, "proc header uninitialized"); if (MyProc != NULL) elog(ERROR, "you already exist"); /* * Try to get a proc struct from the free list. If this fails, we must be * out of PGPROC structures (not to mention semaphores). * * While we are holding the ProcStructLock, also copy the current shared * estimate of spins_per_delay to local storage. */ SpinLockAcquire(ProcStructLock); set_spins_per_delay(procglobal->spins_per_delay); if (IsAnyAutoVacuumProcess()) MyProc = procglobal->autovacFreeProcs; else if (IsBackgroundWorker) MyProc = procglobal->bgworkerFreeProcs; else MyProc = procglobal->freeProcs; if (MyProc != NULL) { if (IsAnyAutoVacuumProcess()) procglobal->autovacFreeProcs = (PGPROC *) MyProc->links.next; else if (IsBackgroundWorker) procglobal->bgworkerFreeProcs = (PGPROC *) MyProc->links.next; else procglobal->freeProcs = (PGPROC *) MyProc->links.next; SpinLockRelease(ProcStructLock); } else { /* * If we reach here, all the PGPROCs are in use. This is one of the * possible places to detect "too many backends", so give the standard * error message. XXX do we need to give a different failure message * in the autovacuum case? */ SpinLockRelease(ProcStructLock); ereport(FATAL, (errcode(ERRCODE_TOO_MANY_CONNECTIONS), errmsg("sorry, too many clients already"))); } MyPgXact = &ProcGlobal->allPgXact[MyProc->pgprocno]; /* * Now that we have a PGPROC, mark ourselves as an active postmaster * child; this is so that the postmaster can detect it if we exit without * cleaning up. (XXX autovac launcher currently doesn't participate in * this; it probably should.) */ if (IsUnderPostmaster && !IsAutoVacuumLauncherProcess()) MarkPostmasterChildActive(); /* * Initialize all fields of MyProc, except for those previously * initialized by InitProcGlobal. */ SHMQueueElemInit(&(MyProc->links)); MyProc->waitStatus = STATUS_OK; MyProc->lxid = InvalidLocalTransactionId; MyProc->fpVXIDLock = false; MyProc->fpLocalTransactionId = InvalidLocalTransactionId; MyPgXact->xid = InvalidTransactionId; MyPgXact->xmin = InvalidTransactionId; MyProc->pid = MyProcPid; /* backendId, databaseId and roleId will be filled in later */ MyProc->backendId = InvalidBackendId; MyProc->databaseId = InvalidOid; MyProc->roleId = InvalidOid; MyPgXact->delayChkpt = false; MyPgXact->vacuumFlags = 0; /* NB -- autovac launcher intentionally does not set IS_AUTOVACUUM */ if (IsAutoVacuumWorkerProcess()) MyPgXact->vacuumFlags |= PROC_IS_AUTOVACUUM; MyProc->lwWaiting = false; MyProc->lwWaitMode = 0; MyProc->waitLock = NULL; MyProc->waitProcLock = NULL; #ifdef USE_ASSERT_CHECKING { int i; /* Last process should have released all locks. */ for (i = 0; i < NUM_LOCK_PARTITIONS; i++) Assert(SHMQueueEmpty(&(MyProc->myProcLocks[i]))); } #endif MyProc->recoveryConflictPending = false; /* Initialize fields for sync rep */ MyProc->waitLSN = 0; MyProc->syncRepState = SYNC_REP_NOT_WAITING; SHMQueueElemInit(&(MyProc->syncRepLinks)); /* * Acquire ownership of the PGPROC's latch, so that we can use WaitLatch * on it. That allows us to repoint the process latch, which so far * points to process local one, to the shared one. */ OwnLatch(&MyProc->procLatch); SwitchToSharedLatch(); /* * We might be reusing a semaphore that belonged to a failed process. So * be careful and reinitialize its value here. (This is not strictly * necessary anymore, but seems like a good idea for cleanliness.) */ PGSemaphoreReset(&MyProc->sem); /* * Arrange to clean up at backend exit. */ on_shmem_exit(ProcKill, 0); /* * Now that we have a PGPROC, we could try to acquire locks, so initialize * local state needed for LWLocks, and the deadlock checker. */ InitLWLockAccess(); InitDeadLockChecking(); }
/* * InternalIpcMemoryCreate(memKey, size) * * Attempt to create a new shared memory segment with the specified key. * Will fail (return NULL) if such a segment already exists. If successful, * attach the segment to the current process and return its attached address. * On success, callbacks are registered with on_shmem_exit to detach and * delete the segment when on_shmem_exit is called. * * If we fail with a failure code other than collision-with-existing-segment, * print out an error and abort. Other types of errors are not recoverable. */ static void * InternalIpcMemoryCreate(IpcMemoryKey memKey, Size size) { IpcMemoryId shmid; void *memAddress; shmid = shmget(memKey, size, IPC_CREAT | IPC_EXCL | IPCProtection); if (shmid < 0) { int shmget_errno = errno; /* * Fail quietly if error indicates a collision with existing segment. * One would expect EEXIST, given that we said IPC_EXCL, but perhaps * we could get a permission violation instead? Also, EIDRM might * occur if an old seg is slated for destruction but not gone yet. */ if (shmget_errno == EEXIST || shmget_errno == EACCES #ifdef EIDRM || shmget_errno == EIDRM #endif ) return NULL; /* * Some BSD-derived kernels are known to return EINVAL, not EEXIST, if * there is an existing segment but it's smaller than "size" (this is * a result of poorly-thought-out ordering of error tests). To * distinguish between collision and invalid size in such cases, we * make a second try with size = 0. These kernels do not test size * against SHMMIN in the preexisting-segment case, so we will not get * EINVAL a second time if there is such a segment. */ if (shmget_errno == EINVAL) { shmid = shmget(memKey, 0, IPC_CREAT | IPC_EXCL | IPCProtection); if (shmid < 0) { /* As above, fail quietly if we verify a collision */ if (errno == EEXIST || errno == EACCES #ifdef EIDRM || errno == EIDRM #endif ) return NULL; /* Otherwise, fall through to report the original error */ } else { /* * On most platforms we cannot get here because SHMMIN is * greater than zero. However, if we do succeed in creating a * zero-size segment, free it and then fall through to report * the original error. */ if (shmctl(shmid, IPC_RMID, NULL) < 0) elog(LOG, "shmctl(%d, %d, 0) failed: %m", (int) shmid, IPC_RMID); } } /* * Else complain and abort. * * Note: at this point EINVAL should mean that either SHMMIN or SHMMAX * is violated. SHMALL violation might be reported as either ENOMEM * (BSDen) or ENOSPC (Linux); the Single Unix Spec fails to say which * it should be. SHMMNI violation is ENOSPC, per spec. Just plain * not-enough-RAM is ENOMEM. */ errno = shmget_errno; ereport(FATAL, (errmsg("could not create shared memory segment: %m"), errdetail("Failed system call was shmget(key=%lu, size=%zu, 0%o).", (unsigned long) memKey, size, IPC_CREAT | IPC_EXCL | IPCProtection), (shmget_errno == EINVAL) ? errhint("This error usually means that PostgreSQL's request for a shared memory " "segment exceeded your kernel's SHMMAX parameter, or possibly that " "it is less than " "your kernel's SHMMIN parameter.\n" "The PostgreSQL documentation contains more information about shared " "memory configuration.") : 0, (shmget_errno == ENOMEM) ? errhint("This error usually means that PostgreSQL's request for a shared " "memory segment exceeded your kernel's SHMALL parameter. You might need " "to reconfigure the kernel with larger SHMALL.\n" "The PostgreSQL documentation contains more information about shared " "memory configuration.") : 0, (shmget_errno == ENOSPC) ? errhint("This error does *not* mean that you have run out of disk space. " "It occurs either if all available shared memory IDs have been taken, " "in which case you need to raise the SHMMNI parameter in your kernel, " "or because the system's overall limit for shared memory has been " "reached.\n" "The PostgreSQL documentation contains more information about shared " "memory configuration.") : 0)); } /* Register on-exit routine to delete the new segment */ on_shmem_exit(IpcMemoryDelete, Int32GetDatum(shmid)); /* OK, should be able to attach to the segment */ memAddress = shmat(shmid, NULL, PG_SHMAT_FLAGS); if (memAddress == (void *) -1) elog(FATAL, "shmat(id=%d) failed: %m", shmid); /* Register on-exit routine to detach new segment before deleting */ on_shmem_exit(IpcMemoryDetach, PointerGetDatum(memAddress)); /* * Store shmem key and ID in data directory lockfile. Format to try to * keep it the same length always (trailing junk in the lockfile won't * hurt, but might confuse humans). */ { char line[64]; sprintf(line, "%9lu %9lu", (unsigned long) memKey, (unsigned long) shmid); AddToDataDirLockFile(LOCK_FILE_LINE_SHMEM_KEY, line); } return memAddress; }
/* -------------------------------- * InitPostgres * Initialize POSTGRES. * * The database can be specified by name, using the in_dbname parameter, or by * OID, using the dboid parameter. In the latter case, the actual database * name can be returned to the caller in out_dbname. If out_dbname isn't * NULL, it must point to a buffer of size NAMEDATALEN. * * In bootstrap mode no parameters are used. * * The return value indicates whether the userID is a superuser. (That * can only be tested inside a transaction, so we want to do it during * the startup transaction rather than doing a separate one in postgres.c.) * * As of PostgreSQL 8.2, we expect InitProcess() was already called, so we * already have a PGPROC struct ... but it's not filled in yet. * * Note: * Be very careful with the order of calls in the InitPostgres function. * -------------------------------- */ void InitPostgres(const char *in_dbname, Oid dboid, const char *username, char *out_dbname) { bool bootstrap = IsBootstrapProcessingMode(); bool autovacuum = IsAutoVacuumProcess(); bool am_superuser; char *fullpath; char dbname[NAMEDATALEN]; /* * Add my PGPROC struct to the ProcArray. * * Once I have done this, I am visible to other backends! */ InitProcessPhase2(); /* Initialize SessionState entry */ SessionState_Init(); /* Initialize memory protection */ GPMemoryProtect_Init(); /* * Initialize my entry in the shared-invalidation manager's array of * per-backend data. * * Sets up MyBackendId, a unique backend identifier. */ MyBackendId = InvalidBackendId; SharedInvalBackendInit(false); if (MyBackendId > MaxBackends || MyBackendId <= 0) elog(FATAL, "bad backend id: %d", MyBackendId); /* Now that we have a BackendId, we can participate in ProcSignal */ ProcSignalInit(MyBackendId); /* * bufmgr needs another initialization call too */ InitBufferPoolBackend(); /* * Initialize local process's access to XLOG. In bootstrap case we may * skip this since StartupXLOG() was run instead. */ if (!bootstrap) InitXLOGAccess(); /* * Initialize the relation cache and the system catalog caches. Note that * no catalog access happens here; we only set up the hashtable structure. * We must do this before starting a transaction because transaction abort * would try to touch these hashtables. */ RelationCacheInitialize(); InitCatalogCache(); /* Initialize portal manager */ EnablePortalManager(); /* Initialize stats collection --- must happen before first xact */ if (!bootstrap) pgstat_initialize(); /* * Load relcache entries for the shared system catalogs. This must create * at least entries for pg_database and catalogs used for authentication. */ RelationCacheInitializePhase2(); /* * Set up process-exit callback to do pre-shutdown cleanup. This has to * be after we've initialized all the low-level modules like the buffer * manager, because during shutdown this has to run before the low-level * modules start to close down. On the other hand, we want it in place * before we begin our first transaction --- if we fail during the * initialization transaction, as is entirely possible, we need the * AbortTransaction call to clean up. */ on_shmem_exit(ShutdownPostgres, 0); /* TODO: autovacuum launcher should be done here? */ /* * Start a new transaction here before first access to db, and get a * snapshot. We don't have a use for the snapshot itself, but we're * interested in the secondary effect that it sets RecentGlobalXmin. */ if (!bootstrap) { StartTransactionCommand(); (void) GetTransactionSnapshot(); } /* * Figure out our postgres user id, and see if we are a superuser. * * In standalone mode and in the autovacuum process, we use a fixed id, * otherwise we figure it out from the authenticated user name. */ if (bootstrap || autovacuum) { InitializeSessionUserIdStandalone(); am_superuser = true; } else if (!IsUnderPostmaster) { InitializeSessionUserIdStandalone(); am_superuser = true; if (!ThereIsAtLeastOneRole()) ereport(WARNING, (errcode(ERRCODE_UNDEFINED_OBJECT), errmsg("no roles are defined in this database system"), errhint("You should immediately run CREATE USER \"%s\" CREATEUSER;.", username))); } else { /* normal multiuser case */ Assert(MyProcPort != NULL); PerformAuthentication(MyProcPort); InitializeSessionUserId(username); am_superuser = superuser(); } /* * Check a normal user hasn't connected to a superuser reserved slot. */ if (!am_superuser && ReservedBackends > 0 && !HaveNFreeProcs(ReservedBackends)) ereport(FATAL, (errcode(ERRCODE_TOO_MANY_CONNECTIONS), errmsg("connection limit exceeded for non-superusers"), errSendAlert(true))); /* * If walsender, we don't want to connect to any particular database. Just * finish the backend startup by processing any options from the startup * packet, and we're done. */ if (am_walsender) { Assert(!bootstrap); /* * We don't have replication role, which existed in postgres. */ if (!superuser()) ereport(FATAL, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("must be superuser role to start walsender"))); /* process any options passed in the startup packet */ if (MyProcPort != NULL) process_startup_options(MyProcPort, am_superuser); /* Apply PostAuthDelay as soon as we've read all options */ if (PostAuthDelay > 0) pg_usleep(PostAuthDelay * 1000000L); /* initialize client encoding */ InitializeClientEncoding(); /* report this backend in the PgBackendStatus array */ pgstat_bestart(); /* close the transaction we started above */ CommitTransactionCommand(); return; } /* * Set up the global variables holding database id and path. But note we * won't actually try to touch the database just yet. * * We take a shortcut in the bootstrap case, otherwise we have to look up * the db name in pg_database. */ if (bootstrap) { MyDatabaseId = TemplateDbOid; MyDatabaseTableSpace = DEFAULTTABLESPACE_OID; } else if (in_dbname != NULL) { HeapTuple tuple; Form_pg_database dbform; tuple = GetDatabaseTuple(in_dbname); if (!HeapTupleIsValid(tuple)) ereport(FATAL, (errcode(ERRCODE_UNDEFINED_DATABASE), errmsg("database \"%s\" does not exist", in_dbname))); dbform = (Form_pg_database) GETSTRUCT(tuple); MyDatabaseId = HeapTupleGetOid(tuple); MyDatabaseTableSpace = dbform->dattablespace; /* take database name from the caller, just for paranoia */ strlcpy(dbname, in_dbname, sizeof(dbname)); pfree(tuple); } else { /* caller specified database by OID */ HeapTuple tuple; Form_pg_database dbform; tuple = GetDatabaseTupleByOid(dboid); if (!HeapTupleIsValid(tuple)) ereport(FATAL, (errcode(ERRCODE_UNDEFINED_DATABASE), errmsg("database %u does not exist", dboid))); dbform = (Form_pg_database) GETSTRUCT(tuple); MyDatabaseId = HeapTupleGetOid(tuple); MyDatabaseTableSpace = dbform->dattablespace; Assert(MyDatabaseId == dboid); strlcpy(dbname, NameStr(dbform->datname), sizeof(dbname)); /* pass the database name back to the caller */ if (out_dbname) strcpy(out_dbname, dbname); pfree(tuple); } /* Now we can mark our PGPROC entry with the database ID */ /* (We assume this is an atomic store so no lock is needed) */ MyProc->databaseId = MyDatabaseId; /* * Now, take a writer's lock on the database we are trying to connect to. * If there is a concurrently running DROP DATABASE on that database, this * will block us until it finishes (and has committed its update of * pg_database). * * Note that the lock is not held long, only until the end of this startup * transaction. This is OK since we are already advertising our use of * the database in the PGPROC array; anyone trying a DROP DATABASE after * this point will see us there. * * Note: use of RowExclusiveLock here is reasonable because we envision * our session as being a concurrent writer of the database. If we had a * way of declaring a session as being guaranteed-read-only, we could use * AccessShareLock for such sessions and thereby not conflict against * CREATE DATABASE. */ if (!bootstrap) LockSharedObject(DatabaseRelationId, MyDatabaseId, 0, RowExclusiveLock); /* * Recheck pg_database to make sure the target database hasn't gone away. * If there was a concurrent DROP DATABASE, this ensures we will die * cleanly without creating a mess. */ if (!bootstrap) { HeapTuple tuple; tuple = GetDatabaseTuple(dbname); if (!HeapTupleIsValid(tuple) || MyDatabaseId != HeapTupleGetOid(tuple) || MyDatabaseTableSpace != ((Form_pg_database) GETSTRUCT(tuple))->dattablespace) ereport(FATAL, (errcode(ERRCODE_UNDEFINED_DATABASE), errmsg("database \"%s\" does not exist", dbname), errdetail("It seems to have just been dropped or renamed."))); } fullpath = GetDatabasePath(MyDatabaseId, MyDatabaseTableSpace); if (!bootstrap) { if (access(fullpath, F_OK) == -1) { if (errno == ENOENT) ereport(FATAL, (errcode(ERRCODE_UNDEFINED_DATABASE), errmsg("database \"%s\" does not exist", dbname), errdetail("The database subdirectory \"%s\" is missing.", fullpath))); else ereport(FATAL, (errcode_for_file_access(), errmsg("could not access directory \"%s\": %m", fullpath))); } ValidatePgVersion(fullpath); } SetDatabasePath(fullpath); /* * It's now possible to do real access to the system catalogs. * * Load relcache entries for the system catalogs. This must create at * least the minimum set of "nailed-in" cache entries. */ RelationCacheInitializePhase3(); /* * Now we have full access to catalog including toast tables, * we can process pg_authid.rolconfig. This ought to come before * processing startup options so that it can override the settings. */ if (!bootstrap) ProcessRoleGUC(); /* set up ACL framework (so CheckMyDatabase can check permissions) */ initialize_acl(); /* * Re-read the pg_database row for our database, check permissions and set * up database-specific GUC settings. We can't do this until all the * database-access infrastructure is up. (Also, it wants to know if the * user is a superuser, so the above stuff has to happen first.) */ if (!bootstrap) CheckMyDatabase(dbname, am_superuser); /* * Now process any command-line switches and any additional GUC variable * settings passed in the startup packet. We couldn't do this before * because we didn't know if client is a superuser. */ if (MyProcPort != NULL) process_startup_options(MyProcPort, am_superuser); /* * Maintenance Mode: allow superuser to connect when * gp_maintenance_conn GUC is set. We cannot check it until * process_startup_options parses the GUC. */ if (gp_maintenance_mode && Gp_role == GP_ROLE_DISPATCH && !(superuser() && gp_maintenance_conn)) ereport(FATAL, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("maintenance mode: connected by superuser only"), errSendAlert(false))); /* * MPP: If we were started in utility mode then we only want to allow * incoming sessions that specify gp_session_role=utility as well. This * lets the bash scripts start the QD in utility mode and connect in but * protect ourselves from normal clients who might be trying to connect to * the system while we startup. */ if ((Gp_role == GP_ROLE_UTILITY) && (Gp_session_role != GP_ROLE_UTILITY)) { ereport(FATAL, (errcode(ERRCODE_CANNOT_CONNECT_NOW), errmsg("System was started in master-only utility mode - only utility mode connections are allowed"))); } /* Apply PostAuthDelay as soon as we've read all options */ if (PostAuthDelay > 0) pg_usleep(PostAuthDelay * 1000000L); /* set default namespace search path */ InitializeSearchPath(); /* initialize client encoding */ InitializeClientEncoding(); /* report this backend in the PgBackendStatus array */ if (!bootstrap) pgstat_bestart(); /* * MPP package setup * * Primary function is to establish connctions to the qExecs. * This is SKIPPED when the database is in bootstrap mode or * Is not UnderPostmaster. */ if (!bootstrap && IsUnderPostmaster) { cdb_setup(); on_proc_exit( cdb_cleanup, 0 ); } /* * MPP SharedSnapshot Setup */ if (Gp_role == GP_ROLE_DISPATCH) { addSharedSnapshot("Query Dispatcher", gp_session_id); } else if (Gp_role == GP_ROLE_DISPATCHAGENT) { SharedLocalSnapshotSlot = NULL; } else if (Gp_segment == -1 && Gp_role == GP_ROLE_EXECUTE && !Gp_is_writer) { /* * Entry db singleton QE is a user of the shared snapshot -- not a creator. * The lookup will occur once the distributed snapshot has been received. */ lookupSharedSnapshot("Entry DB Singleton", "Query Dispatcher", gp_session_id); } else if (Gp_role == GP_ROLE_EXECUTE) { if (Gp_is_writer) { addSharedSnapshot("Writer qExec", gp_session_id); } else { /* * NOTE: This assumes that the Slot has already been * allocated by the writer. Need to make sure we * always allocate the writer qExec first. */ lookupSharedSnapshot("Reader qExec", "Writer qExec", gp_session_id); } } /* close the transaction we started above */ if (!bootstrap) CommitTransactionCommand(); return; }