/* * StrategyInitialize -- initialize the buffer cache replacement * strategy. * * Assumes: All of the buffers are already built into a linked list. * Only called by postmaster and only during initialization. */ void StrategyInitialize(bool init) { bool found; /* * Initialize the shared buffer lookup hashtable. * * Since we can't tolerate running out of lookup table entries, we must be * sure to specify an adequate table size here. The maximum steady-state * usage is of course NBuffers entries, but BufferAlloc() tries to insert * a new___ entry before deleting the old. In principle this could be * happening in each partition concurrently, so we could need as many as * NBuffers + NUM_BUFFER_PARTITIONS entries. */ InitBufTable(NBuffers + NUM_BUFFER_PARTITIONS); /* * Get or create the shared strategy control block */ StrategyControl = (BufferStrategyControl *) ShmemInitStruct("Buffer Strategy Status", sizeof(BufferStrategyControl), &found); if (!found) { /* * Only done once, usually in postmaster */ Assert(init); SpinLockInit(&StrategyControl->buffer_strategy_lock); /* * Grab the whole linked list of free buffers for our strategy. We * assume it was previously set up by InitBufferPool(). */ StrategyControl->firstFreeBuffer = 0; StrategyControl->lastFreeBuffer = NBuffers - 1; /* Initialize the clock sweep pointer */ pg_atomic_init_u32(&StrategyControl->nextVictimBuffer, 0); /* Clear statistics */ StrategyControl->completePasses = 0; pg_atomic_init_u32(&StrategyControl->numBufferAllocs, 0); /* No pending notification */ StrategyControl->bgwprocno = -1; } else Assert(!init); }
/* * Initialize shared buffer pool * * This is called once during shared-memory initialization (either in the * postmaster, or in a standalone backend). */ void InitBufferPool(void) { bool foundBufs, foundDescs, foundIOLocks, foundBufCkpt; /* Align descriptors to a cacheline boundary. */ BufferDescriptors = (BufferDescPadded *) ShmemInitStruct("Buffer Descriptors", NBuffers * sizeof(BufferDescPadded), &foundDescs); BufferBlocks = (char *) ShmemInitStruct("Buffer Blocks", NBuffers * (Size) BLCKSZ, &foundBufs); /* Align lwlocks to cacheline boundary */ BufferIOLWLockArray = (LWLockMinimallyPadded *) ShmemInitStruct("Buffer IO Locks", NBuffers * (Size) sizeof(LWLockMinimallyPadded), &foundIOLocks); LWLockRegisterTranche(LWTRANCHE_BUFFER_IO_IN_PROGRESS, "buffer_io"); LWLockRegisterTranche(LWTRANCHE_BUFFER_CONTENT, "buffer_content"); /* * The array used to sort to-be-checkpointed buffer ids is located in * shared memory, to avoid having to allocate significant amounts of * memory at runtime. As that'd be in the middle of a checkpoint, or when * the checkpointer is restarted, memory allocation failures would be * painful. */ CkptBufferIds = (CkptSortItem *) ShmemInitStruct("Checkpoint BufferIds", NBuffers * sizeof(CkptSortItem), &foundBufCkpt); if (foundDescs || foundBufs || foundIOLocks || foundBufCkpt) { /* should find all of these, or none of them */ Assert(foundDescs && foundBufs && foundIOLocks && foundBufCkpt); /* note: this path is only taken in EXEC_BACKEND case */ } else { int i; /* * Initialize all the buffer headers. */ for (i = 0; i < NBuffers; i++) { BufferDesc *buf = GetBufferDescriptor(i); CLEAR_BUFFERTAG(buf->tag); pg_atomic_init_u32(&buf->state, 0); buf->wait_backend_pid = 0; buf->buf_id = i; /* * Initially link all the buffers together as unused. Subsequent * management of this list is done by freelist.c. */ buf->freeNext = i + 1; LWLockInitialize(BufferDescriptorGetContentLock(buf), LWTRANCHE_BUFFER_CONTENT); LWLockInitialize(BufferDescriptorGetIOLock(buf), LWTRANCHE_BUFFER_IO_IN_PROGRESS); } /* Correct last entry of linked list */ GetBufferDescriptor(NBuffers - 1)->freeNext = FREENEXT_END_OF_LIST; } /* Init other shared buffer-management stuff */ StrategyInitialize(!foundDescs); /* Initialize per-backend file flush context */ WritebackContextInit(&BackendWritebackContext, &backend_flush_after); }
static void test_atomic_uint32(void) { pg_atomic_uint32 var; uint32 expected; int i; pg_atomic_init_u32(&var, 0); if (pg_atomic_read_u32(&var) != 0) elog(ERROR, "atomic_read_u32() #1 wrong"); pg_atomic_write_u32(&var, 3); if (pg_atomic_read_u32(&var) != 3) elog(ERROR, "atomic_read_u32() #2 wrong"); if (pg_atomic_fetch_add_u32(&var, 1) != 3) elog(ERROR, "atomic_fetch_add_u32() #1 wrong"); if (pg_atomic_fetch_sub_u32(&var, 1) != 4) elog(ERROR, "atomic_fetch_sub_u32() #1 wrong"); if (pg_atomic_sub_fetch_u32(&var, 3) != 0) elog(ERROR, "atomic_sub_fetch_u32() #1 wrong"); if (pg_atomic_add_fetch_u32(&var, 10) != 10) elog(ERROR, "atomic_add_fetch_u32() #1 wrong"); if (pg_atomic_exchange_u32(&var, 5) != 10) elog(ERROR, "pg_atomic_exchange_u32() #1 wrong"); if (pg_atomic_exchange_u32(&var, 0) != 5) elog(ERROR, "pg_atomic_exchange_u32() #0 wrong"); /* test around numerical limits */ if (pg_atomic_fetch_add_u32(&var, INT_MAX) != 0) elog(ERROR, "pg_atomic_fetch_add_u32() #2 wrong"); if (pg_atomic_fetch_add_u32(&var, INT_MAX) != INT_MAX) elog(ERROR, "pg_atomic_add_fetch_u32() #3 wrong"); pg_atomic_fetch_add_u32(&var, 1); /* top up to UINT_MAX */ if (pg_atomic_read_u32(&var) != UINT_MAX) elog(ERROR, "atomic_read_u32() #2 wrong"); if (pg_atomic_fetch_sub_u32(&var, INT_MAX) != UINT_MAX) elog(ERROR, "pg_atomic_fetch_sub_u32() #2 wrong"); if (pg_atomic_read_u32(&var) != (uint32) INT_MAX + 1) elog(ERROR, "atomic_read_u32() #3 wrong: %u", pg_atomic_read_u32(&var)); expected = pg_atomic_sub_fetch_u32(&var, INT_MAX); if (expected != 1) elog(ERROR, "pg_atomic_sub_fetch_u32() #3 wrong: %u", expected); pg_atomic_sub_fetch_u32(&var, 1); /* fail exchange because of old expected */ expected = 10; if (pg_atomic_compare_exchange_u32(&var, &expected, 1)) elog(ERROR, "atomic_compare_exchange_u32() changed value spuriously"); /* CAS is allowed to fail due to interrupts, try a couple of times */ for (i = 0; i < 1000; i++) { expected = 0; if (!pg_atomic_compare_exchange_u32(&var, &expected, 1)) break; } if (i == 1000) elog(ERROR, "atomic_compare_exchange_u32() never succeeded"); if (pg_atomic_read_u32(&var) != 1) elog(ERROR, "atomic_compare_exchange_u32() didn't set value properly"); pg_atomic_write_u32(&var, 0); /* try setting flagbits */ if (pg_atomic_fetch_or_u32(&var, 1) & 1) elog(ERROR, "pg_atomic_fetch_or_u32() #1 wrong"); if (!(pg_atomic_fetch_or_u32(&var, 2) & 1)) elog(ERROR, "pg_atomic_fetch_or_u32() #2 wrong"); if (pg_atomic_read_u32(&var) != 3) elog(ERROR, "invalid result after pg_atomic_fetch_or_u32()"); /* try clearing flagbits */ if ((pg_atomic_fetch_and_u32(&var, ~2) & 3) != 3) elog(ERROR, "pg_atomic_fetch_and_u32() #1 wrong"); if (pg_atomic_fetch_and_u32(&var, ~1) != 1) elog(ERROR, "pg_atomic_fetch_and_u32() #2 wrong: is %u", pg_atomic_read_u32(&var)); /* no bits set anymore */ if (pg_atomic_fetch_and_u32(&var, ~0) != 0) elog(ERROR, "pg_atomic_fetch_and_u32() #3 wrong"); }
/* * InitProcess -- initialize a per-process data structure for this backend */ void InitProcess(void) { /* use volatile pointer to prevent code rearrangement */ volatile PROC_HDR *procglobal = ProcGlobal; PGPROC * volatile * procgloballist; /* * ProcGlobal should be set up already (if we are a backend, we inherit * this by fork() or EXEC_BACKEND mechanism from the postmaster). */ if (procglobal == NULL) elog(PANIC, "proc header uninitialized"); if (MyProc != NULL) elog(ERROR, "you already exist"); /* Decide which list should supply our PGPROC. */ if (IsAnyAutoVacuumProcess()) procgloballist = &procglobal->autovacFreeProcs; else if (IsBackgroundWorker) procgloballist = &procglobal->bgworkerFreeProcs; else procgloballist = &procglobal->freeProcs; /* * Try to get a proc struct from the appropriate free list. If this * fails, we must be out of PGPROC structures (not to mention semaphores). * * While we are holding the ProcStructLock, also copy the current shared * estimate of spins_per_delay to local storage. */ SpinLockAcquire(ProcStructLock); set_spins_per_delay(procglobal->spins_per_delay); MyProc = *procgloballist; if (MyProc != NULL) { *procgloballist = (PGPROC *) MyProc->links.next; SpinLockRelease(ProcStructLock); } else { /* * If we reach here, all the PGPROCs are in use. This is one of the * possible places to detect "too many backends", so give the standard * error message. XXX do we need to give a different failure message * in the autovacuum case? */ SpinLockRelease(ProcStructLock); ereport(FATAL, (errcode(ERRCODE_TOO_MANY_CONNECTIONS), errmsg("sorry, too many clients already"))); } MyPgXact = &ProcGlobal->allPgXact[MyProc->pgprocno]; /* * Cross-check that the PGPROC is of the type we expect; if this were * not the case, it would get returned to the wrong list. */ Assert(MyProc->procgloballist == procgloballist); /* * Now that we have a PGPROC, mark ourselves as an active postmaster * child; this is so that the postmaster can detect it if we exit without * cleaning up. (XXX autovac launcher currently doesn't participate in * this; it probably should.) */ if (IsUnderPostmaster && !IsAutoVacuumLauncherProcess()) MarkPostmasterChildActive(); /* * Initialize all fields of MyProc, except for those previously * initialized by InitProcGlobal. */ SHMQueueElemInit(&(MyProc->links)); MyProc->waitStatus = STATUS_OK; MyProc->lxid = InvalidLocalTransactionId; MyProc->fpVXIDLock = false; MyProc->fpLocalTransactionId = InvalidLocalTransactionId; MyPgXact->xid = InvalidTransactionId; MyPgXact->xmin = InvalidTransactionId; MyProc->pid = MyProcPid; /* backendId, databaseId and roleId will be filled in later */ MyProc->backendId = InvalidBackendId; MyProc->databaseId = InvalidOid; MyProc->roleId = InvalidOid; MyPgXact->delayChkpt = false; MyPgXact->vacuumFlags = 0; /* NB -- autovac launcher intentionally does not set IS_AUTOVACUUM */ if (IsAutoVacuumWorkerProcess()) MyPgXact->vacuumFlags |= PROC_IS_AUTOVACUUM; MyProc->lwWaiting = false; MyProc->lwWaitMode = 0; MyProc->waitLock = NULL; MyProc->waitProcLock = NULL; #ifdef USE_ASSERT_CHECKING { int i; /* Last process should have released all locks. */ for (i = 0; i < NUM_LOCK_PARTITIONS; i++) Assert(SHMQueueEmpty(&(MyProc->myProcLocks[i]))); } #endif MyProc->recoveryConflictPending = false; /* Initialize fields for sync rep */ MyProc->waitLSN = 0; MyProc->syncRepState = SYNC_REP_NOT_WAITING; SHMQueueElemInit(&(MyProc->syncRepLinks)); /* Initialize fields for group XID clearing. */ MyProc->backendLatestXid = InvalidTransactionId; pg_atomic_init_u32(&MyProc->nextClearXidElem, INVALID_PGPROCNO); /* * Acquire ownership of the PGPROC's latch, so that we can use WaitLatch * on it. That allows us to repoint the process latch, which so far * points to process local one, to the shared one. */ OwnLatch(&MyProc->procLatch); SwitchToSharedLatch(); /* * We might be reusing a semaphore that belonged to a failed process. So * be careful and reinitialize its value here. (This is not strictly * necessary anymore, but seems like a good idea for cleanliness.) */ PGSemaphoreReset(&MyProc->sem); /* * Arrange to clean up at backend exit. */ on_shmem_exit(ProcKill, 0); /* * Now that we have a PGPROC, we could try to acquire locks, so initialize * local state needed for LWLocks, and the deadlock checker. */ InitLWLockAccess(); InitDeadLockChecking(); }
/* * InitProcGlobal - * Initialize the global process table during postmaster or standalone * backend startup. * * We also create all the per-process semaphores we will need to support * the requested number of backends. We used to allocate semaphores * only when backends were actually started up, but that is bad because * it lets Postgres fail under load --- a lot of Unix systems are * (mis)configured with small limits on the number of semaphores, and * running out when trying to start another backend is a common failure. * So, now we grab enough semaphores to support the desired max number * of backends immediately at initialization --- if the sysadmin has set * MaxConnections, max_worker_processes, or autovacuum_max_workers higher * than his kernel will support, he'll find out sooner rather than later. * * Another reason for creating semaphores here is that the semaphore * implementation typically requires us to create semaphores in the * postmaster, not in backends. * * Note: this is NOT called by individual backends under a postmaster, * not even in the EXEC_BACKEND case. The ProcGlobal and AuxiliaryProcs * pointers must be propagated specially for EXEC_BACKEND operation. */ void InitProcGlobal(void) { PGPROC *procs; PGXACT *pgxacts; int i, j; bool found; uint32 TotalProcs = MaxBackends + NUM_AUXILIARY_PROCS + max_prepared_xacts; /* Create the ProcGlobal shared structure */ ProcGlobal = (PROC_HDR *) ShmemInitStruct("Proc Header", sizeof(PROC_HDR), &found); Assert(!found); /* * Initialize the data structures. */ ProcGlobal->spins_per_delay = DEFAULT_SPINS_PER_DELAY; ProcGlobal->freeProcs = NULL; ProcGlobal->autovacFreeProcs = NULL; ProcGlobal->bgworkerFreeProcs = NULL; ProcGlobal->startupProc = NULL; ProcGlobal->startupProcPid = 0; ProcGlobal->startupBufferPinWaitBufId = -1; ProcGlobal->walwriterLatch = NULL; ProcGlobal->checkpointerLatch = NULL; pg_atomic_init_u32(&ProcGlobal->nextClearXidElem, INVALID_PGPROCNO); /* * Create and initialize all the PGPROC structures we'll need. There are * five separate consumers: (1) normal backends, (2) autovacuum workers * and the autovacuum launcher, (3) background workers, (4) auxiliary * processes, and (5) prepared transactions. Each PGPROC structure is * dedicated to exactly one of these purposes, and they do not move * between groups. */ procs = (PGPROC *) ShmemAlloc(TotalProcs * sizeof(PGPROC)); ProcGlobal->allProcs = procs; /* XXX allProcCount isn't really all of them; it excludes prepared xacts */ ProcGlobal->allProcCount = MaxBackends + NUM_AUXILIARY_PROCS; if (!procs) ereport(FATAL, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("out of shared memory"))); MemSet(procs, 0, TotalProcs * sizeof(PGPROC)); /* * Also allocate a separate array of PGXACT structures. This is separate * from the main PGPROC array so that the most heavily accessed data is * stored contiguously in memory in as few cache lines as possible. This * provides significant performance benefits, especially on a * multiprocessor system. There is one PGXACT structure for every PGPROC * structure. */ pgxacts = (PGXACT *) ShmemAlloc(TotalProcs * sizeof(PGXACT)); MemSet(pgxacts, 0, TotalProcs * sizeof(PGXACT)); ProcGlobal->allPgXact = pgxacts; for (i = 0; i < TotalProcs; i++) { /* Common initialization for all PGPROCs, regardless of type. */ /* * Set up per-PGPROC semaphore, latch, and backendLock. Prepared xact * dummy PGPROCs don't need these though - they're never associated * with a real process */ if (i < MaxBackends + NUM_AUXILIARY_PROCS) { PGSemaphoreCreate(&(procs[i].sem)); InitSharedLatch(&(procs[i].procLatch)); procs[i].backendLock = LWLockAssign(); } procs[i].pgprocno = i; /* * Newly created PGPROCs for normal backends, autovacuum and bgworkers * must be queued up on the appropriate free list. Because there can * only ever be a small, fixed number of auxiliary processes, no free * list is used in that case; InitAuxiliaryProcess() instead uses a * linear search. PGPROCs for prepared transactions are added to a * free list by TwoPhaseShmemInit(). */ if (i < MaxConnections) { /* PGPROC for normal backend, add to freeProcs list */ procs[i].links.next = (SHM_QUEUE *) ProcGlobal->freeProcs; ProcGlobal->freeProcs = &procs[i]; procs[i].procgloballist = &ProcGlobal->freeProcs; } else if (i < MaxConnections + autovacuum_max_workers + 1) { /* PGPROC for AV launcher/worker, add to autovacFreeProcs list */ procs[i].links.next = (SHM_QUEUE *) ProcGlobal->autovacFreeProcs; ProcGlobal->autovacFreeProcs = &procs[i]; procs[i].procgloballist = &ProcGlobal->autovacFreeProcs; } else if (i < MaxBackends) { /* PGPROC for bgworker, add to bgworkerFreeProcs list */ procs[i].links.next = (SHM_QUEUE *) ProcGlobal->bgworkerFreeProcs; ProcGlobal->bgworkerFreeProcs = &procs[i]; procs[i].procgloballist = &ProcGlobal->bgworkerFreeProcs; } /* Initialize myProcLocks[] shared memory queues. */ for (j = 0; j < NUM_LOCK_PARTITIONS; j++) SHMQueueInit(&(procs[i].myProcLocks[j])); } /* * Save pointers to the blocks of PGPROC structures reserved for auxiliary * processes and prepared transactions. */ AuxiliaryProcs = &procs[MaxBackends]; PreparedXactProcs = &procs[MaxBackends + NUM_AUXILIARY_PROCS]; /* Create ProcStructLock spinlock, too */ ProcStructLock = (slock_t *) ShmemAlloc(sizeof(slock_t)); SpinLockInit(ProcStructLock); }