Beispiel #1
0
/*
 * Create a shared memory segment of the given size and initialize.  Also,
 * register an on_shmem_exit callback to release the storage.
 */
void *
pool_shared_memory_create(size_t size)
{
	int			shmid;
	void	   *memAddress;

	/* Try to create new segment */
	shmid = shmget(IPC_PRIVATE, size, IPC_CREAT | IPC_EXCL | IPCProtection);

	if (shmid < 0)
	{
		pool_error("could not create shared memory segment: %s",
				   strerror(errno));
		return NULL;
	}

	/* Register on-exit routine to delete the new segment */
	on_shmem_exit(IpcMemoryDelete, shmid);

	/* OK, should be able to attach to the segment */
	memAddress = shmat(shmid, NULL, PG_SHMAT_FLAGS);

	if (memAddress == (void *) -1)
	{
		pool_error("shmat(id=%d) failed: %s", shmid, strerror(errno));
		return NULL;
	}

	/* Register on-exit routine to detach new segment before deleting */
	on_shmem_exit(IpcMemoryDetach, (Datum) memAddress);

	return memAddress;
}
/*
 * ProcSignalInit
 *		Register the current process in the procsignal array
 *
 * The passed index should be my BackendId if the process has one,
 * or MaxBackends + aux process type if not.
 */
void
ProcSignalInit(int pss_idx)
{
	volatile ProcSignalSlot *slot;

	Assert(pss_idx >= 1 && pss_idx <= NumProcSignalSlots);

	slot = &ProcSignalSlots[pss_idx - 1];

	/* sanity check */
	if (slot->pss_pid != 0)
		elog(LOG, "process %d taking over ProcSignal slot %d, but it's not empty",
			 MyProcPid, pss_idx);

	/* Clear out any leftover signal reasons */
	MemSet(slot->pss_signalFlags, 0, NUM_PROCSIGNALS * sizeof(sig_atomic_t));

	/* Mark slot with my PID */
	slot->pss_pid = MyProcPid;

	/* Remember slot location for CheckProcSignal */
	MyProcSignalSlot = slot;

	/* Set up to release the slot on process exit */
	on_shmem_exit(CleanupProcSignalState, Int32GetDatum(pss_idx));
}
Beispiel #3
0
/*
 * Create a semaphore set and initialize.
 */
void
pool_semaphore_create(int numSems)
{
	int			i;

	/* Try to create new semaphore set */
	semId = semget(IPC_PRIVATE, numSems, IPC_CREAT | IPC_EXCL | IPCProtection);

	if (semId < 0)
		ereport(FATAL,
			(errmsg("Unable to create semaphores:%d error:\"%s\"",numSems,strerror(errno))));

	on_shmem_exit(IpcSemaphoreKill, semId);

	/* Initialize it to count 1 */
	for (i = 0; i < numSems; i++)
	{
		union semun semun;

		semun.val = 1;
		if (semctl(semId, i, SETVAL, semun) < 0)
			ereport(FATAL,
				(errmsg("Unable to create semaphores:%d error:\"%s\"",numSems,strerror(errno)),
						errdetail("semctl(%d, %d, SETVAL, %d) failed",semId,i,semun.val)));
	}
}
Beispiel #4
0
/*
 * SIBackendInit
 *		Initialize a new backend to operate on the sinval buffer
 *
 * Returns:
 *	   >0	A-OK
 *		0	Failed to find a free procState slot (ie, MaxBackends exceeded)
 *	   <0	Some other failure (not currently used)
 *
 * NB: this routine, and all following ones, must be executed with the
 * SInvalLock lock held, since there may be multiple backends trying
 * to access the buffer.
 */
int
SIBackendInit(SISeg *segP)
{
	int			index;
	ProcState  *stateP = NULL;

	/* Look for a free entry in the procState array */
	for (index = 0; index < segP->lastBackend; index++)
	{
		if (segP->procState[index].nextMsgNum < 0)		/* inactive slot? */
		{
			stateP = &segP->procState[index];
			break;
		}
	}

	if (stateP == NULL)
	{
		if (segP->lastBackend < segP->maxBackends)
		{
			stateP = &segP->procState[segP->lastBackend];
			Assert(stateP->nextMsgNum < 0);
			segP->lastBackend++;
		}
		else
		{
			/* out of procState slots */
			MyBackendId = InvalidBackendId;
			return 0;
		}
	}

	MyBackendId = (stateP - &segP->procState[0]) + 1;

#ifdef	INVALIDDEBUG
	elog(DEBUG2, "my backend id is %d", MyBackendId);
#endif   /* INVALIDDEBUG */

	/* Advertise assigned backend ID in MyProc */
	MyProc->backendId = MyBackendId;

	/* Reduce free slot count */
	segP->freeBackends--;

	/* Fetch next local transaction ID into local memory */
	nextLocalTransactionId = segP->nextLXID[MyBackendId - 1];

	/* mark myself active, with all extant messages already read */
	stateP->nextMsgNum = segP->maxMsgNum;
	stateP->resetState = false;

	/* register exit routine to mark my entry inactive at exit */
	on_shmem_exit(CleanupInvalidationState, PointerGetDatum(segP));

	return 1;
}
Beispiel #5
0
/*
 * PGReserveSemaphores --- initialize semaphore support
 *
 * In the Win32 implementation, we acquire semaphores on-demand; the
 * maxSemas parameter is just used to size the array that keeps track of
 * acquired semas for subsequent releasing.  We use anonymous semaphores
 * so the semaphores are automatically freed when the last referencing
 * process exits.
 */
void
PGReserveSemaphores(int maxSemas, int port)
{
	mySemSet = (HANDLE *) malloc(maxSemas * sizeof(HANDLE));
	if (mySemSet == NULL)
		elog(PANIC, "out of memory");
	numSems = 0;
	maxSems = maxSemas;

	on_shmem_exit(ReleaseSemaphores, 0);
}
Beispiel #6
0
/* Initialize a per-walsender data structure for this walsender process */
static void
InitWalSnd(void)
{
	int			i;

	/*
	 * WalSndCtl should be set up already (we inherit this by fork() or
	 * EXEC_BACKEND mechanism from the postmaster).
	 */
	Assert(WalSndCtl != NULL);
	Assert(MyWalSnd == NULL);

	/*
	 * Find a free walsender slot and reserve it. If this fails, we must be
	 * out of WalSnd structures.
	 */
	for (i = 0; i < max_wal_senders; i++)
	{
		/* use volatile pointer to prevent code rearrangement */
		volatile WalSnd *walsnd = &WalSndCtl->walsnds[i];

		SpinLockAcquire(&walsnd->mutex);

		if (walsnd->pid != 0)
		{
			SpinLockRelease(&walsnd->mutex);
			continue;
		}
		else
		{
			/*
			 * Found a free slot. Reserve it for us.
			 */
			walsnd->pid = MyProcPid;
			MemSet(&walsnd->sentPtr, 0, sizeof(XLogRecPtr));
			walsnd->state = WALSNDSTATE_STARTUP;
			SpinLockRelease(&walsnd->mutex);
			/* don't need the lock anymore */
			OwnLatch((Latch *) &walsnd->latch);
			MyWalSnd = (WalSnd *) walsnd;

			break;
		}
	}
	if (MyWalSnd == NULL)
		ereport(FATAL,
				(errcode(ERRCODE_TOO_MANY_CONNECTIONS),
				 errmsg("number of requested standby connections "
						"exceeds max_wal_senders (currently %d)",
						max_wal_senders)));

	/* Arrange to clean up at walsender exit */
	on_shmem_exit(WalSndKill, 0);
}
/*
 * PGReserveSemaphores --- initialize semaphore support
 *
 * This is called during postmaster start or shared memory reinitialization.
 * It should do whatever is needed to be able to support up to maxSemas
 * subsequent PGSemaphoreCreate calls.  Also, if any system resources
 * are acquired here or in PGSemaphoreCreate, register an on_shmem_exit
 * callback to release them.
 *
 * The port number is passed for possible use as a key (for Posix, we use
 * it to generate the starting semaphore name).  In a standalone backend,
 * zero will be passed.
 *
 * In the Posix implementation, we acquire semaphores on-demand; the
 * maxSemas parameter is just used to size the array that keeps track of
 * acquired semas for subsequent releasing.
 */
void
PGReserveSemaphores(int maxSemas, int port)
{
	mySemPointers = (sem_t **) malloc(maxSemas * sizeof(sem_t *));
	if (mySemPointers == NULL)
		elog(PANIC, "out of memory");
	numSems = 0;
	maxSems = maxSemas;
	nextSemKey = port * 1000;

	on_shmem_exit(ReleaseSemaphores, 0);
}
Beispiel #8
0
static void
init_lwlock_stats(void)
{
	int		   *LWLockCounter = (int *) ((char *) LWLockArray - 2 * sizeof(int));
	int			numLocks = LWLockCounter[1];

	sh_acquire_counts = calloc(numLocks, sizeof(int));
	ex_acquire_counts = calloc(numLocks, sizeof(int));
	spin_delay_counts = calloc(numLocks, sizeof(int));
	block_counts = calloc(numLocks, sizeof(int));
	counts_for_pid = MyProcPid;
	on_shmem_exit(print_lwlock_stats, 0);
}
Beispiel #9
0
/*
 * PGReserveSemaphores --- initialize semaphore support
 *
 * This is called during postmaster start or shared memory reinitialization.
 * It should do whatever is needed to be able to support up to maxSemas
 * subsequent PGSemaphoreCreate calls.  Also, if any system resources
 * are acquired here or in PGSemaphoreCreate, register an on_shmem_exit
 * callback to release them.
 *
 * The port number is passed for possible use as a key (for SysV, we use
 * it to generate the starting semaphore key).  In a standalone backend,
 * zero will be passed.
 *
 * In the SysV implementation, we acquire semaphore sets on-demand; the
 * maxSemas parameter is just used to size the array that keeps track of
 * acquired sets for subsequent releasing.
 */
void
PGReserveSemaphores(int maxSemas, int port)
{
	maxSemaSets = (maxSemas + SEMAS_PER_SET - 1) / SEMAS_PER_SET;
	mySemaSets = (IpcSemaphoreId *)
		malloc(maxSemaSets * sizeof(IpcSemaphoreId));
	if (mySemaSets == NULL)
		elog(PANIC, "out of memory");
	numSemaSets = 0;
	nextSemaKey = port * 1000;
	nextSemaNumber = SEMAS_PER_SET;		/* force sema set alloc on 1st call */

	on_shmem_exit(ReleaseSemaphores, 0);
}
Beispiel #10
0
/*
 * Establish an AuxProcessResourceOwner for the current process.
 */
void
CreateAuxProcessResourceOwner(void)
{
	Assert(AuxProcessResourceOwner == NULL);
	Assert(CurrentResourceOwner == NULL);
	AuxProcessResourceOwner = ResourceOwnerCreate(NULL, "AuxiliaryProcess");
	CurrentResourceOwner = AuxProcessResourceOwner;

	/*
	 * Register a shmem-exit callback for cleanup of aux-process resource
	 * owner.  (This needs to run after, e.g., ShutdownXLOG.)
	 */
	on_shmem_exit(ReleaseAuxProcessResourcesCallback, 0);

}
Beispiel #11
0
/*
 * InitProcessPhase2 -- make MyProc visible in the shared ProcArray.
 *
 * This is separate from InitProcess because we can't acquire LWLocks until
 * we've created a PGPROC, but in the EXEC_BACKEND case ProcArrayAdd won't
 * work until after we've done CreateSharedMemoryAndSemaphores.
 */
void
InitProcessPhase2(void)
{
	Assert(MyProc != NULL);

	/*
	 * Add our PGPROC to the PGPROC array in shared memory.
	 */
	ProcArrayAdd(MyProc);

	/*
	 * Arrange to clean that up at backend exit.
	 */
	on_shmem_exit(RemoveProcFromArray, 0);
}
/*
 * shmem_startup_hook
 */
static void
pgcl_shmem_startup(void)
{
	if (prev_shmem_startup_hook)
		prev_shmem_startup_hook();

	/*
	 * If we're in the postmaster (or a standalone backend...),
	 * set up a shmem exit hook to call command_at_shutdown.
	 */
	if (!IsUnderPostmaster)
		on_shmem_exit(pgcl_shmem_shutdown, (Datum) 0);

	if (command_at_startup)
		ExecuteCommand(command_at_startup, "command_at_startup");
}
Beispiel #13
0
/*
 * PGReserveSemaphores --- initialize semaphore support
 *
 * This is called during postmaster start or shared memory reinitialization.
 * It should do whatever is needed to be able to support up to maxSemas
 * subsequent PGSemaphoreCreate calls.	Also, if any system resources
 * are acquired here or in PGSemaphoreCreate, register an on_shmem_exit
 * callback to release them.
 *
 * The port number is passed for possible use as a key (for SysV, we use
 * it to generate the starting semaphore key).	In a standalone backend,
 * zero will be passed.
 *
 * In the SysV implementation, we acquire semaphore sets on-demand; the
 * maxSemas parameter is just used to size the array that keeps track of
 * acquired sets for subsequent releasing.
 */
void
PGReserveSemaphores(int maxSemas, int port)
{
	maxSemaSets = (maxSemas + SEMAS_PER_SET - 1) / SEMAS_PER_SET;
	mySemaSets = (IpcSemaphoreId *)
		malloc(maxSemaSets * sizeof(IpcSemaphoreId));
	if (mySemaSets == NULL)
		elog(PANIC, "out of memory");
	numSemaSets = 0;
	nextSemaKey = port * 1000;
	nextSemaNumber = SEMAS_PER_SET;		/* force sema set alloc on 1st call */

	elog((Debug_print_semaphore_detail ? LOG : DEBUG5),
		 "maxSemaSets %d, nextSemaKey %d, nextSemaNumber %d",
		 maxSemaSets, nextSemaKey, nextSemaNumber);

	on_shmem_exit(ReleaseSemaphores, 0);
}
void PersistentDatabase_DirIterateInit(void)
{
	READ_PERSISTENT_STATE_ORDERED_LOCK_DECLARE;

	Assert(persistentDatabaseSharedData != NULL);
	
	PersistentDatabase_VerifyInitScan();

	PersistentDatabase_ReleaseDirIterator();

	READ_PERSISTENT_STATE_ORDERED_LOCK;

	if (!iterateOnShmemExitArmed)
	{
		on_shmem_exit(AtProcExit_PersistentDatabase, 0);
		iterateOnShmemExitArmed = true;
	}

	dirIterateDatabaseDirEntry = NULL;
	
	READ_PERSISTENT_STATE_ORDERED_UNLOCK;
}
Beispiel #15
0
static void
init_lwlock_stats(void)
{
	HASHCTL		ctl;
	static MemoryContext lwlock_stats_cxt = NULL;
	static bool exit_registered = false;

	if (lwlock_stats_cxt != NULL)
		MemoryContextDelete(lwlock_stats_cxt);

	/*
	 * The LWLock stats will be updated within a critical section, which
	 * requires allocating new hash entries. Allocations within a critical
	 * section are normally not allowed because running out of memory would
	 * lead to a PANIC, but LWLOCK_STATS is debugging code that's not normally
	 * turned on in production, so that's an acceptable risk. The hash entries
	 * are small, so the risk of running out of memory is minimal in practice.
	 */
	lwlock_stats_cxt = AllocSetContextCreate(TopMemoryContext,
											 "LWLock stats",
											 ALLOCSET_DEFAULT_MINSIZE,
											 ALLOCSET_DEFAULT_INITSIZE,
											 ALLOCSET_DEFAULT_MAXSIZE);
	MemoryContextAllowInCriticalSection(lwlock_stats_cxt, true);

	MemSet(&ctl, 0, sizeof(ctl));
	ctl.keysize = sizeof(lwlock_stats_key);
	ctl.entrysize = sizeof(lwlock_stats);
	ctl.hash = tag_hash;
	ctl.hcxt = lwlock_stats_cxt;
	lwlock_stats_htab = hash_create("lwlock stats", 16384, &ctl,
									HASH_ELEM | HASH_FUNCTION | HASH_CONTEXT);
	if (!exit_registered)
	{
		on_shmem_exit(print_lwlock_stats, 0);
		exit_registered = true;
	}
}
Beispiel #16
0
/*
 * InitProcessPhase2 -- make MyProc visible in the shared ProcArray.
 *
 * This is separate from InitProcess because we can't acquire LWLocks until
 * we've created a PGPROC, but in the EXEC_BACKEND case there is a good deal
 * of stuff to be done before this step that will require LWLock access.
 */
void
InitProcessPhase2(void)
{
	Assert(MyProc != NULL);

	/*
	 * We should now know what database we're in, so advertise that.  (We need
	 * not do any locking here, since no other backend can yet see our
	 * PGPROC.)
	 */
	Assert(OidIsValid(MyDatabaseId));
	MyProc->databaseId = MyDatabaseId;

	/*
	 * Add our PGPROC to the PGPROC array in shared memory.
	 */
	ProcArrayAdd(MyProc);

	/*
	 * Arrange to clean that up at backend exit.
	 */
	on_shmem_exit(RemoveProcFromArray, 0);
}
Beispiel #17
0
/**
 * Initialize global sate of backoff scheduler. This is called during creation
 * of shared memory and semaphores.
 */
void
BackoffStateInit()
{
	bool		found = false;

	/* Create or attach to the shared array */
	backoffSingleton = (BackoffState *) ShmemInitStruct("Backoff Global State", sizeof(BackoffState), &found);

	if (!found)
	{
		bool		ret = false;

		/*
		 * We're the first - initialize.
		 */
		MemSet(backoffSingleton, 0, sizeof(BackoffState));
		backoffSingleton->numEntries = MaxBackends;
		backoffSingleton->backendEntries = (BackoffBackendSharedEntry *) ShmemInitStruct("Backoff Backend Entries", mul_size(sizeof(BackoffBackendSharedEntry), backoffSingleton->numEntries), &ret);
		backoffSingleton->sweeperInProgress = false;
		Assert(!ret);
	}

	on_shmem_exit(BackoffStateAtExit, 0);
}
Beispiel #18
0
/*
 * PGReserveSemaphores --- initialize semaphore support
 *
 * This is called during postmaster start or shared memory reinitialization.
 * It should do whatever is needed to be able to support up to maxSemas
 * subsequent PGSemaphoreCreate calls.  Also, if any system resources
 * are acquired here or in PGSemaphoreCreate, register an on_shmem_exit
 * callback to release them.
 *
 * The port number is passed for possible use as a key (for Posix, we use
 * it to generate the starting semaphore name).  In a standalone backend,
 * zero will be passed.
 *
 * In the Posix implementation, we acquire semaphores on-demand; the
 * maxSemas parameter is just used to size the arrays.  For unnamed
 * semaphores, there is an array of PGSemaphoreData structs in shared memory.
 * For named semaphores, we keep a postmaster-local array of sem_t pointers,
 * which we use for releasing the semphores when done.
 * (This design minimizes the dependency of postmaster shutdown on the
 * contents of shared memory, which a failed backend might have clobbered.
 * We can't do much about the possibility of sem_destroy() crashing, but
 * we don't have to expose the counters to other processes.)
 */
void
PGReserveSemaphores(int maxSemas, int port)
{
#ifdef USE_NAMED_POSIX_SEMAPHORES
	mySemPointers = (sem_t **) malloc(maxSemas * sizeof(sem_t *));
	if (mySemPointers == NULL)
		elog(PANIC, "out of memory");
#else

	/*
	 * We must use ShmemAllocUnlocked(), since the spinlock protecting
	 * ShmemAlloc() won't be ready yet.  (This ordering is necessary when we
	 * are emulating spinlocks with semaphores.)
	 */
	sharedSemas = (PGSemaphore)
		ShmemAllocUnlocked(PGSemaphoreShmemSize(maxSemas));
#endif

	numSems = 0;
	maxSems = maxSemas;
	nextSemKey = port * 1000;

	on_shmem_exit(ReleaseSemaphores, 0);
}
Beispiel #19
0
/*
 * PGReserveSemaphores --- initialize semaphore support
 *
 * This is called during postmaster start or shared memory reinitialization.
 * It should do whatever is needed to be able to support up to maxSemas
 * subsequent PGSemaphoreCreate calls.  Also, if any system resources
 * are acquired here or in PGSemaphoreCreate, register an on_shmem_exit
 * callback to release them.
 *
 * The port number is passed for possible use as a key (for SysV, we use
 * it to generate the starting semaphore key).  In a standalone backend,
 * zero will be passed.
 *
 * In the SysV implementation, we acquire semaphore sets on-demand; the
 * maxSemas parameter is just used to size the arrays.  There is an array
 * of PGSemaphoreData structs in shared memory, and a postmaster-local array
 * with one entry per SysV semaphore set, which we use for releasing the
 * semaphore sets when done.  (This design ensures that postmaster shutdown
 * doesn't rely on the contents of shared memory, which a failed backend might
 * have clobbered.)
 */
void
PGReserveSemaphores(int maxSemas, int port)
{
	/*
	 * We must use ShmemAllocUnlocked(), since the spinlock protecting
	 * ShmemAlloc() won't be ready yet.  (This ordering is necessary when we
	 * are emulating spinlocks with semaphores.)
	 */
	sharedSemas = (PGSemaphore)
		ShmemAllocUnlocked(PGSemaphoreShmemSize(maxSemas));
	numSharedSemas = 0;
	maxSharedSemas = maxSemas;

	maxSemaSets = (maxSemas + SEMAS_PER_SET - 1) / SEMAS_PER_SET;
	mySemaSets = (IpcSemaphoreId *)
		malloc(maxSemaSets * sizeof(IpcSemaphoreId));
	if (mySemaSets == NULL)
		elog(PANIC, "out of memory");
	numSemaSets = 0;
	nextSemaKey = port * 1000;
	nextSemaNumber = SEMAS_PER_SET; /* force sema set alloc on 1st call */

	on_shmem_exit(ReleaseSemaphores, 0);
}
Beispiel #20
0
/*
 * SharedInvalBackendInit
 *		Initialize a new backend to operate on the sinval buffer
 */
void
SharedInvalBackendInit(bool sendOnly)
{
	int			index;
	ProcState  *stateP = NULL;
	SISeg	   *segP = shmInvalBuffer;

	/*
	 * This can run in parallel with read operations, and for that matter with
	 * write operations; but not in parallel with additions and removals of
	 * backends, nor in parallel with SICleanupQueue.  It doesn't seem worth
	 * having a third lock, so we choose to use SInvalWriteLock to serialize
	 * additions/removals.
	 */
	LWLockAcquire(SInvalWriteLock, LW_EXCLUSIVE);

	/* Look for a free entry in the procState array */
	for (index = 0; index < segP->lastBackend; index++)
	{
		if (segP->procState[index].procPid == 0)		/* inactive slot? */
		{
			stateP = &segP->procState[index];
			break;
		}
	}

	if (stateP == NULL)
	{
		if (segP->lastBackend < segP->maxBackends)
		{
			stateP = &segP->procState[segP->lastBackend];
			Assert(stateP->procPid == 0);
			segP->lastBackend++;
		}
		else
		{
			/*
			 * out of procState slots: MaxBackends exceeded -- report normally
			 */
			MyBackendId = InvalidBackendId;
			LWLockRelease(SInvalWriteLock);
			ereport(FATAL,
					(errcode(ERRCODE_TOO_MANY_CONNECTIONS),
					 errmsg("sorry, too many clients already")));
		}
	}

	MyBackendId = (stateP - &segP->procState[0]) + 1;

	/* Advertise assigned backend ID in MyProc */
	MyProc->backendId = MyBackendId;

	/* Fetch next local transaction ID into local memory */
	nextLocalTransactionId = stateP->nextLXID;

	/* mark myself active, with all extant messages already read */
	stateP->procPid = MyProcPid;
	stateP->nextMsgNum = segP->maxMsgNum;
	stateP->resetState = false;
	stateP->signaled = false;
	stateP->sendOnly = sendOnly;

	LWLockRelease(SInvalWriteLock);

	/* register exit routine to mark my entry inactive at exit */
	on_shmem_exit(CleanupInvalidationState, PointerGetDatum(segP));

	elog(DEBUG4, "my backend id is %d", MyBackendId);
}
Beispiel #21
0
/* --------------------------------
 * InitPostgres
 *		Initialize POSTGRES.
 *
 * The database can be specified by name, using the in_dbname parameter, or by
 * OID, using the dboid parameter.  In the latter case, the actual database
 * name can be returned to the caller in out_dbname.  If out_dbname isn't
 * NULL, it must point to a buffer of size NAMEDATALEN.
 *
 * In bootstrap mode no parameters are used.  The autovacuum launcher process
 * doesn't use any parameters either, because it only goes far enough to be
 * able to read pg_database; it doesn't connect to any particular database.
 * In walsender mode only username is used.
 *
 * As of PostgreSQL 8.2, we expect InitProcess() was already called, so we
 * already have a PGPROC struct ... but it's not completely filled in yet.
 *
 * Note:
 *		Be very careful with the order of calls in the InitPostgres function.
 * --------------------------------
 */
void
InitPostgres(const char *in_dbname, Oid dboid, const char *username,
             char *out_dbname)
{
    bool		bootstrap = IsBootstrapProcessingMode();
    bool		am_superuser;
    char	   *fullpath;
    char		dbname[NAMEDATALEN];

    elog(DEBUG3, "InitPostgres");

    /*
     * Add my PGPROC struct to the ProcArray.
     *
     * Once I have done this, I am visible to other backends!
     */
    InitProcessPhase2();

    /*
     * Initialize my entry in the shared-invalidation manager's array of
     * per-backend data.
     *
     * Sets up MyBackendId, a unique backend identifier.
     */
    MyBackendId = InvalidBackendId;

    SharedInvalBackendInit(false);

    if (MyBackendId > MaxBackends || MyBackendId <= 0)
        elog(FATAL, "bad backend ID: %d", MyBackendId);

    /* Now that we have a BackendId, we can participate in ProcSignal */
    ProcSignalInit(MyBackendId);

    /*
     * Also set up timeout handlers needed for backend operation.  We need
     * these in every case except bootstrap.
     */
    if (!bootstrap)
    {
        RegisterTimeout(DEADLOCK_TIMEOUT, CheckDeadLock);
        RegisterTimeout(STATEMENT_TIMEOUT, StatementTimeoutHandler);
        RegisterTimeout(LOCK_TIMEOUT, LockTimeoutHandler);
    }

    /*
     * bufmgr needs another initialization call too
     */
    InitBufferPoolBackend();

    /*
     * Initialize local process's access to XLOG.
     */
    if (IsUnderPostmaster)
    {
        /*
         * The postmaster already started the XLOG machinery, but we need to
         * call InitXLOGAccess(), if the system isn't in hot-standby mode.
         * This is handled by calling RecoveryInProgress and ignoring the
         * result.
         */
        (void) RecoveryInProgress();
    }
    else
    {
        /*
         * We are either a bootstrap process or a standalone backend. Either
         * way, start up the XLOG machinery, and register to have it closed
         * down at exit.
         */
        StartupXLOG();
        on_shmem_exit(ShutdownXLOG, 0);
    }

    /*
     * Initialize the relation cache and the system catalog caches.  Note that
     * no catalog access happens here; we only set up the hashtable structure.
     * We must do this before starting a transaction because transaction abort
     * would try to touch these hashtables.
     */
    RelationCacheInitialize();
    InitCatalogCache();
    InitPlanCache();

    /* Initialize portal manager */
    EnablePortalManager();

    /* Initialize stats collection --- must happen before first xact */
    if (!bootstrap)
        pgstat_initialize();

    /*
     * Load relcache entries for the shared system catalogs.  This must create
     * at least entries for pg_database and catalogs used for authentication.
     */
    RelationCacheInitializePhase2();

    /*
     * Set up process-exit callback to do pre-shutdown cleanup.  This is the
     * first before_shmem_exit callback we register; thus, this will be the
     * last thing we do before low-level modules like the buffer manager begin
     * to close down.  We need to have this in place before we begin our first
     * transaction --- if we fail during the initialization transaction, as is
     * entirely possible, we need the AbortTransaction call to clean up.
     */
    before_shmem_exit(ShutdownPostgres, 0);

    /* The autovacuum launcher is done here */
    if (IsAutoVacuumLauncherProcess())
        return;

    /*
     * Start a new transaction here before first access to db, and get a
     * snapshot.  We don't have a use for the snapshot itself, but we're
     * interested in the secondary effect that it sets RecentGlobalXmin. (This
     * is critical for anything that reads heap pages, because HOT may decide
     * to prune them even if the process doesn't attempt to modify any
     * tuples.)
     */
    if (!bootstrap)
    {
        /* statement_timestamp must be set for timeouts to work correctly */
        SetCurrentStatementStartTimestamp();
        StartTransactionCommand();

        /*
         * transaction_isolation will have been set to the default by the
         * above.  If the default is "serializable", and we are in hot
         * standby, we will fail if we don't change it to something lower.
         * Fortunately, "read committed" is plenty good enough.
         */
        XactIsoLevel = XACT_READ_COMMITTED;

        (void) GetTransactionSnapshot();
    }

    /*
     * Perform client authentication if necessary, then figure out our
     * postgres user ID, and see if we are a superuser.
     *
     * In standalone mode and in autovacuum worker processes, we use a fixed
     * ID, otherwise we figure it out from the authenticated user name.
     */
    if (bootstrap || IsAutoVacuumWorkerProcess())
    {
        InitializeSessionUserIdStandalone();
        am_superuser = true;
    }
    else if (!IsUnderPostmaster)
    {
        InitializeSessionUserIdStandalone();
        am_superuser = true;
        if (!ThereIsAtLeastOneRole())
            ereport(WARNING,
                    (errcode(ERRCODE_UNDEFINED_OBJECT),
                     errmsg("no roles are defined in this database system"),
                     errhint("You should immediately run CREATE USER \"%s\" SUPERUSER;.",
                             username)));
    }
    else if (IsBackgroundWorker)
    {
        if (username == NULL)
        {
            InitializeSessionUserIdStandalone();
            am_superuser = true;
        }
        else
        {
            InitializeSessionUserId(username);
            am_superuser = superuser();
        }
    }
    else
    {
        /* normal multiuser case */
        Assert(MyProcPort != NULL);
        PerformAuthentication(MyProcPort);
        InitializeSessionUserId(username);
        am_superuser = superuser();
    }

    /*
     * If we're trying to shut down, only superusers can connect, and new
     * replication connections are not allowed.
     */
    if ((!am_superuser || am_walsender) &&
            MyProcPort != NULL &&
            MyProcPort->canAcceptConnections == CAC_WAITBACKUP)
    {
        if (am_walsender)
            ereport(FATAL,
                    (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
                     errmsg("new replication connections are not allowed during database shutdown")));
        else
            ereport(FATAL,
                    (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
                     errmsg("must be superuser to connect during database shutdown")));
    }

    /*
     * Binary upgrades only allowed super-user connections
     */
    if (IsBinaryUpgrade && !am_superuser)
    {
        ereport(FATAL,
                (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
                 errmsg("must be superuser to connect in binary upgrade mode")));
    }

    /*
     * The last few connections slots are reserved for superusers. Although
     * replication connections currently require superuser privileges, we
     * don't allow them to consume the reserved slots, which are intended for
     * interactive use.
     */
    if ((!am_superuser || am_walsender) &&
            ReservedBackends > 0 &&
            !HaveNFreeProcs(ReservedBackends))
        ereport(FATAL,
                (errcode(ERRCODE_TOO_MANY_CONNECTIONS),
                 errmsg("remaining connection slots are reserved for non-replication superuser connections")));

    /* Check replication permissions needed for walsender processes. */
    if (am_walsender)
    {
        Assert(!bootstrap);

        if (!superuser() && !has_rolreplication(GetUserId()))
            ereport(FATAL,
                    (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
                     errmsg("must be superuser or replication role to start walsender")));
    }

    /*
     * If this is a plain walsender only supporting physical replication, we
     * don't want to connect to any particular database. Just finish the
     * backend startup by processing any options from the startup packet, and
     * we're done.
     */
    if (am_walsender && !am_db_walsender)
    {
        /* process any options passed in the startup packet */
        if (MyProcPort != NULL)
            process_startup_options(MyProcPort, am_superuser);

        /* Apply PostAuthDelay as soon as we've read all options */
        if (PostAuthDelay > 0)
            pg_usleep(PostAuthDelay * 1000000L);

        /* initialize client encoding */
        InitializeClientEncoding();

        /* report this backend in the PgBackendStatus array */
        pgstat_bestart();

        /* close the transaction we started above */
        CommitTransactionCommand();

        return;
    }

    /*
     * Set up the global variables holding database id and default tablespace.
     * But note we won't actually try to touch the database just yet.
     *
     * We take a shortcut in the bootstrap case, otherwise we have to look up
     * the db's entry in pg_database.
     */
    if (bootstrap)
    {
        MyDatabaseId = TemplateDbOid;
        MyDatabaseTableSpace = DEFAULTTABLESPACE_OID;
    }
    else if (in_dbname != NULL)
    {
        HeapTuple	tuple;
        Form_pg_database dbform;

        tuple = GetDatabaseTuple(in_dbname);
        if (!HeapTupleIsValid(tuple))
            ereport(FATAL,
                    (errcode(ERRCODE_UNDEFINED_DATABASE),
                     errmsg("database \"%s\" does not exist", in_dbname)));
        dbform = (Form_pg_database) GETSTRUCT(tuple);
        MyDatabaseId = HeapTupleGetOid(tuple);
        MyDatabaseTableSpace = dbform->dattablespace;
        /* take database name from the caller, just for paranoia */
        strlcpy(dbname, in_dbname, sizeof(dbname));
    }
    else if (OidIsValid(dboid))
    {
        /* caller specified database by OID */
        HeapTuple	tuple;
        Form_pg_database dbform;

        tuple = GetDatabaseTupleByOid(dboid);
        if (!HeapTupleIsValid(tuple))
            ereport(FATAL,
                    (errcode(ERRCODE_UNDEFINED_DATABASE),
                     errmsg("database %u does not exist", dboid)));
        dbform = (Form_pg_database) GETSTRUCT(tuple);
        MyDatabaseId = HeapTupleGetOid(tuple);
        MyDatabaseTableSpace = dbform->dattablespace;
        Assert(MyDatabaseId == dboid);
        strlcpy(dbname, NameStr(dbform->datname), sizeof(dbname));
        /* pass the database name back to the caller */
        if (out_dbname)
            strcpy(out_dbname, dbname);
    }
    else
    {
        /*
         * If this is a background worker not bound to any particular
         * database, we're done now.  Everything that follows only makes
         * sense if we are bound to a specific database.  We do need to
         * close the transaction we started before returning.
         */
        if (!bootstrap)
            CommitTransactionCommand();
        return;
    }

    /*
     * Now, take a writer's lock on the database we are trying to connect to.
     * If there is a concurrently running DROP DATABASE on that database, this
     * will block us until it finishes (and has committed its update of
     * pg_database).
     *
     * Note that the lock is not held long, only until the end of this startup
     * transaction.  This is OK since we will advertise our use of the
     * database in the ProcArray before dropping the lock (in fact, that's the
     * next thing to do).  Anyone trying a DROP DATABASE after this point will
     * see us in the array once they have the lock.  Ordering is important for
     * this because we don't want to advertise ourselves as being in this
     * database until we have the lock; otherwise we create what amounts to a
     * deadlock with CountOtherDBBackends().
     *
     * Note: use of RowExclusiveLock here is reasonable because we envision
     * our session as being a concurrent writer of the database.  If we had a
     * way of declaring a session as being guaranteed-read-only, we could use
     * AccessShareLock for such sessions and thereby not conflict against
     * CREATE DATABASE.
     */
    if (!bootstrap)
        LockSharedObject(DatabaseRelationId, MyDatabaseId, 0,
                         RowExclusiveLock);

    /*
     * Now we can mark our PGPROC entry with the database ID.
     *
     * We assume this is an atomic store so no lock is needed; though actually
     * things would work fine even if it weren't atomic.  Anyone searching the
     * ProcArray for this database's ID should hold the database lock, so they
     * would not be executing concurrently with this store.  A process looking
     * for another database's ID could in theory see a chance match if it read
     * a partially-updated databaseId value; but as long as all such searches
     * wait and retry, as in CountOtherDBBackends(), they will certainly see
     * the correct value on their next try.
     */
    MyProc->databaseId = MyDatabaseId;

    /*
     * We established a catalog snapshot while reading pg_authid and/or
     * pg_database; but until we have set up MyDatabaseId, we won't react to
     * incoming sinval messages for unshared catalogs, so we won't realize it
     * if the snapshot has been invalidated.  Assume it's no good anymore.
     */
    InvalidateCatalogSnapshot();

    /*
     * Recheck pg_database to make sure the target database hasn't gone away.
     * If there was a concurrent DROP DATABASE, this ensures we will die
     * cleanly without creating a mess.
     */
    if (!bootstrap)
    {
        HeapTuple	tuple;

        tuple = GetDatabaseTuple(dbname);
        if (!HeapTupleIsValid(tuple) ||
                MyDatabaseId != HeapTupleGetOid(tuple) ||
                MyDatabaseTableSpace != ((Form_pg_database) GETSTRUCT(tuple))->dattablespace)
            ereport(FATAL,
                    (errcode(ERRCODE_UNDEFINED_DATABASE),
                     errmsg("database \"%s\" does not exist", dbname),
                     errdetail("It seems to have just been dropped or renamed.")));
    }

    /*
     * Now we should be able to access the database directory safely. Verify
     * it's there and looks reasonable.
     */
    fullpath = GetDatabasePath(MyDatabaseId, MyDatabaseTableSpace);

    if (!bootstrap)
    {
        if (access(fullpath, F_OK) == -1)
        {
            if (errno == ENOENT)
                ereport(FATAL,
                        (errcode(ERRCODE_UNDEFINED_DATABASE),
                         errmsg("database \"%s\" does not exist",
                                dbname),
                         errdetail("The database subdirectory \"%s\" is missing.",
                                   fullpath)));
            else
                ereport(FATAL,
                        (errcode_for_file_access(),
                         errmsg("could not access directory \"%s\": %m",
                                fullpath)));
        }

        ValidatePgVersion(fullpath);
    }

    SetDatabasePath(fullpath);

    /*
     * It's now possible to do real access to the system catalogs.
     *
     * Load relcache entries for the system catalogs.  This must create at
     * least the minimum set of "nailed-in" cache entries.
     */
    RelationCacheInitializePhase3();

    /* set up ACL framework (so CheckMyDatabase can check permissions) */
    initialize_acl();

    /*
     * Re-read the pg_database row for our database, check permissions and set
     * up database-specific GUC settings.  We can't do this until all the
     * database-access infrastructure is up.  (Also, it wants to know if the
     * user is a superuser, so the above stuff has to happen first.)
     */
    if (!bootstrap)
        CheckMyDatabase(dbname, am_superuser);

    /*
     * Now process any command-line switches and any additional GUC variable
     * settings passed in the startup packet.   We couldn't do this before
     * because we didn't know if client is a superuser.
     */
    if (MyProcPort != NULL)
        process_startup_options(MyProcPort, am_superuser);

    /* Process pg_db_role_setting options */
    process_settings(MyDatabaseId, GetSessionUserId());

    /* Apply PostAuthDelay as soon as we've read all options */
    if (PostAuthDelay > 0)
        pg_usleep(PostAuthDelay * 1000000L);

    /*
     * Initialize various default states that can't be set up until we've
     * selected the active user and gotten the right GUC settings.
     */

    /* set default namespace search path */
    InitializeSearchPath();

    /* initialize client encoding */
    InitializeClientEncoding();

    /* report this backend in the PgBackendStatus array */
    if (!bootstrap)
        pgstat_bestart();

    /* close the transaction we started above */
    if (!bootstrap)
        CommitTransactionCommand();
}
Beispiel #22
0
/*
 * LWLockAcquire - acquire a lightweight lock in the specified mode
 *
 * If the lock is not available, sleep until it is.
 *
 * Side effect: cancel/die interrupts are held off until lock release.
 */
void
LWLockAcquire(LWLockId lockid, LWLockMode mode)
{
	volatile LWLock *lock = &(LWLockArray[lockid].lock);
	PGPROC	   *proc = MyProc;
	bool		retry = false;
	int			extraWaits = 0;

	PRINT_LWDEBUG("LWLockAcquire", lockid, lock);

#ifdef LWLOCK_STATS
	/* Set up local count state first time through in a given process */
	if (counts_for_pid != MyProcPid)
	{
		int		   *LWLockCounter = (int *) ((char *) LWLockArray - 2 * sizeof(int));
		int			numLocks = LWLockCounter[1];

		sh_acquire_counts = calloc(numLocks, sizeof(int));
		ex_acquire_counts = calloc(numLocks, sizeof(int));
		block_counts = calloc(numLocks, sizeof(int));
		counts_for_pid = MyProcPid;
		on_shmem_exit(print_lwlock_stats, 0);
	}
	/* Count lock acquisition attempts */
	if (mode == LW_EXCLUSIVE)
		ex_acquire_counts[lockid]++;
	else
		sh_acquire_counts[lockid]++;
#endif   /* LWLOCK_STATS */

	/*
	 * We can't wait if we haven't got a PGPROC.  This should only occur
	 * during bootstrap or shared memory initialization.  Put an Assert here
	 * to catch unsafe coding practices.
	 */
	Assert(!(proc == NULL && IsUnderPostmaster));

	/* Ensure we will have room to remember the lock */
	if (num_held_lwlocks >= MAX_SIMUL_LWLOCKS)
		elog(ERROR, "too many LWLocks taken");

	/*
	 * Lock out cancel/die interrupts until we exit the code section protected
	 * by the LWLock.  This ensures that interrupts will not interfere with
	 * manipulations of data structures in shared memory.
	 */
	HOLD_INTERRUPTS();

	/*
	 * Loop here to try to acquire lock after each time we are signaled by
	 * LWLockRelease.
	 *
	 * NOTE: it might seem better to have LWLockRelease actually grant us the
	 * lock, rather than retrying and possibly having to go back to sleep. But
	 * in practice that is no good because it means a process swap for every
	 * lock acquisition when two or more processes are contending for the same
	 * lock.  Since LWLocks are normally used to protect not-very-long
	 * sections of computation, a process needs to be able to acquire and
	 * release the same lock many times during a single CPU time slice, even
	 * in the presence of contention.  The efficiency of being able to do that
	 * outweighs the inefficiency of sometimes wasting a process dispatch
	 * cycle because the lock is not free when a released waiter finally gets
	 * to run.	See pgsql-hackers archives for 29-Dec-01.
	 */
	for (;;)
	{
		bool		mustwait;

		/* Acquire mutex.  Time spent holding mutex should be short! */
		SpinLockAcquire(&lock->mutex);

		/* If retrying, allow LWLockRelease to release waiters again */
		if (retry)
			lock->releaseOK = true;

		/* If I can get the lock, do so quickly. */
		if (mode == LW_EXCLUSIVE)
		{
			if (lock->exclusive == 0 && lock->shared == 0)
			{
				lock->exclusive++;
				mustwait = false;
			}
			else
				mustwait = true;
		}
		else
		{
			if (lock->exclusive == 0)
			{
				lock->shared++;
				mustwait = false;
			}
			else
				mustwait = true;
		}

		if (!mustwait)
			break;				/* got the lock */

		/*
		 * Add myself to wait queue.
		 *
		 * If we don't have a PGPROC structure, there's no way to wait. This
		 * should never occur, since MyProc should only be null during shared
		 * memory initialization.
		 */
		if (proc == NULL)
			elog(PANIC, "cannot wait without a PGPROC structure");

		proc->lwWaiting = true;
		proc->lwExclusive = (mode == LW_EXCLUSIVE);
		proc->lwWaitLink = NULL;
		if (lock->head == NULL)
			lock->head = proc;
		else
			lock->tail->lwWaitLink = proc;
		lock->tail = proc;

		/* Can release the mutex now */
		SpinLockRelease(&lock->mutex);

		/*
		 * Wait until awakened.
		 *
		 * Since we share the process wait semaphore with the regular lock
		 * manager and ProcWaitForSignal, and we may need to acquire an LWLock
		 * while one of those is pending, it is possible that we get awakened
		 * for a reason other than being signaled by LWLockRelease. If so,
		 * loop back and wait again.  Once we've gotten the LWLock,
		 * re-increment the sema by the number of additional signals received,
		 * so that the lock manager or signal manager will see the received
		 * signal when it next waits.
		 */
		LOG_LWDEBUG("LWLockAcquire", lockid, "waiting");

#ifdef LWLOCK_STATS
		block_counts[lockid]++;
#endif

		TRACE_POSTGRESQL_LWLOCK_WAIT_START(lockid, mode);

		for (;;)
		{
			/* "false" means cannot accept cancel/die interrupt here. */
			PGSemaphoreLock(&proc->sem, false);
			if (!proc->lwWaiting)
				break;
			extraWaits++;
		}

		TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(lockid, mode);

		LOG_LWDEBUG("LWLockAcquire", lockid, "awakened");

		/* Now loop back and try to acquire lock again. */
		retry = true;
	}

	/* We are done updating shared state of the lock itself. */
	SpinLockRelease(&lock->mutex);

	TRACE_POSTGRESQL_LWLOCK_ACQUIRE(lockid, mode);

	/* Add lock to list of locks held by this backend */
	held_lwlocks[num_held_lwlocks++] = lockid;

	/*
	 * Fix the process wait semaphore's count for any absorbed wakeups.
	 */
	while (extraWaits-- > 0)
		PGSemaphoreUnlock(&proc->sem);
}
Beispiel #23
0
/*
 * LWLockAcquire - acquire a lightweight lock in the specified mode
 *
 * If the lock is not available, sleep until it is.
 *
 * Side effect: cancel/die interrupts are held off until lock release.
 */
void
LWLockAcquire(LWLockId lockid, LWLockMode mode)
{
	volatile LWLock *lock = &(LWLockArray[lockid].lock);
#if LWLOCK_LOCK_PARTS > 1
	volatile LWLockPart *part = LWLOCK_PART(lock, lockid, MyBackendId);
#endif
	PGPROC	   *proc = MyProc;
	bool		retry = false;
	int			extraWaits = 0;

	PRINT_LWDEBUG("LWLockAcquire", lockid, lock);

#ifdef LWLOCK_STATS
	/* Set up local count state first time through in a given process */
	if (counts_for_pid != MyProcPid)
	{
		int		   *LWLockCounter = (int *) ((char *) LWLockArray - 2 * sizeof(int));
		int			numLocks = LWLockCounter[1];

		sh_acquire_counts = calloc(numLocks, sizeof(int));
		ex_acquire_counts = calloc(numLocks, sizeof(int));
		block_counts = calloc(numLocks, sizeof(int));
		counts_for_pid = MyProcPid;
		on_shmem_exit(print_lwlock_stats, 0);
	}
	/* Count lock acquisition attempts */
	if (mode == LW_EXCLUSIVE)
		ex_acquire_counts[lockid]++;
	else
		sh_acquire_counts[lockid]++;
#endif   /* LWLOCK_STATS */

	/*
	 * We can't wait if we haven't got a PGPROC.  This should only occur
	 * during bootstrap or shared memory initialization.  Put an Assert here
	 * to catch unsafe coding practices.
	 */
	Assert(!(proc == NULL && IsUnderPostmaster));

	/* Ensure we will have room to remember the lock */
	if (num_held_lwlocks >= MAX_SIMUL_LWLOCKS)
		elog(ERROR, "too many LWLocks taken");
		
	/*
	 * Lock out cancel/die interrupts until we exit the code section protected
	 * by the LWLock.  This ensures that interrupts will not interfere with
	 * manipulations of data structures in shared memory.
	 */
	HOLD_INTERRUPTS();

	/*
	 * Loop here to try to acquire lock after each time we are signaled by
	 * LWLockRelease.
	 *
	 * NOTE: it might seem better to have LWLockRelease actually grant us the
	 * lock, rather than retrying and possibly having to go back to sleep. But
	 * in practice that is no good because it means a process swap for every
	 * lock acquisition when two or more processes are contending for the same
	 * lock.  Since LWLocks are normally used to protect not-very-long
	 * sections of computation, a process needs to be able to acquire and
	 * release the same lock many times during a single CPU time slice, even
	 * in the presence of contention.  The efficiency of being able to do that
	 * outweighs the inefficiency of sometimes wasting a process dispatch
	 * cycle because the lock is not free when a released waiter finally gets
	 * to run.	See pgsql-hackers archives for 29-Dec-01.
	 */
	for (;;)
	{
		bool		mustwait;
		
		if (mode == LW_SHARED)
		{
#ifdef		LWLOCK_PART_SHARED_OPS_ATOMIC
			/* Increment shared counter partition. If there's no contention,
			 * this is sufficient to take the lock
			 */
			LWLOCK_PART_SHARED_POSTINC_ATOMIC(lock, lockid, part, MyBackendId);
			LWLOCK_PART_SHARED_FENCE();
			
			/* A concurrent exclusive locking attempt does the following
			 * three steps
			 *   1) Acquire mutex
			 *   2) Check shared counter partitions for readers.
			 *   3a) If found add proc to wait queue, block, restart at (1)
			 *   3b) If not found, set exclusive flag, continue with (4)
			 *   4) Enter protected section
			 * The fence after the atomic add above ensures that no further
			 * such attempt can proceed to (3b) or beyond. There may be
			 * pre-existing exclusive locking attempts at step (3b) or beyond,
			 * but we can recognize those by either the mutex being taken, or
			 * the exclusive flag being set. Conversely, if we see neither, we
			 * may proceed and enter the protected section.
			 *
			 * FIXME: This doesn't work if slock_t is a struct or doesn't
			 * use 0 for state "unlocked".
			 */

			if ((lock->mutex == 0) && (lock->exclusive == 0)) {
				/* If retrying, allow LWLockRelease to release waiters again.
				 * Usually this happens after we acquired the mutex, but if
				 * we skip that, we still need to set releaseOK.
				 *
				 * Acquiring the mutex here is not really an option - if many
				 * reader are awoken simultaneously by an exclusive unlock,
				 * that would be a source of considerable contention.
				 *
				 * Fotunately, this is safe even without the mutex. First,
				 * there actually cannot be any non-fast path unlocking
				 * attempt in progress, because we'd then either still see
				 * the exclusive flag set or the mutex being taken. And
				 * even if there was, and such an attempt cleared the flag
				 * immediately after we set it, it'd also wake up some waiter
				 * who'd then re-set the flag.
				 *
				 * The only reason to do this here, and not directly
				 * after returning from PGSemaphoreLock(), is that it seems
				 * benefical to make SpinLockAcquire() the first thing to
				 * touch the lock if possible, in case we acquire the spin
				 * lock at all. That way, the cache line doesn't go through
				 * a possible shared state, but instead directly to exclusive.
				 * On Opterons at least, there seems to be a difference, c.f.
				 * the comment above tas() for x86_64 in s_lock.h
				 */
				if (retry && !lock->releaseOK)
					lock->releaseOK = true;
					
				goto lock_acquired;
			}
				
			/* At this point, we don't know if the concurrent exclusive locker
			 * has proceeded to (3b) or blocked. We must take the mutex and
			 * re-check
			 */
#endif /* LWLOCK_PART_SHARED_OPS_ATOMIC */
			
			/* Acquire mutex.  Time spent holding mutex should be short! */
			SpinLockAcquire(&lock->mutex);
			
			if (lock->exclusive == 0)
			{
#ifdef LWLOCK_PART_SHARED_OPS_ATOMIC
				/* Already incremented the shared counter partition above */
#else
				lock->shared++;
#endif
				mustwait = false;
			}
			else
			{
#ifdef LWLOCK_PART_SHARED_OPS_ATOMIC
				/* Must undo shared counter partition increment. Note that
				 * we *need* to do that while holding the mutex. Otherwise,
				 * the exclusive lock could be released and attempted to be
				 * re-acquired before we undo the increment. That attempt
				 * would then block, even though there'd be no lock holder
				 * left
				 */
				LWLOCK_PART_SHARED_POSTDEC_ATOMIC(lock, lockid, part, MyBackendId);
#endif
				mustwait = true;
			}
		}
		else
		{
			/* Step (1). Acquire mutex. Time spent holding mutex should be
			 *                          short!
			 */
			SpinLockAcquire(&lock->mutex);
			
			if (lock->exclusive == 0)
			{
				/* Step (2). Check for shared lockers. This surely happens
				 * after (1), otherwise SpinLockAcquire() is broken. Lock
				 * acquire semantics demand that no load must be re-ordered
				 * from after a lock acquisition to before, for obvious
				 * reasons.
				 */

				LWLOCK_IS_SHARED(mustwait, lock, lockid);
				
				if (!mustwait) {
					/* Step (3a). Set exclusive flag. This surely happens
					 * after (2) because it depends on the result of (2),
					 * no matter how much reordering is going on here.
					 */
					lock->exclusive++;
				}
			}
			else
				mustwait = true;
		}
		
		/* If retrying, allow LWLockRelease to release waiters again.
		 * This is also separately done in the LW_SHARED early exit case
		 * above, and in contrast to there we don't hold the mutex there.
		 * See the comment there for why this is safe
		 */
		if (retry)
			lock->releaseOK = true;
		
		if (!mustwait)
			break;				/* got the lock */
			
		/*
		 * Step (3b). Add myself to wait queue.
		 *
		 * If we don't have a PGPROC structure, there's no way to wait. This
		 * should never occur, since MyProc should only be null during shared
		 * memory initialization.
		 */
		if (proc == NULL)
			elog(PANIC, "cannot wait without a PGPROC structure");

		proc->lwWaiting = true;
		proc->lwExclusive = (mode == LW_EXCLUSIVE);
		proc->lwWaitLink = NULL;
		if (lock->head == NULL)
			lock->head = proc;
		else
			lock->tail->lwWaitLink = proc;
		lock->tail = proc;

		/* Can release the mutex now */
		SpinLockRelease(&lock->mutex);

		/*
		 * Wait until awakened.
		 *
		 * Since we share the process wait semaphore with the regular lock
		 * manager and ProcWaitForSignal, and we may need to acquire an LWLock
		 * while one of those is pending, it is possible that we get awakened
		 * for a reason other than being signaled by LWLockRelease. If so,
		 * loop back and wait again.  Once we've gotten the LWLock,
		 * re-increment the sema by the number of additional signals received,
		 * so that the lock manager or signal manager will see the received
		 * signal when it next waits.
		 */
		LOG_LWDEBUG("LWLockAcquire", lockid, "waiting");

#ifdef LWLOCK_STATS
		block_counts[lockid]++;
#endif

		TRACE_POSTGRESQL_LWLOCK_WAIT_START(lockid, mode);

		for (;;)
		{
			/* "false" means cannot accept cancel/die interrupt here. */
			PGSemaphoreLock(&proc->sem, false);
			if (!proc->lwWaiting)
				break;
			extraWaits++;
		}

		TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(lockid, mode);

		LOG_LWDEBUG("LWLockAcquire", lockid, "awakened");

		/* Now loop back and try to acquire lock again. */
		retry = true;
	}

	/* We are done updating shared state of the lock itself. */
	SpinLockRelease(&lock->mutex);
	
	/* Step 4. Enter protected section. This surely happens after (3),
	 * this time because lock release semantics demand that no store
	 * must be moved from before a lock release to after the release,
	 * again for obvious reasons
	 */

#ifdef LWLOCK_PART_SHARED_OPS_ATOMIC
lock_acquired:
#endif

	TRACE_POSTGRESQL_LWLOCK_ACQUIRE(lockid, mode);

	/* Add lock to list of locks held by this backend */
	held_lwlocks[num_held_lwlocks] = lockid;
	held_lwlocks_mode[num_held_lwlocks] = mode;
	++num_held_lwlocks;

	/*
	 * Fix the process wait semaphore's count for any absorbed wakeups.
	 */
	while (extraWaits-- > 0)
		PGSemaphoreUnlock(&proc->sem);
}
Beispiel #24
0
/*
 * shmem_startup hook: allocate or attach to shared memory,
 * then load any pre-existing statistics from file.
 */
static void
pgss_shmem_startup(void)
{
	bool		found;
	HASHCTL		info;
	FILE	   *file;
	uint32		header;
	int32		num;
	int32		i;
	int			query_size;
	int			buffer_size;
	char	   *buffer = NULL;

	if (prev_shmem_startup_hook)
		prev_shmem_startup_hook();

	/* reset in case this is a restart within the postmaster */
	pgss = NULL;
	pgss_hash = NULL;

	/*
	 * Create or attach to the shared memory state, including hash table
	 */
	LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE);

	pgss = ShmemInitStruct("pg_stat_statements",
						   sizeof(pgssSharedState),
						   &found);

	if (!found)
	{
		/* First time through ... */
		pgss->lock = LWLockAssign();
		pgss->query_size = pgstat_track_activity_query_size;
	}

	/* Be sure everyone agrees on the hash table entry size */
	query_size = pgss->query_size;

	memset(&info, 0, sizeof(info));
	info.keysize = sizeof(pgssHashKey);
	info.entrysize = offsetof(pgssEntry, query) +query_size;
	info.hash = pgss_hash_fn;
	info.match = pgss_match_fn;
	pgss_hash = ShmemInitHash("pg_stat_statements hash",
							  pgss_max, pgss_max,
							  &info,
							  HASH_ELEM | HASH_FUNCTION | HASH_COMPARE);

	LWLockRelease(AddinShmemInitLock);

	/*
	 * If we're in the postmaster (or a standalone backend...), set up a shmem
	 * exit hook to dump the statistics to disk.
	 */
	if (!IsUnderPostmaster)
		on_shmem_exit(pgss_shmem_shutdown, (Datum) 0);

	/*
	 * Attempt to load old statistics from the dump file, if this is the first
	 * time through and we weren't told not to.
	 */
	if (found || !pgss_save)
		return;

	/*
	 * Note: we don't bother with locks here, because there should be no other
	 * processes running when this code is reached.
	 */
	file = AllocateFile(PGSS_DUMP_FILE, PG_BINARY_R);
	if (file == NULL)
	{
		if (errno == ENOENT)
			return;				/* ignore not-found error */
		goto error;
	}

	buffer_size = query_size;
	buffer = (char *) palloc(buffer_size);

	if (fread(&header, sizeof(uint32), 1, file) != 1 ||
		header != PGSS_FILE_HEADER ||
		fread(&num, sizeof(int32), 1, file) != 1)
		goto error;

	for (i = 0; i < num; i++)
	{
		pgssEntry	temp;
		pgssEntry  *entry;

		if (fread(&temp, offsetof(pgssEntry, mutex), 1, file) != 1)
			goto error;

		/* Encoding is the only field we can easily sanity-check */
		if (!PG_VALID_BE_ENCODING(temp.key.encoding))
			goto error;

		/* Previous incarnation might have had a larger query_size */
		if (temp.key.query_len >= buffer_size)
		{
			buffer = (char *) repalloc(buffer, temp.key.query_len + 1);
			buffer_size = temp.key.query_len + 1;
		}

		if (fread(buffer, 1, temp.key.query_len, file) != temp.key.query_len)
			goto error;
		buffer[temp.key.query_len] = '\0';

		/* Clip to available length if needed */
		if (temp.key.query_len >= query_size)
			temp.key.query_len = pg_encoding_mbcliplen(temp.key.encoding,
													   buffer,
													   temp.key.query_len,
													   query_size - 1);
		temp.key.query_ptr = buffer;

		/* make the hashtable entry (discards old entries if too many) */
		entry = entry_alloc(&temp.key);

		/* copy in the actual stats */
		entry->counters = temp.counters;
	}

	pfree(buffer);
	FreeFile(file);
	return;

error:
	ereport(LOG,
			(errcode_for_file_access(),
			 errmsg("could not read pg_stat_statement file \"%s\": %m",
					PGSS_DUMP_FILE)));
	if (buffer)
		pfree(buffer);
	if (file)
		FreeFile(file);
	/* If possible, throw away the bogus file; ignore any error */
	unlink(PGSS_DUMP_FILE);
}
/* --------------------------------
 * InitPostgres
 *		Initialize POSTGRES.
 *
 * The database can be specified by name, using the in_dbname parameter, or by
 * OID, using the dboid parameter.	In the latter case, the actual database
 * name can be returned to the caller in out_dbname.  If out_dbname isn't
 * NULL, it must point to a buffer of size NAMEDATALEN.
 *
 * In bootstrap mode no parameters are used.  The autovacuum launcher process
 * doesn't use any parameters either, because it only goes far enough to be
 * able to read pg_database; it doesn't connect to any particular database.
 * In walsender mode only username is used.
 *
 * As of PostgreSQL 8.2, we expect InitProcess() was already called, so we
 * already have a PGPROC struct ... but it's not completely filled in yet.
 *
 * Note:
 *		Be very careful with the order of calls in the InitPostgres function.
 * --------------------------------
 */
void
InitPostgres(const char *in_dbname, Oid dboid, const char *username,
			 char *out_dbname)
{
	bool		bootstrap = IsBootstrapProcessingMode();
	bool		am_superuser;
	GucContext	gucctx;
	char	   *fullpath;
	char		dbname[NAMEDATALEN];

	elog(DEBUG3, "InitPostgres");

	/*
	 * Add my PGPROC struct to the ProcArray.
	 *
	 * Once I have done this, I am visible to other backends!
	 */
	InitProcessPhase2();

	/*
	 * Initialize my entry in the shared-invalidation manager's array of
	 * per-backend data.
	 *
	 * Sets up MyBackendId, a unique backend identifier.
	 */
	MyBackendId = InvalidBackendId;

	SharedInvalBackendInit(false);

	if (MyBackendId > MaxBackends || MyBackendId <= 0)
		elog(FATAL, "bad backend id: %d", MyBackendId);

	/* Now that we have a BackendId, we can participate in ProcSignal */
	ProcSignalInit(MyBackendId);

	/*
	 * bufmgr needs another initialization call too
	 */
	InitBufferPoolBackend();

	/*
	 * Initialize local process's access to XLOG, if appropriate.  In
	 * bootstrap case we skip this since StartupXLOG() was run instead.
	 */
	if (!bootstrap)
		(void) RecoveryInProgress();

	/*
	 * Initialize the relation cache and the system catalog caches.  Note that
	 * no catalog access happens here; we only set up the hashtable structure.
	 * We must do this before starting a transaction because transaction abort
	 * would try to touch these hashtables.
	 */
	RelationCacheInitialize();
	InitCatalogCache();
	InitPlanCache();

	/* Initialize portal manager */
	EnablePortalManager();

	/* Initialize stats collection --- must happen before first xact */
	if (!bootstrap)
		pgstat_initialize();

	/*
	 * Load relcache entries for the shared system catalogs.  This must create
	 * at least an entry for pg_database.
	 */
	RelationCacheInitializePhase2();

	/*
	 * Set up process-exit callback to do pre-shutdown cleanup.  This has to
	 * be after we've initialized all the low-level modules like the buffer
	 * manager, because during shutdown this has to run before the low-level
	 * modules start to close down.  On the other hand, we want it in place
	 * before we begin our first transaction --- if we fail during the
	 * initialization transaction, as is entirely possible, we need the
	 * AbortTransaction call to clean up.
	 */
	on_shmem_exit(ShutdownPostgres, 0);

	/* The autovacuum launcher is done here */
	if (IsAutoVacuumLauncherProcess())
		return;

	/*
	 * Start a new transaction here before first access to db, and get a
	 * snapshot.  We don't have a use for the snapshot itself, but we're
	 * interested in the secondary effect that it sets RecentGlobalXmin. (This
	 * is critical for anything that reads heap pages, because HOT may decide
	 * to prune them even if the process doesn't attempt to modify any
	 * tuples.)
	 */
	if (!bootstrap)
	{
		StartTransactionCommand();
		(void) GetTransactionSnapshot();
	}

	/*
	 * Set up the global variables holding database id and default tablespace.
	 * But note we won't actually try to touch the database just yet.
	 *
	 * We take a shortcut in the bootstrap and walsender case, otherwise we
	 * have to look up the db's entry in pg_database.
	 */
	if (bootstrap || am_walsender)
	{
		MyDatabaseId = TemplateDbOid;
		MyDatabaseTableSpace = DEFAULTTABLESPACE_OID;
	}
	else if (in_dbname != NULL)
	{
		HeapTuple	tuple;
		Form_pg_database dbform;

		tuple = GetDatabaseTuple(in_dbname);
		if (!HeapTupleIsValid(tuple))
			ereport(FATAL,
					(errcode(ERRCODE_UNDEFINED_DATABASE),
					 errmsg("database \"%s\" does not exist", in_dbname)));
		dbform = (Form_pg_database) GETSTRUCT(tuple);
		MyDatabaseId = HeapTupleGetOid(tuple);
		MyDatabaseTableSpace = dbform->dattablespace;
		/* take database name from the caller, just for paranoia */
		strlcpy(dbname, in_dbname, sizeof(dbname));
	}
	else
	{
		/* caller specified database by OID */
		HeapTuple	tuple;
		Form_pg_database dbform;

		tuple = GetDatabaseTupleByOid(dboid);
		if (!HeapTupleIsValid(tuple))
			ereport(FATAL,
					(errcode(ERRCODE_UNDEFINED_DATABASE),
					 errmsg("database %u does not exist", dboid)));
		dbform = (Form_pg_database) GETSTRUCT(tuple);
		MyDatabaseId = HeapTupleGetOid(tuple);
		MyDatabaseTableSpace = dbform->dattablespace;
		Assert(MyDatabaseId == dboid);
		strlcpy(dbname, NameStr(dbform->datname), sizeof(dbname));
		/* pass the database name back to the caller */
		if (out_dbname)
			strcpy(out_dbname, dbname);
	}

	/* Now we can mark our PGPROC entry with the database ID */
	/* (We assume this is an atomic store so no lock is needed) */
	MyProc->databaseId = MyDatabaseId;

	/*
	 * Now, take a writer's lock on the database we are trying to connect to.
	 * If there is a concurrently running DROP DATABASE on that database, this
	 * will block us until it finishes (and has committed its update of
	 * pg_database).
	 *
	 * Note that the lock is not held long, only until the end of this startup
	 * transaction.  This is OK since we are already advertising our use of
	 * the database in the PGPROC array; anyone trying a DROP DATABASE after
	 * this point will see us there.
	 *
	 * Note: use of RowExclusiveLock here is reasonable because we envision
	 * our session as being a concurrent writer of the database.  If we had a
	 * way of declaring a session as being guaranteed-read-only, we could use
	 * AccessShareLock for such sessions and thereby not conflict against
	 * CREATE DATABASE.
	 */
	if (!bootstrap && !am_walsender)
		LockSharedObject(DatabaseRelationId, MyDatabaseId, 0,
						 RowExclusiveLock);

	/*
	 * Recheck pg_database to make sure the target database hasn't gone away.
	 * If there was a concurrent DROP DATABASE, this ensures we will die
	 * cleanly without creating a mess.
	 */
	if (!bootstrap && !am_walsender)
	{
		HeapTuple	tuple;

		tuple = GetDatabaseTuple(dbname);
		if (!HeapTupleIsValid(tuple) ||
			MyDatabaseId != HeapTupleGetOid(tuple) ||
			MyDatabaseTableSpace != ((Form_pg_database) GETSTRUCT(tuple))->dattablespace)
			ereport(FATAL,
					(errcode(ERRCODE_UNDEFINED_DATABASE),
					 errmsg("database \"%s\" does not exist", dbname),
			   errdetail("It seems to have just been dropped or renamed.")));
	}

	/*
	 * Now we should be able to access the database directory safely. Verify
	 * it's there and looks reasonable.
	 */
	fullpath = GetDatabasePath(MyDatabaseId, MyDatabaseTableSpace);

	if (!bootstrap && !am_walsender)
	{
		if (access(fullpath, F_OK) == -1)
		{
			if (errno == ENOENT)
				ereport(FATAL,
						(errcode(ERRCODE_UNDEFINED_DATABASE),
						 errmsg("database \"%s\" does not exist",
								dbname),
					errdetail("The database subdirectory \"%s\" is missing.",
							  fullpath)));
			else
				ereport(FATAL,
						(errcode_for_file_access(),
						 errmsg("could not access directory \"%s\": %m",
								fullpath)));
		}

		ValidatePgVersion(fullpath);
	}

	SetDatabasePath(fullpath);

	/*
	 * It's now possible to do real access to the system catalogs.
	 *
	 * Load relcache entries for the system catalogs.  This must create at
	 * least the minimum set of "nailed-in" cache entries.
	 */
	RelationCacheInitializePhase3();

	/*
	 * Perform client authentication if necessary, then figure out our
	 * postgres user ID, and see if we are a superuser.
	 *
	 * In standalone mode and in autovacuum worker processes, we use a fixed
	 * ID, otherwise we figure it out from the authenticated user name.
	 */
	if (bootstrap || IsAutoVacuumWorkerProcess())
	{
		InitializeSessionUserIdStandalone();
		am_superuser = true;
	}
	else if (!IsUnderPostmaster)
	{
		InitializeSessionUserIdStandalone();
		am_superuser = true;
		if (!ThereIsAtLeastOneRole())
			ereport(WARNING,
					(errcode(ERRCODE_UNDEFINED_OBJECT),
					 errmsg("no roles are defined in this database system"),
					 errhint("You should immediately run CREATE USER \"%s\" SUPERUSER;.",
							 username)));
	}
	else
	{
		/* normal multiuser case */
		Assert(MyProcPort != NULL);
		PerformAuthentication(MyProcPort);
		InitializeSessionUserId(username);
		am_superuser = superuser();
	}

	/* set up ACL framework (so CheckMyDatabase can check permissions) */
	initialize_acl();

	/* Process pg_db_role_setting options */
	process_settings(MyDatabaseId, GetSessionUserId());

	/*
	 * Re-read the pg_database row for our database, check permissions and set
	 * up database-specific GUC settings.  We can't do this until all the
	 * database-access infrastructure is up.  (Also, it wants to know if the
	 * user is a superuser, so the above stuff has to happen first.)
	 */
	if (!bootstrap && !am_walsender)
		CheckMyDatabase(dbname, am_superuser);

	/*
	 * If we're trying to shut down, only superusers can connect.
	 */
	if (!am_superuser &&
		MyProcPort != NULL &&
		MyProcPort->canAcceptConnections == CAC_WAITBACKUP)
		ereport(FATAL,
				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
		   errmsg("must be superuser to connect during database shutdown")));

	/*
	 * Check a normal user hasn't connected to a superuser reserved slot.
	 */
	if (!am_superuser &&
		ReservedBackends > 0 &&
		!HaveNFreeProcs(ReservedBackends))
		ereport(FATAL,
				(errcode(ERRCODE_TOO_MANY_CONNECTIONS),
				 errmsg("connection limit exceeded for non-superusers")));

	/*
	 * Now process any command-line switches that were included in the startup
	 * packet, if we are in a regular backend.	We couldn't do this before
	 * because we didn't know if client is a superuser.
	 */
	gucctx = am_superuser ? PGC_SUSET : PGC_BACKEND;

	if (MyProcPort != NULL &&
		MyProcPort->cmdline_options != NULL)
	{
		/*
		 * The maximum possible number of commandline arguments that could
		 * come from MyProcPort->cmdline_options is (strlen + 1) / 2; see
		 * pg_split_opts().
		 */
		char	  **av;
		int			maxac;
		int			ac;

		maxac = 2 + (strlen(MyProcPort->cmdline_options) + 1) / 2;

		av = (char **) palloc(maxac * sizeof(char *));
		ac = 0;

		av[ac++] = "postgres";

		/* Note this mangles MyProcPort->cmdline_options */
		pg_split_opts(av, &ac, MyProcPort->cmdline_options);

		av[ac] = NULL;

		Assert(ac < maxac);

		(void) process_postgres_switches(ac, av, gucctx);
	}

	/*
	 * Process any additional GUC variable settings passed in startup packet.
	 * These are handled exactly like command-line variables.
	 */
	if (MyProcPort != NULL)
	{
		ListCell   *gucopts = list_head(MyProcPort->guc_options);

		while (gucopts)
		{
			char	   *name;
			char	   *value;

			name = lfirst(gucopts);
			gucopts = lnext(gucopts);

			value = lfirst(gucopts);
			gucopts = lnext(gucopts);

			SetConfigOption(name, value, gucctx, PGC_S_CLIENT);
		}
	}

	/* Apply PostAuthDelay as soon as we've read all options */
	if (PostAuthDelay > 0)
		pg_usleep(PostAuthDelay * 1000000L);

	/*
	 * Initialize various default states that can't be set up until we've
	 * selected the active user and gotten the right GUC settings.
	 */

	/* set default namespace search path */
	InitializeSearchPath();

	/* initialize client encoding */
	InitializeClientEncoding();

	/* reset the database for walsender */
	if (am_walsender)
		MyProc->databaseId = MyDatabaseId = InvalidOid;

	/* report this backend in the PgBackendStatus array */
	if (!bootstrap)
		pgstat_bestart();

	/* close the transaction we started above */
	if (!bootstrap)
		CommitTransactionCommand();
}
Beispiel #26
0
/*
 * PGSharedMemoryCreate
 *
 * Create a shared memory segment of the given size and initialize its
 * standard header.  Also, register an on_shmem_exit callback to release
 * the storage.
 *
 * Dead Postgres segments are recycled if found, but we do not fail upon
 * collision with non-Postgres shmem segments.  The idea here is to detect and
 * re-use keys that may have been assigned by a crashed postmaster or backend.
 *
 * makePrivate means to always create a new segment, rather than attach to
 * or recycle any existing segment.
 *
 * The port number is passed for possible use as a key (for SysV, we use
 * it to generate the starting shmem key).  In a standalone backend,
 * zero will be passed.
 */
PGShmemHeader *
PGSharedMemoryCreate(Size size, bool makePrivate, int port,
					 PGShmemHeader **shim)
{
	IpcMemoryKey NextShmemSegID;
	void	   *memAddress;
	PGShmemHeader *hdr;
	IpcMemoryId shmid;
	struct stat statbuf;
	Size		sysvsize;

	/* Complain if hugepages demanded but we can't possibly support them */
#if !defined(USE_ANONYMOUS_SHMEM) || !defined(MAP_HUGETLB)
	if (huge_pages == HUGE_PAGES_ON)
		ereport(ERROR,
				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
				 errmsg("huge pages not supported on this platform")));
#endif

	/* Room for a header? */
	Assert(size > MAXALIGN(sizeof(PGShmemHeader)));

#ifdef USE_ANONYMOUS_SHMEM
	AnonymousShmem = CreateAnonymousSegment(&size);
	AnonymousShmemSize = size;

	/* Register on-exit routine to unmap the anonymous segment */
	on_shmem_exit(AnonymousShmemDetach, (Datum) 0);

	/* Now we need only allocate a minimal-sized SysV shmem block. */
	sysvsize = sizeof(PGShmemHeader);
#else
	sysvsize = size;
#endif

	/* Make sure PGSharedMemoryAttach doesn't fail without need */
	UsedShmemSegAddr = NULL;

	/* Loop till we find a free IPC key */
	NextShmemSegID = port * 1000;

	for (NextShmemSegID++;; NextShmemSegID++)
	{
		/* Try to create new segment */
		memAddress = InternalIpcMemoryCreate(NextShmemSegID, sysvsize);
		if (memAddress)
			break;				/* successful create and attach */

		/* Check shared memory and possibly remove and recreate */

		if (makePrivate)		/* a standalone backend shouldn't do this */
			continue;

		if ((memAddress = PGSharedMemoryAttach(NextShmemSegID, &shmid)) == NULL)
			continue;			/* can't attach, not one of mine */

		/*
		 * If I am not the creator and it belongs to an extant process,
		 * continue.
		 */
		hdr = (PGShmemHeader *) memAddress;
		if (hdr->creatorPID != getpid())
		{
			if (kill(hdr->creatorPID, 0) == 0 || errno != ESRCH)
			{
				shmdt(memAddress);
				continue;		/* segment belongs to a live process */
			}
		}

		/*
		 * The segment appears to be from a dead Postgres process, or from a
		 * previous cycle of life in this same process.  Zap it, if possible,
		 * and any associated dynamic shared memory segments, as well. This
		 * probably shouldn't fail, but if it does, assume the segment belongs
		 * to someone else after all, and continue quietly.
		 */
		if (hdr->dsm_control != 0)
			dsm_cleanup_using_control_segment(hdr->dsm_control);
		shmdt(memAddress);
		if (shmctl(shmid, IPC_RMID, NULL) < 0)
			continue;

		/*
		 * Now try again to create the segment.
		 */
		memAddress = InternalIpcMemoryCreate(NextShmemSegID, sysvsize);
		if (memAddress)
			break;				/* successful create and attach */

		/*
		 * Can only get here if some other process managed to create the same
		 * shmem key before we did.  Let him have that one, loop around to try
		 * next key.
		 */
	}

	/*
	 * OK, we created a new segment.  Mark it as created by this process. The
	 * order of assignments here is critical so that another Postgres process
	 * can't see the header as valid but belonging to an invalid PID!
	 */
	hdr = (PGShmemHeader *) memAddress;
	hdr->creatorPID = getpid();
	hdr->magic = PGShmemMagic;
	hdr->dsm_control = 0;

	/* Fill in the data directory ID info, too */
	if (stat(DataDir, &statbuf) < 0)
		ereport(FATAL,
				(errcode_for_file_access(),
				 errmsg("could not stat data directory \"%s\": %m",
						DataDir)));
	hdr->device = statbuf.st_dev;
	hdr->inode = statbuf.st_ino;

	/*
	 * Initialize space allocation status for segment.
	 */
	hdr->totalsize = size;
	hdr->freeoffset = MAXALIGN(sizeof(PGShmemHeader));
	*shim = hdr;

	/* Save info for possible future use */
	UsedShmemSegAddr = memAddress;
	UsedShmemSegID = (unsigned long) NextShmemSegID;

	/*
	 * If AnonymousShmem is NULL here, then we're not using anonymous shared
	 * memory, and should return a pointer to the System V shared memory
	 * block. Otherwise, the System V shared memory block is only a shim, and
	 * we must return a pointer to the real block.
	 */
#ifdef USE_ANONYMOUS_SHMEM
	if (AnonymousShmem == NULL)
		return hdr;
	memcpy(AnonymousShmem, hdr, sizeof(PGShmemHeader));
	return (PGShmemHeader *) AnonymousShmem;
#else
	return hdr;
#endif
}
Beispiel #27
0
/*
 * InitAuxiliaryProcess -- create a per-auxiliary-process data structure
 *
 * This is called by bgwriter and similar processes so that they will have a
 * MyProc value that's real enough to let them wait for LWLocks.  The PGPROC
 * and sema that are assigned are one of the extra ones created during
 * InitProcGlobal.
 *
 * Auxiliary processes are presently not expected to wait for real (lockmgr)
 * locks, so we need not set up the deadlock checker.  They are never added
 * to the ProcArray or the sinval messaging mechanism, either.  They also
 * don't get a VXID assigned, since this is only useful when we actually
 * hold lockmgr locks.
 *
 * Startup process however uses locks but never waits for them in the
 * normal backend sense. Startup process also takes part in sinval messaging
 * as a sendOnly process, so never reads messages from sinval queue. So
 * Startup process does have a VXID and does show up in pg_locks.
 */
void
InitAuxiliaryProcess(void)
{
	PGPROC	   *auxproc;
	int			proctype;

	/*
	 * ProcGlobal should be set up already (if we are a backend, we inherit
	 * this by fork() or EXEC_BACKEND mechanism from the postmaster).
	 */
	if (ProcGlobal == NULL || AuxiliaryProcs == NULL)
		elog(PANIC, "proc header uninitialized");

	if (MyProc != NULL)
		elog(ERROR, "you already exist");

	/*
	 * We use the ProcStructLock to protect assignment and releasing of
	 * AuxiliaryProcs entries.
	 *
	 * While we are holding the ProcStructLock, also copy the current shared
	 * estimate of spins_per_delay to local storage.
	 */
	SpinLockAcquire(ProcStructLock);

	set_spins_per_delay(ProcGlobal->spins_per_delay);

	/*
	 * Find a free auxproc ... *big* trouble if there isn't one ...
	 */
	for (proctype = 0; proctype < NUM_AUXILIARY_PROCS; proctype++)
	{
		auxproc = &AuxiliaryProcs[proctype];
		if (auxproc->pid == 0)
			break;
	}
	if (proctype >= NUM_AUXILIARY_PROCS)
	{
		SpinLockRelease(ProcStructLock);
		elog(FATAL, "all AuxiliaryProcs are in use");
	}

	/* Mark auxiliary proc as in use by me */
	/* use volatile pointer to prevent code rearrangement */
	((volatile PGPROC *) auxproc)->pid = MyProcPid;

	MyProc = auxproc;
	MyPgXact = &ProcGlobal->allPgXact[auxproc->pgprocno];

	SpinLockRelease(ProcStructLock);

	/*
	 * Initialize all fields of MyProc, except for those previously
	 * initialized by InitProcGlobal.
	 */
	SHMQueueElemInit(&(MyProc->links));
	MyProc->waitStatus = STATUS_OK;
	MyProc->lxid = InvalidLocalTransactionId;
	MyProc->fpVXIDLock = false;
	MyProc->fpLocalTransactionId = InvalidLocalTransactionId;
	MyPgXact->xid = InvalidTransactionId;
	MyPgXact->xmin = InvalidTransactionId;
	MyProc->backendId = InvalidBackendId;
	MyProc->databaseId = InvalidOid;
	MyProc->roleId = InvalidOid;
	MyPgXact->delayChkpt = false;
	MyPgXact->vacuumFlags = 0;
	MyProc->lwWaiting = false;
	MyProc->lwWaitMode = 0;
	MyProc->waitLock = NULL;
	MyProc->waitProcLock = NULL;
#ifdef USE_ASSERT_CHECKING
	{
		int			i;

		/* Last process should have released all locks. */
		for (i = 0; i < NUM_LOCK_PARTITIONS; i++)
			Assert(SHMQueueEmpty(&(MyProc->myProcLocks[i])));
	}
#endif

	/*
	 * Acquire ownership of the PGPROC's latch, so that we can use WaitLatch
	 * on it.  That allows us to repoint the process latch, which so far
	 * points to process local one, to the shared one.
	 */
	OwnLatch(&MyProc->procLatch);
	SwitchToSharedLatch();

	/*
	 * We might be reusing a semaphore that belonged to a failed process. So
	 * be careful and reinitialize its value here.  (This is not strictly
	 * necessary anymore, but seems like a good idea for cleanliness.)
	 */
	PGSemaphoreReset(&MyProc->sem);

	/*
	 * Arrange to clean up at process exit.
	 */
	on_shmem_exit(AuxiliaryProcKill, Int32GetDatum(proctype));
}
Beispiel #28
0
/*
 * InitProcess -- initialize a per-process data structure for this backend
 */
void
InitProcess(void)
{
	/* use volatile pointer to prevent code rearrangement */
	volatile PROC_HDR *procglobal = ProcGlobal;

	/*
	 * ProcGlobal should be set up already (if we are a backend, we inherit
	 * this by fork() or EXEC_BACKEND mechanism from the postmaster).
	 */
	if (procglobal == NULL)
		elog(PANIC, "proc header uninitialized");

	if (MyProc != NULL)
		elog(ERROR, "you already exist");

	/*
	 * Try to get a proc struct from the free list.  If this fails, we must be
	 * out of PGPROC structures (not to mention semaphores).
	 *
	 * While we are holding the ProcStructLock, also copy the current shared
	 * estimate of spins_per_delay to local storage.
	 */
	SpinLockAcquire(ProcStructLock);

	set_spins_per_delay(procglobal->spins_per_delay);

	if (IsAnyAutoVacuumProcess())
		MyProc = procglobal->autovacFreeProcs;
	else if (IsBackgroundWorker)
		MyProc = procglobal->bgworkerFreeProcs;
	else
		MyProc = procglobal->freeProcs;

	if (MyProc != NULL)
	{
		if (IsAnyAutoVacuumProcess())
			procglobal->autovacFreeProcs = (PGPROC *) MyProc->links.next;
		else if (IsBackgroundWorker)
			procglobal->bgworkerFreeProcs = (PGPROC *) MyProc->links.next;
		else
			procglobal->freeProcs = (PGPROC *) MyProc->links.next;
		SpinLockRelease(ProcStructLock);
	}
	else
	{
		/*
		 * If we reach here, all the PGPROCs are in use.  This is one of the
		 * possible places to detect "too many backends", so give the standard
		 * error message.  XXX do we need to give a different failure message
		 * in the autovacuum case?
		 */
		SpinLockRelease(ProcStructLock);
		ereport(FATAL,
				(errcode(ERRCODE_TOO_MANY_CONNECTIONS),
				 errmsg("sorry, too many clients already")));
	}
	MyPgXact = &ProcGlobal->allPgXact[MyProc->pgprocno];

	/*
	 * Now that we have a PGPROC, mark ourselves as an active postmaster
	 * child; this is so that the postmaster can detect it if we exit without
	 * cleaning up.  (XXX autovac launcher currently doesn't participate in
	 * this; it probably should.)
	 */
	if (IsUnderPostmaster && !IsAutoVacuumLauncherProcess())
		MarkPostmasterChildActive();

	/*
	 * Initialize all fields of MyProc, except for those previously
	 * initialized by InitProcGlobal.
	 */
	SHMQueueElemInit(&(MyProc->links));
	MyProc->waitStatus = STATUS_OK;
	MyProc->lxid = InvalidLocalTransactionId;
	MyProc->fpVXIDLock = false;
	MyProc->fpLocalTransactionId = InvalidLocalTransactionId;
	MyPgXact->xid = InvalidTransactionId;
	MyPgXact->xmin = InvalidTransactionId;
	MyProc->pid = MyProcPid;
	/* backendId, databaseId and roleId will be filled in later */
	MyProc->backendId = InvalidBackendId;
	MyProc->databaseId = InvalidOid;
	MyProc->roleId = InvalidOid;
	MyPgXact->delayChkpt = false;
	MyPgXact->vacuumFlags = 0;
	/* NB -- autovac launcher intentionally does not set IS_AUTOVACUUM */
	if (IsAutoVacuumWorkerProcess())
		MyPgXact->vacuumFlags |= PROC_IS_AUTOVACUUM;
	MyProc->lwWaiting = false;
	MyProc->lwWaitMode = 0;
	MyProc->waitLock = NULL;
	MyProc->waitProcLock = NULL;
#ifdef USE_ASSERT_CHECKING
	{
		int			i;

		/* Last process should have released all locks. */
		for (i = 0; i < NUM_LOCK_PARTITIONS; i++)
			Assert(SHMQueueEmpty(&(MyProc->myProcLocks[i])));
	}
#endif
	MyProc->recoveryConflictPending = false;

	/* Initialize fields for sync rep */
	MyProc->waitLSN = 0;
	MyProc->syncRepState = SYNC_REP_NOT_WAITING;
	SHMQueueElemInit(&(MyProc->syncRepLinks));

	/*
	 * Acquire ownership of the PGPROC's latch, so that we can use WaitLatch
	 * on it.  That allows us to repoint the process latch, which so far
	 * points to process local one, to the shared one.
	 */
	OwnLatch(&MyProc->procLatch);
	SwitchToSharedLatch();

	/*
	 * We might be reusing a semaphore that belonged to a failed process. So
	 * be careful and reinitialize its value here.  (This is not strictly
	 * necessary anymore, but seems like a good idea for cleanliness.)
	 */
	PGSemaphoreReset(&MyProc->sem);

	/*
	 * Arrange to clean up at backend exit.
	 */
	on_shmem_exit(ProcKill, 0);

	/*
	 * Now that we have a PGPROC, we could try to acquire locks, so initialize
	 * local state needed for LWLocks, and the deadlock checker.
	 */
	InitLWLockAccess();
	InitDeadLockChecking();
}
Beispiel #29
0
/*
 *	InternalIpcMemoryCreate(memKey, size)
 *
 * Attempt to create a new shared memory segment with the specified key.
 * Will fail (return NULL) if such a segment already exists.  If successful,
 * attach the segment to the current process and return its attached address.
 * On success, callbacks are registered with on_shmem_exit to detach and
 * delete the segment when on_shmem_exit is called.
 *
 * If we fail with a failure code other than collision-with-existing-segment,
 * print out an error and abort.  Other types of errors are not recoverable.
 */
static void *
InternalIpcMemoryCreate(IpcMemoryKey memKey, Size size)
{
	IpcMemoryId shmid;
	void	   *memAddress;

	shmid = shmget(memKey, size, IPC_CREAT | IPC_EXCL | IPCProtection);

	if (shmid < 0)
	{
		int			shmget_errno = errno;

		/*
		 * Fail quietly if error indicates a collision with existing segment.
		 * One would expect EEXIST, given that we said IPC_EXCL, but perhaps
		 * we could get a permission violation instead?  Also, EIDRM might
		 * occur if an old seg is slated for destruction but not gone yet.
		 */
		if (shmget_errno == EEXIST || shmget_errno == EACCES
#ifdef EIDRM
			|| shmget_errno == EIDRM
#endif
			)
			return NULL;

		/*
		 * Some BSD-derived kernels are known to return EINVAL, not EEXIST, if
		 * there is an existing segment but it's smaller than "size" (this is
		 * a result of poorly-thought-out ordering of error tests). To
		 * distinguish between collision and invalid size in such cases, we
		 * make a second try with size = 0.  These kernels do not test size
		 * against SHMMIN in the preexisting-segment case, so we will not get
		 * EINVAL a second time if there is such a segment.
		 */
		if (shmget_errno == EINVAL)
		{
			shmid = shmget(memKey, 0, IPC_CREAT | IPC_EXCL | IPCProtection);

			if (shmid < 0)
			{
				/* As above, fail quietly if we verify a collision */
				if (errno == EEXIST || errno == EACCES
#ifdef EIDRM
					|| errno == EIDRM
#endif
					)
					return NULL;
				/* Otherwise, fall through to report the original error */
			}
			else
			{
				/*
				 * On most platforms we cannot get here because SHMMIN is
				 * greater than zero.  However, if we do succeed in creating a
				 * zero-size segment, free it and then fall through to report
				 * the original error.
				 */
				if (shmctl(shmid, IPC_RMID, NULL) < 0)
					elog(LOG, "shmctl(%d, %d, 0) failed: %m",
						 (int) shmid, IPC_RMID);
			}
		}

		/*
		 * Else complain and abort.
		 *
		 * Note: at this point EINVAL should mean that either SHMMIN or SHMMAX
		 * is violated.  SHMALL violation might be reported as either ENOMEM
		 * (BSDen) or ENOSPC (Linux); the Single Unix Spec fails to say which
		 * it should be.  SHMMNI violation is ENOSPC, per spec.  Just plain
		 * not-enough-RAM is ENOMEM.
		 */
		errno = shmget_errno;
		ereport(FATAL,
				(errmsg("could not create shared memory segment: %m"),
		  errdetail("Failed system call was shmget(key=%lu, size=%zu, 0%o).",
					(unsigned long) memKey, size,
					IPC_CREAT | IPC_EXCL | IPCProtection),
				 (shmget_errno == EINVAL) ?
				 errhint("This error usually means that PostgreSQL's request for a shared memory "
		 "segment exceeded your kernel's SHMMAX parameter, or possibly that "
						 "it is less than "
						 "your kernel's SHMMIN parameter.\n"
		"The PostgreSQL documentation contains more information about shared "
						 "memory configuration.") : 0,
				 (shmget_errno == ENOMEM) ?
				 errhint("This error usually means that PostgreSQL's request for a shared "
						 "memory segment exceeded your kernel's SHMALL parameter.  You might need "
						 "to reconfigure the kernel with larger SHMALL.\n"
		"The PostgreSQL documentation contains more information about shared "
						 "memory configuration.") : 0,
				 (shmget_errno == ENOSPC) ?
				 errhint("This error does *not* mean that you have run out of disk space.  "
						 "It occurs either if all available shared memory IDs have been taken, "
						 "in which case you need to raise the SHMMNI parameter in your kernel, "
		  "or because the system's overall limit for shared memory has been "
						 "reached.\n"
		"The PostgreSQL documentation contains more information about shared "
						 "memory configuration.") : 0));
	}

	/* Register on-exit routine to delete the new segment */
	on_shmem_exit(IpcMemoryDelete, Int32GetDatum(shmid));

	/* OK, should be able to attach to the segment */
	memAddress = shmat(shmid, NULL, PG_SHMAT_FLAGS);

	if (memAddress == (void *) -1)
		elog(FATAL, "shmat(id=%d) failed: %m", shmid);

	/* Register on-exit routine to detach new segment before deleting */
	on_shmem_exit(IpcMemoryDetach, PointerGetDatum(memAddress));

	/*
	 * Store shmem key and ID in data directory lockfile.  Format to try to
	 * keep it the same length always (trailing junk in the lockfile won't
	 * hurt, but might confuse humans).
	 */
	{
		char		line[64];

		sprintf(line, "%9lu %9lu",
				(unsigned long) memKey, (unsigned long) shmid);
		AddToDataDirLockFile(LOCK_FILE_LINE_SHMEM_KEY, line);
	}

	return memAddress;
}
Beispiel #30
0
/* --------------------------------
 * InitPostgres
 *		Initialize POSTGRES.
 *
 * The database can be specified by name, using the in_dbname parameter, or by
 * OID, using the dboid parameter.	In the latter case, the actual database
 * name can be returned to the caller in out_dbname.  If out_dbname isn't
 * NULL, it must point to a buffer of size NAMEDATALEN.
 *
 * In bootstrap mode no parameters are used.
 *
 * The return value indicates whether the userID is a superuser.  (That
 * can only be tested inside a transaction, so we want to do it during
 * the startup transaction rather than doing a separate one in postgres.c.)
 *
 * As of PostgreSQL 8.2, we expect InitProcess() was already called, so we
 * already have a PGPROC struct ... but it's not filled in yet.
 *
 * Note:
 *		Be very careful with the order of calls in the InitPostgres function.
 * --------------------------------
 */
void
InitPostgres(const char *in_dbname, Oid dboid, const char *username,
			 char *out_dbname)
{
	bool		bootstrap = IsBootstrapProcessingMode();
	bool		autovacuum = IsAutoVacuumProcess();
	bool		am_superuser;
	char	   *fullpath;
	char		dbname[NAMEDATALEN];

	/*
	 * Add my PGPROC struct to the ProcArray.
	 *
	 * Once I have done this, I am visible to other backends!
	 */
	InitProcessPhase2();

	/* Initialize SessionState entry */
	SessionState_Init();
	/* Initialize memory protection */
	GPMemoryProtect_Init();

	/*
	 * Initialize my entry in the shared-invalidation manager's array of
	 * per-backend data.
	 *
	 * Sets up MyBackendId, a unique backend identifier.
	 */
	MyBackendId = InvalidBackendId;

	SharedInvalBackendInit(false);

	if (MyBackendId > MaxBackends || MyBackendId <= 0)
		elog(FATAL, "bad backend id: %d", MyBackendId);

	/* Now that we have a BackendId, we can participate in ProcSignal */
	ProcSignalInit(MyBackendId);

	/*
	 * bufmgr needs another initialization call too
	 */
	InitBufferPoolBackend();

	/*
	 * Initialize local process's access to XLOG.  In bootstrap case we may
	 * skip this since StartupXLOG() was run instead.
	 */
	if (!bootstrap)
		InitXLOGAccess();

	/*
	 * Initialize the relation cache and the system catalog caches.  Note that
	 * no catalog access happens here; we only set up the hashtable structure.
	 * We must do this before starting a transaction because transaction abort
	 * would try to touch these hashtables.
	 */
	RelationCacheInitialize();
	InitCatalogCache();

	/* Initialize portal manager */
	EnablePortalManager();

	/* Initialize stats collection --- must happen before first xact */
	if (!bootstrap)
		pgstat_initialize();

	/*
	 * Load relcache entries for the shared system catalogs.  This must create
	 * at least entries for pg_database and catalogs used for authentication.
	 */
	RelationCacheInitializePhase2();

	/*
	 * Set up process-exit callback to do pre-shutdown cleanup.  This has to
	 * be after we've initialized all the low-level modules like the buffer
	 * manager, because during shutdown this has to run before the low-level
	 * modules start to close down.  On the other hand, we want it in place
	 * before we begin our first transaction --- if we fail during the
	 * initialization transaction, as is entirely possible, we need the
	 * AbortTransaction call to clean up.
	 */
	on_shmem_exit(ShutdownPostgres, 0);

	/* TODO: autovacuum launcher should be done here? */

	/*
	 * Start a new transaction here before first access to db, and get a
	 * snapshot.  We don't have a use for the snapshot itself, but we're
	 * interested in the secondary effect that it sets RecentGlobalXmin.
	 */
	if (!bootstrap)
	{
		StartTransactionCommand();
		(void) GetTransactionSnapshot();
	}

	/*
	 * Figure out our postgres user id, and see if we are a superuser.
	 *
	 * In standalone mode and in the autovacuum process, we use a fixed id,
	 * otherwise we figure it out from the authenticated user name.
	 */
	if (bootstrap || autovacuum)
	{
		InitializeSessionUserIdStandalone();
		am_superuser = true;
	}
	else if (!IsUnderPostmaster)
	{
		InitializeSessionUserIdStandalone();
		am_superuser = true;
		if (!ThereIsAtLeastOneRole())
			ereport(WARNING,
					(errcode(ERRCODE_UNDEFINED_OBJECT),
					 errmsg("no roles are defined in this database system"),
					 errhint("You should immediately run CREATE USER \"%s\" CREATEUSER;.",
							 username)));
	}
	else
	{
		/* normal multiuser case */
		Assert(MyProcPort != NULL);
		PerformAuthentication(MyProcPort);
		InitializeSessionUserId(username);
		am_superuser = superuser();
	}

	/*
	 * Check a normal user hasn't connected to a superuser reserved slot.
	 */
	if (!am_superuser &&
		ReservedBackends > 0 &&
		!HaveNFreeProcs(ReservedBackends))
		ereport(FATAL,
				(errcode(ERRCODE_TOO_MANY_CONNECTIONS),
				 errmsg("connection limit exceeded for non-superusers"),
				 errSendAlert(true)));

	/*
	 * If walsender, we don't want to connect to any particular database. Just
	 * finish the backend startup by processing any options from the startup
	 * packet, and we're done.
	 */
	if (am_walsender)
	{
		Assert(!bootstrap);

		/*
		 * We don't have replication role, which existed in postgres.
		 */
		if (!superuser())
			ereport(FATAL,
					(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
					 errmsg("must be superuser role to start walsender")));

		/* process any options passed in the startup packet */
		if (MyProcPort != NULL)
			process_startup_options(MyProcPort, am_superuser);

		/* Apply PostAuthDelay as soon as we've read all options */
		if (PostAuthDelay > 0)
			pg_usleep(PostAuthDelay * 1000000L);

		/* initialize client encoding */
		InitializeClientEncoding();

		/* report this backend in the PgBackendStatus array */
		pgstat_bestart();

		/* close the transaction we started above */
		CommitTransactionCommand();

		return;
	}

	/*
	 * Set up the global variables holding database id and path.  But note we
	 * won't actually try to touch the database just yet.
	 *
	 * We take a shortcut in the bootstrap case, otherwise we have to look up
	 * the db name in pg_database.
	 */
	if (bootstrap)
	{
		MyDatabaseId = TemplateDbOid;
		MyDatabaseTableSpace = DEFAULTTABLESPACE_OID;
	}
	else if (in_dbname != NULL)
	{
		HeapTuple	tuple;
		Form_pg_database dbform;

		tuple = GetDatabaseTuple(in_dbname);
		if (!HeapTupleIsValid(tuple))
			ereport(FATAL,
					(errcode(ERRCODE_UNDEFINED_DATABASE),
					 errmsg("database \"%s\" does not exist", in_dbname)));
		dbform = (Form_pg_database) GETSTRUCT(tuple);
		MyDatabaseId = HeapTupleGetOid(tuple);
		MyDatabaseTableSpace = dbform->dattablespace;
		/* take database name from the caller, just for paranoia */
		strlcpy(dbname, in_dbname, sizeof(dbname));
		pfree(tuple);
	}
	else
	{
		/* caller specified database by OID */
		HeapTuple	tuple;
		Form_pg_database dbform;

		tuple = GetDatabaseTupleByOid(dboid);
		if (!HeapTupleIsValid(tuple))
			ereport(FATAL,
					(errcode(ERRCODE_UNDEFINED_DATABASE),
					 errmsg("database %u does not exist", dboid)));
		dbform = (Form_pg_database) GETSTRUCT(tuple);
		MyDatabaseId = HeapTupleGetOid(tuple);
		MyDatabaseTableSpace = dbform->dattablespace;
		Assert(MyDatabaseId == dboid);
		strlcpy(dbname, NameStr(dbform->datname), sizeof(dbname));
		/* pass the database name back to the caller */
		if (out_dbname)
			strcpy(out_dbname, dbname);
		pfree(tuple);
	}

	/* Now we can mark our PGPROC entry with the database ID */
	/* (We assume this is an atomic store so no lock is needed) */
	MyProc->databaseId = MyDatabaseId;

	/*
	 * Now, take a writer's lock on the database we are trying to connect to.
	 * If there is a concurrently running DROP DATABASE on that database, this
	 * will block us until it finishes (and has committed its update of
	 * pg_database).
	 *
	 * Note that the lock is not held long, only until the end of this startup
	 * transaction.  This is OK since we are already advertising our use of
	 * the database in the PGPROC array; anyone trying a DROP DATABASE after
	 * this point will see us there.
	 *
	 * Note: use of RowExclusiveLock here is reasonable because we envision
	 * our session as being a concurrent writer of the database.  If we had a
	 * way of declaring a session as being guaranteed-read-only, we could use
	 * AccessShareLock for such sessions and thereby not conflict against
	 * CREATE DATABASE.
	 */
	if (!bootstrap)
		LockSharedObject(DatabaseRelationId, MyDatabaseId, 0,
						 RowExclusiveLock);

	/*
	 * Recheck pg_database to make sure the target database hasn't gone away.
	 * If there was a concurrent DROP DATABASE, this ensures we will die
	 * cleanly without creating a mess.
	 */
	if (!bootstrap)
	{
		HeapTuple	tuple;

		tuple = GetDatabaseTuple(dbname);
		if (!HeapTupleIsValid(tuple) ||
			MyDatabaseId != HeapTupleGetOid(tuple) ||
			MyDatabaseTableSpace != ((Form_pg_database) GETSTRUCT(tuple))->dattablespace)
			ereport(FATAL,
					(errcode(ERRCODE_UNDEFINED_DATABASE),
					 errmsg("database \"%s\" does not exist", dbname),
			   errdetail("It seems to have just been dropped or renamed.")));
	}

	fullpath = GetDatabasePath(MyDatabaseId, MyDatabaseTableSpace);

	if (!bootstrap)
	{
		if (access(fullpath, F_OK) == -1)
		{
			if (errno == ENOENT)
				ereport(FATAL,
						(errcode(ERRCODE_UNDEFINED_DATABASE),
						 errmsg("database \"%s\" does not exist",
								dbname),
					errdetail("The database subdirectory \"%s\" is missing.",
							  fullpath)));
			else
				ereport(FATAL,
						(errcode_for_file_access(),
						 errmsg("could not access directory \"%s\": %m",
								fullpath)));
		}

		ValidatePgVersion(fullpath);
	}

	SetDatabasePath(fullpath);

	/*
	 * It's now possible to do real access to the system catalogs.
	 *
	 * Load relcache entries for the system catalogs.  This must create at
	 * least the minimum set of "nailed-in" cache entries.
	 */
	RelationCacheInitializePhase3();

	/*
	 * Now we have full access to catalog including toast tables,
	 * we can process pg_authid.rolconfig.  This ought to come before
	 * processing startup options so that it can override the settings.
	 */
	if (!bootstrap)
		ProcessRoleGUC();

	/* set up ACL framework (so CheckMyDatabase can check permissions) */
	initialize_acl();

	/*
	 * Re-read the pg_database row for our database, check permissions and set
	 * up database-specific GUC settings.  We can't do this until all the
	 * database-access infrastructure is up.  (Also, it wants to know if the
	 * user is a superuser, so the above stuff has to happen first.)
	 */
	if (!bootstrap)
		CheckMyDatabase(dbname, am_superuser);

	/*
	 * Now process any command-line switches and any additional GUC variable
	 * settings passed in the startup packet.	We couldn't do this before
	 * because we didn't know if client is a superuser.
	 */
	if (MyProcPort != NULL)
		process_startup_options(MyProcPort, am_superuser);

	/*
	 * Maintenance Mode: allow superuser to connect when
	 * gp_maintenance_conn GUC is set.  We cannot check it until
	 * process_startup_options parses the GUC.
	 */
	if (gp_maintenance_mode && Gp_role == GP_ROLE_DISPATCH &&
		!(superuser() && gp_maintenance_conn))
		ereport(FATAL,
				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
				 errmsg("maintenance mode: connected by superuser only"),
				 errSendAlert(false)));

	/*
	 * MPP:  If we were started in utility mode then we only want to allow
	 * incoming sessions that specify gp_session_role=utility as well.  This
	 * lets the bash scripts start the QD in utility mode and connect in but
	 * protect ourselves from normal clients who might be trying to connect to
	 * the system while we startup.
	 */
	if ((Gp_role == GP_ROLE_UTILITY) && (Gp_session_role != GP_ROLE_UTILITY))
	{
		ereport(FATAL,
				(errcode(ERRCODE_CANNOT_CONNECT_NOW),
				 errmsg("System was started in master-only utility mode - only utility mode connections are allowed")));
	}

	/* Apply PostAuthDelay as soon as we've read all options */
	if (PostAuthDelay > 0)
		pg_usleep(PostAuthDelay * 1000000L);

	/* set default namespace search path */
	InitializeSearchPath();

	/* initialize client encoding */
	InitializeClientEncoding();

	/* report this backend in the PgBackendStatus array */
	if (!bootstrap)
		pgstat_bestart();
		
	/* 
     * MPP package setup 
     *
     * Primary function is to establish connctions to the qExecs.
     * This is SKIPPED when the database is in bootstrap mode or 
     * Is not UnderPostmaster.
     */
    if (!bootstrap && IsUnderPostmaster)
    {
		cdb_setup();
		on_proc_exit( cdb_cleanup, 0 );
    }

    /* 
     * MPP SharedSnapshot Setup
	 */
	if (Gp_role == GP_ROLE_DISPATCH)
	{
		addSharedSnapshot("Query Dispatcher", gp_session_id);
	}
	else if (Gp_role == GP_ROLE_DISPATCHAGENT)
	{
		SharedLocalSnapshotSlot = NULL;
	}
    else if (Gp_segment == -1 && Gp_role == GP_ROLE_EXECUTE && !Gp_is_writer)
    {
		/* 
		 * Entry db singleton QE is a user of the shared snapshot -- not a creator.
		 * The lookup will occur once the distributed snapshot has been received.
		 */	
		lookupSharedSnapshot("Entry DB Singleton", "Query Dispatcher", gp_session_id);
    }
    else if (Gp_role == GP_ROLE_EXECUTE)
	{
		if (Gp_is_writer)
		{
			addSharedSnapshot("Writer qExec", gp_session_id);
		}
		else
		{
			/*
			 * NOTE: This assumes that the Slot has already been
			 *       allocated by the writer.  Need to make sure we
			 *       always allocate the writer qExec first.
			 */			 			
			lookupSharedSnapshot("Reader qExec", "Writer qExec", gp_session_id);
		}
	}

	/* close the transaction we started above */
	if (!bootstrap)
		CommitTransactionCommand();

	return;
}