static void BgwPoolMainLoop(Datum arg) { BgwPoolExecutorCtx* ctx = (BgwPoolExecutorCtx*)arg; int id = ctx->id; BgwPool* pool = ctx->constructor(); int size; void* work; BackgroundWorkerUnblockSignals(); BackgroundWorkerInitializeConnection(pool->dbname, NULL); while(true) { PGSemaphoreLock(&pool->available); SpinLockAcquire(&pool->lock); size = *(int*)&pool->queue[pool->head]; Assert(size < pool->size); work = malloc(size); pool->pending -= 1; pool->active += 1; if (pool->lastPeakTime == 0 && pool->active == pool->nWorkers && pool->pending != 0) { pool->lastPeakTime = MtmGetSystemTime(); } if (pool->head + size + 4 > pool->size) { memcpy(work, pool->queue, size); pool->head = INTALIGN(size); } else { memcpy(work, &pool->queue[pool->head+4], size); pool->head += 4 + INTALIGN(size); } if (pool->size == pool->head) { pool->head = 0; } if (pool->producerBlocked) { pool->producerBlocked = false; PGSemaphoreUnlock(&pool->overflow); pool->lastPeakTime = 0; } SpinLockRelease(&pool->lock); pool->executor(id, work, size); free(work); SpinLockAcquire(&pool->lock); pool->active -= 1; pool->lastPeakTime = 0; SpinLockRelease(&pool->lock); } }
void BgwPoolExecute(BgwPool* pool, void* work, size_t size) { if (size+4 > pool->size) { /* * Size of work is larger than size of shared buffer: * run it immediately */ pool->executor(0, work, size); return; } SpinLockAcquire(&pool->lock); while (true) { if ((pool->head <= pool->tail && pool->size - pool->tail < size + 4 && pool->head < size) || (pool->head > pool->tail && pool->head - pool->tail < size + 4)) { if (pool->lastPeakTime == 0) { pool->lastPeakTime = MtmGetSystemTime(); } pool->producerBlocked = true; SpinLockRelease(&pool->lock); PGSemaphoreLock(&pool->overflow); SpinLockAcquire(&pool->lock); } else { pool->pending += 1; if (pool->lastPeakTime == 0 && pool->active == pool->nWorkers && pool->pending != 0) { pool->lastPeakTime = MtmGetSystemTime(); } *(int*)&pool->queue[pool->tail] = size; if (pool->size - pool->tail >= size + 4) { memcpy(&pool->queue[pool->tail+4], work, size); pool->tail += 4 + INTALIGN(size); } else { memcpy(pool->queue, work, size); pool->tail = INTALIGN(size); } if (pool->tail == pool->size) { pool->tail = 0; } PGSemaphoreUnlock(&pool->available); break; } } SpinLockRelease(&pool->lock); }
int main(int argc, char **argv) { MyStorage *storage; int cpid; printf("Creating shared memory ... "); fflush(stdout); storage = (MyStorage *) PGSharedMemoryCreate(8192, false, 5433); storage->flag = 1234; printf("OK\n"); printf("Creating semaphores ... "); fflush(stdout); PGReserveSemaphores(2, 5433); PGSemaphoreCreate(&storage->sem); printf("OK\n"); /* sema initial value is 1, so lock should work */ printf("Testing Lock ... "); fflush(stdout); PGSemaphoreLock(&storage->sem, false); printf("OK\n"); /* now sema value is 0, so trylock should fail */ printf("Testing TryLock ... "); fflush(stdout); if (PGSemaphoreTryLock(&storage->sem)) printf("unexpected result!\n"); else printf("OK\n"); /* unlocking twice and then locking twice should work... */ printf("Testing Multiple Lock ... "); fflush(stdout); PGSemaphoreUnlock(&storage->sem); PGSemaphoreUnlock(&storage->sem); PGSemaphoreLock(&storage->sem, false); PGSemaphoreLock(&storage->sem, false); printf("OK\n"); /* check Reset too */ printf("Testing Reset ... "); fflush(stdout); PGSemaphoreUnlock(&storage->sem); PGSemaphoreReset(&storage->sem); if (PGSemaphoreTryLock(&storage->sem)) printf("unexpected result!\n"); else printf("OK\n"); /* Fork a child process and see if it can communicate */ printf("Forking child process ... "); fflush(stdout); cpid = fork(); if (cpid == 0) { /* In child */ on_exit_reset(); sleep(3); storage->flag++; PGSemaphoreUnlock(&storage->sem); proc_exit(0); } if (cpid < 0) { /* Fork failed */ printf("failed: %s\n", strerror(errno)); proc_exit(1); } printf("forked child pid %d OK\n", cpid); if (storage->flag != 1234) printf("Wrong value found in shared memory!\n"); printf("Waiting for child (should wait 3 sec here) ... "); fflush(stdout); PGSemaphoreLock(&storage->sem, false); printf("OK\n"); if (storage->flag != 1235) printf("Wrong value found in shared memory!\n"); /* Test shutdown */ printf("Running shmem_exit processing ... "); fflush(stdout); shmem_exit(0); printf("OK\n"); printf("Tests complete.\n"); proc_exit(0); return 0; /* not reached */ }
/* * LWLockAcquire - acquire a lightweight lock in the specified mode * * If the lock is not available, sleep until it is. * * Side effect: cancel/die interrupts are held off until lock release. */ void LWLockAcquire(LWLockId lockid, LWLockMode mode) { volatile LWLock *lock = &(LWLockArray[lockid].lock); #if LWLOCK_LOCK_PARTS > 1 volatile LWLockPart *part = LWLOCK_PART(lock, lockid, MyBackendId); #endif PGPROC *proc = MyProc; bool retry = false; int extraWaits = 0; PRINT_LWDEBUG("LWLockAcquire", lockid, lock); #ifdef LWLOCK_STATS /* Set up local count state first time through in a given process */ if (counts_for_pid != MyProcPid) { int *LWLockCounter = (int *) ((char *) LWLockArray - 2 * sizeof(int)); int numLocks = LWLockCounter[1]; sh_acquire_counts = calloc(numLocks, sizeof(int)); ex_acquire_counts = calloc(numLocks, sizeof(int)); block_counts = calloc(numLocks, sizeof(int)); counts_for_pid = MyProcPid; on_shmem_exit(print_lwlock_stats, 0); } /* Count lock acquisition attempts */ if (mode == LW_EXCLUSIVE) ex_acquire_counts[lockid]++; else sh_acquire_counts[lockid]++; #endif /* LWLOCK_STATS */ /* * We can't wait if we haven't got a PGPROC. This should only occur * during bootstrap or shared memory initialization. Put an Assert here * to catch unsafe coding practices. */ Assert(!(proc == NULL && IsUnderPostmaster)); /* Ensure we will have room to remember the lock */ if (num_held_lwlocks >= MAX_SIMUL_LWLOCKS) elog(ERROR, "too many LWLocks taken"); /* * Lock out cancel/die interrupts until we exit the code section protected * by the LWLock. This ensures that interrupts will not interfere with * manipulations of data structures in shared memory. */ HOLD_INTERRUPTS(); /* * Loop here to try to acquire lock after each time we are signaled by * LWLockRelease. * * NOTE: it might seem better to have LWLockRelease actually grant us the * lock, rather than retrying and possibly having to go back to sleep. But * in practice that is no good because it means a process swap for every * lock acquisition when two or more processes are contending for the same * lock. Since LWLocks are normally used to protect not-very-long * sections of computation, a process needs to be able to acquire and * release the same lock many times during a single CPU time slice, even * in the presence of contention. The efficiency of being able to do that * outweighs the inefficiency of sometimes wasting a process dispatch * cycle because the lock is not free when a released waiter finally gets * to run. See pgsql-hackers archives for 29-Dec-01. */ for (;;) { bool mustwait; if (mode == LW_SHARED) { #ifdef LWLOCK_PART_SHARED_OPS_ATOMIC /* Increment shared counter partition. If there's no contention, * this is sufficient to take the lock */ LWLOCK_PART_SHARED_POSTINC_ATOMIC(lock, lockid, part, MyBackendId); LWLOCK_PART_SHARED_FENCE(); /* A concurrent exclusive locking attempt does the following * three steps * 1) Acquire mutex * 2) Check shared counter partitions for readers. * 3a) If found add proc to wait queue, block, restart at (1) * 3b) If not found, set exclusive flag, continue with (4) * 4) Enter protected section * The fence after the atomic add above ensures that no further * such attempt can proceed to (3b) or beyond. There may be * pre-existing exclusive locking attempts at step (3b) or beyond, * but we can recognize those by either the mutex being taken, or * the exclusive flag being set. Conversely, if we see neither, we * may proceed and enter the protected section. * * FIXME: This doesn't work if slock_t is a struct or doesn't * use 0 for state "unlocked". */ if ((lock->mutex == 0) && (lock->exclusive == 0)) { /* If retrying, allow LWLockRelease to release waiters again. * Usually this happens after we acquired the mutex, but if * we skip that, we still need to set releaseOK. * * Acquiring the mutex here is not really an option - if many * reader are awoken simultaneously by an exclusive unlock, * that would be a source of considerable contention. * * Fotunately, this is safe even without the mutex. First, * there actually cannot be any non-fast path unlocking * attempt in progress, because we'd then either still see * the exclusive flag set or the mutex being taken. And * even if there was, and such an attempt cleared the flag * immediately after we set it, it'd also wake up some waiter * who'd then re-set the flag. * * The only reason to do this here, and not directly * after returning from PGSemaphoreLock(), is that it seems * benefical to make SpinLockAcquire() the first thing to * touch the lock if possible, in case we acquire the spin * lock at all. That way, the cache line doesn't go through * a possible shared state, but instead directly to exclusive. * On Opterons at least, there seems to be a difference, c.f. * the comment above tas() for x86_64 in s_lock.h */ if (retry && !lock->releaseOK) lock->releaseOK = true; goto lock_acquired; } /* At this point, we don't know if the concurrent exclusive locker * has proceeded to (3b) or blocked. We must take the mutex and * re-check */ #endif /* LWLOCK_PART_SHARED_OPS_ATOMIC */ /* Acquire mutex. Time spent holding mutex should be short! */ SpinLockAcquire(&lock->mutex); if (lock->exclusive == 0) { #ifdef LWLOCK_PART_SHARED_OPS_ATOMIC /* Already incremented the shared counter partition above */ #else lock->shared++; #endif mustwait = false; } else { #ifdef LWLOCK_PART_SHARED_OPS_ATOMIC /* Must undo shared counter partition increment. Note that * we *need* to do that while holding the mutex. Otherwise, * the exclusive lock could be released and attempted to be * re-acquired before we undo the increment. That attempt * would then block, even though there'd be no lock holder * left */ LWLOCK_PART_SHARED_POSTDEC_ATOMIC(lock, lockid, part, MyBackendId); #endif mustwait = true; } } else { /* Step (1). Acquire mutex. Time spent holding mutex should be * short! */ SpinLockAcquire(&lock->mutex); if (lock->exclusive == 0) { /* Step (2). Check for shared lockers. This surely happens * after (1), otherwise SpinLockAcquire() is broken. Lock * acquire semantics demand that no load must be re-ordered * from after a lock acquisition to before, for obvious * reasons. */ LWLOCK_IS_SHARED(mustwait, lock, lockid); if (!mustwait) { /* Step (3a). Set exclusive flag. This surely happens * after (2) because it depends on the result of (2), * no matter how much reordering is going on here. */ lock->exclusive++; } } else mustwait = true; } /* If retrying, allow LWLockRelease to release waiters again. * This is also separately done in the LW_SHARED early exit case * above, and in contrast to there we don't hold the mutex there. * See the comment there for why this is safe */ if (retry) lock->releaseOK = true; if (!mustwait) break; /* got the lock */ /* * Step (3b). Add myself to wait queue. * * If we don't have a PGPROC structure, there's no way to wait. This * should never occur, since MyProc should only be null during shared * memory initialization. */ if (proc == NULL) elog(PANIC, "cannot wait without a PGPROC structure"); proc->lwWaiting = true; proc->lwExclusive = (mode == LW_EXCLUSIVE); proc->lwWaitLink = NULL; if (lock->head == NULL) lock->head = proc; else lock->tail->lwWaitLink = proc; lock->tail = proc; /* Can release the mutex now */ SpinLockRelease(&lock->mutex); /* * Wait until awakened. * * Since we share the process wait semaphore with the regular lock * manager and ProcWaitForSignal, and we may need to acquire an LWLock * while one of those is pending, it is possible that we get awakened * for a reason other than being signaled by LWLockRelease. If so, * loop back and wait again. Once we've gotten the LWLock, * re-increment the sema by the number of additional signals received, * so that the lock manager or signal manager will see the received * signal when it next waits. */ LOG_LWDEBUG("LWLockAcquire", lockid, "waiting"); #ifdef LWLOCK_STATS block_counts[lockid]++; #endif TRACE_POSTGRESQL_LWLOCK_WAIT_START(lockid, mode); for (;;) { /* "false" means cannot accept cancel/die interrupt here. */ PGSemaphoreLock(&proc->sem, false); if (!proc->lwWaiting) break; extraWaits++; } TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(lockid, mode); LOG_LWDEBUG("LWLockAcquire", lockid, "awakened"); /* Now loop back and try to acquire lock again. */ retry = true; } /* We are done updating shared state of the lock itself. */ SpinLockRelease(&lock->mutex); /* Step 4. Enter protected section. This surely happens after (3), * this time because lock release semantics demand that no store * must be moved from before a lock release to after the release, * again for obvious reasons */ #ifdef LWLOCK_PART_SHARED_OPS_ATOMIC lock_acquired: #endif TRACE_POSTGRESQL_LWLOCK_ACQUIRE(lockid, mode); /* Add lock to list of locks held by this backend */ held_lwlocks[num_held_lwlocks] = lockid; held_lwlocks_mode[num_held_lwlocks] = mode; ++num_held_lwlocks; /* * Fix the process wait semaphore's count for any absorbed wakeups. */ while (extraWaits-- > 0) PGSemaphoreUnlock(&proc->sem); }
/* * LWLockAcquire - acquire a lightweight lock in the specified mode * * If the lock is not available, sleep until it is. * * Side effect: cancel/die interrupts are held off until lock release. */ void LWLockAcquire(LWLockId lockid, LWLockMode mode) { volatile LWLock *lock = &(LWLockArray[lockid].lock); PGPROC *proc = MyProc; bool retry = false; int extraWaits = 0; PRINT_LWDEBUG("LWLockAcquire", lockid, lock); #ifdef LWLOCK_STATS /* Set up local count state first time through in a given process */ if (counts_for_pid != MyProcPid) { int *LWLockCounter = (int *) ((char *) LWLockArray - 2 * sizeof(int)); int numLocks = LWLockCounter[1]; sh_acquire_counts = calloc(numLocks, sizeof(int)); ex_acquire_counts = calloc(numLocks, sizeof(int)); block_counts = calloc(numLocks, sizeof(int)); counts_for_pid = MyProcPid; on_shmem_exit(print_lwlock_stats, 0); } /* Count lock acquisition attempts */ if (mode == LW_EXCLUSIVE) ex_acquire_counts[lockid]++; else sh_acquire_counts[lockid]++; #endif /* LWLOCK_STATS */ /* * We can't wait if we haven't got a PGPROC. This should only occur * during bootstrap or shared memory initialization. Put an Assert here * to catch unsafe coding practices. */ Assert(!(proc == NULL && IsUnderPostmaster)); /* Ensure we will have room to remember the lock */ if (num_held_lwlocks >= MAX_SIMUL_LWLOCKS) elog(ERROR, "too many LWLocks taken"); /* * Lock out cancel/die interrupts until we exit the code section protected * by the LWLock. This ensures that interrupts will not interfere with * manipulations of data structures in shared memory. */ HOLD_INTERRUPTS(); /* * Loop here to try to acquire lock after each time we are signaled by * LWLockRelease. * * NOTE: it might seem better to have LWLockRelease actually grant us the * lock, rather than retrying and possibly having to go back to sleep. But * in practice that is no good because it means a process swap for every * lock acquisition when two or more processes are contending for the same * lock. Since LWLocks are normally used to protect not-very-long * sections of computation, a process needs to be able to acquire and * release the same lock many times during a single CPU time slice, even * in the presence of contention. The efficiency of being able to do that * outweighs the inefficiency of sometimes wasting a process dispatch * cycle because the lock is not free when a released waiter finally gets * to run. See pgsql-hackers archives for 29-Dec-01. */ for (;;) { bool mustwait; /* Acquire mutex. Time spent holding mutex should be short! */ SpinLockAcquire(&lock->mutex); /* If retrying, allow LWLockRelease to release waiters again */ if (retry) lock->releaseOK = true; /* If I can get the lock, do so quickly. */ if (mode == LW_EXCLUSIVE) { if (lock->exclusive == 0 && lock->shared == 0) { lock->exclusive++; mustwait = false; } else mustwait = true; } else { if (lock->exclusive == 0) { lock->shared++; mustwait = false; } else mustwait = true; } if (!mustwait) break; /* got the lock */ /* * Add myself to wait queue. * * If we don't have a PGPROC structure, there's no way to wait. This * should never occur, since MyProc should only be null during shared * memory initialization. */ if (proc == NULL) elog(PANIC, "cannot wait without a PGPROC structure"); proc->lwWaiting = true; proc->lwExclusive = (mode == LW_EXCLUSIVE); proc->lwWaitLink = NULL; if (lock->head == NULL) lock->head = proc; else lock->tail->lwWaitLink = proc; lock->tail = proc; /* Can release the mutex now */ SpinLockRelease(&lock->mutex); /* * Wait until awakened. * * Since we share the process wait semaphore with the regular lock * manager and ProcWaitForSignal, and we may need to acquire an LWLock * while one of those is pending, it is possible that we get awakened * for a reason other than being signaled by LWLockRelease. If so, * loop back and wait again. Once we've gotten the LWLock, * re-increment the sema by the number of additional signals received, * so that the lock manager or signal manager will see the received * signal when it next waits. */ LOG_LWDEBUG("LWLockAcquire", lockid, "waiting"); #ifdef LWLOCK_STATS block_counts[lockid]++; #endif TRACE_POSTGRESQL_LWLOCK_WAIT_START(lockid, mode); for (;;) { /* "false" means cannot accept cancel/die interrupt here. */ PGSemaphoreLock(&proc->sem, false); if (!proc->lwWaiting) break; extraWaits++; } TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(lockid, mode); LOG_LWDEBUG("LWLockAcquire", lockid, "awakened"); /* Now loop back and try to acquire lock again. */ retry = true; } /* We are done updating shared state of the lock itself. */ SpinLockRelease(&lock->mutex); TRACE_POSTGRESQL_LWLOCK_ACQUIRE(lockid, mode); /* Add lock to list of locks held by this backend */ held_lwlocks[num_held_lwlocks++] = lockid; /* * Fix the process wait semaphore's count for any absorbed wakeups. */ while (extraWaits-- > 0) PGSemaphoreUnlock(&proc->sem); }
/* * LWLockWaitForVar - Wait until lock is free, or a variable is updated. * * If the lock is held and *valptr equals oldval, waits until the lock is * either freed, or the lock holder updates *valptr by calling * LWLockUpdateVar. If the lock is free on exit (immediately or after * waiting), returns true. If the lock is still held, but *valptr no longer * matches oldval, returns false and sets *newval to the current value in * *valptr. * * It's possible that the lock holder releases the lock, but another backend * acquires it again before we get a chance to observe that the lock was * momentarily released. We wouldn't need to wait for the new lock holder, * but we cannot distinguish that case, so we will have to wait. * * Note: this function ignores shared lock holders; if the lock is held * in shared mode, returns 'true'. */ bool LWLockWaitForVar(LWLock *lock, uint64 *valptr, uint64 oldval, uint64 *newval) { PGPROC *proc = MyProc; int extraWaits = 0; bool result = false; #ifdef LWLOCK_STATS lwlock_stats *lwstats; #endif PRINT_LWDEBUG("LWLockWaitForVar", lock); #ifdef LWLOCK_STATS lwstats = get_lwlock_stats_entry(lock); #endif /* LWLOCK_STATS */ /* * Quick test first to see if it the slot is free right now. * * XXX: the caller uses a spinlock before this, so we don't need a memory * barrier here as far as the current usage is concerned. But that might * not be safe in general. */ if (lock->exclusive == 0) return true; /* * Lock out cancel/die interrupts while we sleep on the lock. There is no * cleanup mechanism to remove us from the wait queue if we got * interrupted. */ HOLD_INTERRUPTS(); /* * Loop here to check the lock's status after each time we are signaled. */ for (;;) { bool mustwait; uint64 value; /* Acquire mutex. Time spent holding mutex should be short! */ #ifdef LWLOCK_STATS lwstats->spin_delay_count += SpinLockAcquire(&lock->mutex); #else SpinLockAcquire(&lock->mutex); #endif /* Is the lock now free, and if not, does the value match? */ if (lock->exclusive == 0) { result = true; mustwait = false; } else { value = *valptr; if (value != oldval) { result = false; mustwait = false; *newval = value; } else mustwait = true; } if (!mustwait) break; /* the lock was free or value didn't match */ /* * Add myself to wait queue. */ proc->lwWaiting = true; proc->lwWaitMode = LW_WAIT_UNTIL_FREE; /* waiters are added to the front of the queue */ proc->lwWaitLink = lock->head; if (lock->head == NULL) lock->tail = proc; lock->head = proc; /* * Set releaseOK, to make sure we get woken up as soon as the lock is * released. */ lock->releaseOK = true; /* Can release the mutex now */ SpinLockRelease(&lock->mutex); /* * Wait until awakened. * * Since we share the process wait semaphore with the regular lock * manager and ProcWaitForSignal, and we may need to acquire an LWLock * while one of those is pending, it is possible that we get awakened * for a reason other than being signaled by LWLockRelease. If so, * loop back and wait again. Once we've gotten the LWLock, * re-increment the sema by the number of additional signals received, * so that the lock manager or signal manager will see the received * signal when it next waits. */ LOG_LWDEBUG("LWLockWaitForVar", T_NAME(lock), T_ID(lock), "waiting"); #ifdef LWLOCK_STATS lwstats->block_count++; #endif TRACE_POSTGRESQL_LWLOCK_WAIT_START(T_NAME(lock), T_ID(lock), LW_EXCLUSIVE); for (;;) { /* "false" means cannot accept cancel/die interrupt here. */ PGSemaphoreLock(&proc->sem, false); if (!proc->lwWaiting) break; extraWaits++; } TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(T_NAME(lock), T_ID(lock), LW_EXCLUSIVE); LOG_LWDEBUG("LWLockWaitForVar", T_NAME(lock), T_ID(lock), "awakened"); /* Now loop back and check the status of the lock again. */ } /* We are done updating shared state of the lock itself. */ SpinLockRelease(&lock->mutex); TRACE_POSTGRESQL_LWLOCK_ACQUIRE(T_NAME(lock), T_ID(lock), LW_EXCLUSIVE); /* * Fix the process wait semaphore's count for any absorbed wakeups. */ while (extraWaits-- > 0) PGSemaphoreUnlock(&proc->sem); /* * Now okay to allow cancel/die interrupts. */ RESUME_INTERRUPTS(); return result; }
/* * LWLockAcquireOrWait - Acquire lock, or wait until it's free * * The semantics of this function are a bit funky. If the lock is currently * free, it is acquired in the given mode, and the function returns true. If * the lock isn't immediately free, the function waits until it is released * and returns false, but does not acquire the lock. * * This is currently used for WALWriteLock: when a backend flushes the WAL, * holding WALWriteLock, it can flush the commit records of many other * backends as a side-effect. Those other backends need to wait until the * flush finishes, but don't need to acquire the lock anymore. They can just * wake up, observe that their records have already been flushed, and return. */ bool LWLockAcquireOrWait(LWLock *lock, LWLockMode mode) { PGPROC *proc = MyProc; bool mustwait; int extraWaits = 0; #ifdef LWLOCK_STATS lwlock_stats *lwstats; #endif PRINT_LWDEBUG("LWLockAcquireOrWait", lock); #ifdef LWLOCK_STATS lwstats = get_lwlock_stats_entry(lock); #endif /* Ensure we will have room to remember the lock */ if (num_held_lwlocks >= MAX_SIMUL_LWLOCKS) elog(ERROR, "too many LWLocks taken"); /* * Lock out cancel/die interrupts until we exit the code section protected * by the LWLock. This ensures that interrupts will not interfere with * manipulations of data structures in shared memory. */ HOLD_INTERRUPTS(); /* Acquire mutex. Time spent holding mutex should be short! */ SpinLockAcquire(&lock->mutex); /* If I can get the lock, do so quickly. */ if (mode == LW_EXCLUSIVE) { if (lock->exclusive == 0 && lock->shared == 0) { lock->exclusive++; mustwait = false; } else mustwait = true; } else { if (lock->exclusive == 0) { lock->shared++; mustwait = false; } else mustwait = true; } if (mustwait) { /* * Add myself to wait queue. * * If we don't have a PGPROC structure, there's no way to wait. This * should never occur, since MyProc should only be null during shared * memory initialization. */ if (proc == NULL) elog(PANIC, "cannot wait without a PGPROC structure"); proc->lwWaiting = true; proc->lwWaitMode = LW_WAIT_UNTIL_FREE; proc->lwWaitLink = NULL; if (lock->head == NULL) lock->head = proc; else lock->tail->lwWaitLink = proc; lock->tail = proc; /* Can release the mutex now */ SpinLockRelease(&lock->mutex); /* * Wait until awakened. Like in LWLockAcquire, be prepared for bogus * wakups, because we share the semaphore with ProcWaitForSignal. */ LOG_LWDEBUG("LWLockAcquireOrWait", T_NAME(lock), T_ID(lock), "waiting"); #ifdef LWLOCK_STATS lwstats->block_count++; #endif TRACE_POSTGRESQL_LWLOCK_WAIT_START(T_NAME(lock), T_ID(lock), mode); for (;;) { /* "false" means cannot accept cancel/die interrupt here. */ PGSemaphoreLock(&proc->sem, false); if (!proc->lwWaiting) break; extraWaits++; } TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(T_NAME(lock), T_ID(lock), mode); LOG_LWDEBUG("LWLockAcquireOrWait", T_NAME(lock), T_ID(lock), "awakened"); } else { /* We are done updating shared state of the lock itself. */ SpinLockRelease(&lock->mutex); } /* * Fix the process wait semaphore's count for any absorbed wakeups. */ while (extraWaits-- > 0) PGSemaphoreUnlock(&proc->sem); if (mustwait) { /* Failed to get lock, so release interrupt holdoff */ RESUME_INTERRUPTS(); LOG_LWDEBUG("LWLockAcquireOrWait", T_NAME(lock), T_ID(lock), "failed"); TRACE_POSTGRESQL_LWLOCK_ACQUIRE_OR_WAIT_FAIL(T_NAME(lock), T_ID(lock), mode); } else { /* Add lock to list of locks held by this backend */ held_lwlocks[num_held_lwlocks++] = lock; TRACE_POSTGRESQL_LWLOCK_ACQUIRE_OR_WAIT(T_NAME(lock), T_ID(lock), mode); } return !mustwait; }
/* * ProcSleep -- put a process to sleep on the specified lock * * Caller must have set MyProc->heldLocks to reflect locks already held * on the lockable object by this process (under all XIDs). * * The lock table's partition lock must be held at entry, and will be held * at exit. * * Result: STATUS_OK if we acquired the lock, STATUS_ERROR if not (deadlock). * * ASSUME: that no one will fiddle with the queue until after * we release the partition lock. * * NOTES: The process queue is now a priority queue for locking. * * P() on the semaphore should put us to sleep. The process * semaphore is normally zero, so when we try to acquire it, we sleep. */ int ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable) { LOCKMODE lockmode = locallock->tag.mode; LOCK *lock = locallock->lock; PROCLOCK *proclock = locallock->proclock; uint32 hashcode = locallock->hashcode; LWLockId partitionLock = LockHashPartitionLock(hashcode); PROC_QUEUE *waitQueue = &(lock->waitProcs); LOCKMASK myHeldLocks = MyProc->heldLocks; bool early_deadlock = false; bool allow_autovacuum_cancel = true; int myWaitStatus; PGPROC *proc; int i; /* * Determine where to add myself in the wait queue. * * Normally I should go at the end of the queue. However, if I already * hold locks that conflict with the request of any previous waiter, put * myself in the queue just in front of the first such waiter. This is not * a necessary step, since deadlock detection would move me to before that * waiter anyway; but it's relatively cheap to detect such a conflict * immediately, and avoid delaying till deadlock timeout. * * Special case: if I find I should go in front of some waiter, check to * see if I conflict with already-held locks or the requests before that * waiter. If not, then just grant myself the requested lock immediately. * This is the same as the test for immediate grant in LockAcquire, except * we are only considering the part of the wait queue before my insertion * point. */ if (myHeldLocks != 0) { LOCKMASK aheadRequests = 0; proc = (PGPROC *) waitQueue->links.next; for (i = 0; i < waitQueue->size; i++) { /* Must he wait for me? */ if (lockMethodTable->conflictTab[proc->waitLockMode] & myHeldLocks) { /* Must I wait for him ? */ if (lockMethodTable->conflictTab[lockmode] & proc->heldLocks) { /* * Yes, so we have a deadlock. Easiest way to clean up * correctly is to call RemoveFromWaitQueue(), but we * can't do that until we are *on* the wait queue. So, set * a flag to check below, and break out of loop. Also, * record deadlock info for later message. */ RememberSimpleDeadLock(MyProc, lockmode, lock, proc); early_deadlock = true; break; } /* I must go before this waiter. Check special case. */ if ((lockMethodTable->conflictTab[lockmode] & aheadRequests) == 0 && LockCheckConflicts(lockMethodTable, lockmode, lock, proclock, MyProc) == STATUS_OK) { /* Skip the wait and just grant myself the lock. */ GrantLock(lock, proclock, lockmode); GrantAwaitedLock(); return STATUS_OK; } /* Break out of loop to put myself before him */ break; } /* Nope, so advance to next waiter */ aheadRequests |= LOCKBIT_ON(proc->waitLockMode); proc = (PGPROC *) proc->links.next; } /* * If we fall out of loop normally, proc points to waitQueue head, so * we will insert at tail of queue as desired. */ } else { /* I hold no locks, so I can't push in front of anyone. */ proc = (PGPROC *) &(waitQueue->links); } /* * Insert self into queue, ahead of the given proc (or at tail of queue). */ SHMQueueInsertBefore(&(proc->links), &(MyProc->links)); waitQueue->size++; lock->waitMask |= LOCKBIT_ON(lockmode); /* Set up wait information in PGPROC object, too */ MyProc->waitLock = lock; MyProc->waitProcLock = proclock; MyProc->waitLockMode = lockmode; MyProc->waitStatus = STATUS_WAITING; /* * If we detected deadlock, give up without waiting. This must agree with * CheckDeadLock's recovery code, except that we shouldn't release the * semaphore since we haven't tried to lock it yet. */ if (early_deadlock) { RemoveFromWaitQueue(MyProc, hashcode); return STATUS_ERROR; } /* mark that we are waiting for a lock */ lockAwaited = locallock; /* * Release the lock table's partition lock. * * NOTE: this may also cause us to exit critical-section state, possibly * allowing a cancel/die interrupt to be accepted. This is OK because we * have recorded the fact that we are waiting for a lock, and so * LockErrorCleanup will clean up if cancel/die happens. */ LWLockRelease(partitionLock); /* * Also, now that we will successfully clean up after an ereport, it's * safe to check to see if there's a buffer pin deadlock against the * Startup process. Of course, that's only necessary if we're doing Hot * Standby and are not the Startup process ourselves. */ if (RecoveryInProgress() && !InRecovery) CheckRecoveryConflictDeadlock(); /* Reset deadlock_state before enabling the timeout handler */ deadlock_state = DS_NOT_YET_CHECKED; /* * Set timer so we can wake up after awhile and check for a deadlock. If a * deadlock is detected, the handler releases the process's semaphore and * sets MyProc->waitStatus = STATUS_ERROR, allowing us to know that we * must report failure rather than success. * * By delaying the check until we've waited for a bit, we can avoid * running the rather expensive deadlock-check code in most cases. */ enable_timeout_after(DEADLOCK_TIMEOUT, DeadlockTimeout); /* * If someone wakes us between LWLockRelease and PGSemaphoreLock, * PGSemaphoreLock will not block. The wakeup is "saved" by the semaphore * implementation. While this is normally good, there are cases where a * saved wakeup might be leftover from a previous operation (for example, * we aborted ProcWaitForSignal just before someone did ProcSendSignal). * So, loop to wait again if the waitStatus shows we haven't been granted * nor denied the lock yet. * * We pass interruptOK = true, which eliminates a window in which * cancel/die interrupts would be held off undesirably. This is a promise * that we don't mind losing control to a cancel/die interrupt here. We * don't, because we have no shared-state-change work to do after being * granted the lock (the grantor did it all). We do have to worry about * canceling the deadlock timeout and updating the locallock table, but if * we lose control to an error, LockErrorCleanup will fix that up. */ do { PGSemaphoreLock(&MyProc->sem, true); /* * waitStatus could change from STATUS_WAITING to something else * asynchronously. Read it just once per loop to prevent surprising * behavior (such as missing log messages). */ myWaitStatus = MyProc->waitStatus; /* * If we are not deadlocked, but are waiting on an autovacuum-induced * task, send a signal to interrupt it. */ if (deadlock_state == DS_BLOCKED_BY_AUTOVACUUM && allow_autovacuum_cancel) { PGPROC *autovac = GetBlockingAutoVacuumPgproc(); PGXACT *autovac_pgxact = &ProcGlobal->allPgXact[autovac->pgprocno]; LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); /* * Only do it if the worker is not working to protect against Xid * wraparound. */ if ((autovac != NULL) && (autovac_pgxact->vacuumFlags & PROC_IS_AUTOVACUUM) && !(autovac_pgxact->vacuumFlags & PROC_VACUUM_FOR_WRAPAROUND)) { int pid = autovac->pid; StringInfoData locktagbuf; StringInfoData logbuf; /* errdetail for server log */ initStringInfo(&locktagbuf); initStringInfo(&logbuf); DescribeLockTag(&locktagbuf, &lock->tag); appendStringInfo(&logbuf, _("Process %d waits for %s on %s"), MyProcPid, GetLockmodeName(lock->tag.locktag_lockmethodid, lockmode), locktagbuf.data); /* release lock as quickly as possible */ LWLockRelease(ProcArrayLock); ereport(LOG, (errmsg("sending cancel to blocking autovacuum PID %d", pid), errdetail_log("%s", logbuf.data))); pfree(logbuf.data); pfree(locktagbuf.data); /* send the autovacuum worker Back to Old Kent Road */ if (kill(pid, SIGINT) < 0) { /* Just a warning to allow multiple callers */ ereport(WARNING, (errmsg("could not send signal to process %d: %m", pid))); } } else LWLockRelease(ProcArrayLock); /* prevent signal from being resent more than once */ allow_autovacuum_cancel = false; } /* * If awoken after the deadlock check interrupt has run, and * log_lock_waits is on, then report about the wait. */ if (log_lock_waits && deadlock_state != DS_NOT_YET_CHECKED) { StringInfoData buf; const char *modename; long secs; int usecs; long msecs; initStringInfo(&buf); DescribeLockTag(&buf, &locallock->tag.lock); modename = GetLockmodeName(locallock->tag.lock.locktag_lockmethodid, lockmode); TimestampDifference(get_timeout_start_time(DEADLOCK_TIMEOUT), GetCurrentTimestamp(), &secs, &usecs); msecs = secs * 1000 + usecs / 1000; usecs = usecs % 1000; if (deadlock_state == DS_SOFT_DEADLOCK) ereport(LOG, (errmsg("process %d avoided deadlock for %s on %s by rearranging queue order after %ld.%03d ms", MyProcPid, modename, buf.data, msecs, usecs))); else if (deadlock_state == DS_HARD_DEADLOCK) { /* * This message is a bit redundant with the error that will be * reported subsequently, but in some cases the error report * might not make it to the log (eg, if it's caught by an * exception handler), and we want to ensure all long-wait * events get logged. */ ereport(LOG, (errmsg("process %d detected deadlock while waiting for %s on %s after %ld.%03d ms", MyProcPid, modename, buf.data, msecs, usecs))); } if (myWaitStatus == STATUS_WAITING) ereport(LOG, (errmsg("process %d still waiting for %s on %s after %ld.%03d ms", MyProcPid, modename, buf.data, msecs, usecs))); else if (myWaitStatus == STATUS_OK) ereport(LOG, (errmsg("process %d acquired %s on %s after %ld.%03d ms", MyProcPid, modename, buf.data, msecs, usecs))); else { Assert(myWaitStatus == STATUS_ERROR); /* * Currently, the deadlock checker always kicks its own * process, which means that we'll only see STATUS_ERROR when * deadlock_state == DS_HARD_DEADLOCK, and there's no need to * print redundant messages. But for completeness and * future-proofing, print a message if it looks like someone * else kicked us off the lock. */ if (deadlock_state != DS_HARD_DEADLOCK) ereport(LOG, (errmsg("process %d failed to acquire %s on %s after %ld.%03d ms", MyProcPid, modename, buf.data, msecs, usecs))); } /* * At this point we might still need to wait for the lock. Reset * state so we don't print the above messages again. */ deadlock_state = DS_NO_DEADLOCK; pfree(buf.data); } } while (myWaitStatus == STATUS_WAITING); /* * Disable the timer, if it's still running */ disable_timeout(DEADLOCK_TIMEOUT, false); /* * Re-acquire the lock table's partition lock. We have to do this to hold * off cancel/die interrupts before we can mess with lockAwaited (else we * might have a missed or duplicated locallock update). */ LWLockAcquire(partitionLock, LW_EXCLUSIVE); /* * We no longer want LockErrorCleanup to do anything. */ lockAwaited = NULL; /* * If we got the lock, be sure to remember it in the locallock table. */ if (MyProc->waitStatus == STATUS_OK) GrantAwaitedLock(); /* * We don't have to do anything else, because the awaker did all the * necessary update of the lock table and MyProc. */ return MyProc->waitStatus; }
/* * ProcWaitForSignal - wait for a signal from another backend. * * This can share the semaphore normally used for waiting for locks, * since a backend could never be waiting for a lock and a signal at * the same time. As with locks, it's OK if the signal arrives just * before we actually reach the waiting state. Also as with locks, * it's necessary that the caller be robust against bogus wakeups: * always check that the desired state has occurred, and wait again * if not. This copes with possible "leftover" wakeups. */ void ProcWaitForSignal(void) { PGSemaphoreLock(&MyProc->sem, true); }
/* * LWLockAcquire - acquire a lightweight lock in the specified mode * * If the lock is not available, sleep until it is. * * Side effect: cancel/die interrupts are held off until lock release. */ void LWLockAcquire(LWLockId lockid, LWLockMode mode) { volatile LWLock *lock = LWLockArray + lockid; PGPROC *proc = MyProc; bool retry = false; int extraWaits = 0; PRINT_LWDEBUG("LWLockAcquire", lockid, lock); /* * We can't wait if we haven't got a PGPROC. This should only occur * during bootstrap or shared memory initialization. Put an Assert * here to catch unsafe coding practices. */ Assert(!(proc == NULL && IsUnderPostmaster)); /* * Lock out cancel/die interrupts until we exit the code section * protected by the LWLock. This ensures that interrupts will not * interfere with manipulations of data structures in shared memory. */ HOLD_INTERRUPTS(); /* * Loop here to try to acquire lock after each time we are signaled by * LWLockRelease. * * NOTE: it might seem better to have LWLockRelease actually grant us the * lock, rather than retrying and possibly having to go back to sleep. * But in practice that is no good because it means a process swap for * every lock acquisition when two or more processes are contending * for the same lock. Since LWLocks are normally used to protect * not-very-long sections of computation, a process needs to be able * to acquire and release the same lock many times during a single CPU * time slice, even in the presence of contention. The efficiency of * being able to do that outweighs the inefficiency of sometimes * wasting a process dispatch cycle because the lock is not free when * a released waiter finally gets to run. See pgsql-hackers archives * for 29-Dec-01. */ for (;;) { bool mustwait; /* Acquire mutex. Time spent holding mutex should be short! */ SpinLockAcquire_NoHoldoff(&lock->mutex); /* If retrying, allow LWLockRelease to release waiters again */ if (retry) lock->releaseOK = true; /* If I can get the lock, do so quickly. */ if (mode == LW_EXCLUSIVE) { if (lock->exclusive == 0 && lock->shared == 0) { lock->exclusive++; mustwait = false; } else mustwait = true; } else { if (lock->exclusive == 0) { lock->shared++; mustwait = false; } else mustwait = true; } if (!mustwait) break; /* got the lock */ /* * Add myself to wait queue. * * If we don't have a PGPROC structure, there's no way to wait. This * should never occur, since MyProc should only be null during * shared memory initialization. */ if (proc == NULL) elog(FATAL, "cannot wait without a PGPROC structure"); proc->lwWaiting = true; proc->lwExclusive = (mode == LW_EXCLUSIVE); proc->lwWaitLink = NULL; if (lock->head == NULL) lock->head = proc; else lock->tail->lwWaitLink = proc; lock->tail = proc; /* Can release the mutex now */ SpinLockRelease_NoHoldoff(&lock->mutex); /* * Wait until awakened. * * Since we share the process wait semaphore with the regular lock * manager and ProcWaitForSignal, and we may need to acquire an * LWLock while one of those is pending, it is possible that we * get awakened for a reason other than being signaled by * LWLockRelease. If so, loop back and wait again. Once we've * gotten the LWLock, re-increment the sema by the number of * additional signals received, so that the lock manager or signal * manager will see the received signal when it next waits. */ LOG_LWDEBUG("LWLockAcquire", lockid, "waiting"); for (;;) { /* "false" means cannot accept cancel/die interrupt here. */ PGSemaphoreLock(&proc->sem, false); if (!proc->lwWaiting) break; extraWaits++; } LOG_LWDEBUG("LWLockAcquire", lockid, "awakened"); /* Now loop back and try to acquire lock again. */ retry = true; } /* We are done updating shared state of the lock itself. */ SpinLockRelease_NoHoldoff(&lock->mutex); /* Add lock to list of locks held by this backend */ Assert(num_held_lwlocks < MAX_SIMUL_LWLOCKS); held_lwlocks[num_held_lwlocks++] = lockid; /* * Fix the process wait semaphore's count for any absorbed wakeups. */ while (extraWaits-- > 0) PGSemaphoreUnlock(&proc->sem); }
/* * ProcSleep -- put a process to sleep on the specified lock * * Caller must have set MyProc->heldLocks to reflect locks already held * on the lockable object by this process (under all XIDs). * * The lock table's partition lock must be held at entry, and will be held * at exit. * * Result: STATUS_OK if we acquired the lock, STATUS_ERROR if not (deadlock). * * ASSUME: that no one will fiddle with the queue until after * we release the partition lock. * * NOTES: The process queue is now a priority queue for locking. * * P() on the semaphore should put us to sleep. The process * semaphore is normally zero, so when we try to acquire it, we sleep. */ int ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable) { LOCKMODE lockmode = locallock->tag.mode; LOCK *lock = locallock->lock; PROCLOCK *proclock = locallock->proclock; uint32 hashcode = locallock->hashcode; LWLockId partitionLock = LockHashPartitionLock(hashcode); PROC_QUEUE *waitQueue = &(lock->waitProcs); LOCKMASK myHeldLocks = MyProc->heldLocks; bool early_deadlock = false; PGPROC *proc; int i; /* * Determine where to add myself in the wait queue. * * Normally I should go at the end of the queue. However, if I already * hold locks that conflict with the request of any previous waiter, put * myself in the queue just in front of the first such waiter. This is not * a necessary step, since deadlock detection would move me to before that * waiter anyway; but it's relatively cheap to detect such a conflict * immediately, and avoid delaying till deadlock timeout. * * Special case: if I find I should go in front of some waiter, check to * see if I conflict with already-held locks or the requests before that * waiter. If not, then just grant myself the requested lock immediately. * This is the same as the test for immediate grant in LockAcquire, except * we are only considering the part of the wait queue before my insertion * point. */ if (myHeldLocks != 0) { LOCKMASK aheadRequests = 0; proc = (PGPROC *) MAKE_PTR(waitQueue->links.next); for (i = 0; i < waitQueue->size; i++) { /* Must he wait for me? */ if (lockMethodTable->conflictTab[proc->waitLockMode] & myHeldLocks) { /* Must I wait for him ? */ if (lockMethodTable->conflictTab[lockmode] & proc->heldLocks) { /* * Yes, so we have a deadlock. Easiest way to clean up * correctly is to call RemoveFromWaitQueue(), but we * can't do that until we are *on* the wait queue. So, set * a flag to check below, and break out of loop. Also, * record deadlock info for later message. */ RememberSimpleDeadLock(MyProc, lockmode, lock, proc); early_deadlock = true; break; } /* I must go before this waiter. Check special case. */ if ((lockMethodTable->conflictTab[lockmode] & aheadRequests) == 0 && LockCheckConflicts(lockMethodTable, lockmode, lock, proclock, MyProc) == STATUS_OK) { /* Skip the wait and just grant myself the lock. */ GrantLock(lock, proclock, lockmode); GrantAwaitedLock(); return STATUS_OK; } /* Break out of loop to put myself before him */ break; } /* Nope, so advance to next waiter */ aheadRequests |= LOCKBIT_ON(proc->waitLockMode); proc = (PGPROC *) MAKE_PTR(proc->links.next); } /* * If we fall out of loop normally, proc points to waitQueue head, so * we will insert at tail of queue as desired. */ } else { /* I hold no locks, so I can't push in front of anyone. */ proc = (PGPROC *) &(waitQueue->links); } /* * Insert self into queue, ahead of the given proc (or at tail of queue). */ SHMQueueInsertBefore(&(proc->links), &(MyProc->links)); waitQueue->size++; lock->waitMask |= LOCKBIT_ON(lockmode); /* Set up wait information in PGPROC object, too */ MyProc->waitLock = lock; MyProc->waitProcLock = proclock; MyProc->waitLockMode = lockmode; MyProc->waitStatus = STATUS_WAITING; /* * If we detected deadlock, give up without waiting. This must agree with * CheckDeadLock's recovery code, except that we shouldn't release the * semaphore since we haven't tried to lock it yet. */ if (early_deadlock) { RemoveFromWaitQueue(MyProc, hashcode); return STATUS_ERROR; } /* mark that we are waiting for a lock */ lockAwaited = locallock; /* * Release the lock table's partition lock. * * NOTE: this may also cause us to exit critical-section state, possibly * allowing a cancel/die interrupt to be accepted. This is OK because we * have recorded the fact that we are waiting for a lock, and so * LockWaitCancel will clean up if cancel/die happens. */ LWLockRelease(partitionLock); /* * Set timer so we can wake up after awhile and check for a deadlock. If a * deadlock is detected, the handler releases the process's semaphore and * sets MyProc->waitStatus = STATUS_ERROR, allowing us to know that we * must report failure rather than success. * * By delaying the check until we've waited for a bit, we can avoid * running the rather expensive deadlock-check code in most cases. */ if (!enable_sig_alarm(DeadlockTimeout, false)) elog(FATAL, "could not set timer for process wakeup"); /* * If someone wakes us between LWLockRelease and PGSemaphoreLock, * PGSemaphoreLock will not block. The wakeup is "saved" by the semaphore * implementation. While this is normally good, there are cases where a * saved wakeup might be leftover from a previous operation (for example, * we aborted ProcWaitForSignal just before someone did ProcSendSignal). * So, loop to wait again if the waitStatus shows we haven't been granted * nor denied the lock yet. * * We pass interruptOK = true, which eliminates a window in which * cancel/die interrupts would be held off undesirably. This is a promise * that we don't mind losing control to a cancel/die interrupt here. We * don't, because we have no shared-state-change work to do after being * granted the lock (the grantor did it all). We do have to worry about * updating the locallock table, but if we lose control to an error, * LockWaitCancel will fix that up. */ do { PGSemaphoreLock(&MyProc->sem, true); } while (MyProc->waitStatus == STATUS_WAITING); /* * Disable the timer, if it's still running */ if (!disable_sig_alarm(false)) elog(FATAL, "could not disable timer for process wakeup"); /* * Re-acquire the lock table's partition lock. We have to do this to hold * off cancel/die interrupts before we can mess with lockAwaited (else we * might have a missed or duplicated locallock update). */ LWLockAcquire(partitionLock, LW_EXCLUSIVE); /* * We no longer want LockWaitCancel to do anything. */ lockAwaited = NULL; /* * If we got the lock, be sure to remember it in the locallock table. */ if (MyProc->waitStatus == STATUS_OK) GrantAwaitedLock(); /* * We don't have to do anything else, because the awaker did all the * necessary update of the lock table and MyProc. */ return MyProc->waitStatus; }