/* * ProcWakeup -- wake up a process by releasing its private semaphore. * * Also remove the process from the wait queue and set its links invalid. * RETURN: the next process in the wait queue. * * The appropriate lock partition lock must be held by caller. * * XXX: presently, this code is only used for the "success" case, and only * works correctly for that case. To clean up in failure case, would need * to twiddle the lock's request counts too --- see RemoveFromWaitQueue. * Hence, in practice the waitStatus parameter must be STATUS_OK. */ PGPROC * ProcWakeup(PGPROC *proc, int waitStatus) { PGPROC *retProc; /* Proc should be sleeping ... */ if (proc->links.prev == NULL || proc->links.next == NULL) return NULL; Assert(proc->waitStatus == STATUS_WAITING); /* Save next process before we zap the list link */ retProc = (PGPROC *) proc->links.next; /* Remove process from wait queue */ SHMQueueDelete(&(proc->links)); (proc->waitLock->waitProcs.size)--; /* Clean up process' state and pass it the ok/fail signal */ proc->waitLock = NULL; proc->waitProcLock = NULL; proc->waitStatus = waitStatus; /* And awaken it */ PGSemaphoreUnlock(&proc->sem); return retProc; }
/* * ProcSendSignal - send a signal to a backend identified by PID */ void ProcSendSignal(int pid) { PGPROC *proc = NULL; if (RecoveryInProgress()) { /* use volatile pointer to prevent code rearrangement */ volatile PROC_HDR *procglobal = ProcGlobal; SpinLockAcquire(ProcStructLock); /* * Check to see whether it is the Startup process we wish to signal. * This call is made by the buffer manager when it wishes to wake up a * process that has been waiting for a pin in so it can obtain a * cleanup lock using LockBufferForCleanup(). Startup is not a normal * backend, so BackendPidGetProc() will not return any pid at all. So * we remember the information for this special case. */ if (pid == procglobal->startupProcPid) proc = procglobal->startupProc; SpinLockRelease(ProcStructLock); } if (proc == NULL) proc = BackendPidGetProc(pid); if (proc != NULL) PGSemaphoreUnlock(&proc->sem); }
/* * ProcSendSignal - send a signal to a backend identified by PID */ void ProcSendSignal(int pid) { PGPROC *proc = BackendPidGetProc(pid); if (proc != NULL) PGSemaphoreUnlock(&proc->sem); }
void s_unlock_sema(volatile slock_t *lock) { int lockndx = *lock; if (lockndx <= 0 || lockndx > NUM_SPINLOCK_SEMAPHORES) elog(ERROR, "invalid spinlock number: %d", lockndx); PGSemaphoreUnlock(&SpinlockSemaArray[lockndx - 1]); }
/* * LWLockUpdateVar - Update a variable and wake up waiters atomically * * Sets *valptr to 'val', and wakes up all processes waiting for us with * LWLockWaitForVar(). Setting the value and waking up the processes happen * atomically so that any process calling LWLockWaitForVar() on the same lock * is guaranteed to see the new value, and act accordingly. * * The caller must be holding the lock in exclusive mode. */ void LWLockUpdateVar(LWLock *lock, uint64 *valptr, uint64 val) { PGPROC *head; PGPROC *proc; PGPROC *next; /* Acquire mutex. Time spent holding mutex should be short! */ SpinLockAcquire(&lock->mutex); /* we should hold the lock */ Assert(lock->exclusive == 1); /* Update the lock's value */ *valptr = val; /* * See if there are any LW_WAIT_UNTIL_FREE waiters that need to be woken * up. They are always in the front of the queue. */ head = lock->head; if (head != NULL && head->lwWaitMode == LW_WAIT_UNTIL_FREE) { proc = head; next = proc->lwWaitLink; while (next && next->lwWaitMode == LW_WAIT_UNTIL_FREE) { proc = next; next = next->lwWaitLink; } /* proc is now the last PGPROC to be released */ lock->head = next; proc->lwWaitLink = NULL; } else head = NULL; /* We are done updating shared state of the lock itself. */ SpinLockRelease(&lock->mutex); /* * Awaken any waiters I removed from the queue. */ while (head != NULL) { proc = head; head = proc->lwWaitLink; proc->lwWaitLink = NULL; /* check comment in LWLockRelease() about this barrier */ pg_write_barrier(); proc->lwWaiting = false; PGSemaphoreUnlock(&proc->sem); } }
static void BgwPoolMainLoop(Datum arg) { BgwPoolExecutorCtx* ctx = (BgwPoolExecutorCtx*)arg; int id = ctx->id; BgwPool* pool = ctx->constructor(); int size; void* work; BackgroundWorkerUnblockSignals(); BackgroundWorkerInitializeConnection(pool->dbname, NULL); while(true) { PGSemaphoreLock(&pool->available); SpinLockAcquire(&pool->lock); size = *(int*)&pool->queue[pool->head]; Assert(size < pool->size); work = malloc(size); pool->pending -= 1; pool->active += 1; if (pool->lastPeakTime == 0 && pool->active == pool->nWorkers && pool->pending != 0) { pool->lastPeakTime = MtmGetSystemTime(); } if (pool->head + size + 4 > pool->size) { memcpy(work, pool->queue, size); pool->head = INTALIGN(size); } else { memcpy(work, &pool->queue[pool->head+4], size); pool->head += 4 + INTALIGN(size); } if (pool->size == pool->head) { pool->head = 0; } if (pool->producerBlocked) { pool->producerBlocked = false; PGSemaphoreUnlock(&pool->overflow); pool->lastPeakTime = 0; } SpinLockRelease(&pool->lock); pool->executor(id, work, size); free(work); SpinLockAcquire(&pool->lock); pool->active -= 1; pool->lastPeakTime = 0; SpinLockRelease(&pool->lock); } }
void BgwPoolExecute(BgwPool* pool, void* work, size_t size) { if (size+4 > pool->size) { /* * Size of work is larger than size of shared buffer: * run it immediately */ pool->executor(0, work, size); return; } SpinLockAcquire(&pool->lock); while (true) { if ((pool->head <= pool->tail && pool->size - pool->tail < size + 4 && pool->head < size) || (pool->head > pool->tail && pool->head - pool->tail < size + 4)) { if (pool->lastPeakTime == 0) { pool->lastPeakTime = MtmGetSystemTime(); } pool->producerBlocked = true; SpinLockRelease(&pool->lock); PGSemaphoreLock(&pool->overflow); SpinLockAcquire(&pool->lock); } else { pool->pending += 1; if (pool->lastPeakTime == 0 && pool->active == pool->nWorkers && pool->pending != 0) { pool->lastPeakTime = MtmGetSystemTime(); } *(int*)&pool->queue[pool->tail] = size; if (pool->size - pool->tail >= size + 4) { memcpy(&pool->queue[pool->tail+4], work, size); pool->tail += 4 + INTALIGN(size); } else { memcpy(pool->queue, work, size); pool->tail = INTALIGN(size); } if (pool->tail == pool->size) { pool->tail = 0; } PGSemaphoreUnlock(&pool->available); break; } } SpinLockRelease(&pool->lock); }
/* * LWLockRelease - release a previously acquired lock */ void LWLockRelease(LWLockId lockid) { volatile LWLock *lock = &(LWLockArray[lockid].lock); PGPROC *head; PGPROC *proc; int i; PRINT_LWDEBUG("LWLockRelease", lockid, lock); /* * Remove lock from list of locks held. Usually, but not always, it will * be the latest-acquired lock; so search array backwards. */ for (i = num_held_lwlocks; --i >= 0;) { if (lockid == held_lwlocks[i]) break; } if (i < 0) elog(ERROR, "lock %d is not held", (int) lockid); num_held_lwlocks--; for (; i < num_held_lwlocks; i++) held_lwlocks[i] = held_lwlocks[i + 1]; /* Acquire mutex. Time spent holding mutex should be short! */ SpinLockAcquire(&lock->mutex); /* Release my hold on lock */ if (lock->exclusive > 0) lock->exclusive--; else { Assert(lock->shared > 0); lock->shared--; } /* * See if I need to awaken any waiters. If I released a non-last shared * hold, there cannot be anything to do. Also, do not awaken any waiters * if someone has already awakened waiters that haven't yet acquired the * lock. */ head = lock->head; if (head != NULL) { if (lock->exclusive == 0 && lock->shared == 0 && lock->releaseOK) { /* * Remove the to-be-awakened PGPROCs from the queue. If the front * waiter wants exclusive lock, awaken him only. Otherwise awaken * as many waiters as want shared access. */ proc = head; if (!proc->lwExclusive) { while (proc->lwWaitLink != NULL && !proc->lwWaitLink->lwExclusive) proc = proc->lwWaitLink; } /* proc is now the last PGPROC to be released */ lock->head = proc->lwWaitLink; proc->lwWaitLink = NULL; /* prevent additional wakeups until retryer gets to run */ lock->releaseOK = false; } else { /* lock is still held, can't awaken anything */ head = NULL; } } /* We are done updating shared state of the lock itself. */ SpinLockRelease(&lock->mutex); TRACE_POSTGRESQL_LWLOCK_RELEASE(lockid); /* * Awaken any waiters I removed from the queue. */ while (head != NULL) { LOG_LWDEBUG("LWLockRelease", lockid, "release waiter"); proc = head; head = proc->lwWaitLink; proc->lwWaitLink = NULL; proc->lwWaiting = false; PGSemaphoreUnlock(&proc->sem); } /* * Now okay to allow cancel/die interrupts. */ RESUME_INTERRUPTS(); }
/* * LWLockAcquire - acquire a lightweight lock in the specified mode * * If the lock is not available, sleep until it is. * * Side effect: cancel/die interrupts are held off until lock release. */ void LWLockAcquire(LWLockId lockid, LWLockMode mode) { volatile LWLock *lock = &(LWLockArray[lockid].lock); PGPROC *proc = MyProc; bool retry = false; int extraWaits = 0; PRINT_LWDEBUG("LWLockAcquire", lockid, lock); #ifdef LWLOCK_STATS /* Set up local count state first time through in a given process */ if (counts_for_pid != MyProcPid) { int *LWLockCounter = (int *) ((char *) LWLockArray - 2 * sizeof(int)); int numLocks = LWLockCounter[1]; sh_acquire_counts = calloc(numLocks, sizeof(int)); ex_acquire_counts = calloc(numLocks, sizeof(int)); block_counts = calloc(numLocks, sizeof(int)); counts_for_pid = MyProcPid; on_shmem_exit(print_lwlock_stats, 0); } /* Count lock acquisition attempts */ if (mode == LW_EXCLUSIVE) ex_acquire_counts[lockid]++; else sh_acquire_counts[lockid]++; #endif /* LWLOCK_STATS */ /* * We can't wait if we haven't got a PGPROC. This should only occur * during bootstrap or shared memory initialization. Put an Assert here * to catch unsafe coding practices. */ Assert(!(proc == NULL && IsUnderPostmaster)); /* Ensure we will have room to remember the lock */ if (num_held_lwlocks >= MAX_SIMUL_LWLOCKS) elog(ERROR, "too many LWLocks taken"); /* * Lock out cancel/die interrupts until we exit the code section protected * by the LWLock. This ensures that interrupts will not interfere with * manipulations of data structures in shared memory. */ HOLD_INTERRUPTS(); /* * Loop here to try to acquire lock after each time we are signaled by * LWLockRelease. * * NOTE: it might seem better to have LWLockRelease actually grant us the * lock, rather than retrying and possibly having to go back to sleep. But * in practice that is no good because it means a process swap for every * lock acquisition when two or more processes are contending for the same * lock. Since LWLocks are normally used to protect not-very-long * sections of computation, a process needs to be able to acquire and * release the same lock many times during a single CPU time slice, even * in the presence of contention. The efficiency of being able to do that * outweighs the inefficiency of sometimes wasting a process dispatch * cycle because the lock is not free when a released waiter finally gets * to run. See pgsql-hackers archives for 29-Dec-01. */ for (;;) { bool mustwait; /* Acquire mutex. Time spent holding mutex should be short! */ SpinLockAcquire(&lock->mutex); /* If retrying, allow LWLockRelease to release waiters again */ if (retry) lock->releaseOK = true; /* If I can get the lock, do so quickly. */ if (mode == LW_EXCLUSIVE) { if (lock->exclusive == 0 && lock->shared == 0) { lock->exclusive++; mustwait = false; } else mustwait = true; } else { if (lock->exclusive == 0) { lock->shared++; mustwait = false; } else mustwait = true; } if (!mustwait) break; /* got the lock */ /* * Add myself to wait queue. * * If we don't have a PGPROC structure, there's no way to wait. This * should never occur, since MyProc should only be null during shared * memory initialization. */ if (proc == NULL) elog(PANIC, "cannot wait without a PGPROC structure"); proc->lwWaiting = true; proc->lwExclusive = (mode == LW_EXCLUSIVE); proc->lwWaitLink = NULL; if (lock->head == NULL) lock->head = proc; else lock->tail->lwWaitLink = proc; lock->tail = proc; /* Can release the mutex now */ SpinLockRelease(&lock->mutex); /* * Wait until awakened. * * Since we share the process wait semaphore with the regular lock * manager and ProcWaitForSignal, and we may need to acquire an LWLock * while one of those is pending, it is possible that we get awakened * for a reason other than being signaled by LWLockRelease. If so, * loop back and wait again. Once we've gotten the LWLock, * re-increment the sema by the number of additional signals received, * so that the lock manager or signal manager will see the received * signal when it next waits. */ LOG_LWDEBUG("LWLockAcquire", lockid, "waiting"); #ifdef LWLOCK_STATS block_counts[lockid]++; #endif TRACE_POSTGRESQL_LWLOCK_WAIT_START(lockid, mode); for (;;) { /* "false" means cannot accept cancel/die interrupt here. */ PGSemaphoreLock(&proc->sem, false); if (!proc->lwWaiting) break; extraWaits++; } TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(lockid, mode); LOG_LWDEBUG("LWLockAcquire", lockid, "awakened"); /* Now loop back and try to acquire lock again. */ retry = true; } /* We are done updating shared state of the lock itself. */ SpinLockRelease(&lock->mutex); TRACE_POSTGRESQL_LWLOCK_ACQUIRE(lockid, mode); /* Add lock to list of locks held by this backend */ held_lwlocks[num_held_lwlocks++] = lockid; /* * Fix the process wait semaphore's count for any absorbed wakeups. */ while (extraWaits-- > 0) PGSemaphoreUnlock(&proc->sem); }
/* * LWLockWaitForVar - Wait until lock is free, or a variable is updated. * * If the lock is held and *valptr equals oldval, waits until the lock is * either freed, or the lock holder updates *valptr by calling * LWLockUpdateVar. If the lock is free on exit (immediately or after * waiting), returns true. If the lock is still held, but *valptr no longer * matches oldval, returns false and sets *newval to the current value in * *valptr. * * It's possible that the lock holder releases the lock, but another backend * acquires it again before we get a chance to observe that the lock was * momentarily released. We wouldn't need to wait for the new lock holder, * but we cannot distinguish that case, so we will have to wait. * * Note: this function ignores shared lock holders; if the lock is held * in shared mode, returns 'true'. */ bool LWLockWaitForVar(LWLock *lock, uint64 *valptr, uint64 oldval, uint64 *newval) { PGPROC *proc = MyProc; int extraWaits = 0; bool result = false; #ifdef LWLOCK_STATS lwlock_stats *lwstats; #endif PRINT_LWDEBUG("LWLockWaitForVar", lock); #ifdef LWLOCK_STATS lwstats = get_lwlock_stats_entry(lock); #endif /* LWLOCK_STATS */ /* * Quick test first to see if it the slot is free right now. * * XXX: the caller uses a spinlock before this, so we don't need a memory * barrier here as far as the current usage is concerned. But that might * not be safe in general. */ if (lock->exclusive == 0) return true; /* * Lock out cancel/die interrupts while we sleep on the lock. There is no * cleanup mechanism to remove us from the wait queue if we got * interrupted. */ HOLD_INTERRUPTS(); /* * Loop here to check the lock's status after each time we are signaled. */ for (;;) { bool mustwait; uint64 value; /* Acquire mutex. Time spent holding mutex should be short! */ #ifdef LWLOCK_STATS lwstats->spin_delay_count += SpinLockAcquire(&lock->mutex); #else SpinLockAcquire(&lock->mutex); #endif /* Is the lock now free, and if not, does the value match? */ if (lock->exclusive == 0) { result = true; mustwait = false; } else { value = *valptr; if (value != oldval) { result = false; mustwait = false; *newval = value; } else mustwait = true; } if (!mustwait) break; /* the lock was free or value didn't match */ /* * Add myself to wait queue. */ proc->lwWaiting = true; proc->lwWaitMode = LW_WAIT_UNTIL_FREE; /* waiters are added to the front of the queue */ proc->lwWaitLink = lock->head; if (lock->head == NULL) lock->tail = proc; lock->head = proc; /* * Set releaseOK, to make sure we get woken up as soon as the lock is * released. */ lock->releaseOK = true; /* Can release the mutex now */ SpinLockRelease(&lock->mutex); /* * Wait until awakened. * * Since we share the process wait semaphore with the regular lock * manager and ProcWaitForSignal, and we may need to acquire an LWLock * while one of those is pending, it is possible that we get awakened * for a reason other than being signaled by LWLockRelease. If so, * loop back and wait again. Once we've gotten the LWLock, * re-increment the sema by the number of additional signals received, * so that the lock manager or signal manager will see the received * signal when it next waits. */ LOG_LWDEBUG("LWLockWaitForVar", T_NAME(lock), T_ID(lock), "waiting"); #ifdef LWLOCK_STATS lwstats->block_count++; #endif TRACE_POSTGRESQL_LWLOCK_WAIT_START(T_NAME(lock), T_ID(lock), LW_EXCLUSIVE); for (;;) { /* "false" means cannot accept cancel/die interrupt here. */ PGSemaphoreLock(&proc->sem, false); if (!proc->lwWaiting) break; extraWaits++; } TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(T_NAME(lock), T_ID(lock), LW_EXCLUSIVE); LOG_LWDEBUG("LWLockWaitForVar", T_NAME(lock), T_ID(lock), "awakened"); /* Now loop back and check the status of the lock again. */ } /* We are done updating shared state of the lock itself. */ SpinLockRelease(&lock->mutex); TRACE_POSTGRESQL_LWLOCK_ACQUIRE(T_NAME(lock), T_ID(lock), LW_EXCLUSIVE); /* * Fix the process wait semaphore's count for any absorbed wakeups. */ while (extraWaits-- > 0) PGSemaphoreUnlock(&proc->sem); /* * Now okay to allow cancel/die interrupts. */ RESUME_INTERRUPTS(); return result; }
/* * LWLockAcquireOrWait - Acquire lock, or wait until it's free * * The semantics of this function are a bit funky. If the lock is currently * free, it is acquired in the given mode, and the function returns true. If * the lock isn't immediately free, the function waits until it is released * and returns false, but does not acquire the lock. * * This is currently used for WALWriteLock: when a backend flushes the WAL, * holding WALWriteLock, it can flush the commit records of many other * backends as a side-effect. Those other backends need to wait until the * flush finishes, but don't need to acquire the lock anymore. They can just * wake up, observe that their records have already been flushed, and return. */ bool LWLockAcquireOrWait(LWLock *lock, LWLockMode mode) { PGPROC *proc = MyProc; bool mustwait; int extraWaits = 0; #ifdef LWLOCK_STATS lwlock_stats *lwstats; #endif PRINT_LWDEBUG("LWLockAcquireOrWait", lock); #ifdef LWLOCK_STATS lwstats = get_lwlock_stats_entry(lock); #endif /* Ensure we will have room to remember the lock */ if (num_held_lwlocks >= MAX_SIMUL_LWLOCKS) elog(ERROR, "too many LWLocks taken"); /* * Lock out cancel/die interrupts until we exit the code section protected * by the LWLock. This ensures that interrupts will not interfere with * manipulations of data structures in shared memory. */ HOLD_INTERRUPTS(); /* Acquire mutex. Time spent holding mutex should be short! */ SpinLockAcquire(&lock->mutex); /* If I can get the lock, do so quickly. */ if (mode == LW_EXCLUSIVE) { if (lock->exclusive == 0 && lock->shared == 0) { lock->exclusive++; mustwait = false; } else mustwait = true; } else { if (lock->exclusive == 0) { lock->shared++; mustwait = false; } else mustwait = true; } if (mustwait) { /* * Add myself to wait queue. * * If we don't have a PGPROC structure, there's no way to wait. This * should never occur, since MyProc should only be null during shared * memory initialization. */ if (proc == NULL) elog(PANIC, "cannot wait without a PGPROC structure"); proc->lwWaiting = true; proc->lwWaitMode = LW_WAIT_UNTIL_FREE; proc->lwWaitLink = NULL; if (lock->head == NULL) lock->head = proc; else lock->tail->lwWaitLink = proc; lock->tail = proc; /* Can release the mutex now */ SpinLockRelease(&lock->mutex); /* * Wait until awakened. Like in LWLockAcquire, be prepared for bogus * wakups, because we share the semaphore with ProcWaitForSignal. */ LOG_LWDEBUG("LWLockAcquireOrWait", T_NAME(lock), T_ID(lock), "waiting"); #ifdef LWLOCK_STATS lwstats->block_count++; #endif TRACE_POSTGRESQL_LWLOCK_WAIT_START(T_NAME(lock), T_ID(lock), mode); for (;;) { /* "false" means cannot accept cancel/die interrupt here. */ PGSemaphoreLock(&proc->sem, false); if (!proc->lwWaiting) break; extraWaits++; } TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(T_NAME(lock), T_ID(lock), mode); LOG_LWDEBUG("LWLockAcquireOrWait", T_NAME(lock), T_ID(lock), "awakened"); } else { /* We are done updating shared state of the lock itself. */ SpinLockRelease(&lock->mutex); } /* * Fix the process wait semaphore's count for any absorbed wakeups. */ while (extraWaits-- > 0) PGSemaphoreUnlock(&proc->sem); if (mustwait) { /* Failed to get lock, so release interrupt holdoff */ RESUME_INTERRUPTS(); LOG_LWDEBUG("LWLockAcquireOrWait", T_NAME(lock), T_ID(lock), "failed"); TRACE_POSTGRESQL_LWLOCK_ACQUIRE_OR_WAIT_FAIL(T_NAME(lock), T_ID(lock), mode); } else { /* Add lock to list of locks held by this backend */ held_lwlocks[num_held_lwlocks++] = lock; TRACE_POSTGRESQL_LWLOCK_ACQUIRE_OR_WAIT(T_NAME(lock), T_ID(lock), mode); } return !mustwait; }
/* * LWLockRelease - release a previously acquired lock */ void LWLockRelease(LWLock *lock) { PGPROC *head; PGPROC *proc; int i; PRINT_LWDEBUG("LWLockRelease", lock); /* * Remove lock from list of locks held. Usually, but not always, it will * be the latest-acquired lock; so search array backwards. */ for (i = num_held_lwlocks; --i >= 0;) { if (lock == held_lwlocks[i]) break; } if (i < 0) elog(ERROR, "lock %s %d is not held", T_NAME(lock), T_ID(lock)); num_held_lwlocks--; for (; i < num_held_lwlocks; i++) held_lwlocks[i] = held_lwlocks[i + 1]; /* Acquire mutex. Time spent holding mutex should be short! */ SpinLockAcquire(&lock->mutex); /* Release my hold on lock */ if (lock->exclusive > 0) lock->exclusive--; else { Assert(lock->shared > 0); lock->shared--; } /* * See if I need to awaken any waiters. If I released a non-last shared * hold, there cannot be anything to do. Also, do not awaken any waiters * if someone has already awakened waiters that haven't yet acquired the * lock. */ head = lock->head; if (head != NULL) { if (lock->exclusive == 0 && lock->shared == 0 && lock->releaseOK) { /* * Remove the to-be-awakened PGPROCs from the queue. */ bool releaseOK = true; proc = head; /* * First wake up any backends that want to be woken up without * acquiring the lock. */ while (proc->lwWaitMode == LW_WAIT_UNTIL_FREE && proc->lwWaitLink) proc = proc->lwWaitLink; /* * If the front waiter wants exclusive lock, awaken him only. * Otherwise awaken as many waiters as want shared access. */ if (proc->lwWaitMode != LW_EXCLUSIVE) { while (proc->lwWaitLink != NULL && proc->lwWaitLink->lwWaitMode != LW_EXCLUSIVE) { if (proc->lwWaitMode != LW_WAIT_UNTIL_FREE) releaseOK = false; proc = proc->lwWaitLink; } } /* proc is now the last PGPROC to be released */ lock->head = proc->lwWaitLink; proc->lwWaitLink = NULL; /* * Prevent additional wakeups until retryer gets to run. Backends * that are just waiting for the lock to become free don't retry * automatically. */ if (proc->lwWaitMode != LW_WAIT_UNTIL_FREE) releaseOK = false; lock->releaseOK = releaseOK; } else { /* lock is still held, can't awaken anything */ head = NULL; } } /* We are done updating shared state of the lock itself. */ SpinLockRelease(&lock->mutex); TRACE_POSTGRESQL_LWLOCK_RELEASE(T_NAME(lock), T_ID(lock)); /* * Awaken any waiters I removed from the queue. */ while (head != NULL) { LOG_LWDEBUG("LWLockRelease", T_NAME(lock), T_ID(lock), "release waiter"); proc = head; head = proc->lwWaitLink; proc->lwWaitLink = NULL; /* * Guarantee that lwWaiting being unset only becomes visible once the * unlink from the link has completed. Otherwise the target backend * could be woken up for other reason and enqueue for a new lock - if * that happens before the list unlink happens, the list would end up * being corrupted. * * The barrier pairs with the SpinLockAcquire() when enqueing for * another lock. */ pg_write_barrier(); proc->lwWaiting = false; PGSemaphoreUnlock(&proc->sem); } /* * Now okay to allow cancel/die interrupts. */ RESUME_INTERRUPTS(); }
int main(int argc, char **argv) { MyStorage *storage; int cpid; printf("Creating shared memory ... "); fflush(stdout); storage = (MyStorage *) PGSharedMemoryCreate(8192, false, 5433); storage->flag = 1234; printf("OK\n"); printf("Creating semaphores ... "); fflush(stdout); PGReserveSemaphores(2, 5433); PGSemaphoreCreate(&storage->sem); printf("OK\n"); /* sema initial value is 1, so lock should work */ printf("Testing Lock ... "); fflush(stdout); PGSemaphoreLock(&storage->sem, false); printf("OK\n"); /* now sema value is 0, so trylock should fail */ printf("Testing TryLock ... "); fflush(stdout); if (PGSemaphoreTryLock(&storage->sem)) printf("unexpected result!\n"); else printf("OK\n"); /* unlocking twice and then locking twice should work... */ printf("Testing Multiple Lock ... "); fflush(stdout); PGSemaphoreUnlock(&storage->sem); PGSemaphoreUnlock(&storage->sem); PGSemaphoreLock(&storage->sem, false); PGSemaphoreLock(&storage->sem, false); printf("OK\n"); /* check Reset too */ printf("Testing Reset ... "); fflush(stdout); PGSemaphoreUnlock(&storage->sem); PGSemaphoreReset(&storage->sem); if (PGSemaphoreTryLock(&storage->sem)) printf("unexpected result!\n"); else printf("OK\n"); /* Fork a child process and see if it can communicate */ printf("Forking child process ... "); fflush(stdout); cpid = fork(); if (cpid == 0) { /* In child */ on_exit_reset(); sleep(3); storage->flag++; PGSemaphoreUnlock(&storage->sem); proc_exit(0); } if (cpid < 0) { /* Fork failed */ printf("failed: %s\n", strerror(errno)); proc_exit(1); } printf("forked child pid %d OK\n", cpid); if (storage->flag != 1234) printf("Wrong value found in shared memory!\n"); printf("Waiting for child (should wait 3 sec here) ... "); fflush(stdout); PGSemaphoreLock(&storage->sem, false); printf("OK\n"); if (storage->flag != 1235) printf("Wrong value found in shared memory!\n"); /* Test shutdown */ printf("Running shmem_exit processing ... "); fflush(stdout); shmem_exit(0); printf("OK\n"); printf("Tests complete.\n"); proc_exit(0); return 0; /* not reached */ }
/* * LWLockAcquire - acquire a lightweight lock in the specified mode * * If the lock is not available, sleep until it is. * * Side effect: cancel/die interrupts are held off until lock release. */ void LWLockAcquire(LWLockId lockid, LWLockMode mode) { volatile LWLock *lock = &(LWLockArray[lockid].lock); #if LWLOCK_LOCK_PARTS > 1 volatile LWLockPart *part = LWLOCK_PART(lock, lockid, MyBackendId); #endif PGPROC *proc = MyProc; bool retry = false; int extraWaits = 0; PRINT_LWDEBUG("LWLockAcquire", lockid, lock); #ifdef LWLOCK_STATS /* Set up local count state first time through in a given process */ if (counts_for_pid != MyProcPid) { int *LWLockCounter = (int *) ((char *) LWLockArray - 2 * sizeof(int)); int numLocks = LWLockCounter[1]; sh_acquire_counts = calloc(numLocks, sizeof(int)); ex_acquire_counts = calloc(numLocks, sizeof(int)); block_counts = calloc(numLocks, sizeof(int)); counts_for_pid = MyProcPid; on_shmem_exit(print_lwlock_stats, 0); } /* Count lock acquisition attempts */ if (mode == LW_EXCLUSIVE) ex_acquire_counts[lockid]++; else sh_acquire_counts[lockid]++; #endif /* LWLOCK_STATS */ /* * We can't wait if we haven't got a PGPROC. This should only occur * during bootstrap or shared memory initialization. Put an Assert here * to catch unsafe coding practices. */ Assert(!(proc == NULL && IsUnderPostmaster)); /* Ensure we will have room to remember the lock */ if (num_held_lwlocks >= MAX_SIMUL_LWLOCKS) elog(ERROR, "too many LWLocks taken"); /* * Lock out cancel/die interrupts until we exit the code section protected * by the LWLock. This ensures that interrupts will not interfere with * manipulations of data structures in shared memory. */ HOLD_INTERRUPTS(); /* * Loop here to try to acquire lock after each time we are signaled by * LWLockRelease. * * NOTE: it might seem better to have LWLockRelease actually grant us the * lock, rather than retrying and possibly having to go back to sleep. But * in practice that is no good because it means a process swap for every * lock acquisition when two or more processes are contending for the same * lock. Since LWLocks are normally used to protect not-very-long * sections of computation, a process needs to be able to acquire and * release the same lock many times during a single CPU time slice, even * in the presence of contention. The efficiency of being able to do that * outweighs the inefficiency of sometimes wasting a process dispatch * cycle because the lock is not free when a released waiter finally gets * to run. See pgsql-hackers archives for 29-Dec-01. */ for (;;) { bool mustwait; if (mode == LW_SHARED) { #ifdef LWLOCK_PART_SHARED_OPS_ATOMIC /* Increment shared counter partition. If there's no contention, * this is sufficient to take the lock */ LWLOCK_PART_SHARED_POSTINC_ATOMIC(lock, lockid, part, MyBackendId); LWLOCK_PART_SHARED_FENCE(); /* A concurrent exclusive locking attempt does the following * three steps * 1) Acquire mutex * 2) Check shared counter partitions for readers. * 3a) If found add proc to wait queue, block, restart at (1) * 3b) If not found, set exclusive flag, continue with (4) * 4) Enter protected section * The fence after the atomic add above ensures that no further * such attempt can proceed to (3b) or beyond. There may be * pre-existing exclusive locking attempts at step (3b) or beyond, * but we can recognize those by either the mutex being taken, or * the exclusive flag being set. Conversely, if we see neither, we * may proceed and enter the protected section. * * FIXME: This doesn't work if slock_t is a struct or doesn't * use 0 for state "unlocked". */ if ((lock->mutex == 0) && (lock->exclusive == 0)) { /* If retrying, allow LWLockRelease to release waiters again. * Usually this happens after we acquired the mutex, but if * we skip that, we still need to set releaseOK. * * Acquiring the mutex here is not really an option - if many * reader are awoken simultaneously by an exclusive unlock, * that would be a source of considerable contention. * * Fotunately, this is safe even without the mutex. First, * there actually cannot be any non-fast path unlocking * attempt in progress, because we'd then either still see * the exclusive flag set or the mutex being taken. And * even if there was, and such an attempt cleared the flag * immediately after we set it, it'd also wake up some waiter * who'd then re-set the flag. * * The only reason to do this here, and not directly * after returning from PGSemaphoreLock(), is that it seems * benefical to make SpinLockAcquire() the first thing to * touch the lock if possible, in case we acquire the spin * lock at all. That way, the cache line doesn't go through * a possible shared state, but instead directly to exclusive. * On Opterons at least, there seems to be a difference, c.f. * the comment above tas() for x86_64 in s_lock.h */ if (retry && !lock->releaseOK) lock->releaseOK = true; goto lock_acquired; } /* At this point, we don't know if the concurrent exclusive locker * has proceeded to (3b) or blocked. We must take the mutex and * re-check */ #endif /* LWLOCK_PART_SHARED_OPS_ATOMIC */ /* Acquire mutex. Time spent holding mutex should be short! */ SpinLockAcquire(&lock->mutex); if (lock->exclusive == 0) { #ifdef LWLOCK_PART_SHARED_OPS_ATOMIC /* Already incremented the shared counter partition above */ #else lock->shared++; #endif mustwait = false; } else { #ifdef LWLOCK_PART_SHARED_OPS_ATOMIC /* Must undo shared counter partition increment. Note that * we *need* to do that while holding the mutex. Otherwise, * the exclusive lock could be released and attempted to be * re-acquired before we undo the increment. That attempt * would then block, even though there'd be no lock holder * left */ LWLOCK_PART_SHARED_POSTDEC_ATOMIC(lock, lockid, part, MyBackendId); #endif mustwait = true; } } else { /* Step (1). Acquire mutex. Time spent holding mutex should be * short! */ SpinLockAcquire(&lock->mutex); if (lock->exclusive == 0) { /* Step (2). Check for shared lockers. This surely happens * after (1), otherwise SpinLockAcquire() is broken. Lock * acquire semantics demand that no load must be re-ordered * from after a lock acquisition to before, for obvious * reasons. */ LWLOCK_IS_SHARED(mustwait, lock, lockid); if (!mustwait) { /* Step (3a). Set exclusive flag. This surely happens * after (2) because it depends on the result of (2), * no matter how much reordering is going on here. */ lock->exclusive++; } } else mustwait = true; } /* If retrying, allow LWLockRelease to release waiters again. * This is also separately done in the LW_SHARED early exit case * above, and in contrast to there we don't hold the mutex there. * See the comment there for why this is safe */ if (retry) lock->releaseOK = true; if (!mustwait) break; /* got the lock */ /* * Step (3b). Add myself to wait queue. * * If we don't have a PGPROC structure, there's no way to wait. This * should never occur, since MyProc should only be null during shared * memory initialization. */ if (proc == NULL) elog(PANIC, "cannot wait without a PGPROC structure"); proc->lwWaiting = true; proc->lwExclusive = (mode == LW_EXCLUSIVE); proc->lwWaitLink = NULL; if (lock->head == NULL) lock->head = proc; else lock->tail->lwWaitLink = proc; lock->tail = proc; /* Can release the mutex now */ SpinLockRelease(&lock->mutex); /* * Wait until awakened. * * Since we share the process wait semaphore with the regular lock * manager and ProcWaitForSignal, and we may need to acquire an LWLock * while one of those is pending, it is possible that we get awakened * for a reason other than being signaled by LWLockRelease. If so, * loop back and wait again. Once we've gotten the LWLock, * re-increment the sema by the number of additional signals received, * so that the lock manager or signal manager will see the received * signal when it next waits. */ LOG_LWDEBUG("LWLockAcquire", lockid, "waiting"); #ifdef LWLOCK_STATS block_counts[lockid]++; #endif TRACE_POSTGRESQL_LWLOCK_WAIT_START(lockid, mode); for (;;) { /* "false" means cannot accept cancel/die interrupt here. */ PGSemaphoreLock(&proc->sem, false); if (!proc->lwWaiting) break; extraWaits++; } TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(lockid, mode); LOG_LWDEBUG("LWLockAcquire", lockid, "awakened"); /* Now loop back and try to acquire lock again. */ retry = true; } /* We are done updating shared state of the lock itself. */ SpinLockRelease(&lock->mutex); /* Step 4. Enter protected section. This surely happens after (3), * this time because lock release semantics demand that no store * must be moved from before a lock release to after the release, * again for obvious reasons */ #ifdef LWLOCK_PART_SHARED_OPS_ATOMIC lock_acquired: #endif TRACE_POSTGRESQL_LWLOCK_ACQUIRE(lockid, mode); /* Add lock to list of locks held by this backend */ held_lwlocks[num_held_lwlocks] = lockid; held_lwlocks_mode[num_held_lwlocks] = mode; ++num_held_lwlocks; /* * Fix the process wait semaphore's count for any absorbed wakeups. */ while (extraWaits-- > 0) PGSemaphoreUnlock(&proc->sem); }
/* * CheckDeadLock * * We only get to this routine if the DEADLOCK_TIMEOUT fired * while waiting for a lock to be released by some other process. Look * to see if there's a deadlock; if not, just return and continue waiting. * (But signal ProcSleep to log a message, if log_lock_waits is true.) * If we have a real deadlock, remove ourselves from the lock's wait queue * and signal an error to ProcSleep. * * NB: this is run inside a signal handler, so be very wary about what is done * here or in called routines. */ void CheckDeadLock(void) { int i; /* * Acquire exclusive lock on the entire shared lock data structures. Must * grab LWLocks in partition-number order to avoid LWLock deadlock. * * Note that the deadlock check interrupt had better not be enabled * anywhere that this process itself holds lock partition locks, else this * will wait forever. Also note that LWLockAcquire creates a critical * section, so that this routine cannot be interrupted by cancel/die * interrupts. */ for (i = 0; i < NUM_LOCK_PARTITIONS; i++) LWLockAcquire(FirstLockMgrLock + i, LW_EXCLUSIVE); /* * Check to see if we've been awoken by anyone in the interim. * * If we have, we can return and resume our transaction -- happy day. * Before we are awoken the process releasing the lock grants it to us so * we know that we don't have to wait anymore. * * We check by looking to see if we've been unlinked from the wait queue. * This is quicker than checking our semaphore's state, since no kernel * call is needed, and it is safe because we hold the lock partition lock. */ if (MyProc->links.prev == NULL || MyProc->links.next == NULL) goto check_done; #ifdef LOCK_DEBUG if (Debug_deadlocks) DumpAllLocks(); #endif /* Run the deadlock check, and set deadlock_state for use by ProcSleep */ deadlock_state = DeadLockCheck(MyProc); if (deadlock_state == DS_HARD_DEADLOCK) { /* * Oops. We have a deadlock. * * Get this process out of wait state. (Note: we could do this more * efficiently by relying on lockAwaited, but use this coding to * preserve the flexibility to kill some other transaction than the * one detecting the deadlock.) * * RemoveFromWaitQueue sets MyProc->waitStatus to STATUS_ERROR, so * ProcSleep will report an error after we return from the signal * handler. */ Assert(MyProc->waitLock != NULL); RemoveFromWaitQueue(MyProc, LockTagHashCode(&(MyProc->waitLock->tag))); /* * Unlock my semaphore so that the interrupted ProcSleep() call can * finish. */ PGSemaphoreUnlock(&MyProc->sem); /* * We're done here. Transaction abort caused by the error that * ProcSleep will raise will cause any other locks we hold to be * released, thus allowing other processes to wake up; we don't need * to do that here. NOTE: an exception is that releasing locks we * hold doesn't consider the possibility of waiters that were blocked * behind us on the lock we just failed to get, and might now be * wakable because we're not in front of them anymore. However, * RemoveFromWaitQueue took care of waking up any such processes. */ } else if (log_lock_waits || deadlock_state == DS_BLOCKED_BY_AUTOVACUUM) { /* * Unlock my semaphore so that the interrupted ProcSleep() call can * print the log message (we daren't do it here because we are inside * a signal handler). It will then sleep again until someone releases * the lock. * * If blocked by autovacuum, this wakeup will enable ProcSleep to send * the canceling signal to the autovacuum worker. */ PGSemaphoreUnlock(&MyProc->sem); } /* * And release locks. We do this in reverse order for two reasons: (1) * Anyone else who needs more than one of the locks will be trying to lock * them in increasing order; we don't want to release the other process * until it can get all the locks it needs. (2) This avoids O(N^2) * behavior inside LWLockRelease. */ check_done: for (i = NUM_LOCK_PARTITIONS; --i >= 0;) LWLockRelease(FirstLockMgrLock + i); }
/* * LWLockRelease - release a previously acquired lock */ void LWLockRelease(LWLockId lockid) { volatile LWLock *lock = &(LWLockArray[lockid].lock); PGPROC *head; PGPROC *proc; int i; bool saveExclusive; PRINT_LWDEBUG("LWLockRelease", lockid, lock); /* * Remove lock from list of locks held. Usually, but not always, it will * be the latest-acquired lock; so search array backwards. */ for (i = num_held_lwlocks; --i >= 0;) { if (lockid == held_lwlocks[i]) break; } if (i < 0) elog(ERROR, "lock %d is not held", (int) lockid); saveExclusive = held_lwlocks_exclusive[i]; if (InterruptHoldoffCount <= 0) elog(PANIC, "upon entering lock release, the interrupt holdoff count is bad (%d) for release of lock %d (%s)", InterruptHoldoffCount, (int)lockid, (saveExclusive ? "Exclusive" : "Shared")); #ifdef LWLOCK_TRACE_MIRROREDLOCK if (lockid == MirroredLock) elog(LOG, "LWLockRelease: release for MirroredLock by PID %u in held_lwlocks[%d] %s", MyProcPid, i, (held_lwlocks_exclusive[i] ? "Exclusive" : "Shared")); #endif num_held_lwlocks--; for (; i < num_held_lwlocks; i++) { held_lwlocks_exclusive[i] = held_lwlocks_exclusive[i + 1]; held_lwlocks[i] = held_lwlocks[i + 1]; #ifdef USE_TEST_UTILS_X86 /* shift stack traces */ held_lwlocks_depth[i] = held_lwlocks_depth[i + 1]; memcpy ( held_lwlocks_addresses[i], held_lwlocks_addresses[i + 1], held_lwlocks_depth[i] * sizeof(*held_lwlocks_depth) ) ; #endif /* USE_TEST_UTILS_X86 */ } // Clear out old last entry. held_lwlocks_exclusive[num_held_lwlocks] = false; held_lwlocks[num_held_lwlocks] = 0; #ifdef USE_TEST_UTILS_X86 held_lwlocks_depth[num_held_lwlocks] = 0; #endif /* USE_TEST_UTILS_X86 */ /* Acquire mutex. Time spent holding mutex should be short! */ SpinLockAcquire(&lock->mutex); /* Release my hold on lock */ if (lock->exclusive > 0) { lock->exclusive--; lock->exclusivePid = 0; } else { Assert(lock->shared > 0); lock->shared--; } /* * See if I need to awaken any waiters. If I released a non-last shared * hold, there cannot be anything to do. Also, do not awaken any waiters * if someone has already awakened waiters that haven't yet acquired the * lock. */ head = lock->head; if (head != NULL) { if (lock->exclusive == 0 && lock->shared == 0 && lock->releaseOK) { /* * Remove the to-be-awakened PGPROCs from the queue. If the front * waiter wants exclusive lock, awaken him only. Otherwise awaken * as many waiters as want shared access. */ proc = head; if (!proc->lwExclusive) { while (proc->lwWaitLink != NULL && !proc->lwWaitLink->lwExclusive) { proc = proc->lwWaitLink; if (proc->pid != 0) { lock->releaseOK = false; } } } /* proc is now the last PGPROC to be released */ lock->head = proc->lwWaitLink; proc->lwWaitLink = NULL; /* proc->pid can be 0 if process exited while waiting for lock */ if (proc->pid != 0) { /* prevent additional wakeups until retryer gets to run */ lock->releaseOK = false; } } else { /* lock is still held, can't awaken anything */ head = NULL; } } /* We are done updating shared state of the lock itself. */ SpinLockRelease(&lock->mutex); PG_TRACE1(lwlock__release, lockid); /* * Awaken any waiters I removed from the queue. */ while (head != NULL) { #ifdef LWLOCK_TRACE_MIRROREDLOCK if (lockid == MirroredLock) elog(LOG, "LWLockRelease: release waiter for MirroredLock (this PID %u", MyProcPid); #endif LOG_LWDEBUG("LWLockRelease", lockid, "release waiter"); proc = head; head = proc->lwWaitLink; proc->lwWaitLink = NULL; pg_write_barrier(); proc->lwWaiting = false; PGSemaphoreUnlock(&proc->sem); } /* * Now okay to allow cancel/die interrupts. */ if (InterruptHoldoffCount <= 0) elog(PANIC, "upon exiting lock release, the interrupt holdoff count is bad (%d) for release of lock %d (%s)", InterruptHoldoffCount, (int)lockid, (saveExclusive ? "Exclusive" : "Shared")); RESUME_INTERRUPTS(); }
/* * LWLockAcquire - acquire a lightweight lock in the specified mode * * If the lock is not available, sleep until it is. * * Side effect: cancel/die interrupts are held off until lock release. */ void LWLockAcquire(LWLockId lockid, LWLockMode mode) { volatile LWLock *lock = LWLockArray + lockid; PGPROC *proc = MyProc; bool retry = false; int extraWaits = 0; PRINT_LWDEBUG("LWLockAcquire", lockid, lock); /* * We can't wait if we haven't got a PGPROC. This should only occur * during bootstrap or shared memory initialization. Put an Assert * here to catch unsafe coding practices. */ Assert(!(proc == NULL && IsUnderPostmaster)); /* * Lock out cancel/die interrupts until we exit the code section * protected by the LWLock. This ensures that interrupts will not * interfere with manipulations of data structures in shared memory. */ HOLD_INTERRUPTS(); /* * Loop here to try to acquire lock after each time we are signaled by * LWLockRelease. * * NOTE: it might seem better to have LWLockRelease actually grant us the * lock, rather than retrying and possibly having to go back to sleep. * But in practice that is no good because it means a process swap for * every lock acquisition when two or more processes are contending * for the same lock. Since LWLocks are normally used to protect * not-very-long sections of computation, a process needs to be able * to acquire and release the same lock many times during a single CPU * time slice, even in the presence of contention. The efficiency of * being able to do that outweighs the inefficiency of sometimes * wasting a process dispatch cycle because the lock is not free when * a released waiter finally gets to run. See pgsql-hackers archives * for 29-Dec-01. */ for (;;) { bool mustwait; /* Acquire mutex. Time spent holding mutex should be short! */ SpinLockAcquire_NoHoldoff(&lock->mutex); /* If retrying, allow LWLockRelease to release waiters again */ if (retry) lock->releaseOK = true; /* If I can get the lock, do so quickly. */ if (mode == LW_EXCLUSIVE) { if (lock->exclusive == 0 && lock->shared == 0) { lock->exclusive++; mustwait = false; } else mustwait = true; } else { if (lock->exclusive == 0) { lock->shared++; mustwait = false; } else mustwait = true; } if (!mustwait) break; /* got the lock */ /* * Add myself to wait queue. * * If we don't have a PGPROC structure, there's no way to wait. This * should never occur, since MyProc should only be null during * shared memory initialization. */ if (proc == NULL) elog(FATAL, "cannot wait without a PGPROC structure"); proc->lwWaiting = true; proc->lwExclusive = (mode == LW_EXCLUSIVE); proc->lwWaitLink = NULL; if (lock->head == NULL) lock->head = proc; else lock->tail->lwWaitLink = proc; lock->tail = proc; /* Can release the mutex now */ SpinLockRelease_NoHoldoff(&lock->mutex); /* * Wait until awakened. * * Since we share the process wait semaphore with the regular lock * manager and ProcWaitForSignal, and we may need to acquire an * LWLock while one of those is pending, it is possible that we * get awakened for a reason other than being signaled by * LWLockRelease. If so, loop back and wait again. Once we've * gotten the LWLock, re-increment the sema by the number of * additional signals received, so that the lock manager or signal * manager will see the received signal when it next waits. */ LOG_LWDEBUG("LWLockAcquire", lockid, "waiting"); for (;;) { /* "false" means cannot accept cancel/die interrupt here. */ PGSemaphoreLock(&proc->sem, false); if (!proc->lwWaiting) break; extraWaits++; } LOG_LWDEBUG("LWLockAcquire", lockid, "awakened"); /* Now loop back and try to acquire lock again. */ retry = true; } /* We are done updating shared state of the lock itself. */ SpinLockRelease_NoHoldoff(&lock->mutex); /* Add lock to list of locks held by this backend */ Assert(num_held_lwlocks < MAX_SIMUL_LWLOCKS); held_lwlocks[num_held_lwlocks++] = lockid; /* * Fix the process wait semaphore's count for any absorbed wakeups. */ while (extraWaits-- > 0) PGSemaphoreUnlock(&proc->sem); }
/* * CheckDeadLock * * We only get to this routine if we got SIGALRM after DeadlockTimeout * while waiting for a lock to be released by some other process. Look * to see if there's a deadlock; if not, just return and continue waiting. * (But signal ProcSleep to log a message, if log_lock_waits is true.) * If we have a real deadlock, remove ourselves from the lock's wait queue * and signal an error to ProcSleep. * * NB: this is run inside a signal handler, so be very wary about what is done * here or in called routines. */ static void CheckDeadLock(void) { int i; /* * Acquire exclusive lock on the entire shared lock data structures. Must * grab LWLocks in partition-number order to avoid LWLock deadlock. * * Note that the deadlock check interrupt had better not be enabled * anywhere that this process itself holds lock partition locks, else this * will wait forever. Also note that LWLockAcquire creates a critical * section, so that this routine cannot be interrupted by cancel/die * interrupts. */ for (i = 0; i < NUM_LOCK_PARTITIONS; i++) LWLockAcquire(FirstLockMgrLock + i, LW_EXCLUSIVE); /* * Check to see if we've been awoken by anyone in the interim. * * If we have, we can return and resume our transaction -- happy day. * Before we are awoken the process releasing the lock grants it to us * so we know that we don't have to wait anymore. * * We check by looking to see if we've been unlinked from the wait queue. * This is quicker than checking our semaphore's state, since no kernel * call is needed, and it is safe because we hold the lock partition lock. */ if (MyProc->links.prev == INVALID_OFFSET || MyProc->links.next == INVALID_OFFSET) goto check_done; #ifdef LOCK_DEBUG if (Debug_deadlocks) DumpAllLocks(); #endif if (!DeadLockCheck(MyProc)) { /* No deadlock, so keep waiting */ goto check_done; } /* * Unlock my semaphore so that the interrupted ProcSleep() call can * finish. */ PGSemaphoreUnlock(&MyProc->sem); /* * We're done here. Transaction abort caused by the error that ProcSleep * will raise will cause any other locks we hold to be released, thus * allowing other processes to wake up; we don't need to do that here. * NOTE: an exception is that releasing locks we hold doesn't consider the * possibility of waiters that were blocked behind us on the lock we just * failed to get, and might now be wakable because we're not in front of * them anymore. However, RemoveFromWaitQueue took care of waking up any * such processes. */ /* * Release locks acquired at head of routine. Order is not critical, so * do it back-to-front to avoid waking another CheckDeadLock instance * before it can get all the locks. */ check_done: for (i = NUM_LOCK_PARTITIONS; --i >= 0;) LWLockRelease(FirstLockMgrLock + i); }
/* * LWLockRelease - release a previously acquired lock */ void LWLockRelease(LWLockId lockid) { volatile LWLock *lock = &(LWLockArray[lockid].lock); #if LWLOCK_LOCK_PARTS > 1 volatile LWLockPart *part = LWLOCK_PART(lock, lockid, MyBackendId); #endif PGPROC *head = NULL; PGPROC *proc; LWLockMode mode; int i; PRINT_LWDEBUG("LWLockRelease", lockid, lock); /* * Remove lock from list of locks held. Usually, but not always, it will * be the latest-acquired lock; so search array backwards. */ for (i = num_held_lwlocks; --i >= 0;) { if (lockid == held_lwlocks[i]) break; } if (i < 0) elog(ERROR, "lock %d is not held", (int) lockid); mode = held_lwlocks_mode[i]; num_held_lwlocks--; for (; i < num_held_lwlocks; i++) { held_lwlocks[i] = held_lwlocks[i + 1]; held_lwlocks_mode[i] = held_lwlocks_mode[i + 1]; } if (mode == LW_SHARED) { #ifdef LWLOCK_PART_SHARED_OPS_ATOMIC int shared_pre; /* Release my hold on lock */ Assert(lock->exclusive == 0); shared_pre = LWLOCK_PART_SHARED_POSTDEC_ATOMIC(lock, lockid, part, MyBackendId); Assert(shared_pre > 0); /* If the count didn't drop to zero (i.e., there are more lockers * using the same shared counter partition), we can leave waiting * up blocked exclusive locking attempts to them. Note that there * may also be shared lockers using a *different* partition, so * we're not necessarily the last share lockers, even if we continue. * Still, it's an easy optimization, so we got for it */ if (shared_pre > 1) goto lock_released; LWLOCK_PART_SHARED_FENCE(); /* A concurrent exclusive locking attempt does the following * three steps * 1) Acquire mutex * 2) Check shared counter partitions for readers. * 3a) If found add proc to wait queue, block, restart at (1) * 3b) If not found, set exclusive flag, continue with (4) * 4) Enter protected section * Assume now that we're that last share lock holder. Then, the * fence after the atomic add above ensures that no further such * concurrent exclusive locking attempts will proceed to (3a) and * thus block. There may be such attempts currently blocking or * about to block, but we can recognize those by either wait queue * being non-empty or the mutex being taken. Conversely, if we see * neither, we may assume that nobody needs to be signalled. * * Note that if two shared lockers release their lock while * an exclusive locking attempt is in progress, both may decide * they need to signal here. Taking the mutex below will sort that * out, but it's a bit unfortunate that they have to race for the * mutex here. Also, taking the mutex will force *other* shared * lockers to take the mutex also in their release path. * XXX: We may be able to improve that if we could dinstinguish * been mutexed held for the purpose of unlocking and mutexes * held for the purpose of locking. * * FIXME: This doesn't work if slock_t is a struct. */ if ((lock->mutex == 0) && (lock->head == NULL)) goto lock_released; /* At this point, we don't know if the concurrent exclusive locker * has seen on-zero in our shared counter partition in his step * (2) or not. We must thus take the mutex and re-check. */ #endif /* LWLOCK_PART_SHARED_OPS_ATOMIC */ /* Acquire mutex. Time spent holding mutex should be short! */ SpinLockAcquire(&lock->mutex); #ifdef LWLOCK_PART_SHARED_OPS_ATOMIC /* Already decremented the shared counter partition above */ #else /* Release my hold on lock */ lock->shared--; #endif } else { /* Acquire mutex. Time spent holding mutex should be short! */ SpinLockAcquire(&lock->mutex); /* Release my hold on lock */ lock->exclusive--; Assert(lock->exclusive == 0); } /* * See if I need to awaken any waiters. If I released a non-last shared * hold, there cannot be anything to do. Also, do not awaken any waiters * if someone has already awakened waiters that haven't yet acquired the * lock. */ head = lock->head; if (head != NULL) { bool is_shared; if (mode == LW_SHARED) LWLOCK_IS_SHARED(is_shared, lock, lockid); else is_shared = false; if (lock->exclusive == 0 && !is_shared && lock->releaseOK) { /* * Remove the to-be-awakened PGPROCs from the queue. If the front * waiter wants exclusive lock, awaken him only. Otherwise awaken * as many waiters as want shared access. */ proc = head; if (!proc->lwExclusive) { while (proc->lwWaitLink != NULL && !proc->lwWaitLink->lwExclusive) proc = proc->lwWaitLink; } /* proc is now the last PGPROC to be released */ lock->head = proc->lwWaitLink; proc->lwWaitLink = NULL; /* prevent additional wakeups until retryer gets to run */ lock->releaseOK = false; } else { /* lock is still held, can't awaken anything */ head = NULL; } } /* We are done updating shared state of the lock itself. */ SpinLockRelease(&lock->mutex); #ifdef LWLOCK_PART_SHARED_OPS_ATOMIC lock_released: #endif TRACE_POSTGRESQL_LWLOCK_RELEASE(lockid); /* * Awaken any waiters I removed from the queue. */ while (head != NULL) { LOG_LWDEBUG("LWLockRelease", lockid, "release waiter"); proc = head; head = proc->lwWaitLink; proc->lwWaitLink = NULL; proc->lwWaiting = false; PGSemaphoreUnlock(&proc->sem); } /* * Now okay to allow cancel/die interrupts. */ RESUME_INTERRUPTS(); }
void s_unlock_sema(volatile slock_t *lock) { PGSemaphoreUnlock((PGSemaphore) lock); }
/* * Create a semaphore set with the given number of useful semaphores * (an additional sema is actually allocated to serve as identifier). * Dead Postgres sema sets are recycled if found, but we do not fail * upon collision with non-Postgres sema sets. * * The idea here is to detect and re-use keys that may have been assigned * by a crashed postmaster or backend. */ static IpcSemaphoreId IpcSemaphoreCreate(int numSems) { IpcSemaphoreId semId; union semun semun; PGSemaphoreData mysema; /* Loop till we find a free IPC key */ for (nextSemaKey++;; nextSemaKey++) { pid_t creatorPID; /* Try to create new semaphore set */ semId = InternalIpcSemaphoreCreate(nextSemaKey, numSems + 1); if (semId >= 0) break; /* successful create */ /* See if it looks to be leftover from a dead Postgres process */ semId = semget(nextSemaKey, numSems + 1, 0); if (semId < 0) continue; /* failed: must be some other app's */ if (IpcSemaphoreGetValue(semId, numSems) != PGSemaMagic) continue; /* sema belongs to a non-Postgres app */ /* * If the creator PID is my own PID or does not belong to any extant * process, it's safe to zap it. */ creatorPID = IpcSemaphoreGetLastPID(semId, numSems); if (creatorPID <= 0) continue; /* oops, GETPID failed */ if (creatorPID != getpid()) { if (kill(creatorPID, 0) == 0 || errno != ESRCH) continue; /* sema belongs to a live process */ } /* * The sema set appears to be from a dead Postgres process, or from a * previous cycle of life in this same process. Zap it, if possible. * This probably shouldn't fail, but if it does, assume the sema set * belongs to someone else after all, and continue quietly. */ semun.val = 0; /* unused, but keep compiler quiet */ if (semctl(semId, 0, IPC_RMID, semun) < 0) continue; /* * Now try again to create the sema set. */ semId = InternalIpcSemaphoreCreate(nextSemaKey, numSems + 1); if (semId >= 0) break; /* successful create */ /* * Can only get here if some other process managed to create the same * sema key before we did. Let him have that one, loop around to try * next key. */ } /* * OK, we created a new sema set. Mark it as created by this process. We * do this by setting the spare semaphore to PGSemaMagic-1 and then * incrementing it with semop(). That leaves it with value PGSemaMagic * and sempid referencing this process. */ IpcSemaphoreInitialize(semId, numSems, PGSemaMagic - 1); mysema.semId = semId; mysema.semNum = numSems; PGSemaphoreUnlock(&mysema); return semId; }
void s_unlock_sema(volatile slock_t *lock) { PGSemaphoreUnlock(&SpinlockSemaArray[*lock]); }