/* * ClockSweepTick - Helper routine for StrategyGetBuffer() * * Move the clock hand one buffer ahead of its current position and return the * id of the buffer now under the hand. */ static inline uint32 ClockSweepTick(void) { uint32 victim; /* * Atomically move hand ahead one buffer - if there's several processes * doing this, this can lead to buffers being returned slightly out of * apparent order. */ victim = pg_atomic_fetch_add_u32(&StrategyControl->nextVictimBuffer, 1); if (victim >= NBuffers) { uint32 originalVictim = victim; /* always wrap what we look up in BufferDescriptors */ victim = victim % NBuffers; /* * If we're the one that just caused a wraparound, force * completePasses to be incremented while holding the spinlock. We * need the spinlock so StrategySyncStart() can return a consistent * value consisting of nextVictimBuffer and completePasses. */ if (victim == 0) { uint32 expected; uint32 wrapped; bool success = false; expected = originalVictim + 1; while (!success) { /* * Acquire the spinlock while increasing completePasses. That * allows other readers to read nextVictimBuffer and * completePasses in a consistent manner which is required for * StrategySyncStart(). In theory delaying the increment * could lead to an overflow of nextVictimBuffers, but that's * highly unlikely and wouldn't be particularly harmful. */ SpinLockAcquire(&StrategyControl->buffer_strategy_lock); wrapped = expected % NBuffers; success = pg_atomic_compare_exchange_u32(&StrategyControl->nextVictimBuffer, &expected, wrapped); if (success) StrategyControl->completePasses++; SpinLockRelease(&StrategyControl->buffer_strategy_lock); } } } return victim; }
void cfs_lock_file(FileMap* map, char const* file_path) { long delay = CFS_LOCK_MIN_TIMEOUT; while (true) { uint64 count = pg_atomic_fetch_add_u32(&map->lock, 1); if (count < CFS_GC_LOCK) { break; } if (InRecovery) { /* Uhhh... looks like last GC was interrupted. * Try to recover file */ char* map_bck_path = psprintf("%s.map.bck", file_path); char* file_bck_path = psprintf("%s.bck", file_path); if (access(file_bck_path, R_OK) != 0) { /* There is no backup file: new map should be constructed */ int md2 = open(map_bck_path, O_RDWR|PG_BINARY, 0); if (md2 >= 0) { /* Recover map */ if (!cfs_read_file(md2, map, sizeof(FileMap))) { elog(LOG, "Failed to read file %s: %m", map_bck_path); } close(md2); } } else { /* Presence of backup file means that we still have unchanged data and map files. * Just remove backup files, grab lock and continue processing */ unlink(file_bck_path); unlink(map_bck_path); } pfree(file_bck_path); pfree(map_bck_path); break; } pg_atomic_fetch_sub_u32(&map->lock, 1); pg_usleep(delay); if (delay < CFS_LOCK_MAX_TIMEOUT) { delay *= 2; } } if (IsUnderPostmaster && cfs_gc_workers != 0 && pg_atomic_test_set_flag(&cfs_state->gc_started)) { cfs_start_background_gc(); } }
static void test_atomic_uint32(void) { pg_atomic_uint32 var; uint32 expected; int i; pg_atomic_init_u32(&var, 0); if (pg_atomic_read_u32(&var) != 0) elog(ERROR, "atomic_read_u32() #1 wrong"); pg_atomic_write_u32(&var, 3); if (pg_atomic_read_u32(&var) != 3) elog(ERROR, "atomic_read_u32() #2 wrong"); if (pg_atomic_fetch_add_u32(&var, 1) != 3) elog(ERROR, "atomic_fetch_add_u32() #1 wrong"); if (pg_atomic_fetch_sub_u32(&var, 1) != 4) elog(ERROR, "atomic_fetch_sub_u32() #1 wrong"); if (pg_atomic_sub_fetch_u32(&var, 3) != 0) elog(ERROR, "atomic_sub_fetch_u32() #1 wrong"); if (pg_atomic_add_fetch_u32(&var, 10) != 10) elog(ERROR, "atomic_add_fetch_u32() #1 wrong"); if (pg_atomic_exchange_u32(&var, 5) != 10) elog(ERROR, "pg_atomic_exchange_u32() #1 wrong"); if (pg_atomic_exchange_u32(&var, 0) != 5) elog(ERROR, "pg_atomic_exchange_u32() #0 wrong"); /* test around numerical limits */ if (pg_atomic_fetch_add_u32(&var, INT_MAX) != 0) elog(ERROR, "pg_atomic_fetch_add_u32() #2 wrong"); if (pg_atomic_fetch_add_u32(&var, INT_MAX) != INT_MAX) elog(ERROR, "pg_atomic_add_fetch_u32() #3 wrong"); pg_atomic_fetch_add_u32(&var, 1); /* top up to UINT_MAX */ if (pg_atomic_read_u32(&var) != UINT_MAX) elog(ERROR, "atomic_read_u32() #2 wrong"); if (pg_atomic_fetch_sub_u32(&var, INT_MAX) != UINT_MAX) elog(ERROR, "pg_atomic_fetch_sub_u32() #2 wrong"); if (pg_atomic_read_u32(&var) != (uint32) INT_MAX + 1) elog(ERROR, "atomic_read_u32() #3 wrong: %u", pg_atomic_read_u32(&var)); expected = pg_atomic_sub_fetch_u32(&var, INT_MAX); if (expected != 1) elog(ERROR, "pg_atomic_sub_fetch_u32() #3 wrong: %u", expected); pg_atomic_sub_fetch_u32(&var, 1); /* fail exchange because of old expected */ expected = 10; if (pg_atomic_compare_exchange_u32(&var, &expected, 1)) elog(ERROR, "atomic_compare_exchange_u32() changed value spuriously"); /* CAS is allowed to fail due to interrupts, try a couple of times */ for (i = 0; i < 1000; i++) { expected = 0; if (!pg_atomic_compare_exchange_u32(&var, &expected, 1)) break; } if (i == 1000) elog(ERROR, "atomic_compare_exchange_u32() never succeeded"); if (pg_atomic_read_u32(&var) != 1) elog(ERROR, "atomic_compare_exchange_u32() didn't set value properly"); pg_atomic_write_u32(&var, 0); /* try setting flagbits */ if (pg_atomic_fetch_or_u32(&var, 1) & 1) elog(ERROR, "pg_atomic_fetch_or_u32() #1 wrong"); if (!(pg_atomic_fetch_or_u32(&var, 2) & 1)) elog(ERROR, "pg_atomic_fetch_or_u32() #2 wrong"); if (pg_atomic_read_u32(&var) != 3) elog(ERROR, "invalid result after pg_atomic_fetch_or_u32()"); /* try clearing flagbits */ if ((pg_atomic_fetch_and_u32(&var, ~2) & 3) != 3) elog(ERROR, "pg_atomic_fetch_and_u32() #1 wrong"); if (pg_atomic_fetch_and_u32(&var, ~1) != 1) elog(ERROR, "pg_atomic_fetch_and_u32() #2 wrong: is %u", pg_atomic_read_u32(&var)); /* no bits set anymore */ if (pg_atomic_fetch_and_u32(&var, ~0) != 0) elog(ERROR, "pg_atomic_fetch_and_u32() #3 wrong"); }
/* * Get position for storing updated page */ uint32 cfs_alloc_page(FileMap* map, uint32 oldSize, uint32 newSize) { pg_atomic_fetch_add_u32(&map->usedSize, newSize - oldSize); return pg_atomic_fetch_add_u32(&map->physSize, newSize); }
/* * StrategyGetBuffer * * Called by the bufmgr to get the next candidate buffer to use in * BufferAlloc(). The only hard requirement BufferAlloc() has is that * the selected buffer must not currently be pinned by anyone. * * strategy is a BufferAccessStrategy object, or NULL for default strategy. * * To ensure that no one else can pin the buffer before we do, we must * return the buffer with the buffer header spinlock still held. */ volatile BufferDesc * StrategyGetBuffer(BufferAccessStrategy strategy) { volatile BufferDesc *buf; int bgwprocno; int trycounter; /* * If given a strategy object, see whether it can select a buffer. We * assume strategy objects don't need buffer_strategy_lock. */ if (strategy != NULL) { buf = GetBufferFromRing(strategy); if (buf != NULL) return buf; } /* * If asked, we need to waken the bgwriter. Since we don't want to rely on * a spinlock for this we force a read from shared memory once, and then * set the latch based on that value. We need to go through that length * because otherwise bgprocno might be reset while/after we check because * the compiler might just reread from memory. * * This can possibly set the latch of the wrong process if the bgwriter * dies in the wrong moment. But since PGPROC->procLatch is never * deallocated the worst consequence of that is that we set the latch of * some arbitrary process. */ bgwprocno = INT_ACCESS_ONCE(StrategyControl->bgwprocno); if (bgwprocno != -1) { /* reset bgwprocno first, before setting the latch */ StrategyControl->bgwprocno = -1; /* * Not acquiring ProcArrayLock here which is slightly icky. It's * actually fine because procLatch isn't ever freed, so we just can * potentially set the wrong process' (or no process') latch. */ SetLatch(&ProcGlobal->allProcs[bgwprocno].procLatch); } /* * We count buffer allocation requests so that the bgwriter can estimate * the rate of buffer consumption. Note that buffers recycled by a * strategy object are intentionally not counted here. */ pg_atomic_fetch_add_u32(&StrategyControl->numBufferAllocs, 1); /* * First check, without acquiring the lock, whether there's buffers in the * freelist. Since we otherwise don't require the spinlock in every * StrategyGetBuffer() invocation, it'd be sad to acquire it here - * uselessly in most cases. That obviously leaves a race where a buffer is * put on the freelist but we don't see the store yet - but that's pretty * harmless, it'll just get used during the next buffer acquisition. * * If there's buffers on the freelist, acquire the spinlock to pop one * buffer of the freelist. Then check whether that buffer is usable and * repeat if not. * * Note that the freeNext fields are considered to be protected by the * buffer_strategy_lock not the individual buffer spinlocks, so it's OK to * manipulate them without holding the spinlock. */ if (StrategyControl->firstFreeBuffer >= 0) { while (true) { /* Acquire the spinlock to remove element from the freelist */ SpinLockAcquire(&StrategyControl->buffer_strategy_lock); if (StrategyControl->firstFreeBuffer < 0) { SpinLockRelease(&StrategyControl->buffer_strategy_lock); break; } buf = GetBufferDescriptor(StrategyControl->firstFreeBuffer); Assert(buf->freeNext != FREENEXT_NOT_IN_LIST); /* Unconditionally remove buffer from freelist */ StrategyControl->firstFreeBuffer = buf->freeNext; buf->freeNext = FREENEXT_NOT_IN_LIST; /* * Release the lock so someone else can access the freelist while * we check out this buffer. */ SpinLockRelease(&StrategyControl->buffer_strategy_lock); /* * If the buffer is pinned or has a nonzero usage_count, we cannot * use it; discard it and retry. (This can only happen if VACUUM * put a valid buffer in the freelist and then someone else used * it before we got to it. It's probably impossible altogether as * of 8.3, but we'd better check anyway.) */ LockBufHdr(buf); if (buf->refcount == 0 && buf->usage_count == 0) { if (strategy != NULL) AddBufferToRing(strategy, buf); return buf; } UnlockBufHdr(buf); } } /* Nothing on the freelist, so run the "clock sweep" algorithm */ trycounter = NBuffers; for (;;) { buf = GetBufferDescriptor(ClockSweepTick()); /* * If the buffer is pinned or has a nonzero usage_count, we cannot use * it; decrement the usage_count (unless pinned) and keep scanning. */ LockBufHdr(buf); if (buf->refcount == 0) { if (buf->usage_count > 0) { buf->usage_count--; trycounter = NBuffers; } else { /* Found a usable buffer */ if (strategy != NULL) AddBufferToRing(strategy, buf); return buf; } } else if (--trycounter == 0) { /* * We've scanned all the buffers without making any state changes, * so all the buffers are pinned (or were when we looked at them). * We could hope that someone will free one eventually, but it's * probably better to fail than to risk getting stuck in an * infinite loop. */ UnlockBufHdr(buf); elog(ERROR, "no unpinned buffers available"); } UnlockBufHdr(buf); } }
void MMReceiverStarted() { if (pg_atomic_fetch_add_u32(&dtm->nReceivers, 1) == dtm->nNodes-2) { dtm->initialized = true; } }