/* * StrategyGetBuffer * * Called by the bufmgr to get the next candidate buffer to use in * BufferAlloc(). The only hard requirement BufferAlloc() has is that * the selected buffer must not currently be pinned by anyone. * * To ensure that no one else can pin the buffer before we do, we must * return the buffer with the buffer header spinlock still held. That * means that we return with the BufFreelistLock still held, as well; * the caller must release that lock once the spinlock is dropped. */ volatile BufferDesc * StrategyGetBuffer(void) { volatile BufferDesc *buf; int trycounter; LWLockAcquire(BufFreelistLock, LW_EXCLUSIVE); /* * Try to get a buffer from the freelist. Note that the freeNext fields * are considered to be protected by the BufFreelistLock not the * individual buffer spinlocks, so it's OK to manipulate them without * holding the spinlock. */ while (StrategyControl->firstFreeBuffer >= 0) { buf = &BufferDescriptors[StrategyControl->firstFreeBuffer]; Assert(buf->freeNext != FREENEXT_NOT_IN_LIST); /* Unconditionally remove buffer from freelist */ StrategyControl->firstFreeBuffer = buf->freeNext; buf->freeNext = FREENEXT_NOT_IN_LIST; buf->freePre = FREEPRE_NOT_IN_LIST; /* * If the buffer is pinned or has a nonzero usage_count, we cannot use * it; discard it and retry. (This can only happen if VACUUM put a * valid buffer in the freelist and then someone else used it before * we got to it.) */ LockBufHdr(buf); if (buf->refcount == 0 && buf->usage_count == 0) { elog(LOG, "Get buf %d\n", buf->buf_id); return buf; } UnlockBufHdr(buf); } /* Nothing on the freelist, so run the "clock sweep" algorithm */ /* this part is removed to remove "clock sweep" if nothing found in * freelist, nothing available. */ /* not reached */ return NULL; }
/* * GetBufferFromRing -- returns a buffer from the ring, or NULL if the * ring is empty. * * The bufhdr spin lock is held on the returned buffer. */ static volatile BufferDesc * GetBufferFromRing(BufferAccessStrategy strategy) { volatile BufferDesc *buf; Buffer bufnum; /* Advance to next ring slot */ if (++strategy->current >= strategy->ring_size) strategy->current = 0; /* * If the slot hasn't been filled yet, tell the caller to allocate a new * buffer with the normal allocation strategy. He will then fill this * slot by calling AddBufferToRing with the new buffer. */ bufnum = strategy->buffers[strategy->current]; if (bufnum == InvalidBuffer) { strategy->current_was_in_ring = false; return NULL; } /* * If the buffer is pinned we cannot use it under any circumstances. * * If usage_count is 0 or 1 then the buffer is fair game (we expect 1, * since our own previous usage of the ring element would have left it * there, but it might've been decremented by clock sweep since then). A * higher usage_count indicates someone else has touched the buffer, so we * shouldn't re-use it. */ buf = &BufferDescriptors[bufnum - 1]; LockBufHdr(buf); if (buf->refcount == 0 && buf->usage_count <= 1) { strategy->current_was_in_ring = true; return buf; } UnlockBufHdr(buf); /* * Tell caller to allocate a new buffer with the normal allocation * strategy. He'll then replace this ring element via AddBufferToRing. */ strategy->current_was_in_ring = false; return NULL; }
/* * StrategyGetBuffer * * Called by the bufmgr to get the next candidate buffer to use in * BufferAlloc(). The only hard requirement BufferAlloc() has is that * the selected buffer must not currently be pinned by anyone. * * strategy is a BufferAccessStrategy object, or NULL for default strategy. * * To ensure that no one else can pin the buffer before we do, we must * return the buffer with the buffer header spinlock still held. If * *lock_held is set on exit, we have returned with the BufFreelistLock * still held, as well; the caller must release that lock once the spinlock * is dropped. We do it that way because releasing the BufFreelistLock * might awaken other processes, and it would be bad to do the associated * kernel calls while holding the buffer header spinlock. */ volatile BufferDesc * StrategyGetBuffer(BufferAccessStrategy strategy, bool *lock_held) { volatile BufferDesc *buf; Latch *bgwriterLatch; int trycounter; /* * If given a strategy object, see whether it can select a buffer. We * assume strategy objects don't need the BufFreelistLock. */ if (strategy != NULL) { buf = GetBufferFromRing(strategy); if (buf != NULL) { *lock_held = false; return buf; } } /* Nope, so lock the freelist */ *lock_held = true; LWLockAcquire(BufFreelistLock, LW_EXCLUSIVE); /* * We count buffer allocation requests so that the bgwriter can estimate * the rate of buffer consumption. Note that buffers recycled by a * strategy object are intentionally not counted here. */ StrategyControl->numBufferAllocs++; /* * If bgwriterLatch is set, we need to waken the bgwriter, but we should * not do so while holding BufFreelistLock; so release and re-grab. This * is annoyingly tedious, but it happens at most once per bgwriter cycle, * so the performance hit is minimal. */ bgwriterLatch = StrategyControl->bgwriterLatch; if (bgwriterLatch) { StrategyControl->bgwriterLatch = NULL; LWLockRelease(BufFreelistLock); SetLatch(bgwriterLatch); LWLockAcquire(BufFreelistLock, LW_EXCLUSIVE); } /* * Try to get a buffer from the freelist. Note that the freeNext fields * are considered to be protected by the BufFreelistLock not the * individual buffer spinlocks, so it's OK to manipulate them without * holding the spinlock. */ while (StrategyControl->firstFreeBuffer >= 0) { buf = &BufferDescriptors[StrategyControl->firstFreeBuffer]; Assert(buf->freeNext != FREENEXT_NOT_IN_LIST); /* Unconditionally remove buffer from freelist */ StrategyControl->firstFreeBuffer = buf->freeNext; buf->freeNext = FREENEXT_NOT_IN_LIST; /* * If the buffer is pinned or has a nonzero usage_count, we cannot use * it; discard it and retry. (This can only happen if VACUUM put a * valid buffer in the freelist and then someone else used it before * we got to it. It's probably impossible altogether as of 8.3, but * we'd better check anyway.) */ LockBufHdr(buf); if (buf->refcount == 0 && buf->usage_count == 0) { if (strategy != NULL) AddBufferToRing(strategy, buf); return buf; } UnlockBufHdr(buf); } if(true) { /* Now using LRU buffer replacement policy */ int currMinTimeStamp = INT_MAX; BufferStrategyControl *currTargetBuffer; for (StrategyControl->nextVictimBuffer=0;StrategyControl->nextVictimBuffer<NBuffers;StrategyControl->nextVictimBuffer++) { buf = &BufferDescriptors[StrategyControl->nextVictimBuffer]; if(StrategyControl->nextVictimBuffer == NBuffers-1 && currMinTimeStamp == INT_MAX) { StrategyControl ->completePasses++; elog(ERROR,"No available buffer frame"); } LockBufHdr(buf); if(buf->refcount==0 && buf->timer<currMinTimeStamp) { currMinTimeStamp = buf->timer; currTargetBuffer = buf; } UnlockBufHdr(buf); } return currTargetBuffer; } else { /* Nothing on the freelist, so run the "clock sweep" algorithm */ trycounter = NBuffers; for (;;) { buf = &BufferDescriptors[StrategyControl->nextVictimBuffer]; if (++StrategyControl->nextVictimBuffer >= NBuffers) { StrategyControl->nextVictimBuffer = 0; StrategyControl->completePasses++; } /* * If the buffer is pinned or has a nonzero usage_count, we cannot use * it; decrement the usage_count (unless pinned) and keep scanning. */ LockBufHdr(buf); if (buf->refcount == 0) { if (buf->usage_count > 0) { buf->usage_count--; trycounter = NBuffers; } else { /* Found a usable buffer */ if (strategy != NULL) AddBufferToRing(strategy, buf); return buf; } } else if (--trycounter == 0) { /* * We've scanned all the buffers without making any state changes, * so all the buffers are pinned (or were when we looked at them). * We could hope that someone will free one eventually, but it's * probably better to fail than to risk getting stuck in an * infinite loop. */ UnlockBufHdr(buf); elog(ERROR, "no unpinned buffers available"); } UnlockBufHdr(buf); } /* not reached */ return NULL; } }
/* * StrategyGetBuffer * * Called by the bufmgr to get the next candidate buffer to use in * BufferAlloc(). The only hard requirement BufferAlloc() has is that * the selected buffer must not currently be pinned by anyone. * * strategy is a BufferAccessStrategy object, or NULL for default strategy. * * To ensure that no one else can pin the buffer before we do, we must * return the buffer with the buffer header spinlock still held. */ volatile BufferDesc * StrategyGetBuffer(BufferAccessStrategy strategy) { volatile BufferDesc *buf; int bgwprocno; int trycounter; /* * If given a strategy object, see whether it can select a buffer. We * assume strategy objects don't need buffer_strategy_lock. */ if (strategy != NULL) { buf = GetBufferFromRing(strategy); if (buf != NULL) return buf; } /* * If asked, we need to waken the bgwriter. Since we don't want to rely on * a spinlock for this we force a read from shared memory once, and then * set the latch based on that value. We need to go through that length * because otherwise bgprocno might be reset while/after we check because * the compiler might just reread from memory. * * This can possibly set the latch of the wrong process if the bgwriter * dies in the wrong moment. But since PGPROC->procLatch is never * deallocated the worst consequence of that is that we set the latch of * some arbitrary process. */ bgwprocno = INT_ACCESS_ONCE(StrategyControl->bgwprocno); if (bgwprocno != -1) { /* reset bgwprocno first, before setting the latch */ StrategyControl->bgwprocno = -1; /* * Not acquiring ProcArrayLock here which is slightly icky. It's * actually fine because procLatch isn't ever freed, so we just can * potentially set the wrong process' (or no process') latch. */ SetLatch(&ProcGlobal->allProcs[bgwprocno].procLatch); } /* * We count buffer allocation requests so that the bgwriter can estimate * the rate of buffer consumption. Note that buffers recycled by a * strategy object are intentionally not counted here. */ pg_atomic_fetch_add_u32(&StrategyControl->numBufferAllocs, 1); /* * First check, without acquiring the lock, whether there's buffers in the * freelist. Since we otherwise don't require the spinlock in every * StrategyGetBuffer() invocation, it'd be sad to acquire it here - * uselessly in most cases. That obviously leaves a race where a buffer is * put on the freelist but we don't see the store yet - but that's pretty * harmless, it'll just get used during the next buffer acquisition. * * If there's buffers on the freelist, acquire the spinlock to pop one * buffer of the freelist. Then check whether that buffer is usable and * repeat if not. * * Note that the freeNext fields are considered to be protected by the * buffer_strategy_lock not the individual buffer spinlocks, so it's OK to * manipulate them without holding the spinlock. */ if (StrategyControl->firstFreeBuffer >= 0) { while (true) { /* Acquire the spinlock to remove element from the freelist */ SpinLockAcquire(&StrategyControl->buffer_strategy_lock); if (StrategyControl->firstFreeBuffer < 0) { SpinLockRelease(&StrategyControl->buffer_strategy_lock); break; } buf = GetBufferDescriptor(StrategyControl->firstFreeBuffer); Assert(buf->freeNext != FREENEXT_NOT_IN_LIST); /* Unconditionally remove buffer from freelist */ StrategyControl->firstFreeBuffer = buf->freeNext; buf->freeNext = FREENEXT_NOT_IN_LIST; /* * Release the lock so someone else can access the freelist while * we check out this buffer. */ SpinLockRelease(&StrategyControl->buffer_strategy_lock); /* * If the buffer is pinned or has a nonzero usage_count, we cannot * use it; discard it and retry. (This can only happen if VACUUM * put a valid buffer in the freelist and then someone else used * it before we got to it. It's probably impossible altogether as * of 8.3, but we'd better check anyway.) */ LockBufHdr(buf); if (buf->refcount == 0 && buf->usage_count == 0) { if (strategy != NULL) AddBufferToRing(strategy, buf); return buf; } UnlockBufHdr(buf); } } /* Nothing on the freelist, so run the "clock sweep" algorithm */ trycounter = NBuffers; for (;;) { buf = GetBufferDescriptor(ClockSweepTick()); /* * If the buffer is pinned or has a nonzero usage_count, we cannot use * it; decrement the usage_count (unless pinned) and keep scanning. */ LockBufHdr(buf); if (buf->refcount == 0) { if (buf->usage_count > 0) { buf->usage_count--; trycounter = NBuffers; } else { /* Found a usable buffer */ if (strategy != NULL) AddBufferToRing(strategy, buf); return buf; } } else if (--trycounter == 0) { /* * We've scanned all the buffers without making any state changes, * so all the buffers are pinned (or were when we looked at them). * We could hope that someone will free one eventually, but it's * probably better to fail than to risk getting stuck in an * infinite loop. */ UnlockBufHdr(buf); elog(ERROR, "no unpinned buffers available"); } UnlockBufHdr(buf); } }
/* * StrategyGetBuffer * * Called by the bufmgr to get the next candidate buffer to use in * BufferAlloc(). The only hard requirement BufferAlloc() has is that * the selected buffer must not currently be pinned by anyone. * * strategy is a BufferAccessStrategy object, or NULL for default strategy. * * To ensure that no one else can pin the buffer before we do, we must * return the buffer with the buffer header spinlock still held. If * *lock_held is set on exit, we have returned with the BufFreelistLock * still held, as well; the caller must release that lock once the spinlock * is dropped. We do it that way because releasing the BufFreelistLock * might awaken other processes, and it would be bad to do the associated * kernel calls while holding the buffer header spinlock. */ volatile BufferDesc * StrategyGetBuffer(BufferAccessStrategy strategy, bool *lock_held) { volatile BufferDesc *buf; volatile int bufIndex = -1; volatile int resultIndex = -1; int trycounter; /* Lock the freelist */ *lock_held = true; LWLockAcquire(BufFreelistLock, LW_EXCLUSIVE); /* * We count buffer allocation requests so that the bgwriter can estimate * the rate of buffer consumption. */ StrategyControl->numBufferAllocs++; /* * Try to get a buffer from the freelist. Note that the freeNext fields * are considered to be protected by the BufFreelistLock not the * individual buffer spinlocks, so it's OK to manipulate them without * holding the spinlock. */ while (StrategyControl->firstFreeBuffer >= 0) { bufIndex = StrategyControl->firstFreeBuffer; buf = &BufferDescriptors[bufIndex]; Assert(buf->freeNext != FREENEXT_NOT_IN_LIST); /* Unconditionally remove buffer from freelist */ StrategyControl->firstFreeBuffer = buf->freeNext; buf->freeNext = FREENEXT_NOT_IN_LIST; /* * If the buffer is pinned or has a nonzero usage_count, we cannot use * it; discard it and retry. (This can only happen if VACUUM put a * valid buffer in the freelist and then someone else used it before * we got to it. It's probably impossible altogether as of 8.3, but * we'd better check anyway.) */ LockBufHdr(buf); if (buf->refcount == 0 && buf->usage_count == 0) { resultIndex = bufIndex; break; } UnlockBufHdr(buf); } /* * Nothing on the freelist, so use the buffer replacement policy * to select a buffer to evict. */ if (resultIndex == -1) { if (BufferReplacementPolicy == POLICY_CLOCK) { /* Run the "clock sweep" algorithm */ trycounter = NBuffers; for (;;) { bufIndex = StrategyControl->nextVictimBuffer; buf = &BufferDescriptors[bufIndex]; /* * If the clock sweep hand has reached the end of the * buffer pool, start back at the beginning. */ if (++StrategyControl->nextVictimBuffer >= NBuffers) { StrategyControl->nextVictimBuffer = 0; StrategyControl->completePasses++; } /* * If the buffer is pinned or has a nonzero usage_count, we cannot use * it; decrement the usage_count (unless pinned) and keep scanning. */ LockBufHdr(buf); if (buf->refcount == 0) { if (buf->usage_count > 0) { buf->usage_count--; trycounter = NBuffers; } else { /* Found a usable buffer */ resultIndex = bufIndex; break; } } else if (--trycounter == 0) { /* * We've scanned all the buffers without making any state changes, * so all the buffers are pinned (or were when we looked at them). * We could hope that someone will free one eventually, but it's * probably better to fail than to risk getting stuck in an * infinite loop. */ UnlockBufHdr(buf); elog(ERROR, "no unpinned buffers available"); } UnlockBufHdr(buf); } } /* * CS186 TODO: Add code here to implement the LRU, MRU and 2Q buffer * replacement policies. Once you've selected a buffer to * evict, assign its index in the BufferDescriptors array to * "resultIndex". You can model your code on the CLOCK code * above. */ else if (BufferReplacementPolicy == POLICY_LRU) { resultIndex = StrategyHelper(&buf, &(StrategyControl->head), &(StrategyControl->tail)); if(resultIndex == -1) elog(ERROR, "no unpinned buffers available"); } else if (BufferReplacementPolicy == POLICY_MRU) { buf = StrategyControl->head; if(buf) { // Deleting the tail ie find refcount == 0 on first try if(buf->refcount == 0) { resultIndex = buf->buf_id; if (StrategyControl->head == StrategyControl->tail) { StrategyControl->head = NULL; StrategyControl->tail = NULL; } else { buf->prev->next = buf->next; buf->next->prev = buf->prev; StrategyControl->head = buf->next; } buf->prev = NULL; buf->next = NULL; buf->queueTag = 0; } else { buf = StrategyControl->head->next; while (buf != StrategyControl->head) { if(buf->refcount == 0) { resultIndex = buf->buf_id; buf->prev->next = buf->next; buf->next->prev = buf->prev; if (buf == StrategyControl->tail) { StrategyControl->tail = buf->prev; } buf->prev = NULL; buf->next = NULL; buf->queueTag = 0; break; } buf = buf->next; } } } if(resultIndex == -1) elog(ERROR, "no unpinned buffers available"); } else if (BufferReplacementPolicy == POLICY_2Q) { int threshhold; threshhold = NBuffers/2 ; if (size_of_list(&(StrategyControl->head2Q)) >= threshhold || !(StrategyControl->head)) { resultIndex = StrategyHelper(&buf, &(StrategyControl->head2Q), &(StrategyControl->tail2Q)); } else { resultIndex = StrategyHelper(&buf, &(StrategyControl->head), &(StrategyControl->tail)); } if (resultIndex == -1) { elog(ERROR, "no unpinned buffers available"); } } else { elog(ERROR, "invalid buffer pool replacement policy %d", BufferReplacementPolicy); } /* * CS186 Grading LOG - DON'T TOUCH * Don't output logs starting with "GRADING" by yourself; they are for grading purposes only. */ elog(LOG, "GRADING: EVICT %2d", resultIndex); } if (resultIndex == -1) elog(ERROR, "reached end of StrategyGetBuffer() without selecting a buffer"); return &BufferDescriptors[resultIndex]; }
/* * StrategyGetBuffer * * Called by the bufmgr to get the next candidate buffer to use in * BufferAlloc(). The only hard requirement BufferAlloc() has is that * the selected buffer must not currently be pinned by anyone. * * To ensure that no one else can pin the buffer before we do, we must * return the buffer with the buffer header spinlock still held. That * means that we return with the BufFreelistLock still held, as well; * the caller must release that lock once the spinlock is dropped. */ volatile BufferDesc * StrategyGetBuffer(void) { volatile BufferDesc *buf; int trycounter; LWLockAcquire(BufFreelistLock, LW_EXCLUSIVE); /* * Try to get a buffer from the freelist. Note that the freeNext fields * are considered to be protected by the BufFreelistLock not the * individual buffer spinlocks, so it's OK to manipulate them without * holding the spinlock. */ while (StrategyControl->firstFreeBuffer >= 0) { buf = &BufferDescriptors[StrategyControl->firstFreeBuffer]; Assert(buf->freeNext != FREENEXT_NOT_IN_LIST); /* Unconditionally remove buffer from freelist */ StrategyControl->firstFreeBuffer = buf->freeNext; buf->freeNext = FREENEXT_NOT_IN_LIST; /* * If the buffer is pinned or has a nonzero usage_count, we cannot use * it; discard it and retry. (This can only happen if VACUUM put a * valid buffer in the freelist and then someone else used it before * we got to it.) */ LockBufHdr(buf); if (buf->refcount == 0 && buf->usage_count == 0) return buf; UnlockBufHdr(buf); } /* Nothing on the freelist, so run the "clock sweep" algorithm */ trycounter = NBuffers; for (;;) { buf = &BufferDescriptors[StrategyControl->nextVictimBuffer]; if (++StrategyControl->nextVictimBuffer >= NBuffers) StrategyControl->nextVictimBuffer = 0; /* * If the buffer is pinned or has a nonzero usage_count, we cannot use * it; decrement the usage_count and keep scanning. */ LockBufHdr(buf); if (buf->refcount == 0 && buf->usage_count == 0) return buf; if (buf->usage_count > 0) { buf->usage_count--; trycounter = NBuffers; } else if (--trycounter == 0) { /* * We've scanned all the buffers without making any state changes, * so all the buffers are pinned (or were when we looked at them). * We could hope that someone will free one eventually, but it's * probably better to fail than to risk getting stuck in an * infinite loop. */ UnlockBufHdr(buf); elog(ERROR, "no unpinned buffers available"); } UnlockBufHdr(buf); } /* not reached */ return NULL; }
Datum pg_buffercache_pages(PG_FUNCTION_ARGS) { FuncCallContext *funcctx; Datum result; MemoryContext oldcontext; BufferCachePagesContext *fctx; /* User function context. */ TupleDesc tupledesc; HeapTuple tuple; if (SRF_IS_FIRSTCALL()) { int i; volatile BufferDesc *bufHdr; funcctx = SRF_FIRSTCALL_INIT(); /* Switch context when allocating stuff to be used in later calls */ oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); /* Create a user function context for cross-call persistence */ fctx = (BufferCachePagesContext *) palloc(sizeof(BufferCachePagesContext)); /* Construct a tuple descriptor for the result rows. */ tupledesc = CreateTemplateTupleDesc(NUM_BUFFERCACHE_PAGES_ELEM, false); TupleDescInitEntry(tupledesc, (AttrNumber) 1, "bufferid", INT4OID, -1, 0); TupleDescInitEntry(tupledesc, (AttrNumber) 2, "relfilenode", OIDOID, -1, 0); TupleDescInitEntry(tupledesc, (AttrNumber) 3, "reltablespace", OIDOID, -1, 0); TupleDescInitEntry(tupledesc, (AttrNumber) 4, "reldatabase", OIDOID, -1, 0); TupleDescInitEntry(tupledesc, (AttrNumber) 5, "relforknumber", INT2OID, -1, 0); TupleDescInitEntry(tupledesc, (AttrNumber) 6, "relblocknumber", INT8OID, -1, 0); TupleDescInitEntry(tupledesc, (AttrNumber) 7, "isdirty", BOOLOID, -1, 0); TupleDescInitEntry(tupledesc, (AttrNumber) 8, "usage_count", INT2OID, -1, 0); fctx->tupdesc = BlessTupleDesc(tupledesc); /* Allocate NBuffers worth of BufferCachePagesRec records. */ fctx->record = (BufferCachePagesRec *) palloc(sizeof(BufferCachePagesRec) * NBuffers); /* Set max calls and remember the user function context. */ funcctx->max_calls = NBuffers; funcctx->user_fctx = fctx; /* Return to original context when allocating transient memory */ MemoryContextSwitchTo(oldcontext); /* * To get a consistent picture of the buffer state, we must lock all * partitions of the buffer map. Needless to say, this is horrible * for concurrency. Must grab locks in increasing order to avoid * possible deadlocks. */ for (i = 0; i < NUM_BUFFER_PARTITIONS; i++) LWLockAcquire(FirstBufMappingLock + i, LW_SHARED); /* * Scan though all the buffers, saving the relevant fields in the * fctx->record structure. */ for (i = 0, bufHdr = BufferDescriptors; i < NBuffers; i++, bufHdr++) { /* Lock each buffer header before inspecting. */ LockBufHdr(bufHdr); fctx->record[i].bufferid = BufferDescriptorGetBuffer(bufHdr); fctx->record[i].relfilenode = bufHdr->tag.rnode.relNode; fctx->record[i].reltablespace = bufHdr->tag.rnode.spcNode; fctx->record[i].reldatabase = bufHdr->tag.rnode.dbNode; fctx->record[i].forknum = bufHdr->tag.forkNum; fctx->record[i].blocknum = bufHdr->tag.blockNum; fctx->record[i].usagecount = bufHdr->usage_count; if (bufHdr->flags & BM_DIRTY) fctx->record[i].isdirty = true; else fctx->record[i].isdirty = false; /* Note if the buffer is valid, and has storage created */ if ((bufHdr->flags & BM_VALID) && (bufHdr->flags & BM_TAG_VALID)) fctx->record[i].isvalid = true; else fctx->record[i].isvalid = false; UnlockBufHdr(bufHdr); } /* * And release locks. We do this in reverse order for two reasons: * (1) Anyone else who needs more than one of the locks will be trying * to lock them in increasing order; we don't want to release the * other process until it can get all the locks it needs. (2) This * avoids O(N^2) behavior inside LWLockRelease. */ for (i = NUM_BUFFER_PARTITIONS; --i >= 0;) LWLockRelease(FirstBufMappingLock + i); } funcctx = SRF_PERCALL_SETUP(); /* Get the saved state */ fctx = funcctx->user_fctx; if (funcctx->call_cntr < funcctx->max_calls) { uint32 i = funcctx->call_cntr; Datum values[NUM_BUFFERCACHE_PAGES_ELEM]; bool nulls[NUM_BUFFERCACHE_PAGES_ELEM]; values[0] = Int32GetDatum(fctx->record[i].bufferid); nulls[0] = false; /* * Set all fields except the bufferid to null if the buffer is unused * or not valid. */ if (fctx->record[i].blocknum == InvalidBlockNumber || fctx->record[i].isvalid == false) { nulls[1] = true; nulls[2] = true; nulls[3] = true; nulls[4] = true; nulls[5] = true; nulls[6] = true; nulls[7] = true; } else { values[1] = ObjectIdGetDatum(fctx->record[i].relfilenode); nulls[1] = false; values[2] = ObjectIdGetDatum(fctx->record[i].reltablespace); nulls[2] = false; values[3] = ObjectIdGetDatum(fctx->record[i].reldatabase); nulls[3] = false; values[4] = ObjectIdGetDatum(fctx->record[i].forknum); nulls[4] = false; values[5] = Int64GetDatum((int64) fctx->record[i].blocknum); nulls[5] = false; values[6] = BoolGetDatum(fctx->record[i].isdirty); nulls[6] = false; values[7] = Int16GetDatum(fctx->record[i].usagecount); nulls[7] = false; } /* Build and return the tuple. */ tuple = heap_form_tuple(fctx->tupdesc, values, nulls); result = HeapTupleGetDatum(tuple); SRF_RETURN_NEXT(funcctx, result); } else SRF_RETURN_DONE(funcctx); }
Datum pg_buffercache_pages(PG_FUNCTION_ARGS) { FuncCallContext *funcctx; Datum result; MemoryContext oldcontext; BufferCachePagesContext *fctx; /* User function context. */ TupleDesc tupledesc; TupleDesc expected_tupledesc; HeapTuple tuple; if (SRF_IS_FIRSTCALL()) { int i; funcctx = SRF_FIRSTCALL_INIT(); /* Switch context when allocating stuff to be used in later calls */ oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); /* Create a user function context for cross-call persistence */ fctx = (BufferCachePagesContext *) palloc(sizeof(BufferCachePagesContext)); /* * To smoothly support upgrades from version 1.0 of this extension * transparently handle the (non-)existence of the pinning_backends * column. We unfortunately have to get the result type for that... - * we can't use the result type determined by the function definition * without potentially crashing when somebody uses the old (or even * wrong) function definition though. */ if (get_call_result_type(fcinfo, NULL, &expected_tupledesc) != TYPEFUNC_COMPOSITE) elog(ERROR, "return type must be a row type"); if (expected_tupledesc->natts < NUM_BUFFERCACHE_PAGES_MIN_ELEM || expected_tupledesc->natts > NUM_BUFFERCACHE_PAGES_ELEM) elog(ERROR, "incorrect number of output arguments"); /* Construct a tuple descriptor for the result rows. */ tupledesc = CreateTemplateTupleDesc(expected_tupledesc->natts, false); TupleDescInitEntry(tupledesc, (AttrNumber) 1, "bufferid", INT4OID, -1, 0); TupleDescInitEntry(tupledesc, (AttrNumber) 2, "relfilenode", OIDOID, -1, 0); TupleDescInitEntry(tupledesc, (AttrNumber) 3, "reltablespace", OIDOID, -1, 0); TupleDescInitEntry(tupledesc, (AttrNumber) 4, "reldatabase", OIDOID, -1, 0); TupleDescInitEntry(tupledesc, (AttrNumber) 5, "relforknumber", INT2OID, -1, 0); TupleDescInitEntry(tupledesc, (AttrNumber) 6, "relblocknumber", INT8OID, -1, 0); TupleDescInitEntry(tupledesc, (AttrNumber) 7, "isdirty", BOOLOID, -1, 0); TupleDescInitEntry(tupledesc, (AttrNumber) 8, "usage_count", INT2OID, -1, 0); if (expected_tupledesc->natts == NUM_BUFFERCACHE_PAGES_ELEM) TupleDescInitEntry(tupledesc, (AttrNumber) 9, "pinning_backends", INT4OID, -1, 0); fctx->tupdesc = BlessTupleDesc(tupledesc); /* Allocate NBuffers worth of BufferCachePagesRec records. */ fctx->record = (BufferCachePagesRec *) MemoryContextAllocHuge(CurrentMemoryContext, sizeof(BufferCachePagesRec) * NBuffers); /* Set max calls and remember the user function context. */ funcctx->max_calls = NBuffers; funcctx->user_fctx = fctx; /* Return to original context when allocating transient memory */ MemoryContextSwitchTo(oldcontext); /* * Scan through all the buffers, saving the relevant fields in the * fctx->record structure. * * We don't hold the partition locks, so we don't get a consistent * snapshot across all buffers, but we do grab the buffer header * locks, so the information of each buffer is self-consistent. */ for (i = 0; i < NBuffers; i++) { BufferDesc *bufHdr; uint32 buf_state; bufHdr = GetBufferDescriptor(i); /* Lock each buffer header before inspecting. */ buf_state = LockBufHdr(bufHdr); fctx->record[i].bufferid = BufferDescriptorGetBuffer(bufHdr); fctx->record[i].relfilenode = bufHdr->tag.rnode.relNode; fctx->record[i].reltablespace = bufHdr->tag.rnode.spcNode; fctx->record[i].reldatabase = bufHdr->tag.rnode.dbNode; fctx->record[i].forknum = bufHdr->tag.forkNum; fctx->record[i].blocknum = bufHdr->tag.blockNum; fctx->record[i].usagecount = BUF_STATE_GET_USAGECOUNT(buf_state); fctx->record[i].pinning_backends = BUF_STATE_GET_REFCOUNT(buf_state); if (buf_state & BM_DIRTY) fctx->record[i].isdirty = true; else fctx->record[i].isdirty = false; /* Note if the buffer is valid, and has storage created */ if ((buf_state & BM_VALID) && (buf_state & BM_TAG_VALID)) fctx->record[i].isvalid = true; else fctx->record[i].isvalid = false; UnlockBufHdr(bufHdr, buf_state); } } funcctx = SRF_PERCALL_SETUP(); /* Get the saved state */ fctx = funcctx->user_fctx; if (funcctx->call_cntr < funcctx->max_calls) { uint32 i = funcctx->call_cntr; Datum values[NUM_BUFFERCACHE_PAGES_ELEM]; bool nulls[NUM_BUFFERCACHE_PAGES_ELEM]; values[0] = Int32GetDatum(fctx->record[i].bufferid); nulls[0] = false; /* * Set all fields except the bufferid to null if the buffer is unused * or not valid. */ if (fctx->record[i].blocknum == InvalidBlockNumber || fctx->record[i].isvalid == false) { nulls[1] = true; nulls[2] = true; nulls[3] = true; nulls[4] = true; nulls[5] = true; nulls[6] = true; nulls[7] = true; /* unused for v1.0 callers, but the array is always long enough */ nulls[8] = true; } else { values[1] = ObjectIdGetDatum(fctx->record[i].relfilenode); nulls[1] = false; values[2] = ObjectIdGetDatum(fctx->record[i].reltablespace); nulls[2] = false; values[3] = ObjectIdGetDatum(fctx->record[i].reldatabase); nulls[3] = false; values[4] = ObjectIdGetDatum(fctx->record[i].forknum); nulls[4] = false; values[5] = Int64GetDatum((int64) fctx->record[i].blocknum); nulls[5] = false; values[6] = BoolGetDatum(fctx->record[i].isdirty); nulls[6] = false; values[7] = Int16GetDatum(fctx->record[i].usagecount); nulls[7] = false; /* unused for v1.0 callers, but the array is always long enough */ values[8] = Int32GetDatum(fctx->record[i].pinning_backends); nulls[8] = false; } /* Build and return the tuple. */ tuple = heap_form_tuple(fctx->tupdesc, values, nulls); result = HeapTupleGetDatum(tuple); SRF_RETURN_NEXT(funcctx, result); } else SRF_RETURN_DONE(funcctx); }
static void SaveBuffers(void) { int i; int num_buffers; int log_level = DEBUG3; SavedBuffer *saved_buffers; volatile BufferDesc *bufHdr; // XXX: Do we really need volatile here? FILE *file = NULL; int database_counter= 0; Oid prev_database = InvalidOid; Oid prev_filenode = InvalidOid; ForkNumber prev_forknum = InvalidForkNumber; BlockNumber prev_blocknum = InvalidBlockNumber; BlockNumber range_counter = 0; const char *savefile_path; /* * XXX: If the memory request fails, ask for a smaller memory chunk, and use * it to create chunks of save-files, and make the workers read those chunks. * * This is not a concern as of now, so deferred; there's at least one other * place that allocates (NBuffers * (much_bigger_struct)), so this seems to * be an acceptable practice. */ saved_buffers = (SavedBuffer *) palloc(sizeof(SavedBuffer) * NBuffers); /* Lock the buffer partitions for reading. */ for (i = 0; i < NUM_BUFFER_PARTITIONS; ++i) LWLockAcquire(FirstBufMappingLock + i, LW_SHARED); /* Scan and save a list of valid buffers. */ for (num_buffers = 0, i = 0, bufHdr = BufferDescriptors; i < NBuffers; ++i, ++bufHdr) { /* Lock each buffer header before inspecting. */ LockBufHdr(bufHdr); /* Skip invalid buffers */ if ((bufHdr->flags & BM_VALID) && (bufHdr->flags & BM_TAG_VALID)) { saved_buffers[num_buffers].database = bufHdr->tag.rnode.dbNode; saved_buffers[num_buffers].filenode = bufHdr->tag.rnode.relNode; saved_buffers[num_buffers].forknum = bufHdr->tag.forkNum; saved_buffers[num_buffers].blocknum = bufHdr->tag.blockNum; ++num_buffers; } UnlockBufHdr(bufHdr); } /* Unlock the buffer partitions in reverse order, to avoid a deadlock. */ for (i = NUM_BUFFER_PARTITIONS - 1; i >= 0; --i) LWLockRelease(FirstBufMappingLock + i); /* * Sort the list, so that we can optimize the storage of these buffers. * * The side-effect of this storage optimization is that when reading the * blocks back from relation forks, it leads to sequential reads, which * improve the restore speeds quite considerably as compared to random reads * from different blocks all over the data directory. */ pg_qsort(saved_buffers, num_buffers, sizeof(SavedBuffer), SavedBufferCmp); /* Connect to the database and start a transaction for database name lookups. */ BackgroundWorkerInitializeConnection(guc_default_database, NULL); SetCurrentStatementStartTimestamp(); StartTransactionCommand(); PushActiveSnapshot(GetTransactionSnapshot()); pgstat_report_activity(STATE_RUNNING, "saving buffers"); for (i = 0; i < num_buffers; ++i) { int j; SavedBuffer *buf = &saved_buffers[i]; if (i == 0) { /* * Special case for global objects. The sort brings them to the * front of the list. */ /* Make sure the first buffer we save belongs to global object. */ Assert(buf->database == InvalidOid); /* * Database number (and save-file name) 1 is reserverd for storing * list of buffers of global objects. */ database_counter = 1; savefile_path = getSavefileName(database_counter); file = fileOpen(savefile_path, PG_BINARY_W); writeDBName("", file, savefile_path); prev_database = buf->database; } if (buf->database != prev_database) { char *dbname; /* * We are beginning to process a different database than the * previous one; close the save-file of previous database, and open * a new one. */ ++database_counter; dbname = get_database_name(buf->database); Assert(dbname != NULL); if (file != NULL) fileClose(file, savefile_path); savefile_path = getSavefileName(database_counter); file = fileOpen(savefile_path, PG_BINARY_W); writeDBName(dbname, file, savefile_path); pfree(dbname); /* Reset trackers appropriately */ prev_database = buf->database; prev_filenode = InvalidOid; prev_forknum = InvalidForkNumber; prev_blocknum = InvalidBlockNumber; range_counter = 0; } if (buf->filenode != prev_filenode) { /* We're beginning to process a new relation; emit a record for it. */ fileWrite("r", 1, file, savefile_path); fileWrite(&(buf->filenode), sizeof(Oid), file, savefile_path); /* Reset trackers appropriately */ prev_filenode = buf->filenode; prev_forknum = InvalidForkNumber; prev_blocknum = InvalidBlockNumber; range_counter = 0; } if (buf->forknum != prev_forknum) { /* * We're beginning to process a new fork of this relation; add a * record for it. */ fileWrite("f", 1, file, savefile_path); fileWrite(&(buf->forknum), sizeof(ForkNumber), file, savefile_path); /* Reset trackers appropriately */ prev_forknum = buf->forknum; prev_blocknum = InvalidBlockNumber; range_counter = 0; } ereport(log_level, (errmsg("writer: writing block db %d filenode %d forknum %d blocknum %d", database_counter, prev_filenode, prev_forknum, buf->blocknum))); fileWrite("b", 1, file, savefile_path); fileWrite(&(buf->blocknum), sizeof(BlockNumber), file, savefile_path); prev_blocknum = buf->blocknum; /* * If a continuous range of blocks follows this block, then emit one * entry for the range, instead of one for each block. */ range_counter = 0; for ( j = i+1; j < num_buffers; ++j) { SavedBuffer *tmp = &saved_buffers[j]; if (tmp->database == prev_database && tmp->filenode == prev_filenode && tmp->forknum == prev_forknum && tmp->blocknum == (prev_blocknum + range_counter + 1)) { ++range_counter; } } if (range_counter != 0) { ereport(log_level, (errmsg("writer: writing range db %d filenode %d forknum %d blocknum %d range %d", database_counter, prev_filenode, prev_forknum, prev_blocknum, range_counter))); fileWrite("N", 1, file, savefile_path); fileWrite(&range_counter, sizeof(range_counter), file, savefile_path); i += range_counter; } } ereport(LOG, (errmsg("Buffer Saver: saved metadata of %d blocks", num_buffers))); Assert(file != NULL); fileClose(file, savefile_path); pfree(saved_buffers); PopActiveSnapshot(); CommitTransactionCommand(); pgstat_report_activity(STATE_IDLE, NULL); }
/* * StrategyGetBuffer * * Called by the bufmgr to get the next candidate buffer to use in * BufferAlloc(). The only hard requirement BufferAlloc() has is that * the selected buffer must not currently be pinned by anyone. * * strategy is a BufferAccessStrategy object, or NULL for default strategy. * * To ensure that no one else can pin the buffer before we do, we must * return the buffer with the buffer header spinlock still held. If * *lock_held is set on exit, we have returned with the BufFreelistLock * still held, as well; the caller must release that lock once the spinlock * is dropped. We do it that way because releasing the BufFreelistLock * might awaken other processes, and it would be bad to do the associated * kernel calls while holding the buffer header spinlock. */ volatile BufferDesc * StrategyGetBuffer(BufferAccessStrategy strategy, bool *lock_held) { volatile BufferDesc *buf; Latch *bgwriterLatch; int trycounter; elog(LOG, "in StrategyGetBuffer"); // fprintf(stderr, "StrategyGetBuffer\n"); /* * If given a strategy object, see whether it can select a buffer. We * assume strategy objects don't need the BufFreelistLock. */ /* Nope, so lock the freelist */ *lock_held = true; LWLockAcquire(BufFreelistLock, LW_EXCLUSIVE); LWLockAcquire(BufDllLock, LW_EXCLUSIVE); /* * We count buffer allocation requests so that the bgwriter can estimate * the rate of buffer consumption. Note that buffers recycled by a * strategy object are intentionally not counted here. */ StrategyControl->numBufferAllocs++; /* * If bgwriterLatch is set, we need to waken the bgwriter, but we should * not do so while holding BufFreelistLock; so release and re-grab. This * is annoyingly tedious, but it happens at most once per bgwriter cycle, * so the performance hit is minimal. */ bgwriterLatch = StrategyControl->bgwriterLatch; if (bgwriterLatch) { StrategyControl->bgwriterLatch = NULL; LWLockRelease(BufFreelistLock); SetLatch(bgwriterLatch); LWLockAcquire(BufFreelistLock, LW_EXCLUSIVE); } /* * Try to get a buffer from the freelist. Note that the freeNext fields * are considered to be protected by the BufFreelistLock not the * individual buffer spinlocks, so it's OK to manipulate them without * holding the spinlock. */ while (StrategyControl->firstFreeBuffer >= 0) { buf = &BufferDescriptors[StrategyControl->firstFreeBuffer]; Assert(buf->freeNext != FREENEXT_NOT_IN_LIST); /* Unconditionally remove buffer from freelist */ StrategyControl->firstFreeBuffer = buf->freeNext; buf->freeNext = FREENEXT_NOT_IN_LIST; /* * If the buffer is pinned or has a nonzero usage_count, we cannot use * it; discard it and retry. (This can only happen if VACUUM put a * valid buffer in the freelist and then someone else used it before * we got to it. It's probably impossible altogether as of 8.3, but * we'd better check anyway.) */ LockBufHdr(buf); if (buf->refcount == 0 && buf->usage_count == 0) { BufNodes[StrategyControl->firstFreeBuffer] = dllInsertInt(BufDLL, StrategyControl->firstFreeBuffer, TAIL); LWLockRelease(BufDllLock); // elog(ERROR, "Successfully got buf from list"); return buf; } UnlockBufHdr(buf); } /* Nothing on the freelist, so run the "clock sweep" algorithm */ DllNode *d = BufDLL->head; for (; d; d = d->next) { buf = &BufferDescriptors[d->data]; /* * If the buffer is pinned or has a nonzero usage_count, we cannot use * it; decrement the usage_count (unless pinned) and keep scanning. */ LockBufHdr(buf); if (buf->refcount == 0) { /* Found a usable buffer */ dllMove(BufDLL, buf, TAIL); LWLockRelease(BufDllLock); elog(LOG, "Successfully got buf from list"); return buf; } UnlockBufHdr(buf); } /* * We've scanned all the buffers without making any state changes, * so all the buffers are pinned (or were when we looked at them). * We could hope that someone will free one eventually, but it's * probably better to fail than to risk getting stuck in an * infinite loop. */ UnlockBufHdr(buf); elog(ERROR, "no unpinned buffers available"); /* not reached */ LWLockRelease(BufDllLock); return NULL; }
/* * StrategyGetBuffer * * Called by the bufmgr to get the next candidate buffer to use in * BufferAlloc(). The only hard requirement BufferAlloc() has is that * the selected buffer must not currently be pinned by anyone. * * strategy is a BufferAccessStrategy object, or NULL for default strategy. * * To ensure that no one else can pin the buffer before we do, we must * return the buffer with the buffer header spinlock still held. If * *lock_held is set on exit, we have returned with the BufFreelistLock * still held, as well; the caller must release that lock once the spinlock * is dropped. We do it that way because releasing the BufFreelistLock * might awaken other processes, and it would be bad to do the associated * kernel calls while holding the buffer header spinlock. */ volatile BufferDesc * StrategyGetBuffer(BufferAccessStrategy strategy, bool *lock_held) { volatile BufferDesc *buf; Latch *bgwriterLatch; int trycounter; volatile int bufIndex = -1; volatile int resultIndex = -1; volatile BufferDesc *next; volatile BufferDesc *previous; /* * If given a strategy object, see whether it can select a buffer. We * assume strategy objects don't need the BufFreelistLock. */ if (strategy != NULL) { buf = GetBufferFromRing(strategy); if (buf != NULL) { *lock_held = false; return buf; } } /* Nope, so lock the freelist */ *lock_held = true; LWLockAcquire(BufFreelistLock, LW_EXCLUSIVE); /* * We count buffer allocation requests so that the bgwriter can estimate * the rate of buffer consumption. Note that buffers recycled by a * strategy object are intentionally not counted here. */ StrategyControl->numBufferAllocs++; /* * If bgwriterLatch is set, we need to waken the bgwriter, but we should * not do so while holding BufFreelistLock; so release and re-grab. This * is annoyingly tedious, but it happens at most once per bgwriter cycle, * so the performance hit is minimal. */ bgwriterLatch = StrategyControl->bgwriterLatch; if (bgwriterLatch) { StrategyControl->bgwriterLatch = NULL; LWLockRelease(BufFreelistLock); SetLatch(bgwriterLatch); LWLockAcquire(BufFreelistLock, LW_EXCLUSIVE); } /* * Try to get a buffer from the freelist. Note that the freeNext fields * are considered to be protected by the BufFreelistLock not the * individual buffer spinlocks, so it's OK to manipulate them without * holding the spinlock. */ while (StrategyControl->firstFreeBuffer >= 0) { buf = &BufferDescriptors[StrategyControl->firstFreeBuffer]; Assert(buf->freeNext != FREENEXT_NOT_IN_LIST); /* Unconditionally remove buffer from freelist */ StrategyControl->firstFreeBuffer = buf->freeNext; buf->freeNext = FREENEXT_NOT_IN_LIST; /* * If the buffer is pinned or has a nonzero usage_count, we cannot use * it; discard it and retry. (This can only happen if VACUUM put a * valid buffer in the freelist and then someone else used it before * we got to it. It's probably impossible altogether as of 8.3, but * we'd better check anyway.) */ LockBufHdr(buf); if (buf->refcount == 0 && buf->usage_count == 0) { if (strategy != NULL) AddBufferToRing(strategy, buf); return buf; } UnlockBufHdr(buf); } /* Nothing on the freelist, so run the algorithm defined in the * BufferReplacementPolicy variable*/ if (resultIndex == -1) { if (BufferReplacementPolicy == POLICY_CLOCK) { //Running the Clock Sweep Algorithm (Default postgres Algo.) trycounter = NBuffers; for (;;) { buf = &BufferDescriptors[StrategyControl->nextVictimBuffer]; /* * If the clock sweep hand has reached the end of the * buffer pool, start back at the beginning. */ if (++StrategyControl->nextVictimBuffer >= NBuffers) { StrategyControl->nextVictimBuffer = 0; StrategyControl->completePasses++; } /* * If the buffer is pinned or has a nonzero usage_count, we cannot use * it; decrement the usage_count (unless pinned) and keep scanning. */ LockBufHdr(buf); if (buf->refcount == 0) { if (buf->usage_count > 0) { buf->usage_count--; trycounter = NBuffers; } else { /* Found a usable buffer */ if (strategy != NULL) AddBufferToRing(strategy, buf); return buf; } } else if (--trycounter == 0) { /* * We've scanned all the buffers without making any state changes, * so all the buffers are pinned (or were when we looked at them). * We could hope that someone will free one eventually, but it's * probably better to fail than to risk getting stuck in an * infinite loop. */ UnlockBufHdr(buf); elog(ERROR, "no unpinned buffers available"); } UnlockBufHdr(buf); } } /* Implementation of LRU, MRU and 2Q Algorithms. * Once we've selected a buffer to evict, its index in * the BufferDescriptors array is stored in "resultIndex" */ else if (BufferReplacementPolicy == POLICY_LRU) { buf = StrategyControl->firstUnpinned; while (buf != NULL) { LockBufHdr(buf); if (buf->refcount == 0) { resultIndex = buf->buf_id; break; } else { UnlockBufHdr(buf); buf = buf->next; } } /* * We've scanned all the buffers without making any state changes, * so all the buffers are pinned (or were when we looked at them). * We could hope that someone will free one eventually, but it's * probably better to fail than to risk getting stuck in an * infinite loop. */ if (buf == NULL) { UnlockBufHdr(buf); //p added 10/24 elog(ERROR, "no unpinned buffers available"); } } else if (BufferReplacementPolicy == POLICY_MRU) { buf = StrategyControl->lastUnpinned; while (buf != NULL) { LockBufHdr(buf); if (buf->refcount == 0) { resultIndex = buf->buf_id; break; } else { UnlockBufHdr(buf); buf = buf->previous; } } /* * We've scanned all the buffers without making any state changes, * so all the buffers are pinned (or were when we looked at them). * We could hope that someone will free one eventually, but it's * probably better to fail than to risk getting stuck in an * infinite loop. */ if (buf == NULL) { UnlockBufHdr(buf); //p added 10/24 elog(ERROR, "no unpinned buffers available"); } } else if (BufferReplacementPolicy == POLICY_2Q) { int thres = NBuffers/2; int sizeA1 = 0; volatile BufferDesc *head = StrategyControl->a1Head; while (head != NULL) { head = head->next; sizeA1++; } if (sizeA1 >= thres || StrategyControl->lastUnpinned == NULL) { buf = StrategyControl->a1Head; while (buf != NULL) { if (buf->refcount == 0) { resultIndex = buf->buf_id; next = buf->next; previous = buf->previous; //adjust neighbors if (next != NULL) { if (previous != NULL) { //next and prev != null, buf is already in middle of list previous->next = next; next->previous = previous; } else { //next != null, prev == null, buf is at beginning of list next->previous = NULL; StrategyControl->a1Head = next; } } else if (previous == NULL) { //next == NULL, prev == null, buf is only item in list StrategyControl->a1Head = NULL; StrategyControl->a1Tail = NULL; } else { //buf is last item in list, next == null, prev != null StrategyControl->a1Tail = previous; previous->next = NULL; } buf->next = NULL; buf->previous = NULL; break; } else { buf = buf->next; } } if (buf == NULL) { elog(ERROR, "no unpinned buffers available"); } } else { // delete from the head of AM buf = StrategyControl->firstUnpinned; while (buf != NULL) { // LockBufHdr(buf); if (buf->refcount == 0) { resultIndex = buf->buf_id; next = buf->next; previous = buf->previous; //adjust neighbors if (next != NULL) { if (previous != NULL) { //next and prev != null, buf is already in middle of list previous->next = next; next->previous = previous; } else { //next != null, prev == null, buf is at beginning of list next->previous = NULL; StrategyControl->firstUnpinned = next; } } else if (previous == NULL) { //next == NULL, prev == null, buf is new to list StrategyControl->firstUnpinned = NULL; StrategyControl->lastUnpinned = NULL; } else { previous->next = NULL; StrategyControl->lastUnpinned = previous; } buf->next = NULL; buf->previous = NULL; break; } else { // UnlockBufHdr(buf); buf = buf->next; } // UnlockBufHdr(buf); //phoebe 10/24 } if (buf == NULL) { // UnlockBufHdr(buf); elog(ERROR, "no unpinned buffers available"); } } } else { elog(ERROR, "invalid buffer pool replacement policy %d", BufferReplacementPolicy); } } if (resultIndex == -1) { elog(ERROR, "reached end of StrategyGetBuffer() without selecting a buffer"); } return &BufferDescriptors[resultIndex]; }