/*
 * StrategyGetBuffer
 *
 *	Called by the bufmgr to get the next candidate buffer to use in
 *	BufferAlloc(). The only hard requirement BufferAlloc() has is that
 *	the selected buffer must not currently be pinned by anyone.
 *
 *	To ensure that no one else can pin the buffer before we do, we must
 *	return the buffer with the buffer header spinlock still held.  That
 *	means that we return with the BufFreelistLock still held, as well;
 *	the caller must release that lock once the spinlock is dropped.
 */
volatile BufferDesc *
StrategyGetBuffer(void)
{
	volatile BufferDesc *buf;
	int			trycounter;

	LWLockAcquire(BufFreelistLock, LW_EXCLUSIVE);

	/*
	 * Try to get a buffer from the freelist.  Note that the freeNext fields
	 * are considered to be protected by the BufFreelistLock not the
	 * individual buffer spinlocks, so it's OK to manipulate them without
	 * holding the spinlock.
	 */
	while (StrategyControl->firstFreeBuffer >= 0)
	{
		buf = &BufferDescriptors[StrategyControl->firstFreeBuffer];
		Assert(buf->freeNext != FREENEXT_NOT_IN_LIST);

		/* Unconditionally remove buffer from freelist */
		StrategyControl->firstFreeBuffer = buf->freeNext;
		buf->freeNext = FREENEXT_NOT_IN_LIST;
                buf->freePre = FREEPRE_NOT_IN_LIST;

		/*
		 * If the buffer is pinned or has a nonzero usage_count, we cannot use
		 * it; discard it and retry.  (This can only happen if VACUUM put a
		 * valid buffer in the freelist and then someone else used it before
		 * we got to it.)
		 */
		LockBufHdr(buf);
		if (buf->refcount == 0 && buf->usage_count == 0)
                {
                        elog(LOG, "Get buf %d\n", buf->buf_id);
			return buf;
                }
		UnlockBufHdr(buf);
	}

	/* Nothing on the freelist, so run the "clock sweep" algorithm */
        /* this part is removed to remove "clock sweep" if nothing found in 
         * freelist, nothing available.
         */
	

	/* not reached */
	return NULL;
}
Beispiel #2
0
/*
 * GetBufferFromRing -- returns a buffer from the ring, or NULL if the
 *		ring is empty.
 *
 * The bufhdr spin lock is held on the returned buffer.
 */
static volatile BufferDesc *
GetBufferFromRing(BufferAccessStrategy strategy)
{
	volatile BufferDesc *buf;
	Buffer		bufnum;

	/* Advance to next ring slot */
	if (++strategy->current >= strategy->ring_size)
		strategy->current = 0;

	/*
	 * If the slot hasn't been filled yet, tell the caller to allocate a new
	 * buffer with the normal allocation strategy.	He will then fill this
	 * slot by calling AddBufferToRing with the new buffer.
	 */
	bufnum = strategy->buffers[strategy->current];
	if (bufnum == InvalidBuffer)
	{
		strategy->current_was_in_ring = false;
		return NULL;
	}

	/*
	 * If the buffer is pinned we cannot use it under any circumstances.
	 *
	 * If usage_count is 0 or 1 then the buffer is fair game (we expect 1,
	 * since our own previous usage of the ring element would have left it
	 * there, but it might've been decremented by clock sweep since then). A
	 * higher usage_count indicates someone else has touched the buffer, so we
	 * shouldn't re-use it.
	 */
	buf = &BufferDescriptors[bufnum - 1];
	LockBufHdr(buf);
	if (buf->refcount == 0 && buf->usage_count <= 1)
	{
		strategy->current_was_in_ring = true;
		return buf;
	}
	UnlockBufHdr(buf);

	/*
	 * Tell caller to allocate a new buffer with the normal allocation
	 * strategy.  He'll then replace this ring element via AddBufferToRing.
	 */
	strategy->current_was_in_ring = false;
	return NULL;
}
Beispiel #3
0
/*
 * StrategyGetBuffer
 *
 *	Called by the bufmgr to get the next candidate buffer to use in
 *	BufferAlloc(). The only hard requirement BufferAlloc() has is that
 *	the selected buffer must not currently be pinned by anyone.
 *
 *	strategy is a BufferAccessStrategy object, or NULL for default strategy.
 *
 *	To ensure that no one else can pin the buffer before we do, we must
 *	return the buffer with the buffer header spinlock still held.  If
 *	*lock_held is set on exit, we have returned with the BufFreelistLock
 *	still held, as well; the caller must release that lock once the spinlock
 *	is dropped.  We do it that way because releasing the BufFreelistLock
 *	might awaken other processes, and it would be bad to do the associated
 *	kernel calls while holding the buffer header spinlock.
 */
volatile BufferDesc *
StrategyGetBuffer(BufferAccessStrategy strategy, bool *lock_held)
{
	volatile BufferDesc *buf;
	Latch	   *bgwriterLatch;
	int			trycounter;

	/*
	 * If given a strategy object, see whether it can select a buffer. We
	 * assume strategy objects don't need the BufFreelistLock.
	 */
	if (strategy != NULL)
	{
		buf = GetBufferFromRing(strategy);
		if (buf != NULL)
		{
			*lock_held = false;
			return buf;
		}
	}

	/* Nope, so lock the freelist */
	*lock_held = true;
	LWLockAcquire(BufFreelistLock, LW_EXCLUSIVE);

	/*
	 * We count buffer allocation requests so that the bgwriter can estimate
	 * the rate of buffer consumption.	Note that buffers recycled by a
	 * strategy object are intentionally not counted here.
	 */
	StrategyControl->numBufferAllocs++;

	/*
	 * If bgwriterLatch is set, we need to waken the bgwriter, but we should
	 * not do so while holding BufFreelistLock; so release and re-grab.  This
	 * is annoyingly tedious, but it happens at most once per bgwriter cycle,
	 * so the performance hit is minimal.
	 */
	bgwriterLatch = StrategyControl->bgwriterLatch;
	if (bgwriterLatch)
	{
		StrategyControl->bgwriterLatch = NULL;
		LWLockRelease(BufFreelistLock);
		SetLatch(bgwriterLatch);
		LWLockAcquire(BufFreelistLock, LW_EXCLUSIVE);
	}

	/*
	 * Try to get a buffer from the freelist.  Note that the freeNext fields
	 * are considered to be protected by the BufFreelistLock not the
	 * individual buffer spinlocks, so it's OK to manipulate them without
	 * holding the spinlock.
	 */
	while (StrategyControl->firstFreeBuffer >= 0)
	{
		buf = &BufferDescriptors[StrategyControl->firstFreeBuffer];
		Assert(buf->freeNext != FREENEXT_NOT_IN_LIST);

		/* Unconditionally remove buffer from freelist */
		StrategyControl->firstFreeBuffer = buf->freeNext;
		buf->freeNext = FREENEXT_NOT_IN_LIST;

		/*
		 * If the buffer is pinned or has a nonzero usage_count, we cannot use
		 * it; discard it and retry.  (This can only happen if VACUUM put a
		 * valid buffer in the freelist and then someone else used it before
		 * we got to it.  It's probably impossible altogether as of 8.3, but
		 * we'd better check anyway.)
		 */
		LockBufHdr(buf);
		if (buf->refcount == 0 && buf->usage_count == 0)
		{
			if (strategy != NULL)
				AddBufferToRing(strategy, buf);
			return buf;
		}
		UnlockBufHdr(buf);
	}

	if(true)
	{
		/* Now using LRU buffer replacement policy */
		int currMinTimeStamp = INT_MAX;
		BufferStrategyControl *currTargetBuffer;

		for (StrategyControl->nextVictimBuffer=0;StrategyControl->nextVictimBuffer<NBuffers;StrategyControl->nextVictimBuffer++)
		{
			buf = &BufferDescriptors[StrategyControl->nextVictimBuffer];

			if(StrategyControl->nextVictimBuffer == NBuffers-1 && currMinTimeStamp == INT_MAX)
			{
				StrategyControl ->completePasses++;
				elog(ERROR,"No available buffer frame");
			}

			LockBufHdr(buf);

			if(buf->refcount==0 && buf->timer<currMinTimeStamp)
			{
				currMinTimeStamp = buf->timer;
				currTargetBuffer = buf;
			}
			UnlockBufHdr(buf);
		}

		return currTargetBuffer;		
	}
	else
	{
		/* Nothing on the freelist, so run the "clock sweep" algorithm */
		trycounter = NBuffers;
		for (;;)
		{
			buf = &BufferDescriptors[StrategyControl->nextVictimBuffer];

			if (++StrategyControl->nextVictimBuffer >= NBuffers)
			{
				StrategyControl->nextVictimBuffer = 0;
				StrategyControl->completePasses++;
			}

			/*
			 * If the buffer is pinned or has a nonzero usage_count, we cannot use
			 * it; decrement the usage_count (unless pinned) and keep scanning.
      		 	 */
			LockBufHdr(buf);
			if (buf->refcount == 0)
			{
				if (buf->usage_count > 0)
				{
					buf->usage_count--;
					trycounter = NBuffers;
				}
				else
				{
					/* Found a usable buffer */
					if (strategy != NULL)
						AddBufferToRing(strategy, buf);
					return buf;
				}
			}
			else if (--trycounter == 0)
			{
				/*
				 * We've scanned all the buffers without making any state changes,
				 * so all the buffers are pinned (or were when we looked at them).
				 * We could hope that someone will free one eventually, but it's
				 * probably better to fail than to risk getting stuck in an
				 * infinite loop.
				 */
				UnlockBufHdr(buf);
				elog(ERROR, "no unpinned buffers available");
			}
			UnlockBufHdr(buf);
		}

		/* not reached */
		return NULL;
	}
}
Beispiel #4
0
/*
 * StrategyGetBuffer
 *
 *	Called by the bufmgr to get the next candidate buffer to use in
 *	BufferAlloc(). The only hard requirement BufferAlloc() has is that
 *	the selected buffer must not currently be pinned by anyone.
 *
 *	strategy is a BufferAccessStrategy object, or NULL for default strategy.
 *
 *	To ensure that no one else can pin the buffer before we do, we must
 *	return the buffer with the buffer header spinlock still held.
 */
volatile BufferDesc *
StrategyGetBuffer(BufferAccessStrategy strategy)
{
	volatile BufferDesc *buf;
	int			bgwprocno;
	int			trycounter;

	/*
	 * If given a strategy object, see whether it can select a buffer. We
	 * assume strategy objects don't need buffer_strategy_lock.
	 */
	if (strategy != NULL)
	{
		buf = GetBufferFromRing(strategy);
		if (buf != NULL)
			return buf;
	}

	/*
	 * If asked, we need to waken the bgwriter. Since we don't want to rely on
	 * a spinlock for this we force a read from shared memory once, and then
	 * set the latch based on that value. We need to go through that length
	 * because otherwise bgprocno might be reset while/after we check because
	 * the compiler might just reread from memory.
	 *
	 * This can possibly set the latch of the wrong process if the bgwriter
	 * dies in the wrong moment. But since PGPROC->procLatch is never
	 * deallocated the worst consequence of that is that we set the latch of
	 * some arbitrary process.
	 */
	bgwprocno = INT_ACCESS_ONCE(StrategyControl->bgwprocno);
	if (bgwprocno != -1)
	{
		/* reset bgwprocno first, before setting the latch */
		StrategyControl->bgwprocno = -1;

		/*
		 * Not acquiring ProcArrayLock here which is slightly icky. It's
		 * actually fine because procLatch isn't ever freed, so we just can
		 * potentially set the wrong process' (or no process') latch.
		 */
		SetLatch(&ProcGlobal->allProcs[bgwprocno].procLatch);
	}

	/*
	 * We count buffer allocation requests so that the bgwriter can estimate
	 * the rate of buffer consumption.  Note that buffers recycled by a
	 * strategy object are intentionally not counted here.
	 */
	pg_atomic_fetch_add_u32(&StrategyControl->numBufferAllocs, 1);

	/*
	 * First check, without acquiring the lock, whether there's buffers in the
	 * freelist. Since we otherwise don't require the spinlock in every
	 * StrategyGetBuffer() invocation, it'd be sad to acquire it here -
	 * uselessly in most cases. That obviously leaves a race where a buffer is
	 * put on the freelist but we don't see the store yet - but that's pretty
	 * harmless, it'll just get used during the next buffer acquisition.
	 *
	 * If there's buffers on the freelist, acquire the spinlock to pop one
	 * buffer of the freelist. Then check whether that buffer is usable and
	 * repeat if not.
	 *
	 * Note that the freeNext fields are considered to be protected by the
	 * buffer_strategy_lock not the individual buffer spinlocks, so it's OK to
	 * manipulate them without holding the spinlock.
	 */
	if (StrategyControl->firstFreeBuffer >= 0)
	{
		while (true)
		{
			/* Acquire the spinlock to remove element from the freelist */
			SpinLockAcquire(&StrategyControl->buffer_strategy_lock);

			if (StrategyControl->firstFreeBuffer < 0)
			{
				SpinLockRelease(&StrategyControl->buffer_strategy_lock);
				break;
			}

			buf = GetBufferDescriptor(StrategyControl->firstFreeBuffer);
			Assert(buf->freeNext != FREENEXT_NOT_IN_LIST);

			/* Unconditionally remove buffer from freelist */
			StrategyControl->firstFreeBuffer = buf->freeNext;
			buf->freeNext = FREENEXT_NOT_IN_LIST;

			/*
			 * Release the lock so someone else can access the freelist while
			 * we check out this buffer.
			 */
			SpinLockRelease(&StrategyControl->buffer_strategy_lock);

			/*
			 * If the buffer is pinned or has a nonzero usage_count, we cannot
			 * use it; discard it and retry.  (This can only happen if VACUUM
			 * put a valid buffer in the freelist and then someone else used
			 * it before we got to it.  It's probably impossible altogether as
			 * of 8.3, but we'd better check anyway.)
			 */
			LockBufHdr(buf);
			if (buf->refcount == 0 && buf->usage_count == 0)
			{
				if (strategy != NULL)
					AddBufferToRing(strategy, buf);
				return buf;
			}
			UnlockBufHdr(buf);

		}
	}

	/* Nothing on the freelist, so run the "clock sweep" algorithm */
	trycounter = NBuffers;
	for (;;)
	{

		buf = GetBufferDescriptor(ClockSweepTick());

		/*
		 * If the buffer is pinned or has a nonzero usage_count, we cannot use
		 * it; decrement the usage_count (unless pinned) and keep scanning.
		 */
		LockBufHdr(buf);
		if (buf->refcount == 0)
		{
			if (buf->usage_count > 0)
			{
				buf->usage_count--;
				trycounter = NBuffers;
			}
			else
			{
				/* Found a usable buffer */
				if (strategy != NULL)
					AddBufferToRing(strategy, buf);
				return buf;
			}
		}
		else if (--trycounter == 0)
		{
			/*
			 * We've scanned all the buffers without making any state changes,
			 * so all the buffers are pinned (or were when we looked at them).
			 * We could hope that someone will free one eventually, but it's
			 * probably better to fail than to risk getting stuck in an
			 * infinite loop.
			 */
			UnlockBufHdr(buf);
			elog(ERROR, "no unpinned buffers available");
		}
		UnlockBufHdr(buf);
	}
}
Beispiel #5
0
/*
 * StrategyGetBuffer
 *
 *	Called by the bufmgr to get the next candidate buffer to use in
 *	BufferAlloc(). The only hard requirement BufferAlloc() has is that
 *	the selected buffer must not currently be pinned by anyone.
 *
 *	strategy is a BufferAccessStrategy object, or NULL for default strategy.
 *
 *	To ensure that no one else can pin the buffer before we do, we must
 *	return the buffer with the buffer header spinlock still held.  If
 *	*lock_held is set on exit, we have returned with the BufFreelistLock
 *	still held, as well; the caller must release that lock once the spinlock
 *	is dropped.	 We do it that way because releasing the BufFreelistLock
 *	might awaken other processes, and it would be bad to do the associated
 *	kernel calls while holding the buffer header spinlock.
 */
volatile BufferDesc *
StrategyGetBuffer(BufferAccessStrategy strategy, bool *lock_held)
{
    volatile BufferDesc *buf;
    volatile int bufIndex = -1;
    volatile int resultIndex = -1;
    int			trycounter;

    /* Lock the freelist */
    *lock_held = true;
    LWLockAcquire(BufFreelistLock, LW_EXCLUSIVE);

    /*
     * We count buffer allocation requests so that the bgwriter can estimate
     * the rate of buffer consumption.
     */
    StrategyControl->numBufferAllocs++;

    /*
     * Try to get a buffer from the freelist.  Note that the freeNext fields
     * are considered to be protected by the BufFreelistLock not the
     * individual buffer spinlocks, so it's OK to manipulate them without
     * holding the spinlock.
     */
    while (StrategyControl->firstFreeBuffer >= 0)
    {
        bufIndex = StrategyControl->firstFreeBuffer;
        buf = &BufferDescriptors[bufIndex];
        Assert(buf->freeNext != FREENEXT_NOT_IN_LIST);

        /* Unconditionally remove buffer from freelist */
        StrategyControl->firstFreeBuffer = buf->freeNext;
        buf->freeNext = FREENEXT_NOT_IN_LIST;

        /*
         * If the buffer is pinned or has a nonzero usage_count, we cannot use
         * it; discard it and retry.  (This can only happen if VACUUM put a
         * valid buffer in the freelist and then someone else used it before
         * we got to it.  It's probably impossible altogether as of 8.3, but
         * we'd better check anyway.)
         */
        LockBufHdr(buf);
        if (buf->refcount == 0 && buf->usage_count == 0)
        {
            resultIndex = bufIndex;
            break;
        }
        UnlockBufHdr(buf);
    }

    /*
     * Nothing on the freelist, so use the buffer replacement policy
     * to select a buffer to evict.
     */
    if (resultIndex == -1)
    {
        if (BufferReplacementPolicy == POLICY_CLOCK)
        {
            /* Run the "clock sweep" algorithm */
            trycounter = NBuffers;
            for (;;)
            {
                bufIndex = StrategyControl->nextVictimBuffer;
                buf = &BufferDescriptors[bufIndex];

                /*
                 * If the clock sweep hand has reached the end of the
                 * buffer pool, start back at the beginning.
                 */
                if (++StrategyControl->nextVictimBuffer >= NBuffers)
                {
                    StrategyControl->nextVictimBuffer = 0;
                    StrategyControl->completePasses++;
                }

                /*
                 * If the buffer is pinned or has a nonzero usage_count, we cannot use
                 * it; decrement the usage_count (unless pinned) and keep scanning.
                 */
                LockBufHdr(buf);
                if (buf->refcount == 0)
                {
                    if (buf->usage_count > 0)
                    {
                        buf->usage_count--;
                        trycounter = NBuffers;
                    }
                    else
                    {
                        /* Found a usable buffer */
                        resultIndex = bufIndex;
                        break;
                    }
                }
                else if (--trycounter == 0)
                {
                    /*
                     * We've scanned all the buffers without making any state changes,
                     * so all the buffers are pinned (or were when we looked at them).
                     * We could hope that someone will free one eventually, but it's
                     * probably better to fail than to risk getting stuck in an
                     * infinite loop.
                     */
                    UnlockBufHdr(buf);
                    elog(ERROR, "no unpinned buffers available");
                }
                UnlockBufHdr(buf);
            }
        }
        /*
         * CS186 TODO: Add code here to implement the LRU, MRU and 2Q buffer
         * replacement policies. Once you've selected a buffer to
         * evict, assign its index in the BufferDescriptors array to
         * "resultIndex". You can model your code on the CLOCK code
         * above.
         */
        else if (BufferReplacementPolicy == POLICY_LRU)
        {
            resultIndex = StrategyHelper(&buf, &(StrategyControl->head), &(StrategyControl->tail));
            if(resultIndex == -1)
                elog(ERROR, "no unpinned buffers available");
        }
        else if (BufferReplacementPolicy == POLICY_MRU)
        {
            buf = StrategyControl->head;
            if(buf) {
                // Deleting the tail ie find refcount == 0 on first try
                if(buf->refcount == 0) {
                    resultIndex = buf->buf_id;
                    if (StrategyControl->head == StrategyControl->tail) {
                        StrategyControl->head = NULL;
                        StrategyControl->tail = NULL;
                    }
                    else {
                        buf->prev->next = buf->next;
                        buf->next->prev = buf->prev;
                        StrategyControl->head = buf->next;
                    }
                    buf->prev = NULL;
                    buf->next = NULL;
                    buf->queueTag = 0;

                }
                else {
                    buf = StrategyControl->head->next;

                    while (buf != StrategyControl->head) {
                        if(buf->refcount == 0) {
                            resultIndex = buf->buf_id;

                            buf->prev->next = buf->next;
                            buf->next->prev = buf->prev;
                            if (buf == StrategyControl->tail) {
                                StrategyControl->tail = buf->prev;
                            }
                            buf->prev = NULL;
                            buf->next = NULL;
                            buf->queueTag = 0;

                            break;
                        }
                        buf = buf->next;
                    }

                }
            }
            if(resultIndex == -1)
                elog(ERROR, "no unpinned buffers available");
        }
        else if (BufferReplacementPolicy == POLICY_2Q)
        {
            int threshhold;
            threshhold = NBuffers/2 ;
            if (size_of_list(&(StrategyControl->head2Q)) >= threshhold || !(StrategyControl->head)) {
                resultIndex = StrategyHelper(&buf, &(StrategyControl->head2Q), &(StrategyControl->tail2Q));
            }
            else {
                resultIndex = StrategyHelper(&buf, &(StrategyControl->head), &(StrategyControl->tail));
            }
            if (resultIndex == -1) {
                elog(ERROR, "no unpinned buffers available");
            }

        }
        else
        {
            elog(ERROR, "invalid buffer pool replacement policy %d", BufferReplacementPolicy);
        }

        /*
         * CS186 Grading LOG - DON'T TOUCH
         * Don't output logs starting with "GRADING" by yourself; they are for grading purposes only.
         */
        elog(LOG, "GRADING: EVICT %2d", resultIndex);
    }

    if (resultIndex == -1)
        elog(ERROR, "reached end of StrategyGetBuffer() without selecting a buffer");

    return &BufferDescriptors[resultIndex];
}
Beispiel #6
0
/*
 * StrategyGetBuffer
 *
 *	Called by the bufmgr to get the next candidate buffer to use in
 *	BufferAlloc(). The only hard requirement BufferAlloc() has is that
 *	the selected buffer must not currently be pinned by anyone.
 *
 *	To ensure that no one else can pin the buffer before we do, we must
 *	return the buffer with the buffer header spinlock still held.  That
 *	means that we return with the BufFreelistLock still held, as well;
 *	the caller must release that lock once the spinlock is dropped.
 */
volatile BufferDesc *
StrategyGetBuffer(void)
{
    volatile BufferDesc *buf;
    int			trycounter;

    LWLockAcquire(BufFreelistLock, LW_EXCLUSIVE);

    /*
     * Try to get a buffer from the freelist.  Note that the freeNext fields
     * are considered to be protected by the BufFreelistLock not the
     * individual buffer spinlocks, so it's OK to manipulate them without
     * holding the spinlock.
     */
    while (StrategyControl->firstFreeBuffer >= 0)
    {
        buf = &BufferDescriptors[StrategyControl->firstFreeBuffer];
        Assert(buf->freeNext != FREENEXT_NOT_IN_LIST);

        /* Unconditionally remove buffer from freelist */
        StrategyControl->firstFreeBuffer = buf->freeNext;
        buf->freeNext = FREENEXT_NOT_IN_LIST;

        /*
         * If the buffer is pinned or has a nonzero usage_count, we cannot use
         * it; discard it and retry.  (This can only happen if VACUUM put a
         * valid buffer in the freelist and then someone else used it before
         * we got to it.)
         */
        LockBufHdr(buf);
        if (buf->refcount == 0 && buf->usage_count == 0)
            return buf;
        UnlockBufHdr(buf);
    }

    /* Nothing on the freelist, so run the "clock sweep" algorithm */
    trycounter = NBuffers;
    for (;;)
    {
        buf = &BufferDescriptors[StrategyControl->nextVictimBuffer];

        if (++StrategyControl->nextVictimBuffer >= NBuffers)
            StrategyControl->nextVictimBuffer = 0;

        /*
         * If the buffer is pinned or has a nonzero usage_count, we cannot use
         * it; decrement the usage_count and keep scanning.
         */
        LockBufHdr(buf);
        if (buf->refcount == 0 && buf->usage_count == 0)
            return buf;
        if (buf->usage_count > 0)
        {
            buf->usage_count--;
            trycounter = NBuffers;
        }
        else if (--trycounter == 0)
        {
            /*
             * We've scanned all the buffers without making any state changes,
             * so all the buffers are pinned (or were when we looked at them).
             * We could hope that someone will free one eventually, but it's
             * probably better to fail than to risk getting stuck in an
             * infinite loop.
             */
            UnlockBufHdr(buf);
            elog(ERROR, "no unpinned buffers available");
        }
        UnlockBufHdr(buf);
    }

    /* not reached */
    return NULL;
}
Datum
pg_buffercache_pages(PG_FUNCTION_ARGS)
{
	FuncCallContext *funcctx;
	Datum		result;
	MemoryContext oldcontext;
	BufferCachePagesContext *fctx;		/* User function context. */
	TupleDesc	tupledesc;
	HeapTuple	tuple;

	if (SRF_IS_FIRSTCALL())
	{
		int			i;
		volatile BufferDesc *bufHdr;

		funcctx = SRF_FIRSTCALL_INIT();

		/* Switch context when allocating stuff to be used in later calls */
		oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);

		/* Create a user function context for cross-call persistence */
		fctx = (BufferCachePagesContext *) palloc(sizeof(BufferCachePagesContext));

		/* Construct a tuple descriptor for the result rows. */
		tupledesc = CreateTemplateTupleDesc(NUM_BUFFERCACHE_PAGES_ELEM, false);
		TupleDescInitEntry(tupledesc, (AttrNumber) 1, "bufferid",
						   INT4OID, -1, 0);
		TupleDescInitEntry(tupledesc, (AttrNumber) 2, "relfilenode",
						   OIDOID, -1, 0);
		TupleDescInitEntry(tupledesc, (AttrNumber) 3, "reltablespace",
						   OIDOID, -1, 0);
		TupleDescInitEntry(tupledesc, (AttrNumber) 4, "reldatabase",
						   OIDOID, -1, 0);
		TupleDescInitEntry(tupledesc, (AttrNumber) 5, "relforknumber",
						   INT2OID, -1, 0);
		TupleDescInitEntry(tupledesc, (AttrNumber) 6, "relblocknumber",
						   INT8OID, -1, 0);
		TupleDescInitEntry(tupledesc, (AttrNumber) 7, "isdirty",
						   BOOLOID, -1, 0);
		TupleDescInitEntry(tupledesc, (AttrNumber) 8, "usage_count",
						   INT2OID, -1, 0);

		fctx->tupdesc = BlessTupleDesc(tupledesc);

		/* Allocate NBuffers worth of BufferCachePagesRec records. */
		fctx->record = (BufferCachePagesRec *) palloc(sizeof(BufferCachePagesRec) * NBuffers);

		/* Set max calls and remember the user function context. */
		funcctx->max_calls = NBuffers;
		funcctx->user_fctx = fctx;

		/* Return to original context when allocating transient memory */
		MemoryContextSwitchTo(oldcontext);

		/*
		 * To get a consistent picture of the buffer state, we must lock all
		 * partitions of the buffer map.  Needless to say, this is horrible
		 * for concurrency.  Must grab locks in increasing order to avoid
		 * possible deadlocks.
		 */
		for (i = 0; i < NUM_BUFFER_PARTITIONS; i++)
			LWLockAcquire(FirstBufMappingLock + i, LW_SHARED);

		/*
		 * Scan though all the buffers, saving the relevant fields in the
		 * fctx->record structure.
		 */
		for (i = 0, bufHdr = BufferDescriptors; i < NBuffers; i++, bufHdr++)
		{
			/* Lock each buffer header before inspecting. */
			LockBufHdr(bufHdr);

			fctx->record[i].bufferid = BufferDescriptorGetBuffer(bufHdr);
			fctx->record[i].relfilenode = bufHdr->tag.rnode.relNode;
			fctx->record[i].reltablespace = bufHdr->tag.rnode.spcNode;
			fctx->record[i].reldatabase = bufHdr->tag.rnode.dbNode;
			fctx->record[i].forknum = bufHdr->tag.forkNum;
			fctx->record[i].blocknum = bufHdr->tag.blockNum;
			fctx->record[i].usagecount = bufHdr->usage_count;

			if (bufHdr->flags & BM_DIRTY)
				fctx->record[i].isdirty = true;
			else
				fctx->record[i].isdirty = false;

			/* Note if the buffer is valid, and has storage created */
			if ((bufHdr->flags & BM_VALID) && (bufHdr->flags & BM_TAG_VALID))
				fctx->record[i].isvalid = true;
			else
				fctx->record[i].isvalid = false;

			UnlockBufHdr(bufHdr);
		}

		/*
		 * And release locks.  We do this in reverse order for two reasons:
		 * (1) Anyone else who needs more than one of the locks will be trying
		 * to lock them in increasing order; we don't want to release the
		 * other process until it can get all the locks it needs. (2) This
		 * avoids O(N^2) behavior inside LWLockRelease.
		 */
		for (i = NUM_BUFFER_PARTITIONS; --i >= 0;)
			LWLockRelease(FirstBufMappingLock + i);
	}

	funcctx = SRF_PERCALL_SETUP();

	/* Get the saved state */
	fctx = funcctx->user_fctx;

	if (funcctx->call_cntr < funcctx->max_calls)
	{
		uint32		i = funcctx->call_cntr;
		Datum		values[NUM_BUFFERCACHE_PAGES_ELEM];
		bool		nulls[NUM_BUFFERCACHE_PAGES_ELEM];

		values[0] = Int32GetDatum(fctx->record[i].bufferid);
		nulls[0] = false;

		/*
		 * Set all fields except the bufferid to null if the buffer is unused
		 * or not valid.
		 */
		if (fctx->record[i].blocknum == InvalidBlockNumber ||
			fctx->record[i].isvalid == false)
		{
			nulls[1] = true;
			nulls[2] = true;
			nulls[3] = true;
			nulls[4] = true;
			nulls[5] = true;
			nulls[6] = true;
			nulls[7] = true;
		}
		else
		{
			values[1] = ObjectIdGetDatum(fctx->record[i].relfilenode);
			nulls[1] = false;
			values[2] = ObjectIdGetDatum(fctx->record[i].reltablespace);
			nulls[2] = false;
			values[3] = ObjectIdGetDatum(fctx->record[i].reldatabase);
			nulls[3] = false;
			values[4] = ObjectIdGetDatum(fctx->record[i].forknum);
			nulls[4] = false;
			values[5] = Int64GetDatum((int64) fctx->record[i].blocknum);
			nulls[5] = false;
			values[6] = BoolGetDatum(fctx->record[i].isdirty);
			nulls[6] = false;
			values[7] = Int16GetDatum(fctx->record[i].usagecount);
			nulls[7] = false;
		}

		/* Build and return the tuple. */
		tuple = heap_form_tuple(fctx->tupdesc, values, nulls);
		result = HeapTupleGetDatum(tuple);

		SRF_RETURN_NEXT(funcctx, result);
	}
	else
		SRF_RETURN_DONE(funcctx);
}
Beispiel #8
0
Datum
pg_buffercache_pages(PG_FUNCTION_ARGS)
{
	FuncCallContext *funcctx;
	Datum		result;
	MemoryContext oldcontext;
	BufferCachePagesContext *fctx;		/* User function context. */
	TupleDesc	tupledesc;
	TupleDesc	expected_tupledesc;
	HeapTuple	tuple;

	if (SRF_IS_FIRSTCALL())
	{
		int			i;

		funcctx = SRF_FIRSTCALL_INIT();

		/* Switch context when allocating stuff to be used in later calls */
		oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);

		/* Create a user function context for cross-call persistence */
		fctx = (BufferCachePagesContext *) palloc(sizeof(BufferCachePagesContext));

		/*
		 * To smoothly support upgrades from version 1.0 of this extension
		 * transparently handle the (non-)existence of the pinning_backends
		 * column. We unfortunately have to get the result type for that... -
		 * we can't use the result type determined by the function definition
		 * without potentially crashing when somebody uses the old (or even
		 * wrong) function definition though.
		 */
		if (get_call_result_type(fcinfo, NULL, &expected_tupledesc) != TYPEFUNC_COMPOSITE)
			elog(ERROR, "return type must be a row type");

		if (expected_tupledesc->natts < NUM_BUFFERCACHE_PAGES_MIN_ELEM ||
			expected_tupledesc->natts > NUM_BUFFERCACHE_PAGES_ELEM)
			elog(ERROR, "incorrect number of output arguments");

		/* Construct a tuple descriptor for the result rows. */
		tupledesc = CreateTemplateTupleDesc(expected_tupledesc->natts, false);
		TupleDescInitEntry(tupledesc, (AttrNumber) 1, "bufferid",
						   INT4OID, -1, 0);
		TupleDescInitEntry(tupledesc, (AttrNumber) 2, "relfilenode",
						   OIDOID, -1, 0);
		TupleDescInitEntry(tupledesc, (AttrNumber) 3, "reltablespace",
						   OIDOID, -1, 0);
		TupleDescInitEntry(tupledesc, (AttrNumber) 4, "reldatabase",
						   OIDOID, -1, 0);
		TupleDescInitEntry(tupledesc, (AttrNumber) 5, "relforknumber",
						   INT2OID, -1, 0);
		TupleDescInitEntry(tupledesc, (AttrNumber) 6, "relblocknumber",
						   INT8OID, -1, 0);
		TupleDescInitEntry(tupledesc, (AttrNumber) 7, "isdirty",
						   BOOLOID, -1, 0);
		TupleDescInitEntry(tupledesc, (AttrNumber) 8, "usage_count",
						   INT2OID, -1, 0);

		if (expected_tupledesc->natts == NUM_BUFFERCACHE_PAGES_ELEM)
			TupleDescInitEntry(tupledesc, (AttrNumber) 9, "pinning_backends",
							   INT4OID, -1, 0);

		fctx->tupdesc = BlessTupleDesc(tupledesc);

		/* Allocate NBuffers worth of BufferCachePagesRec records. */
		fctx->record = (BufferCachePagesRec *)
			MemoryContextAllocHuge(CurrentMemoryContext,
								   sizeof(BufferCachePagesRec) * NBuffers);

		/* Set max calls and remember the user function context. */
		funcctx->max_calls = NBuffers;
		funcctx->user_fctx = fctx;

		/* Return to original context when allocating transient memory */
		MemoryContextSwitchTo(oldcontext);

		/*
		 * Scan through all the buffers, saving the relevant fields in the
		 * fctx->record structure.
		 *
		 * We don't hold the partition locks, so we don't get a consistent
		 * snapshot across all buffers, but we do grab the buffer header
		 * locks, so the information of each buffer is self-consistent.
		 */
		for (i = 0; i < NBuffers; i++)
		{
			BufferDesc *bufHdr;
			uint32		buf_state;

			bufHdr = GetBufferDescriptor(i);
			/* Lock each buffer header before inspecting. */
			buf_state = LockBufHdr(bufHdr);

			fctx->record[i].bufferid = BufferDescriptorGetBuffer(bufHdr);
			fctx->record[i].relfilenode = bufHdr->tag.rnode.relNode;
			fctx->record[i].reltablespace = bufHdr->tag.rnode.spcNode;
			fctx->record[i].reldatabase = bufHdr->tag.rnode.dbNode;
			fctx->record[i].forknum = bufHdr->tag.forkNum;
			fctx->record[i].blocknum = bufHdr->tag.blockNum;
			fctx->record[i].usagecount = BUF_STATE_GET_USAGECOUNT(buf_state);
			fctx->record[i].pinning_backends = BUF_STATE_GET_REFCOUNT(buf_state);

			if (buf_state & BM_DIRTY)
				fctx->record[i].isdirty = true;
			else
				fctx->record[i].isdirty = false;

			/* Note if the buffer is valid, and has storage created */
			if ((buf_state & BM_VALID) && (buf_state & BM_TAG_VALID))
				fctx->record[i].isvalid = true;
			else
				fctx->record[i].isvalid = false;

			UnlockBufHdr(bufHdr, buf_state);
		}
	}

	funcctx = SRF_PERCALL_SETUP();

	/* Get the saved state */
	fctx = funcctx->user_fctx;

	if (funcctx->call_cntr < funcctx->max_calls)
	{
		uint32		i = funcctx->call_cntr;
		Datum		values[NUM_BUFFERCACHE_PAGES_ELEM];
		bool		nulls[NUM_BUFFERCACHE_PAGES_ELEM];

		values[0] = Int32GetDatum(fctx->record[i].bufferid);
		nulls[0] = false;

		/*
		 * Set all fields except the bufferid to null if the buffer is unused
		 * or not valid.
		 */
		if (fctx->record[i].blocknum == InvalidBlockNumber ||
			fctx->record[i].isvalid == false)
		{
			nulls[1] = true;
			nulls[2] = true;
			nulls[3] = true;
			nulls[4] = true;
			nulls[5] = true;
			nulls[6] = true;
			nulls[7] = true;
			/* unused for v1.0 callers, but the array is always long enough */
			nulls[8] = true;
		}
		else
		{
			values[1] = ObjectIdGetDatum(fctx->record[i].relfilenode);
			nulls[1] = false;
			values[2] = ObjectIdGetDatum(fctx->record[i].reltablespace);
			nulls[2] = false;
			values[3] = ObjectIdGetDatum(fctx->record[i].reldatabase);
			nulls[3] = false;
			values[4] = ObjectIdGetDatum(fctx->record[i].forknum);
			nulls[4] = false;
			values[5] = Int64GetDatum((int64) fctx->record[i].blocknum);
			nulls[5] = false;
			values[6] = BoolGetDatum(fctx->record[i].isdirty);
			nulls[6] = false;
			values[7] = Int16GetDatum(fctx->record[i].usagecount);
			nulls[7] = false;
			/* unused for v1.0 callers, but the array is always long enough */
			values[8] = Int32GetDatum(fctx->record[i].pinning_backends);
			nulls[8] = false;
		}

		/* Build and return the tuple. */
		tuple = heap_form_tuple(fctx->tupdesc, values, nulls);
		result = HeapTupleGetDatum(tuple);

		SRF_RETURN_NEXT(funcctx, result);
	}
	else
		SRF_RETURN_DONE(funcctx);
}
static void
SaveBuffers(void)
{
	int						i;
	int						num_buffers;
	int						log_level		= DEBUG3;
	SavedBuffer			   *saved_buffers;
	volatile BufferDesc	   *bufHdr;			// XXX: Do we really need volatile here?
	FILE				   *file			= NULL;
	int						database_counter= 0;
	Oid						prev_database	= InvalidOid;
	Oid						prev_filenode	= InvalidOid;
	ForkNumber				prev_forknum	= InvalidForkNumber;
	BlockNumber				prev_blocknum	= InvalidBlockNumber;
	BlockNumber				range_counter	= 0;
	const char			   *savefile_path;

	/*
	 * XXX: If the memory request fails, ask for a smaller memory chunk, and use
	 * it to create chunks of save-files, and make the workers read those chunks.
	 *
	 * This is not a concern as of now, so deferred; there's at least one other
	 * place that allocates (NBuffers * (much_bigger_struct)), so this seems to
	 * be an acceptable practice.
	 */

	saved_buffers = (SavedBuffer *) palloc(sizeof(SavedBuffer) * NBuffers);

	/* Lock the buffer partitions for reading. */
	for (i = 0; i < NUM_BUFFER_PARTITIONS; ++i)
		LWLockAcquire(FirstBufMappingLock + i, LW_SHARED);

	/* Scan and save a list of valid buffers. */
	for (num_buffers = 0, i = 0, bufHdr = BufferDescriptors; i < NBuffers; ++i, ++bufHdr)
	{
		/* Lock each buffer header before inspecting. */
		LockBufHdr(bufHdr);

		/* Skip invalid buffers */
		if ((bufHdr->flags & BM_VALID) && (bufHdr->flags & BM_TAG_VALID))
		{
			saved_buffers[num_buffers].database	= bufHdr->tag.rnode.dbNode;
			saved_buffers[num_buffers].filenode	= bufHdr->tag.rnode.relNode;
			saved_buffers[num_buffers].forknum	= bufHdr->tag.forkNum;
			saved_buffers[num_buffers].blocknum	= bufHdr->tag.blockNum;

			++num_buffers;
		}

		UnlockBufHdr(bufHdr);
	}

	/* Unlock the buffer partitions in reverse order, to avoid a deadlock. */
	for (i = NUM_BUFFER_PARTITIONS - 1; i >= 0; --i)
		LWLockRelease(FirstBufMappingLock + i);

	/*
	 * Sort the list, so that we can optimize the storage of these buffers.
	 *
	 * The side-effect of this storage optimization is that when reading the
	 * blocks back from relation forks, it leads to sequential reads, which
	 * improve the restore speeds quite considerably as compared to random reads
	 * from different blocks all over the data directory.
	 */
	pg_qsort(saved_buffers, num_buffers, sizeof(SavedBuffer), SavedBufferCmp);

	/* Connect to the database and start a transaction for database name lookups. */
	BackgroundWorkerInitializeConnection(guc_default_database, NULL);
	SetCurrentStatementStartTimestamp();
	StartTransactionCommand();
	PushActiveSnapshot(GetTransactionSnapshot());
	pgstat_report_activity(STATE_RUNNING, "saving buffers");

	for (i = 0; i < num_buffers; ++i)
	{
		int j;
		SavedBuffer *buf = &saved_buffers[i];

		if (i == 0)
		{
			/*
			 * Special case for global objects. The sort brings them to the
			 * front of the list.
			 */

			/* Make sure the first buffer we save belongs to global object. */
			Assert(buf->database == InvalidOid);

			/*
			 * Database number (and save-file name) 1 is reserverd for storing
			 * list of buffers of global objects.
			 */
			database_counter = 1;

			savefile_path = getSavefileName(database_counter);
			file = fileOpen(savefile_path, PG_BINARY_W);
			writeDBName("", file, savefile_path);

			prev_database = buf->database;
		}

		if (buf->database != prev_database)
		{
			char *dbname;

			/*
			 * We are beginning to process a different database than the
			 * previous one; close the save-file of previous database, and open
			 * a new one.
			 */
			++database_counter;

			dbname = get_database_name(buf->database);

			Assert(dbname != NULL);

			if (file != NULL)
				fileClose(file, savefile_path);

			savefile_path = getSavefileName(database_counter);
			file = fileOpen(savefile_path, PG_BINARY_W);
			writeDBName(dbname, file, savefile_path);

			pfree(dbname);

			/* Reset trackers appropriately */
			prev_database	= buf->database;
			prev_filenode	= InvalidOid;
			prev_forknum	= InvalidForkNumber;
			prev_blocknum	= InvalidBlockNumber;
			range_counter	= 0;
		}

		if (buf->filenode != prev_filenode)
		{
			/* We're beginning to process a new relation; emit a record for it. */
			fileWrite("r", 1, file, savefile_path);
			fileWrite(&(buf->filenode), sizeof(Oid), file, savefile_path);

			/* Reset trackers appropriately */
			prev_filenode	= buf->filenode;
			prev_forknum	= InvalidForkNumber;
			prev_blocknum	= InvalidBlockNumber;
			range_counter	= 0;
		}

		if (buf->forknum != prev_forknum)
		{
			/*
			 * We're beginning to process a new fork of this relation; add a
			 * record for it.
			 */
			fileWrite("f", 1, file, savefile_path);
			fileWrite(&(buf->forknum), sizeof(ForkNumber), file, savefile_path);

			/* Reset trackers appropriately */
			prev_forknum	= buf->forknum;
			prev_blocknum	= InvalidBlockNumber;
			range_counter	= 0;
		}

		ereport(log_level,
				(errmsg("writer: writing block db %d filenode %d forknum %d blocknum %d",
						database_counter, prev_filenode, prev_forknum, buf->blocknum)));

		fileWrite("b", 1, file, savefile_path);
		fileWrite(&(buf->blocknum), sizeof(BlockNumber), file, savefile_path);

		prev_blocknum = buf->blocknum;

		/*
		 * If a continuous range of blocks follows this block, then emit one
		 * entry for the range, instead of one for each block.
		 */
		range_counter = 0;

		for ( j = i+1; j < num_buffers; ++j)
		{
			SavedBuffer *tmp = &saved_buffers[j];

			if (tmp->database		== prev_database
				&& tmp->filenode	== prev_filenode
				&& tmp->forknum		== prev_forknum
				&& tmp->blocknum	== (prev_blocknum + range_counter + 1))
			{
				++range_counter;
			}
		}

		if (range_counter != 0)
		{
			ereport(log_level,
				(errmsg("writer: writing range db %d filenode %d forknum %d blocknum %d range %d",
						database_counter, prev_filenode, prev_forknum, prev_blocknum, range_counter)));

			fileWrite("N", 1, file, savefile_path);
			fileWrite(&range_counter, sizeof(range_counter), file, savefile_path);

			i += range_counter;
		}
	}

	ereport(LOG,
			(errmsg("Buffer Saver: saved metadata of %d blocks", num_buffers)));

	Assert(file != NULL);
	fileClose(file, savefile_path);

	pfree(saved_buffers);

	PopActiveSnapshot();
	CommitTransactionCommand();
	pgstat_report_activity(STATE_IDLE, NULL);
}
/*
 * StrategyGetBuffer
 *
 *	Called by the bufmgr to get the next candidate buffer to use in
 *	BufferAlloc(). The only hard requirement BufferAlloc() has is that
 *	the selected buffer must not currently be pinned by anyone.
 *
 *	strategy is a BufferAccessStrategy object, or NULL for default strategy.
 *
 *	To ensure that no one else can pin the buffer before we do, we must
 *	return the buffer with the buffer header spinlock still held.  If
 *	*lock_held is set on exit, we have returned with the BufFreelistLock
 *	still held, as well; the caller must release that lock once the spinlock
 *	is dropped.  We do it that way because releasing the BufFreelistLock
 *	might awaken other processes, and it would be bad to do the associated
 *	kernel calls while holding the buffer header spinlock.
 */
volatile BufferDesc *
StrategyGetBuffer(BufferAccessStrategy strategy, bool *lock_held)
{
	volatile BufferDesc *buf;
	Latch	   *bgwriterLatch;
	int			trycounter;
	elog(LOG, "in StrategyGetBuffer");

	// fprintf(stderr, "StrategyGetBuffer\n");

	/*
	 * If given a strategy object, see whether it can select a buffer. We
	 * assume strategy objects don't need the BufFreelistLock.
	 */

	/* Nope, so lock the freelist */
	*lock_held = true;
	LWLockAcquire(BufFreelistLock, LW_EXCLUSIVE);
	LWLockAcquire(BufDllLock, LW_EXCLUSIVE);

	/*
	 * We count buffer allocation requests so that the bgwriter can estimate
	 * the rate of buffer consumption.	Note that buffers recycled by a
	 * strategy object are intentionally not counted here.
	 */
	StrategyControl->numBufferAllocs++;

	/*
	 * If bgwriterLatch is set, we need to waken the bgwriter, but we should
	 * not do so while holding BufFreelistLock; so release and re-grab.  This
	 * is annoyingly tedious, but it happens at most once per bgwriter cycle,
	 * so the performance hit is minimal.
	 */
	bgwriterLatch = StrategyControl->bgwriterLatch;
	if (bgwriterLatch)
	{
		StrategyControl->bgwriterLatch = NULL;
		LWLockRelease(BufFreelistLock);
		SetLatch(bgwriterLatch);
		LWLockAcquire(BufFreelistLock, LW_EXCLUSIVE);
	}

	/*
	 * Try to get a buffer from the freelist.  Note that the freeNext fields
	 * are considered to be protected by the BufFreelistLock not the
	 * individual buffer spinlocks, so it's OK to manipulate them without
	 * holding the spinlock.
	 */
	while (StrategyControl->firstFreeBuffer >= 0)
	{
		buf = &BufferDescriptors[StrategyControl->firstFreeBuffer];
		Assert(buf->freeNext != FREENEXT_NOT_IN_LIST);

		/* Unconditionally remove buffer from freelist */
		StrategyControl->firstFreeBuffer = buf->freeNext;
		buf->freeNext = FREENEXT_NOT_IN_LIST;

		/*
		 * If the buffer is pinned or has a nonzero usage_count, we cannot use
		 * it; discard it and retry.  (This can only happen if VACUUM put a
		 * valid buffer in the freelist and then someone else used it before
		 * we got to it.  It's probably impossible altogether as of 8.3, but
		 * we'd better check anyway.)
		 */
		LockBufHdr(buf);
		if (buf->refcount == 0 && buf->usage_count == 0)
		{
			BufNodes[StrategyControl->firstFreeBuffer] = 
				dllInsertInt(BufDLL, StrategyControl->firstFreeBuffer, TAIL);
			LWLockRelease(BufDllLock);
			// elog(ERROR, "Successfully got buf from list");
			return buf;
		}
		UnlockBufHdr(buf);
	}

	/* Nothing on the freelist, so run the "clock sweep" algorithm */
	DllNode *d = BufDLL->head;
	for (; d; d = d->next)
	{
		buf = &BufferDescriptors[d->data];
		/*
		 * If the buffer is pinned or has a nonzero usage_count, we cannot use
		 * it; decrement the usage_count (unless pinned) and keep scanning.
		 */
		LockBufHdr(buf);
		if (buf->refcount == 0)
		{
			/* Found a usable buffer */
			dllMove(BufDLL, buf, TAIL);
			LWLockRelease(BufDllLock);
			elog(LOG, "Successfully got buf from list");
			return buf;
		}
		UnlockBufHdr(buf);
	}
	/*
	 * We've scanned all the buffers without making any state changes,
	 * so all the buffers are pinned (or were when we looked at them).
	 * We could hope that someone will free one eventually, but it's
	 * probably better to fail than to risk getting stuck in an
	 * infinite loop.
	 */
	UnlockBufHdr(buf);
	elog(ERROR, "no unpinned buffers available");

	/* not reached */
	LWLockRelease(BufDllLock);
	return NULL;
}
Beispiel #11
0
/*
 * StrategyGetBuffer
 *
 *	Called by the bufmgr to get the next candidate buffer to use in
 *	BufferAlloc(). The only hard requirement BufferAlloc() has is that
 *	the selected buffer must not currently be pinned by anyone.
 *
 *	strategy is a BufferAccessStrategy object, or NULL for default strategy.
 *
 *	To ensure that no one else can pin the buffer before we do, we must
 *	return the buffer with the buffer header spinlock still held.  If
 *	*lock_held is set on exit, we have returned with the BufFreelistLock
 *	still held, as well; the caller must release that lock once the spinlock
 *	is dropped.  We do it that way because releasing the BufFreelistLock
 *	might awaken other processes, and it would be bad to do the associated
 *	kernel calls while holding the buffer header spinlock.
 */
volatile BufferDesc *
StrategyGetBuffer(BufferAccessStrategy strategy, bool *lock_held)
{
    volatile BufferDesc *buf;
    Latch	   *bgwriterLatch;
    int			trycounter;

    volatile int bufIndex = -1;
    volatile int resultIndex = -1;

    volatile BufferDesc *next;
    volatile BufferDesc *previous;

    /*
     * If given a strategy object, see whether it can select a buffer. We
     * assume strategy objects don't need the BufFreelistLock.
     */
    if (strategy != NULL)
    {
        buf = GetBufferFromRing(strategy);
        if (buf != NULL)
        {
            *lock_held = false;
            return buf;
        }
    }

    /* Nope, so lock the freelist */
    *lock_held = true;
    LWLockAcquire(BufFreelistLock, LW_EXCLUSIVE);

    /*
     * We count buffer allocation requests so that the bgwriter can estimate
     * the rate of buffer consumption.  Note that buffers recycled by a
     * strategy object are intentionally not counted here.
     */
    StrategyControl->numBufferAllocs++;

    /*
     * If bgwriterLatch is set, we need to waken the bgwriter, but we should
     * not do so while holding BufFreelistLock; so release and re-grab.  This
     * is annoyingly tedious, but it happens at most once per bgwriter cycle,
     * so the performance hit is minimal.
     */
    bgwriterLatch = StrategyControl->bgwriterLatch;
    if (bgwriterLatch)
    {
        StrategyControl->bgwriterLatch = NULL;
        LWLockRelease(BufFreelistLock);
        SetLatch(bgwriterLatch);
        LWLockAcquire(BufFreelistLock, LW_EXCLUSIVE);
    }

    /*
     * Try to get a buffer from the freelist.  Note that the freeNext fields
     * are considered to be protected by the BufFreelistLock not the
     * individual buffer spinlocks, so it's OK to manipulate them without
     * holding the spinlock.
     */

    while (StrategyControl->firstFreeBuffer >= 0)
    {
        buf = &BufferDescriptors[StrategyControl->firstFreeBuffer];
        Assert(buf->freeNext != FREENEXT_NOT_IN_LIST);

        /* Unconditionally remove buffer from freelist */
        StrategyControl->firstFreeBuffer = buf->freeNext;
        buf->freeNext = FREENEXT_NOT_IN_LIST;

        /*
         * If the buffer is pinned or has a nonzero usage_count, we cannot use
         * it; discard it and retry.  (This can only happen if VACUUM put a
         * valid buffer in the freelist and then someone else used it before
         * we got to it.  It's probably impossible altogether as of 8.3, but
         * we'd better check anyway.)
         */
        LockBufHdr(buf);
        if (buf->refcount == 0 && buf->usage_count == 0)
        {
            if (strategy != NULL)
                AddBufferToRing(strategy, buf);
            return buf;
        }
        UnlockBufHdr(buf);
    }

    /* Nothing on the freelist, so run the algorithm defined in the
     * BufferReplacementPolicy variable*/
    if (resultIndex == -1)
    {
        if (BufferReplacementPolicy == POLICY_CLOCK)
        {
            //Running the Clock Sweep Algorithm (Default postgres Algo.)
            trycounter = NBuffers;
            for (;;)
            {
                buf = &BufferDescriptors[StrategyControl->nextVictimBuffer];
                /*
                * If the clock sweep hand has reached the end of the
                * buffer pool, start back at the beginning.
                */

                if (++StrategyControl->nextVictimBuffer >= NBuffers)
                {
                    StrategyControl->nextVictimBuffer = 0;
                    StrategyControl->completePasses++;
                }

                /*
                 * If the buffer is pinned or has a nonzero usage_count, we cannot use
                 * it; decrement the usage_count (unless pinned) and keep scanning.
                 */
                LockBufHdr(buf);
                if (buf->refcount == 0)
                {
                    if (buf->usage_count > 0)
                    {
                        buf->usage_count--;
                        trycounter = NBuffers;
                    }
                    else
                    {
                        /* Found a usable buffer */
                        if (strategy != NULL)
                            AddBufferToRing(strategy, buf);
                        return buf;
                    }
                }
                else if (--trycounter == 0)
                {
                    /*
                     * We've scanned all the buffers without making any state changes,
                     * so all the buffers are pinned (or were when we looked at them).
                     * We could hope that someone will free one eventually, but it's
                     * probably better to fail than to risk getting stuck in an
                     * infinite loop.
                     */
                    UnlockBufHdr(buf);
                    elog(ERROR, "no unpinned buffers available");
                }
                UnlockBufHdr(buf);
            }
        }
        /* Implementation of LRU, MRU and 2Q Algorithms.
         * Once we've selected a buffer to evict, its index in
         * the BufferDescriptors array is stored in "resultIndex" */
        else if (BufferReplacementPolicy == POLICY_LRU)
        {
            buf = StrategyControl->firstUnpinned;

            while (buf != NULL) {
                LockBufHdr(buf);
                if (buf->refcount == 0) {
                    resultIndex = buf->buf_id;
                    break;
                } else {
                    UnlockBufHdr(buf);
                    buf = buf->next;
                }

            }
            /*
             * We've scanned all the buffers without making any state changes,
             * so all the buffers are pinned (or were when we looked at them).
             * We could hope that someone will free one eventually, but it's
             * probably better to fail than to risk getting stuck in an
             * infinite loop.
             */
            if (buf == NULL) {
                UnlockBufHdr(buf); //p added 10/24
                elog(ERROR, "no unpinned buffers available");
            }

        }
        else if (BufferReplacementPolicy == POLICY_MRU)
        {
            buf = StrategyControl->lastUnpinned;
            while (buf != NULL) {
                LockBufHdr(buf);
                if (buf->refcount == 0) {
                    resultIndex = buf->buf_id;
                    break;
                } else {
                    UnlockBufHdr(buf);
                    buf = buf->previous;
                }
            }
            /*
             * We've scanned all the buffers without making any state changes,
             * so all the buffers are pinned (or were when we looked at them).
             * We could hope that someone will free one eventually, but it's
             * probably better to fail than to risk getting stuck in an
             * infinite loop.
             */
            if (buf == NULL) {
                UnlockBufHdr(buf); //p added 10/24
                elog(ERROR, "no unpinned buffers available");
            }

        }
        else if (BufferReplacementPolicy == POLICY_2Q)
        {
            int thres = NBuffers/2;
            int sizeA1 = 0;
            volatile BufferDesc *head = StrategyControl->a1Head;
            while (head != NULL) {
                head = head->next;
                sizeA1++;
            }
            if (sizeA1 >= thres || StrategyControl->lastUnpinned == NULL) {
                buf = StrategyControl->a1Head;
                while (buf != NULL) {

                    if (buf->refcount == 0) {
                        resultIndex = buf->buf_id;
                        next = buf->next;
                        previous = buf->previous;
                        //adjust neighbors
                        if (next != NULL) {
                            if (previous != NULL) { //next and prev != null, buf is already in middle of list
                                previous->next = next;
                                next->previous = previous;
                            } else { //next != null, prev == null, buf is at beginning of list
                                next->previous = NULL;
                                StrategyControl->a1Head = next;
                            }
                        } else if (previous == NULL) { //next == NULL, prev == null, buf is only item in list
                            StrategyControl->a1Head = NULL;
                            StrategyControl->a1Tail = NULL;
                        } else { //buf is last item in list, next == null, prev != null
                            StrategyControl->a1Tail = previous;
                            previous->next = NULL;
                        }
                        buf->next = NULL;
                        buf->previous = NULL;
                        break;

                    } else {

                        buf = buf->next;
                    }

                }
                if (buf == NULL) {

                    elog(ERROR, "no unpinned buffers available");
                }

            } else { // delete from the head of AM
                buf = StrategyControl->firstUnpinned;
                while (buf != NULL) {
                    //          LockBufHdr(buf);
                    if (buf->refcount == 0) {
                        resultIndex = buf->buf_id;
                        next = buf->next;
                        previous = buf->previous;
                        //adjust neighbors
                        if (next != NULL) {
                            if (previous != NULL) { //next and prev != null, buf is already in middle of list
                                previous->next = next;
                                next->previous = previous;
                            } else { //next != null, prev == null, buf is at beginning of list
                                next->previous = NULL;
                                StrategyControl->firstUnpinned = next;
                            }
                        } else if (previous == NULL) { //next == NULL, prev == null, buf is new to list
                            StrategyControl->firstUnpinned = NULL;
                            StrategyControl->lastUnpinned = NULL;
                        } else {
                            previous->next = NULL;
                            StrategyControl->lastUnpinned = previous;
                        }
                        buf->next = NULL;
                        buf->previous = NULL;
                        break;
                    } else {
                        //            UnlockBufHdr(buf);
                        buf = buf->next;
                    }
                    //	  UnlockBufHdr(buf); //phoebe 10/24

                }
                if (buf == NULL) {
                    //	  UnlockBufHdr(buf);
                    elog(ERROR, "no unpinned buffers available");
                }
            }
        }
        else
        {
            elog(ERROR, "invalid buffer pool replacement policy %d", BufferReplacementPolicy);
        }

    }

    if (resultIndex == -1)
    {
        elog(ERROR, "reached end of StrategyGetBuffer() without selecting a buffer");
    }


    return &BufferDescriptors[resultIndex];

}