/* * DropRelFileNodeAllLocalBuffers * This function removes from the buffer pool all pages of all forks * of the specified relation. * * See DropRelFileNodeAllBuffers in bufmgr.c for more notes. */ void DropRelFileNodeAllLocalBuffers(RelFileNode rnode) { int i; for (i = 0; i < NLocBuffer; i++) { BufferDesc *bufHdr = &LocalBufferDescriptors[i]; LocalBufferLookupEnt *hresult; if ((bufHdr->flags & BM_TAG_VALID) && RelFileNodeEquals(bufHdr->tag.rnode, rnode)) { if (LocalRefCount[i] != 0) elog(ERROR, "block %u of %s is still referenced (local %u)", bufHdr->tag.blockNum, relpathbackend(bufHdr->tag.rnode, MyBackendId, bufHdr->tag.forkNum), LocalRefCount[i]); /* Remove entry from hashtable */ hresult = (LocalBufferLookupEnt *) hash_search(LocalBufHash, (void *) &bufHdr->tag, HASH_REMOVE, NULL); if (!hresult) /* shouldn't happen */ elog(ERROR, "local buffer hash table corrupted"); /* Mark buffer invalid */ CLEAR_BUFFERTAG(bufHdr->tag); bufHdr->flags = 0; bufHdr->usage_count = 0; } } }
/* * Initialize shared buffer pool * * This is called once during shared-memory initialization (either in the * postmaster, or in a standalone backend). */ void InitBufferPool(void) { bool foundBufs, foundDescs; BufferDescriptors = (BufferDesc *) ShmemInitStruct("Buffer Descriptors", NBuffers * sizeof(BufferDesc), &foundDescs); BufferBlocks = (char *) ShmemInitStruct("Buffer Blocks", NBuffers * (Size) BLCKSZ, &foundBufs); if (foundDescs || foundBufs) { /* both should be present or neither */ Assert(foundDescs && foundBufs); /* note: this path is only taken in EXEC_BACKEND case */ } else { BufferDesc *buf; int i; buf = BufferDescriptors; /* * Initialize all the buffer headers. */ for (i = 0; i < NBuffers; buf++, i++) { CLEAR_BUFFERTAG(buf->tag); buf->flags = 0; buf->usage_count = 0; buf->refcount = 0; buf->wait_backend_pid = 0; SpinLockInit(&buf->buf_hdr_lock); buf->buf_id = i; /* * Initially link all the buffers together as unused. Subsequent * management of this list is done by freelist.c. */ buf->freeNext = i + 1; buf->io_in_progress_lock = LWLockAssign(); buf->content_lock = LWLockAssign(); } /* Correct last entry of linked list */ BufferDescriptors[NBuffers - 1].freeNext = FREENEXT_END_OF_LIST; } /* Init other shared buffer-management stuff */ StrategyInitialize(!foundDescs); }
/* * StrategyInvalidateBuffer * * Called by the buffer manager to inform us that a buffer content * is no longer valid. We simply throw away any eventual existing * buffer hash entry and move the CDB and buffer to the free lists. */ void StrategyInvalidateBuffer(BufferDesc *buf) { int cdb_id; BufferStrategyCDB *cdb; /* The buffer cannot be dirty or pinned */ Assert(!(buf->flags & BM_DIRTY) || !(buf->flags & BM_VALID)); Assert(buf->refcount == 0); /* * Lookup the cache directory block for this buffer */ cdb_id = BufTableLookup(&(buf->tag)); if (cdb_id < 0) elog(ERROR, "buffer %d not in buffer hash table", buf->buf_id); cdb = &StrategyCDB[cdb_id]; /* * Remove the CDB from the hashtable and the queue it is currently * on. */ BufTableDelete(&(cdb->buf_tag)); STRAT_LIST_REMOVE(cdb); /* * Clear out the CDB's buffer tag and association with the buffer and * add it to the list of unused CDB's */ CLEAR_BUFFERTAG(cdb->buf_tag); cdb->buf_id = -1; cdb->next = StrategyControl->listUnusedCDB; StrategyControl->listUnusedCDB = cdb_id; /* * Clear out the buffer's tag and add it to the list of currently * unused buffers. We must do this to ensure that linear scans of the * buffer array don't think the buffer is valid. */ CLEAR_BUFFERTAG(buf->tag); buf->flags &= ~(BM_VALID | BM_DIRTY); buf->cntxDirty = false; buf->bufNext = StrategyControl->listFreeBuffers; StrategyControl->listFreeBuffers = buf->buf_id; }
void InitBufferPool () { int i; PrivateRefCount = (long *) malloc(NBuffers * sizeof(long)); // Altered to use regular memory BufferDescriptors = (BufferDesc *) malloc (NBuffers * sizeof(BufferDesc)); BufferBlocks = (char *) malloc (NBuffers * BLCKSZ); ShmemBase = BufferBlocks; // ShmemBase = (unsigned long) BufferBlocks; if (false) { } else { BufferDesc *buf; buf = BufferDescriptors; for (i = 0; i < NBuffers; buf++, i++) { CLEAR_BUFFERTAG(buf->tag); buf->flags = 0; buf->usage_count = 0; buf->refcount = 0; buf->wait_backend_pid = 0; buf->freeNext = i + 1; buf->buf_id = i; *((char *) MAKE_PTR(i * BLCKSZ)) = '#'; buf->io_in_progress_lock = LWLockAssign(); buf->content_lock = LWLockAssign(); } /* close the circular queue */ BufferDescriptors[NBuffers - 1].freeNext = -1; StrategyInitialize(true); } }
/* * DropRelFileNodeLocalBuffers * This function removes from the buffer pool all the pages of the * specified relation that have block numbers >= firstDelBlock. * (In particular, with firstDelBlock = 0, all pages are removed.) * Dirty pages are simply dropped, without bothering to write them * out first. Therefore, this is NOT rollback-able, and so should be * used only with extreme caution! * * See DropRelFileNodeBuffers in bufmgr.c for more notes. */ void DropRelFileNodeLocalBuffers(RelFileNode rnode, ForkNumber forkNum, BlockNumber firstDelBlock) { int i; for (i = 0; i < NLocBuffer; i++) { BufferDesc *bufHdr = GetLocalBufferDescriptor(i); LocalBufferLookupEnt *hresult; uint32 buf_state; buf_state = pg_atomic_read_u32(&bufHdr->state); if ((buf_state & BM_TAG_VALID) && RelFileNodeEquals(bufHdr->tag.rnode, rnode) && bufHdr->tag.forkNum == forkNum && bufHdr->tag.blockNum >= firstDelBlock) { if (LocalRefCount[i] != 0) elog(ERROR, "block %u of %s is still referenced (local %u)", bufHdr->tag.blockNum, relpathbackend(bufHdr->tag.rnode, MyBackendId, bufHdr->tag.forkNum), LocalRefCount[i]); /* Remove entry from hashtable */ hresult = (LocalBufferLookupEnt *) hash_search(LocalBufHash, (void *) &bufHdr->tag, HASH_REMOVE, NULL); if (!hresult) /* shouldn't happen */ elog(ERROR, "local buffer hash table corrupted"); /* Mark buffer invalid */ CLEAR_BUFFERTAG(bufHdr->tag); buf_state &= ~BUF_FLAG_MASK; buf_state &= ~BUF_USAGECOUNT_MASK; pg_atomic_write_u32(&bufHdr->state, buf_state); } } }
/* * Initialize shared buffer pool * * This is called once during shared-memory initialization (either in the * postmaster, or in a standalone backend). */ void InitBufferPool(void) { bool foundBufs, foundDescs, foundIOLocks, foundBufCkpt; /* Align descriptors to a cacheline boundary. */ BufferDescriptors = (BufferDescPadded *) ShmemInitStruct("Buffer Descriptors", NBuffers * sizeof(BufferDescPadded), &foundDescs); BufferBlocks = (char *) ShmemInitStruct("Buffer Blocks", NBuffers * (Size) BLCKSZ, &foundBufs); /* Align lwlocks to cacheline boundary */ BufferIOLWLockArray = (LWLockMinimallyPadded *) ShmemInitStruct("Buffer IO Locks", NBuffers * (Size) sizeof(LWLockMinimallyPadded), &foundIOLocks); LWLockRegisterTranche(LWTRANCHE_BUFFER_IO_IN_PROGRESS, "buffer_io"); LWLockRegisterTranche(LWTRANCHE_BUFFER_CONTENT, "buffer_content"); /* * The array used to sort to-be-checkpointed buffer ids is located in * shared memory, to avoid having to allocate significant amounts of * memory at runtime. As that'd be in the middle of a checkpoint, or when * the checkpointer is restarted, memory allocation failures would be * painful. */ CkptBufferIds = (CkptSortItem *) ShmemInitStruct("Checkpoint BufferIds", NBuffers * sizeof(CkptSortItem), &foundBufCkpt); if (foundDescs || foundBufs || foundIOLocks || foundBufCkpt) { /* should find all of these, or none of them */ Assert(foundDescs && foundBufs && foundIOLocks && foundBufCkpt); /* note: this path is only taken in EXEC_BACKEND case */ } else { int i; /* * Initialize all the buffer headers. */ for (i = 0; i < NBuffers; i++) { BufferDesc *buf = GetBufferDescriptor(i); CLEAR_BUFFERTAG(buf->tag); pg_atomic_init_u32(&buf->state, 0); buf->wait_backend_pid = 0; buf->buf_id = i; /* * Initially link all the buffers together as unused. Subsequent * management of this list is done by freelist.c. */ buf->freeNext = i + 1; LWLockInitialize(BufferDescriptorGetContentLock(buf), LWTRANCHE_BUFFER_CONTENT); LWLockInitialize(BufferDescriptorGetIOLock(buf), LWTRANCHE_BUFFER_IO_IN_PROGRESS); } /* Correct last entry of linked list */ GetBufferDescriptor(NBuffers - 1)->freeNext = FREENEXT_END_OF_LIST; } /* Init other shared buffer-management stuff */ StrategyInitialize(!foundDescs); /* Initialize per-backend file flush context */ WritebackContextInit(&BackendWritebackContext, &backend_flush_after); }
/* * Initialize shared buffer pool * * This is called once during shared-memory initialization (either in the * postmaster, or in a standalone backend). */ void InitBufferPool(void) { bool foundBufs, foundDescs, foundIOLocks; /* Align descriptors to a cacheline boundary. */ BufferDescriptors = (BufferDescPadded *) CACHELINEALIGN( ShmemInitStruct("Buffer Descriptors", NBuffers * sizeof(BufferDescPadded) + PG_CACHE_LINE_SIZE, &foundDescs)); BufferBlocks = (char *) ShmemInitStruct("Buffer Blocks", NBuffers * (Size) BLCKSZ, &foundBufs); /* Align lwlocks to cacheline boundary */ BufferIOLWLockArray = (LWLockMinimallyPadded *) CACHELINEALIGN(ShmemInitStruct("Buffer IO Locks", NBuffers * (Size) sizeof(LWLockMinimallyPadded) + PG_CACHE_LINE_SIZE, &foundIOLocks)); BufferIOLWLockTranche.name = "buffer_io"; BufferIOLWLockTranche.array_base = BufferIOLWLockArray; BufferIOLWLockTranche.array_stride = sizeof(LWLockMinimallyPadded); LWLockRegisterTranche(LWTRANCHE_BUFFER_IO_IN_PROGRESS, &BufferIOLWLockTranche); BufferContentLWLockTranche.name = "buffer_content"; BufferContentLWLockTranche.array_base = ((char *) BufferDescriptors) + offsetof(BufferDesc, content_lock); BufferContentLWLockTranche.array_stride = sizeof(BufferDescPadded); LWLockRegisterTranche(LWTRANCHE_BUFFER_CONTENT, &BufferContentLWLockTranche); if (foundDescs || foundBufs || foundIOLocks) { /* should find all of these, or none of them */ Assert(foundDescs && foundBufs && foundIOLocks); /* note: this path is only taken in EXEC_BACKEND case */ } else { int i; /* * Initialize all the buffer headers. */ for (i = 0; i < NBuffers; i++) { BufferDesc *buf = GetBufferDescriptor(i); CLEAR_BUFFERTAG(buf->tag); buf->flags = 0; buf->usage_count = 0; buf->refcount = 0; buf->wait_backend_pid = 0; SpinLockInit(&buf->buf_hdr_lock); buf->buf_id = i; /* * Initially link all the buffers together as unused. Subsequent * management of this list is done by freelist.c. */ buf->freeNext = i + 1; LWLockInitialize(BufferDescriptorGetContentLock(buf), LWTRANCHE_BUFFER_CONTENT); LWLockInitialize(BufferDescriptorGetIOLock(buf), LWTRANCHE_BUFFER_IO_IN_PROGRESS); } /* Correct last entry of linked list */ GetBufferDescriptor(NBuffers - 1)->freeNext = FREENEXT_END_OF_LIST; } /* Init other shared buffer-management stuff */ StrategyInitialize(!foundDescs); }
/* * LocalBufferAlloc - * Find or create a local buffer for the given page of the given relation. * * API is similar to bufmgr.c's BufferAlloc, except that we do not need * to do any locking since this is all local. Also, IO_IN_PROGRESS * does not get set. Lastly, we support only default access strategy * (hence, usage_count is always advanced). */ BufferDesc * LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum, bool *foundPtr) { BufferTag newTag; /* identity of requested block */ LocalBufferLookupEnt *hresult; BufferDesc *bufHdr; int b; int trycounter; bool found; INIT_BUFFERTAG(newTag, smgr->smgr_rnode.node, forkNum, blockNum); /* Initialize local buffers if first request in this session */ if (LocalBufHash == NULL) InitLocalBuffers(); /* See if the desired buffer already exists */ hresult = (LocalBufferLookupEnt *) hash_search(LocalBufHash, (void *) &newTag, HASH_FIND, NULL); if (hresult) { b = hresult->id; bufHdr = &LocalBufferDescriptors[b]; Assert(BUFFERTAGS_EQUAL(bufHdr->tag, newTag)); #ifdef LBDEBUG fprintf(stderr, "LB ALLOC (%u,%d,%d) %d\n", smgr->smgr_rnode.node.relNode, forkNum, blockNum, -b - 1); #endif /* this part is equivalent to PinBuffer for a shared buffer */ if (LocalRefCount[b] == 0) { if (bufHdr->usage_count < BM_MAX_USAGE_COUNT) bufHdr->usage_count++; } LocalRefCount[b]++; ResourceOwnerRememberBuffer(CurrentResourceOwner, BufferDescriptorGetBuffer(bufHdr)); if (bufHdr->flags & BM_VALID) *foundPtr = TRUE; else { /* Previous read attempt must have failed; try again */ *foundPtr = FALSE; } return bufHdr; } #ifdef LBDEBUG fprintf(stderr, "LB ALLOC (%u,%d,%d) %d\n", smgr->smgr_rnode.node.relNode, forkNum, blockNum, -nextFreeLocalBuf - 1); #endif /* * Need to get a new buffer. We use a clock sweep algorithm (essentially * the same as what freelist.c does now...) */ trycounter = NLocBuffer; for (;;) { b = nextFreeLocalBuf; if (++nextFreeLocalBuf >= NLocBuffer) nextFreeLocalBuf = 0; bufHdr = &LocalBufferDescriptors[b]; if (LocalRefCount[b] == 0) { if (bufHdr->usage_count > 0) { bufHdr->usage_count--; trycounter = NLocBuffer; } else { /* Found a usable buffer */ LocalRefCount[b]++; ResourceOwnerRememberBuffer(CurrentResourceOwner, BufferDescriptorGetBuffer(bufHdr)); break; } } else if (--trycounter == 0) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_RESOURCES), errmsg("no empty local buffer available"))); } /* * this buffer is not referenced but it might still be dirty. if that's * the case, write it out before reusing it! */ if (bufHdr->flags & BM_DIRTY) { SMgrRelation oreln; /* Find smgr relation for buffer */ oreln = smgropen(bufHdr->tag.rnode, MyBackendId); /* And write... */ smgrwrite(oreln, bufHdr->tag.forkNum, bufHdr->tag.blockNum, (char *) LocalBufHdrGetBlock(bufHdr), false); /* Mark not-dirty now in case we error out below */ bufHdr->flags &= ~BM_DIRTY; pgBufferUsage.local_blks_written++; } /* * lazy memory allocation: allocate space on first use of a buffer. */ if (LocalBufHdrGetBlock(bufHdr) == NULL) { /* Set pointer for use by BufferGetBlock() macro */ LocalBufHdrGetBlock(bufHdr) = GetLocalBufferStorage(); } /* * Update the hash table: remove old entry, if any, and make new one. */ if (bufHdr->flags & BM_TAG_VALID) { hresult = (LocalBufferLookupEnt *) hash_search(LocalBufHash, (void *) &bufHdr->tag, HASH_REMOVE, NULL); if (!hresult) /* shouldn't happen */ elog(ERROR, "local buffer hash table corrupted"); /* mark buffer invalid just in case hash insert fails */ CLEAR_BUFFERTAG(bufHdr->tag); bufHdr->flags &= ~(BM_VALID | BM_TAG_VALID); } hresult = (LocalBufferLookupEnt *) hash_search(LocalBufHash, (void *) &newTag, HASH_ENTER, &found); if (found) /* shouldn't happen */ elog(ERROR, "local buffer hash table corrupted"); hresult->id = b; /* * it's all ours now. */ bufHdr->tag = newTag; bufHdr->flags &= ~(BM_VALID | BM_DIRTY | BM_JUST_DIRTIED | BM_IO_ERROR); bufHdr->flags |= BM_TAG_VALID; bufHdr->usage_count = 1; *foundPtr = FALSE; return bufHdr; }
/* * Initialize shared buffer pool * * This is called once during shared-memory initialization (either in the * postmaster, or in a standalone backend). */ void InitBufferPool(void) { char *BufferBlocks; bool foundBufs, foundDescs; int i; Data_Descriptors = NBuffers; Free_List_Descriptor = Data_Descriptors; Lookup_List_Descriptor = Data_Descriptors + 1; Num_Descriptors = Data_Descriptors + 1; /* * It's probably not really necessary to grab the lock --- if there's * anyone else attached to the shmem at this point, we've got * problems. */ LWLockAcquire(BufMgrLock, LW_EXCLUSIVE); #ifdef BMTRACE CurTraceBuf = (long *) ShmemInitStruct("Buffer trace", (BMT_LIMIT * sizeof(bmtrace)) + sizeof(long), &foundDescs); if (!foundDescs) MemSet(CurTraceBuf, 0, (BMT_LIMIT * sizeof(bmtrace)) + sizeof(long)); TraceBuf = (bmtrace *) & (CurTraceBuf[1]); #endif BufferDescriptors = (BufferDesc *) ShmemInitStruct("Buffer Descriptors", Num_Descriptors * sizeof(BufferDesc), &foundDescs); BufferBlocks = (char *) ShmemInitStruct("Buffer Blocks", NBuffers * BLCKSZ, &foundBufs); if (foundDescs || foundBufs) { /* both should be present or neither */ Assert(foundDescs && foundBufs); } else { BufferDesc *buf; char *block; buf = BufferDescriptors; block = BufferBlocks; /* * link the buffers into a circular, doubly-linked list to * initialize free list, and initialize the buffer headers. Still * don't know anything about replacement strategy in this file. */ for (i = 0; i < Data_Descriptors; block += BLCKSZ, buf++, i++) { Assert(ShmemIsValid((unsigned long) block)); buf->freeNext = i + 1; buf->freePrev = i - 1; CLEAR_BUFFERTAG(&(buf->tag)); buf->buf_id = i; buf->data = MAKE_OFFSET(block); buf->flags = (BM_DELETED | BM_FREE | BM_VALID); buf->refcount = 0; buf->io_in_progress_lock = LWLockAssign(); buf->cntx_lock = LWLockAssign(); buf->cntxDirty = false; buf->wait_backend_id = 0; } /* close the circular queue */ BufferDescriptors[0].freePrev = Data_Descriptors - 1; BufferDescriptors[Data_Descriptors - 1].freeNext = 0; } /* Init other shared buffer-management stuff */ InitBufTable(); InitFreeList(!foundDescs); LWLockRelease(BufMgrLock); }
/* * Initialize shared buffer pool * * This is called once during shared-memory initialization (either in the * postmaster, or in a standalone backend). */ void InitBufferPool(void) { char *BufferBlocks; bool foundBufs, foundDescs; int i; BufferDescriptors = (BufferDesc *) ShmemInitStruct("Buffer Descriptors", NBuffers * sizeof(BufferDesc), &foundDescs); BufferBlocks = (char *) ShmemInitStruct("Buffer Blocks", NBuffers * BLCKSZ, &foundBufs); if (foundDescs || foundBufs) { /* both should be present or neither */ Assert(foundDescs && foundBufs); } else { BufferDesc *buf; char *block; /* * It's probably not really necessary to grab the lock --- if * there's anyone else attached to the shmem at this point, we've * got problems. */ LWLockAcquire(BufMgrLock, LW_EXCLUSIVE); buf = BufferDescriptors; block = BufferBlocks; /* * Initialize all the buffer headers. */ for (i = 0; i < NBuffers; block += BLCKSZ, buf++, i++) { Assert(ShmemIsValid((unsigned long) block)); /* * The bufNext fields link together all totally-unused buffers. * Subsequent management of this list is done by * StrategyGetBuffer(). */ buf->bufNext = i + 1; CLEAR_BUFFERTAG(buf->tag); buf->buf_id = i; buf->data = MAKE_OFFSET(block); buf->flags = 0; buf->refcount = 0; buf->io_in_progress_lock = LWLockAssign(); buf->cntx_lock = LWLockAssign(); buf->cntxDirty = false; buf->wait_backend_id = 0; } /* Correct last entry of linked list */ BufferDescriptors[NBuffers - 1].bufNext = -1; LWLockRelease(BufMgrLock); } /* Init other shared buffer-management stuff */ StrategyInitialize(!foundDescs); }
/* * StrategyInitialize -- initialize the buffer cache replacement * strategy. * * Assume: All of the buffers are already building a linked list. * Only called by postmaster and only during initialization. */ void StrategyInitialize(bool init) { /* A1out list can hold 50% of NBuffers, per Johnson and Shasha */ int nCDBs = NBuffers + NBuffers / 2; bool found; int i; /* * Initialize the shared CDB lookup hashtable */ InitBufTable(nCDBs); /* * Get or create the shared strategy control block and the CDB's */ StrategyControl = (BufferStrategyControl *) ShmemInitStruct("Buffer Strategy Status", sizeof(BufferStrategyControl) + sizeof(BufferStrategyCDB) * (nCDBs - 1), &found); StrategyCDB = &(StrategyControl->cdb[0]); if (!found) { /* * Only done once, usually in postmaster */ Assert(init); /* * Grab the whole linked list of free buffers for our strategy. We * assume it was previously set up by InitBufferPool(). */ StrategyControl->listFreeBuffers = 0; /* * We set the target T1 size to 1/4th of available buffers. * Possibly this should be a runtime tunable. */ StrategyControl->target_T1_size = NBuffers / 4; /* * Initialize all lists to be empty */ for (i = 0; i < STRAT_NUM_LISTS; i++) { StrategyControl->listHead[i] = -1; StrategyControl->listTail[i] = -1; StrategyControl->listSize[i] = 0; StrategyControl->num_hit[i] = 0; } StrategyControl->num_lookup = 0; StrategyControl->stat_report = 0; /* * All CDB's are linked as the listUnusedCDB */ for (i = 0; i < nCDBs; i++) { StrategyCDB[i].next = i + 1; StrategyCDB[i].list = STRAT_LIST_UNUSED; CLEAR_BUFFERTAG(StrategyCDB[i].buf_tag); StrategyCDB[i].buf_id = -1; } StrategyCDB[nCDBs - 1].next = -1; StrategyControl->listUnusedCDB = 0; } else Assert(!init); }