/* * Initialize metapage for bloom index. */ void BloomInitMetapage(Relation index) { Page metaPage; Buffer metaBuffer; BloomMetaPageData *metadata; GenericXLogState *state; /* * Make a new buffer, since it first buffer it should be associated with * block number 0 (BLOOM_METAPAGE_BLKNO). */ metaBuffer = BloomNewBuffer(index); Assert(BufferGetBlockNumber(metaBuffer) == BLOOM_METAPAGE_BLKNO); /* Initialize bloom index options */ if (!index->rd_options) index->rd_options = palloc0(sizeof(BloomOptions)); adjustBloomOptions((BloomOptions *) index->rd_options); /* Initialize contents of meta page */ state = GenericXLogStart(index); metaPage = GenericXLogRegisterBuffer(state, metaBuffer, GENERIC_XLOG_FULL_IMAGE); BloomInitPage(metaPage, BLOOM_META); metadata = BloomPageGetMeta(metaPage); memset(metadata, 0, sizeof(BloomMetaPageData)); metadata->magickNumber = BLOOM_MAGICK_NUMBER; metadata->opts = *((BloomOptions *) index->rd_options); ((PageHeader) metaPage)->pd_lower += sizeof(BloomMetaPageData); GenericXLogFinish(state); UnlockReleaseBuffer(metaBuffer); }
/* * Flush page cached in BloomBuildState. */ static void flushCachedPage(Relation index, BloomBuildState *buildstate) { Page page; Buffer buffer = BloomNewBuffer(index); GenericXLogState *state; state = GenericXLogStart(index); page = GenericXLogRegisterBuffer(state, buffer, GENERIC_XLOG_FULL_IMAGE); memcpy(page, buildstate->data, BLCKSZ); GenericXLogFinish(state); UnlockReleaseBuffer(buffer); }
/* * Initialize metapage for bloom index. */ void BloomInitMetapage(Relation index) { Buffer metaBuffer; Page metaPage; GenericXLogState *state; /* * Make a new page; since it is first page it should be associated with * block number 0 (BLOOM_METAPAGE_BLKNO). */ metaBuffer = BloomNewBuffer(index); Assert(BufferGetBlockNumber(metaBuffer) == BLOOM_METAPAGE_BLKNO); /* Initialize contents of meta page */ state = GenericXLogStart(index); metaPage = GenericXLogRegisterBuffer(state, metaBuffer, GENERIC_XLOG_FULL_IMAGE); BloomFillMetapage(index, metaPage); GenericXLogFinish(state); UnlockReleaseBuffer(metaBuffer); }
/* * Insert value (stored in RumBtree) to tree described by stack * * During an index build, buildStats is non-null and the counters * it contains should be incremented as needed. * * NB: the passed-in stack is freed, as though by freeRumBtreeStack. */ void rumInsertValue(Relation index, RumBtree btree, RumBtreeStack * stack, GinStatsData *buildStats) { RumBtreeStack *parent; BlockNumber rootBlkno; Page page, rpage, lpage; GenericXLogState *state = NULL; /* extract root BlockNumber from stack */ Assert(stack != NULL); parent = stack; while (parent->parent) parent = parent->parent; rootBlkno = parent->blkno; Assert(BlockNumberIsValid(rootBlkno)); /* this loop crawls up the stack until the insertion is complete */ for (;;) { BlockNumber savedLeftLink, savedRightLink; page = BufferGetPage(stack->buffer); savedLeftLink = RumPageGetOpaque(page)->leftlink; savedRightLink = RumPageGetOpaque(page)->rightlink; if (btree->isEnoughSpace(btree, stack->buffer, stack->off)) { if (btree->rumstate->isBuild) { page = BufferGetPage(stack->buffer); START_CRIT_SECTION(); } else { state = GenericXLogStart(index); page = GenericXLogRegisterBuffer(state, stack->buffer, 0); } btree->placeToPage(btree, page, stack->off); if (btree->rumstate->isBuild) { MarkBufferDirty(stack->buffer); END_CRIT_SECTION(); } else GenericXLogFinish(state); LockBuffer(stack->buffer, RUM_UNLOCK); freeRumBtreeStack(stack); return; } else { Buffer rbuffer = RumNewBuffer(btree->index); Page newlpage; /* During index build, count the newly-split page */ if (buildStats) { if (btree->isData) buildStats->nDataPages++; else buildStats->nEntryPages++; } parent = stack->parent; if (parent == NULL) { Buffer lbuffer; if (btree->rumstate->isBuild) { page = BufferGetPage(stack->buffer); rpage = BufferGetPage(rbuffer); } else { state = GenericXLogStart(index); page = GenericXLogRegisterBuffer(state, stack->buffer, 0); rpage = GenericXLogRegisterBuffer(state, rbuffer, GENERIC_XLOG_FULL_IMAGE); } /* * newlpage is a pointer to memory page, it doesn't associate * with buffer, stack->buffer should be untouched */ newlpage = btree->splitPage(btree, stack->buffer, rbuffer, page, rpage, stack->off); /* * split root, so we need to allocate new left page and place * pointer on root to left and right page */ lbuffer = RumNewBuffer(btree->index); if (btree->rumstate->isBuild) lpage = BufferGetPage(lbuffer); else lpage = GenericXLogRegisterBuffer(state, lbuffer, GENERIC_XLOG_FULL_IMAGE); RumPageGetOpaque(rpage)->rightlink = InvalidBlockNumber; RumPageGetOpaque(newlpage)->leftlink = InvalidBlockNumber; RumPageGetOpaque(rpage)->leftlink = BufferGetBlockNumber(lbuffer); RumPageGetOpaque(newlpage)->rightlink = BufferGetBlockNumber(rbuffer); RumInitPage(page, RumPageGetOpaque(newlpage)->flags & ~RUM_LEAF, BufferGetPageSize(stack->buffer)); PageRestoreTempPage(newlpage, lpage); btree->fillRoot(btree, stack->buffer, lbuffer, rbuffer, page, lpage, rpage); PredicateLockPageSplit(btree->index, BufferGetBlockNumber(stack->buffer), BufferGetBlockNumber(lbuffer)); PredicateLockPageSplit(btree->index, BufferGetBlockNumber(stack->buffer), BufferGetBlockNumber(rbuffer)); if (btree->rumstate->isBuild) { START_CRIT_SECTION(); MarkBufferDirty(rbuffer); MarkBufferDirty(lbuffer); MarkBufferDirty(stack->buffer); } else GenericXLogFinish(state); UnlockReleaseBuffer(rbuffer); UnlockReleaseBuffer(lbuffer); LockBuffer(stack->buffer, RUM_UNLOCK); if (btree->rumstate->isBuild) END_CRIT_SECTION(); freeRumBtreeStack(stack); /* During index build, count the newly-added root page */ if (buildStats) { if (btree->isData) buildStats->nDataPages++; else buildStats->nEntryPages++; } return; } else { BlockNumber rightrightBlkno = InvalidBlockNumber; Buffer rightrightBuffer = InvalidBuffer; /* split non-root page */ if (btree->rumstate->isBuild) { lpage = BufferGetPage(stack->buffer); rpage = BufferGetPage(rbuffer); } else { state = GenericXLogStart(index); lpage = GenericXLogRegisterBuffer(state, stack->buffer, 0); rpage = GenericXLogRegisterBuffer(state, rbuffer, 0); } rightrightBlkno = RumPageGetOpaque(lpage)->rightlink; /* * newlpage is a pointer to memory page, it doesn't associate * with buffer, stack->buffer should be untouched */ newlpage = btree->splitPage(btree, stack->buffer, rbuffer, lpage, rpage, stack->off); RumPageGetOpaque(rpage)->rightlink = savedRightLink; RumPageGetOpaque(newlpage)->leftlink = savedLeftLink; RumPageGetOpaque(rpage)->leftlink = BufferGetBlockNumber(stack->buffer); RumPageGetOpaque(newlpage)->rightlink = BufferGetBlockNumber(rbuffer); PredicateLockPageSplit(btree->index, BufferGetBlockNumber(stack->buffer), BufferGetBlockNumber(rbuffer)); /* * it's safe because we don't have right-to-left walking * with locking bth pages except vacuum. But vacuum will * try to lock all pages with conditional lock */ if (rightrightBlkno != InvalidBlockNumber) { Page rightrightPage; rightrightBuffer = ReadBuffer(btree->index, rightrightBlkno); LockBuffer(rightrightBuffer, RUM_EXCLUSIVE); if (btree->rumstate->isBuild) rightrightPage = BufferGetPage(rightrightBuffer); else rightrightPage = GenericXLogRegisterBuffer(state, rightrightBuffer, 0); RumPageGetOpaque(rightrightPage)->leftlink = BufferGetBlockNumber(rbuffer); } if (btree->rumstate->isBuild) START_CRIT_SECTION(); PageRestoreTempPage(newlpage, lpage); if (btree->rumstate->isBuild) { MarkBufferDirty(rbuffer); MarkBufferDirty(stack->buffer); if (rightrightBlkno != InvalidBlockNumber) MarkBufferDirty(rightrightBuffer); END_CRIT_SECTION(); } else GenericXLogFinish(state); UnlockReleaseBuffer(rbuffer); if (rightrightBlkno != InvalidBlockNumber) UnlockReleaseBuffer(rightrightBuffer); } } btree->isDelete = false; /* search parent to lock */ LockBuffer(parent->buffer, RUM_EXCLUSIVE); /* move right if it's needed */ page = BufferGetPage(parent->buffer); while ((parent->off = btree->findChildPtr(btree, page, stack->blkno, parent->off)) == InvalidOffsetNumber) { BlockNumber rightlink = RumPageGetOpaque(page)->rightlink; if (rightlink == InvalidBlockNumber) { /* * rightmost page, but we don't find parent, we should use * plain search... */ LockBuffer(parent->buffer, RUM_UNLOCK); rumFindParents(btree, stack, rootBlkno); parent = stack->parent; Assert(parent != NULL); break; } parent->buffer = rumStep(parent->buffer, btree->index, RUM_EXCLUSIVE, ForwardScanDirection); parent->blkno = rightlink; page = BufferGetPage(parent->buffer); } UnlockReleaseBuffer(stack->buffer); pfree(stack); stack = parent; } }
/* * Bulk deletion of all index entries pointing to a set of heap tuples. * The set of target tuples is specified via a callback routine that tells * whether any given heap tuple (identified by ItemPointer) is being deleted. * * Result: a palloc'd struct containing statistical info for VACUUM displays. */ IndexBulkDeleteResult * blbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state) { Relation index = info->index; BlockNumber blkno, npages; FreeBlockNumberArray notFullPage; int countPage = 0; BloomState state; Buffer buffer; Page page; GenericXLogState *gxlogState; if (stats == NULL) stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult)); initBloomState(&state, index); /* * Interate over the pages. We don't care about concurrently added pages, * they can't contain tuples to delete. */ npages = RelationGetNumberOfBlocks(index); for (blkno = BLOOM_HEAD_BLKNO; blkno < npages; blkno++) { BloomTuple *itup, *itupPtr, *itupEnd; buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno, RBM_NORMAL, info->strategy); LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); gxlogState = GenericXLogStart(index); page = GenericXLogRegisterBuffer(gxlogState, buffer, 0); if (BloomPageIsDeleted(page)) { UnlockReleaseBuffer(buffer); GenericXLogAbort(gxlogState); CHECK_FOR_INTERRUPTS(); continue; } /* Iterate over the tuples */ itup = itupPtr = BloomPageGetTuple(&state, page, FirstOffsetNumber); itupEnd = BloomPageGetTuple(&state, page, OffsetNumberNext(BloomPageGetMaxOffset(page))); while (itup < itupEnd) { /* Do we have to delete this tuple? */ if (callback(&itup->heapPtr, callback_state)) { stats->tuples_removed += 1; BloomPageGetOpaque(page)->maxoff--; } else { if (itupPtr != itup) { /* * If we already delete something before, we have to move * this tuple backward. */ memmove((Pointer) itupPtr, (Pointer) itup, state.sizeOfBloomTuple); } stats->num_index_tuples++; itupPtr = BloomPageGetNextTuple(&state, itupPtr); } itup = BloomPageGetNextTuple(&state, itup); } Assert(itupPtr == BloomPageGetTuple(&state, page, OffsetNumberNext(BloomPageGetMaxOffset(page)))); /* * Add page to notFullPage list if we will not mark page as deleted and * there is a free space on it */ if (BloomPageGetMaxOffset(page) != 0 && BloomPageGetFreeSpace(&state, page) > state.sizeOfBloomTuple && countPage < BloomMetaBlockN) notFullPage[countPage++] = blkno; /* Did we delete something? */ if (itupPtr != itup) { /* Is it empty page now? */ if (BloomPageGetMaxOffset(page) == 0) BloomPageSetDeleted(page); /* Adjust pg_lower */ ((PageHeader) page)->pd_lower = (Pointer) itupPtr - page; /* Finish WAL-logging */ GenericXLogFinish(gxlogState); } else { /* Didn't change anything: abort WAL-logging */ GenericXLogAbort(gxlogState); } UnlockReleaseBuffer(buffer); CHECK_FOR_INTERRUPTS(); } if (countPage > 0) { BloomMetaPageData *metaData; buffer = ReadBuffer(index, BLOOM_METAPAGE_BLKNO); LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); gxlogState = GenericXLogStart(index); page = GenericXLogRegisterBuffer(gxlogState, buffer, 0); metaData = BloomPageGetMeta(page); memcpy(metaData->notFullPage, notFullPage, sizeof(BlockNumber) * countPage); metaData->nStart = 0; metaData->nEnd = countPage; GenericXLogFinish(gxlogState); UnlockReleaseBuffer(buffer); } return stats; }
/* * Insert new tuple to the bloom index. */ bool blinsert(Relation index, Datum *values, bool *isnull, ItemPointer ht_ctid, Relation heapRel, IndexUniqueCheck checkUnique, IndexInfo *indexInfo) { BloomState blstate; BloomTuple *itup; MemoryContext oldCtx; MemoryContext insertCtx; BloomMetaPageData *metaData; Buffer buffer, metaBuffer; Page page, metaPage; BlockNumber blkno = InvalidBlockNumber; OffsetNumber nStart; GenericXLogState *state; insertCtx = AllocSetContextCreate(CurrentMemoryContext, "Bloom insert temporary context", ALLOCSET_DEFAULT_SIZES); oldCtx = MemoryContextSwitchTo(insertCtx); initBloomState(&blstate, index); itup = BloomFormTuple(&blstate, ht_ctid, values, isnull); /* * At first, try to insert new tuple to the first page in notFullPage * array. If successful, we don't need to modify the meta page. */ metaBuffer = ReadBuffer(index, BLOOM_METAPAGE_BLKNO); LockBuffer(metaBuffer, BUFFER_LOCK_SHARE); metaData = BloomPageGetMeta(BufferGetPage(metaBuffer)); if (metaData->nEnd > metaData->nStart) { Page page; blkno = metaData->notFullPage[metaData->nStart]; Assert(blkno != InvalidBlockNumber); /* Don't hold metabuffer lock while doing insert */ LockBuffer(metaBuffer, BUFFER_LOCK_UNLOCK); buffer = ReadBuffer(index, blkno); LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); state = GenericXLogStart(index); page = GenericXLogRegisterBuffer(state, buffer, 0); /* * We might have found a page that was recently deleted by VACUUM. If * so, we can reuse it, but we must reinitialize it. */ if (PageIsNew(page) || BloomPageIsDeleted(page)) BloomInitPage(page, 0); if (BloomPageAddItem(&blstate, page, itup)) { /* Success! Apply the change, clean up, and exit */ GenericXLogFinish(state); UnlockReleaseBuffer(buffer); ReleaseBuffer(metaBuffer); MemoryContextSwitchTo(oldCtx); MemoryContextDelete(insertCtx); return false; } /* Didn't fit, must try other pages */ GenericXLogAbort(state); UnlockReleaseBuffer(buffer); } else { /* No entries in notFullPage */ LockBuffer(metaBuffer, BUFFER_LOCK_UNLOCK); } /* * Try other pages in notFullPage array. We will have to change nStart in * metapage. Thus, grab exclusive lock on metapage. */ LockBuffer(metaBuffer, BUFFER_LOCK_EXCLUSIVE); /* nStart might have changed while we didn't have lock */ nStart = metaData->nStart; /* Skip first page if we already tried it above */ if (nStart < metaData->nEnd && blkno == metaData->notFullPage[nStart]) nStart++; /* * This loop iterates for each page we try from the notFullPage array, and * will also initialize a GenericXLogState for the fallback case of having * to allocate a new page. */ for (;;) { state = GenericXLogStart(index); /* get modifiable copy of metapage */ metaPage = GenericXLogRegisterBuffer(state, metaBuffer, 0); metaData = BloomPageGetMeta(metaPage); if (nStart >= metaData->nEnd) break; /* no more entries in notFullPage array */ blkno = metaData->notFullPage[nStart]; Assert(blkno != InvalidBlockNumber); buffer = ReadBuffer(index, blkno); LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); page = GenericXLogRegisterBuffer(state, buffer, 0); /* Basically same logic as above */ if (PageIsNew(page) || BloomPageIsDeleted(page)) BloomInitPage(page, 0); if (BloomPageAddItem(&blstate, page, itup)) { /* Success! Apply the changes, clean up, and exit */ metaData->nStart = nStart; GenericXLogFinish(state); UnlockReleaseBuffer(buffer); UnlockReleaseBuffer(metaBuffer); MemoryContextSwitchTo(oldCtx); MemoryContextDelete(insertCtx); return false; } /* Didn't fit, must try other pages */ GenericXLogAbort(state); UnlockReleaseBuffer(buffer); nStart++; } /* * Didn't find place to insert in notFullPage array. Allocate new page. * (XXX is it good to do this while holding ex-lock on the metapage??) */ buffer = BloomNewBuffer(index); page = GenericXLogRegisterBuffer(state, buffer, GENERIC_XLOG_FULL_IMAGE); BloomInitPage(page, 0); if (!BloomPageAddItem(&blstate, page, itup)) { /* We shouldn't be here since we're inserting to an empty page */ elog(ERROR, "could not add new bloom tuple to empty page"); } /* Reset notFullPage array to contain just this new page */ metaData->nStart = 0; metaData->nEnd = 1; metaData->notFullPage[0] = BufferGetBlockNumber(buffer); /* Apply the changes, clean up, and exit */ GenericXLogFinish(state); UnlockReleaseBuffer(buffer); UnlockReleaseBuffer(metaBuffer); MemoryContextSwitchTo(oldCtx); MemoryContextDelete(insertCtx); return false; }
/* * Bulk deletion of all index entries pointing to a set of heap tuples. * The set of target tuples is specified via a callback routine that tells * whether any given heap tuple (identified by ItemPointer) is being deleted. * * Result: a palloc'd struct containing statistical info for VACUUM displays. */ IndexBulkDeleteResult * blbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state) { Relation index = info->index; BlockNumber blkno, npages; FreeBlockNumberArray notFullPage; int countPage = 0; BloomState state; Buffer buffer; Page page; BloomMetaPageData *metaData; GenericXLogState *gxlogState; if (stats == NULL) stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult)); initBloomState(&state, index); /* * Interate over the pages. We don't care about concurrently added pages, * they can't contain tuples to delete. */ npages = RelationGetNumberOfBlocks(index); for (blkno = BLOOM_HEAD_BLKNO; blkno < npages; blkno++) { BloomTuple *itup, *itupPtr, *itupEnd; vacuum_delay_point(); buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno, RBM_NORMAL, info->strategy); LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); gxlogState = GenericXLogStart(index); page = GenericXLogRegisterBuffer(gxlogState, buffer, 0); /* Ignore empty/deleted pages until blvacuumcleanup() */ if (PageIsNew(page) || BloomPageIsDeleted(page)) { UnlockReleaseBuffer(buffer); GenericXLogAbort(gxlogState); continue; } /* * Iterate over the tuples. itup points to current tuple being * scanned, itupPtr points to where to save next non-deleted tuple. */ itup = itupPtr = BloomPageGetTuple(&state, page, FirstOffsetNumber); itupEnd = BloomPageGetTuple(&state, page, OffsetNumberNext(BloomPageGetMaxOffset(page))); while (itup < itupEnd) { /* Do we have to delete this tuple? */ if (callback(&itup->heapPtr, callback_state)) { /* Yes; adjust count of tuples that will be left on page */ BloomPageGetOpaque(page)->maxoff--; stats->tuples_removed += 1; } else { /* No; copy it to itupPtr++, but skip copy if not needed */ if (itupPtr != itup) memmove((Pointer) itupPtr, (Pointer) itup, state.sizeOfBloomTuple); itupPtr = BloomPageGetNextTuple(&state, itupPtr); } itup = BloomPageGetNextTuple(&state, itup); } /* Assert that we counted correctly */ Assert(itupPtr == BloomPageGetTuple(&state, page, OffsetNumberNext(BloomPageGetMaxOffset(page)))); /* * Add page to new notFullPage list if we will not mark page as * deleted and there is free space on it */ if (BloomPageGetMaxOffset(page) != 0 && BloomPageGetFreeSpace(&state, page) >= state.sizeOfBloomTuple && countPage < BloomMetaBlockN) notFullPage[countPage++] = blkno; /* Did we delete something? */ if (itupPtr != itup) { /* Is it empty page now? */ if (BloomPageGetMaxOffset(page) == 0) BloomPageSetDeleted(page); /* Adjust pg_lower */ ((PageHeader) page)->pd_lower = (Pointer) itupPtr - page; /* Finish WAL-logging */ GenericXLogFinish(gxlogState); } else { /* Didn't change anything: abort WAL-logging */ GenericXLogAbort(gxlogState); } UnlockReleaseBuffer(buffer); } /* * Update the metapage's notFullPage list with whatever we found. Our * info could already be out of date at this point, but blinsert() will * cope if so. */ buffer = ReadBuffer(index, BLOOM_METAPAGE_BLKNO); LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); gxlogState = GenericXLogStart(index); page = GenericXLogRegisterBuffer(gxlogState, buffer, 0); metaData = BloomPageGetMeta(page); memcpy(metaData->notFullPage, notFullPage, sizeof(BlockNumber) * countPage); metaData->nStart = 0; metaData->nEnd = countPage; GenericXLogFinish(gxlogState); UnlockReleaseBuffer(buffer); return stats; }