/* * Form a non-leaf entry tuple by copying the key data from the given tuple, * which can be either a leaf or non-leaf entry tuple. * * Any posting list in the source tuple is not copied. The specified child * block number is inserted into t_tid. */ static IndexTuple GinFormInteriorTuple(IndexTuple itup, Page page, BlockNumber childblk) { IndexTuple nitup; if (GinPageIsLeaf(page) && !GinIsPostingTree(itup)) { /* Tuple contains a posting list, just copy stuff before that */ uint32 origsize = GinGetPostingOffset(itup); origsize = MAXALIGN(origsize); nitup = (IndexTuple) palloc(origsize); memcpy(nitup, itup, origsize); /* ... be sure to fix the size header field ... */ nitup->t_info &= ~INDEX_SIZE_MASK; nitup->t_info |= origsize; } else { /* Copy the tuple as-is */ nitup = (IndexTuple) palloc(IndexTupleSize(itup)); memcpy(nitup, itup, IndexTupleSize(itup)); } /* Now insert the correct downlink */ GinSetDownlink(nitup, childblk); return nitup; }
/* * Inserts only one entry to the index, but it can add more than 1 ItemPointer. */ static void ginEntryInsert(Relation index, GinState *ginstate, Datum value, ItemPointerData *items, uint32 nitem, bool isBuild) { GinBtreeData btree; GinBtreeStack *stack; IndexTuple itup; Page page; prepareEntryScan(&btree, index, value, ginstate); stack = ginFindLeafPage(&btree, NULL); page = BufferGetPage(stack->buffer); if (btree.findItem(&btree, stack)) { /* found entry */ itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off)); if (GinIsPostingTree(itup)) { /* lock root of posting tree */ GinPostingTreeScan *gdi; BlockNumber rootPostingTree = GinGetPostingTree(itup); /* release all stack */ LockBuffer(stack->buffer, GIN_UNLOCK); freeGinBtreeStack(stack); /* insert into posting tree */ gdi = prepareScanPostingTree(index, rootPostingTree, FALSE); gdi->btree.isBuild = isBuild; insertItemPointer(gdi, items, nitem); return; } itup = addItemPointersToTuple(index, ginstate, stack, itup, items, nitem, isBuild); btree.isDelete = TRUE; } else { /* We suppose, that tuple can store at list one itempointer */ itup = GinFormTuple(ginstate, value, items, 1); if (itup == NULL || IndexTupleSize(itup) >= GinMaxItemSize) elog(ERROR, "huge tuple"); if (nitem > 1) { IndexTuple previtup = itup; itup = addItemPointersToTuple(index, ginstate, stack, previtup, items + 1, nitem - 1, isBuild); pfree(previtup); } } btree.entry = itup; ginInsertValue(&btree, stack); pfree(itup); }
/* * Insert one or more heap TIDs associated with the given key value. * This will either add a single key entry, or enlarge a pre-existing entry. * * During an index build, buildStats is non-null and the counters * it contains should be incremented as needed. */ void ginEntryInsert(GinState *ginstate, OffsetNumber attnum, Datum key, GinNullCategory category, ItemPointerData *items, uint32 nitem, GinStatsData *buildStats) { GinBtreeData btree; GinBtreeStack *stack; IndexTuple itup; Page page; /* During index build, count the to-be-inserted entry */ if (buildStats) buildStats->nEntries++; ginPrepareEntryScan(&btree, attnum, key, category, ginstate); stack = ginFindLeafPage(&btree, GIN_ROOT_BLKNO, false); page = BufferGetPage(stack->buffer); if (btree.findItem(&btree, stack)) { /* found pre-existing entry */ itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off)); if (GinIsPostingTree(itup)) { /* add entries to existing posting tree */ BlockNumber rootPostingTree = GinGetPostingTree(itup); /* release all stack */ LockBuffer(stack->buffer, GIN_UNLOCK); freeGinBtreeStack(stack); /* insert into posting tree */ ginInsertItemPointers(ginstate->index, rootPostingTree, items, nitem, buildStats); return; } /* modify an existing leaf entry */ itup = addItemPointersToLeafTuple(ginstate, itup, items, nitem, buildStats); btree.isDelete = TRUE; } else { /* no match, so construct a new leaf entry */ itup = buildFreshLeafTuple(ginstate, attnum, key, category, items, nitem, buildStats); } /* Insert the new or modified leaf tuple */ btree.entry = itup; ginInsertValue(&btree, stack, buildStats); pfree(itup); }
/* * Returns new tuple with copied value from source tuple. * New tuple will not store posting list */ static IndexTuple copyIndexTuple(IndexTuple itup, Page page) { IndexTuple nitup; if (GinPageIsLeaf(page) && !GinIsPostingTree(itup)) { nitup = (IndexTuple) palloc(MAXALIGN(GinGetOrigSizePosting(itup))); memcpy(nitup, itup, GinGetOrigSizePosting(itup)); nitup->t_info &= ~INDEX_SIZE_MASK; nitup->t_info |= GinGetOrigSizePosting(itup); } else { nitup = (IndexTuple) palloc(MAXALIGN(IndexTupleSize(itup))); memcpy(nitup, itup, IndexTupleSize(itup)); } return nitup; }
/* * Adds array of item pointers to tuple's posting list, or * creates posting tree and tuple pointing to tree in case * of not enough space. Max size of tuple is defined in * GinFormTuple(). Returns a new, modified index tuple. * items[] must be in sorted order with no duplicates. */ static IndexTuple addItemPointersToLeafTuple(GinState *ginstate, IndexTuple old, ItemPointerData *items, uint32 nitem, GinStatsData *buildStats, Buffer buffer) { OffsetNumber attnum; Datum key; GinNullCategory category; IndexTuple res; ItemPointerData *newItems, *oldItems; int oldNPosting, newNPosting; GinPostingList *compressedList; Assert(!GinIsPostingTree(old)); attnum = gintuple_get_attrnum(ginstate, old); key = gintuple_get_key(ginstate, old, &category); /* merge the old and new posting lists */ oldItems = ginReadTuple(ginstate, attnum, old, &oldNPosting); newItems = ginMergeItemPointers(items, nitem, oldItems, oldNPosting, &newNPosting); /* Compress the posting list, and try to a build tuple with room for it */ res = NULL; compressedList = ginCompressPostingList(newItems, newNPosting, GinMaxItemSize, NULL); pfree(newItems); if (compressedList) { res = GinFormTuple(ginstate, attnum, key, category, (char *) compressedList, SizeOfGinPostingList(compressedList), newNPosting, false); pfree(compressedList); } if (!res) { /* posting list would be too big, convert to posting tree */ BlockNumber postingRoot; /* * Initialize posting tree with the old tuple's posting list. It's * surely small enough to fit on one posting-tree page, and should * already be in order with no duplicates. */ postingRoot = createPostingTree(ginstate->index, oldItems, oldNPosting, buildStats, buffer); /* Now insert the TIDs-to-be-added into the posting tree */ ginInsertItemPointers(ginstate->index, postingRoot, items, nitem, buildStats); /* And build a new posting-tree-only result tuple */ res = GinFormTuple(ginstate, attnum, key, category, NULL, 0, 0, true); GinSetPostingTree(res, postingRoot); } pfree(oldItems); return res; }
/* * returns modified page or NULL if page isn't modified. * Function works with original page until first change is occurred, * then page is copied into temporary one. */ static Page ginVacuumEntryPage(GinVacuumState *gvs, Buffer buffer, BlockNumber *roots, uint32 *nroot) { Page origpage = BufferGetPage(buffer), tmppage; OffsetNumber i, maxoff = PageGetMaxOffsetNumber(origpage); tmppage = origpage; *nroot = 0; for (i = FirstOffsetNumber; i <= maxoff; i++) { IndexTuple itup = (IndexTuple) PageGetItem(tmppage, PageGetItemId(tmppage, i)); if (GinIsPostingTree(itup)) { /* * store posting tree's roots for further processing, we can't * vacuum it just now due to risk of deadlocks with scans/inserts */ roots[*nroot] = GinGetDownlink(itup); (*nroot)++; } else if (GinGetNPosting(itup) > 0) { /* * if we already create temporary page, we will make changes in * place */ ItemPointerData *cleaned = (tmppage == origpage) ? NULL : GinGetPosting(itup); uint32 newN = ginVacuumPostingList(gvs, GinGetPosting(itup), GinGetNPosting(itup), &cleaned); if (GinGetNPosting(itup) != newN) { OffsetNumber attnum; Datum key; GinNullCategory category; /* * Some ItemPointers was deleted, so we should remake our * tuple */ if (tmppage == origpage) { /* * On first difference we create temporary page in memory * and copies content in to it. */ tmppage = PageGetTempPageCopy(origpage); if (newN > 0) { Size pos = ((char *) GinGetPosting(itup)) - ((char *) origpage); memcpy(tmppage + pos, cleaned, sizeof(ItemPointerData) * newN); } pfree(cleaned); /* set itup pointer to new page */ itup = (IndexTuple) PageGetItem(tmppage, PageGetItemId(tmppage, i)); } attnum = gintuple_get_attrnum(&gvs->ginstate, itup); key = gintuple_get_key(&gvs->ginstate, itup, &category); itup = GinFormTuple(&gvs->ginstate, attnum, key, category, GinGetPosting(itup), newN, true); PageIndexTupleDelete(tmppage, i); if (PageAddItem(tmppage, (Item) itup, IndexTupleSize(itup), i, false, false) != i) elog(ERROR, "failed to add item to index page in \"%s\"", RelationGetRelationName(gvs->index)); pfree(itup); } } } return (tmppage == origpage) ? NULL : tmppage; }
/* * Adds array of item pointers to tuple's posting list, or * creates posting tree and tuple pointing to tree in case * of not enough space. Max size of tuple is defined in * GinFormTuple(). Returns a new, modified index tuple. * items[] must be in sorted order with no duplicates. */ static IndexTuple addItemPointersToLeafTuple(GinState *ginstate, IndexTuple old, ItemPointerData *items, uint32 nitem, GinStatsData *buildStats) { OffsetNumber attnum; Datum key; GinNullCategory category; IndexTuple res; Assert(!GinIsPostingTree(old)); attnum = gintuple_get_attrnum(ginstate, old); key = gintuple_get_key(ginstate, old, &category); /* try to build tuple with room for all the items */ res = GinFormTuple(ginstate, attnum, key, category, NULL, nitem + GinGetNPosting(old), false); if (res) { /* good, small enough */ uint32 newnitem; /* fill in the posting list with union of old and new TIDs */ newnitem = ginMergeItemPointers(GinGetPosting(res), GinGetPosting(old), GinGetNPosting(old), items, nitem); /* merge might have eliminated some duplicate items */ GinShortenTuple(res, newnitem); } else { /* posting list would be too big, convert to posting tree */ BlockNumber postingRoot; /* * Initialize posting tree with the old tuple's posting list. It's * surely small enough to fit on one posting-tree page, and should * already be in order with no duplicates. */ postingRoot = createPostingTree(ginstate->index, GinGetPosting(old), GinGetNPosting(old), buildStats); /* Now insert the TIDs-to-be-added into the posting tree */ ginInsertItemPointers(ginstate->index, postingRoot, items, nitem, buildStats); /* And build a new posting-tree-only result tuple */ res = GinFormTuple(ginstate, attnum, key, category, NULL, 0, true); GinSetPostingTree(res, postingRoot); } return res; }
/* * returns modified page or NULL if page isn't modified. * Function works with original page until first change is occurred, * then page is copied into temporary one. */ static Page ginVacuumEntryPage(GinVacuumState *gvs, Buffer buffer, BlockNumber *roots, uint32 *nroot) { Page origpage = BufferGetPage(buffer), tmppage; OffsetNumber i, maxoff = PageGetMaxOffsetNumber(origpage); tmppage = origpage; *nroot = 0; for (i = FirstOffsetNumber; i <= maxoff; i++) { IndexTuple itup = (IndexTuple) PageGetItem(tmppage, PageGetItemId(tmppage, i)); if (GinIsPostingTree(itup)) { /* * store posting tree's roots for further processing, we can't * vacuum it just now due to risk of deadlocks with scans/inserts */ roots[*nroot] = GinGetDownlink(itup); (*nroot)++; } else if (GinGetNPosting(itup) > 0) { int nitems; ItemPointer items_orig; bool free_items_orig; ItemPointer items; /* Get list of item pointers from the tuple. */ if (GinItupIsCompressed(itup)) { items_orig = ginPostingListDecode((GinPostingList *) GinGetPosting(itup), &nitems); free_items_orig = true; } else { items_orig = (ItemPointer) GinGetPosting(itup); nitems = GinGetNPosting(itup); free_items_orig = false; } /* Remove any items from the list that need to be vacuumed. */ items = ginVacuumItemPointers(gvs, items_orig, nitems, &nitems); if (free_items_orig) pfree(items_orig); /* If any item pointers were removed, recreate the tuple. */ if (items) { OffsetNumber attnum; Datum key; GinNullCategory category; GinPostingList *plist; int plistsize; if (nitems > 0) { plist = ginCompressPostingList(items, nitems, GinMaxItemSize, NULL); plistsize = SizeOfGinPostingList(plist); } else { plist = NULL; plistsize = 0; } /* * if we already created a temporary page, make changes in * place */ if (tmppage == origpage) { /* * On first difference, create a temporary copy of the * page and copy the tuple's posting list to it. */ tmppage = PageGetTempPageCopy(origpage); /* set itup pointer to new page */ itup = (IndexTuple) PageGetItem(tmppage, PageGetItemId(tmppage, i)); } attnum = gintuple_get_attrnum(&gvs->ginstate, itup); key = gintuple_get_key(&gvs->ginstate, itup, &category); itup = GinFormTuple(&gvs->ginstate, attnum, key, category, (char *) plist, plistsize, nitems, true); if (plist) pfree(plist); PageIndexTupleDelete(tmppage, i); if (PageAddItem(tmppage, (Item) itup, IndexTupleSize(itup), i, false, false) != i) elog(ERROR, "failed to add item to index page in \"%s\"", RelationGetRelationName(gvs->index)); pfree(itup); pfree(items); } } } return (tmppage == origpage) ? NULL : tmppage; }
/* * Start* functions setup state of searches: find correct buffer and locks it, * Stop* functions unlock buffer (but don't release!) */ static void startScanEntry(Relation index, GinState *ginstate, GinScanEntry entry) { GinBtreeData btreeEntry; GinBtreeStack *stackEntry; Page page; bool needUnlock = TRUE; if (entry->master != NULL) { entry->isFinished = entry->master->isFinished; return; } /* * We should find entry, and begin scan of posting tree * or just store posting list in memory */ prepareEntryScan(&btreeEntry, index, entry->entry, ginstate); btreeEntry.searchMode = TRUE; stackEntry = ginFindLeafPage(&btreeEntry, NULL); page = BufferGetPage(stackEntry->buffer); entry->isFinished = TRUE; entry->buffer = InvalidBuffer; entry->offset = InvalidOffsetNumber; entry->list = NULL; entry->nlist = 0; entry->reduceResult = FALSE; entry->predictNumberResult = 0; if (btreeEntry.findItem(&btreeEntry, stackEntry)) { IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stackEntry->off)); if (GinIsPostingTree(itup)) { BlockNumber rootPostingTree = GinGetPostingTree(itup); GinPostingTreeScan *gdi; Page page; LockBuffer(stackEntry->buffer, GIN_UNLOCK); needUnlock = FALSE; gdi = prepareScanPostingTree(index, rootPostingTree, TRUE); entry->buffer = scanBeginPostingTree(gdi); /* * We keep buffer pinned because we need to prevent deletition * page during scan. See GIN's vacuum implementation. RefCount * is increased to keep buffer pinned after freeGinBtreeStack() call. */ IncrBufferRefCount(entry->buffer); page = BufferGetPage(entry->buffer); entry->predictNumberResult = gdi->stack->predictNumber * GinPageGetOpaque(page)->maxoff; /* * Keep page content in memory to prevent durable page locking */ entry->list = (ItemPointerData *) palloc( BLCKSZ ); entry->nlist = GinPageGetOpaque(page)->maxoff; memcpy( entry->list, GinDataPageGetItem(page, FirstOffsetNumber), GinPageGetOpaque(page)->maxoff * sizeof(ItemPointerData) ); LockBuffer(entry->buffer, GIN_UNLOCK); freeGinBtreeStack(gdi->stack); pfree(gdi); entry->isFinished = FALSE; } else if (GinGetNPosting(itup) > 0) { entry->nlist = GinGetNPosting(itup); entry->list = (ItemPointerData *) palloc(sizeof(ItemPointerData) * entry->nlist); memcpy(entry->list, GinGetPosting(itup), sizeof(ItemPointerData) * entry->nlist); entry->isFinished = FALSE; } } if (needUnlock) LockBuffer(stackEntry->buffer, GIN_UNLOCK); freeGinBtreeStack(stackEntry); }
/* * Inserts only one entry to the index, but it can add more than 1 ItemPointer. * * During an index build, buildStats is non-null and the counters * it contains should be incremented as needed. */ void ginEntryInsert(Relation index, GinState *ginstate, OffsetNumber attnum, Datum value, ItemPointerData *items, uint32 nitem, GinStatsData *buildStats) { GinBtreeData btree; GinBtreeStack *stack; IndexTuple itup; Page page; /* During index build, count the to-be-inserted entry */ if (buildStats) buildStats->nEntries++; ginPrepareEntryScan(&btree, index, attnum, value, ginstate); stack = ginFindLeafPage(&btree, NULL); page = BufferGetPage(stack->buffer); if (btree.findItem(&btree, stack)) { /* found entry */ itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off)); if (GinIsPostingTree(itup)) { /* lock root of posting tree */ GinPostingTreeScan *gdi; BlockNumber rootPostingTree = GinGetPostingTree(itup); /* release all stack */ LockBuffer(stack->buffer, GIN_UNLOCK); freeGinBtreeStack(stack); /* insert into posting tree */ gdi = ginPrepareScanPostingTree(index, rootPostingTree, FALSE); gdi->btree.isBuild = (buildStats != NULL); ginInsertItemPointer(gdi, items, nitem, buildStats); pfree(gdi); return; } itup = addItemPointersToTuple(index, ginstate, stack, itup, items, nitem, buildStats); btree.isDelete = TRUE; } else { /* We suppose that tuple can store at least one itempointer */ itup = GinFormTuple(index, ginstate, attnum, value, items, 1, true); if (nitem > 1) { /* Add the rest, making a posting tree if necessary */ IndexTuple previtup = itup; itup = addItemPointersToTuple(index, ginstate, stack, previtup, items + 1, nitem - 1, buildStats); pfree(previtup); } } btree.entry = itup; ginInsertValue(&btree, stack, buildStats); pfree(itup); }