/* * Adds array of item pointers to tuple's posting list or * creates posting tree and tuple pointed to tree in a case * of not enough space. Max size of tuple is defined in * GinFormTuple(). */ static IndexTuple addItemPointersToTuple(Relation index, GinState *ginstate, GinBtreeStack *stack, IndexTuple old, ItemPointerData *items, uint32 nitem, GinStatsData *buildStats) { Datum key = gin_index_getattr(ginstate, old); OffsetNumber attnum = gintuple_get_attrnum(ginstate, old); IndexTuple res = GinFormTuple(index, ginstate, attnum, key, NULL, nitem + GinGetNPosting(old), false); if (res) { /* good, small enough */ uint32 newnitem; newnitem = ginMergeItemPointers(GinGetPosting(res), GinGetPosting(old), GinGetNPosting(old), items, nitem); /* merge might have eliminated some duplicate items */ GinShortenTuple(res, newnitem); } else { BlockNumber postingRoot; GinPostingTreeScan *gdi; /* posting list becomes big, so we need to make posting's tree */ res = GinFormTuple(index, ginstate, attnum, key, NULL, 0, true); postingRoot = createPostingTree(index, GinGetPosting(old), GinGetNPosting(old)); GinSetPostingTree(res, postingRoot); gdi = ginPrepareScanPostingTree(index, postingRoot, FALSE); gdi->btree.isBuild = (buildStats != NULL); ginInsertItemPointer(gdi, items, nitem, buildStats); pfree(gdi); /* During index build, count the newly-added data page */ if (buildStats) buildStats->nDataPages++; } return res; }
/* * Read item pointers from leaf entry tuple. * * Returns a palloc'd array of ItemPointers. The number of items is returned * in *nitems. */ ItemPointer ginReadTuple(GinState *ginstate, OffsetNumber attnum, IndexTuple itup, int *nitems) { Pointer ptr = GinGetPosting(itup); int nipd = GinGetNPosting(itup); ItemPointer ipd; int ndecoded; if (GinItupIsCompressed(itup)) { if (nipd > 0) { ipd = ginPostingListDecode((GinPostingList *) ptr, &ndecoded); if (nipd != ndecoded) elog(ERROR, "number of items mismatch in GIN entry tuple, %d in tuple header, %d decoded", nipd, ndecoded); } else { ipd = palloc(0); } } else { ipd = (ItemPointer) palloc(sizeof(ItemPointerData) * nipd); memcpy(ipd, ptr, sizeof(ItemPointerData) * nipd); } *nitems = nipd; return ipd; }
/* * Adds array of item pointers to tuple's posting list or * creates posting tree and tuple pointed to tree in a case * of not enough space. Max size of tuple is defined in * GinFormTuple(). */ static IndexTuple addItemPointersToTuple(Relation index, GinState *ginstate, GinBtreeStack *stack, IndexTuple old, ItemPointerData *items, uint32 nitem, bool isBuild) { bool isnull; Datum key = index_getattr(old, FirstOffsetNumber, ginstate->tupdesc, &isnull); IndexTuple res = GinFormTuple(ginstate, key, NULL, nitem + GinGetNPosting(old)); if (res) { /* good, small enough */ MergeItemPointers(GinGetPosting(res), GinGetPosting(old), GinGetNPosting(old), items, nitem ); GinSetNPosting(res, nitem + GinGetNPosting(old)); } else { BlockNumber postingRoot; GinPostingTreeScan *gdi; /* posting list becomes big, so we need to make posting's tree */ res = GinFormTuple(ginstate, key, NULL, 0); postingRoot = createPostingTree(index, GinGetPosting(old), GinGetNPosting(old)); GinSetPostingTree(res, postingRoot); gdi = prepareScanPostingTree(index, postingRoot, FALSE); gdi->btree.isBuild = isBuild; insertItemPointer(gdi, items, nitem); pfree(gdi); } return res; }
/* * Sometimes we reduce the number of posting list items in a tuple after * having built it with GinFormTuple. This function adjusts the size * fields to match. */ void GinShortenTuple(IndexTuple itup, uint32 nipd) { uint32 newsize; Assert(nipd <= GinGetNPosting(itup)); newsize = MAXALIGN(SHORTALIGN(GinGetOrigSizePosting(itup)) + sizeof(ItemPointerData) * nipd); Assert(newsize <= (itup->t_info & INDEX_SIZE_MASK)); itup->t_info &= ~INDEX_SIZE_MASK; itup->t_info |= newsize; GinSetNPosting(itup, nipd); }
/* * returns modified page or NULL if page isn't modified. * Function works with original page until first change is occurred, * then page is copied into temporary one. */ static Page ginVacuumEntryPage(GinVacuumState *gvs, Buffer buffer, BlockNumber *roots, uint32 *nroot) { Page origpage = BufferGetPage(buffer), tmppage; OffsetNumber i, maxoff = PageGetMaxOffsetNumber(origpage); tmppage = origpage; *nroot = 0; for (i = FirstOffsetNumber; i <= maxoff; i++) { IndexTuple itup = (IndexTuple) PageGetItem(tmppage, PageGetItemId(tmppage, i)); if (GinIsPostingTree(itup)) { /* * store posting tree's roots for further processing, we can't * vacuum it just now due to risk of deadlocks with scans/inserts */ roots[*nroot] = GinGetDownlink(itup); (*nroot)++; } else if (GinGetNPosting(itup) > 0) { /* * if we already create temporary page, we will make changes in * place */ ItemPointerData *cleaned = (tmppage == origpage) ? NULL : GinGetPosting(itup); uint32 newN = ginVacuumPostingList(gvs, GinGetPosting(itup), GinGetNPosting(itup), &cleaned); if (GinGetNPosting(itup) != newN) { OffsetNumber attnum; Datum key; GinNullCategory category; /* * Some ItemPointers was deleted, so we should remake our * tuple */ if (tmppage == origpage) { /* * On first difference we create temporary page in memory * and copies content in to it. */ tmppage = PageGetTempPageCopy(origpage); if (newN > 0) { Size pos = ((char *) GinGetPosting(itup)) - ((char *) origpage); memcpy(tmppage + pos, cleaned, sizeof(ItemPointerData) * newN); } pfree(cleaned); /* set itup pointer to new page */ itup = (IndexTuple) PageGetItem(tmppage, PageGetItemId(tmppage, i)); } attnum = gintuple_get_attrnum(&gvs->ginstate, itup); key = gintuple_get_key(&gvs->ginstate, itup, &category); itup = GinFormTuple(&gvs->ginstate, attnum, key, category, GinGetPosting(itup), newN, true); PageIndexTupleDelete(tmppage, i); if (PageAddItem(tmppage, (Item) itup, IndexTupleSize(itup), i, false, false) != i) elog(ERROR, "failed to add item to index page in \"%s\"", RelationGetRelationName(gvs->index)); pfree(itup); } } } return (tmppage == origpage) ? NULL : tmppage; }
/* * Adds array of item pointers to tuple's posting list, or * creates posting tree and tuple pointing to tree in case * of not enough space. Max size of tuple is defined in * GinFormTuple(). Returns a new, modified index tuple. * items[] must be in sorted order with no duplicates. */ static IndexTuple addItemPointersToLeafTuple(GinState *ginstate, IndexTuple old, ItemPointerData *items, uint32 nitem, GinStatsData *buildStats) { OffsetNumber attnum; Datum key; GinNullCategory category; IndexTuple res; Assert(!GinIsPostingTree(old)); attnum = gintuple_get_attrnum(ginstate, old); key = gintuple_get_key(ginstate, old, &category); /* try to build tuple with room for all the items */ res = GinFormTuple(ginstate, attnum, key, category, NULL, nitem + GinGetNPosting(old), false); if (res) { /* good, small enough */ uint32 newnitem; /* fill in the posting list with union of old and new TIDs */ newnitem = ginMergeItemPointers(GinGetPosting(res), GinGetPosting(old), GinGetNPosting(old), items, nitem); /* merge might have eliminated some duplicate items */ GinShortenTuple(res, newnitem); } else { /* posting list would be too big, convert to posting tree */ BlockNumber postingRoot; /* * Initialize posting tree with the old tuple's posting list. It's * surely small enough to fit on one posting-tree page, and should * already be in order with no duplicates. */ postingRoot = createPostingTree(ginstate->index, GinGetPosting(old), GinGetNPosting(old), buildStats); /* Now insert the TIDs-to-be-added into the posting tree */ ginInsertItemPointers(ginstate->index, postingRoot, items, nitem, buildStats); /* And build a new posting-tree-only result tuple */ res = GinFormTuple(ginstate, attnum, key, category, NULL, 0, true); GinSetPostingTree(res, postingRoot); } return res; }
/* * returns modified page or NULL if page isn't modified. * Function works with original page until first change is occurred, * then page is copied into temporary one. */ static Page ginVacuumEntryPage(GinVacuumState *gvs, Buffer buffer, BlockNumber *roots, uint32 *nroot) { Page origpage = BufferGetPage(buffer), tmppage; OffsetNumber i, maxoff = PageGetMaxOffsetNumber(origpage); tmppage = origpage; *nroot = 0; for (i = FirstOffsetNumber; i <= maxoff; i++) { IndexTuple itup = (IndexTuple) PageGetItem(tmppage, PageGetItemId(tmppage, i)); if (GinIsPostingTree(itup)) { /* * store posting tree's roots for further processing, we can't * vacuum it just now due to risk of deadlocks with scans/inserts */ roots[*nroot] = GinGetDownlink(itup); (*nroot)++; } else if (GinGetNPosting(itup) > 0) { int nitems; ItemPointer items_orig; bool free_items_orig; ItemPointer items; /* Get list of item pointers from the tuple. */ if (GinItupIsCompressed(itup)) { items_orig = ginPostingListDecode((GinPostingList *) GinGetPosting(itup), &nitems); free_items_orig = true; } else { items_orig = (ItemPointer) GinGetPosting(itup); nitems = GinGetNPosting(itup); free_items_orig = false; } /* Remove any items from the list that need to be vacuumed. */ items = ginVacuumItemPointers(gvs, items_orig, nitems, &nitems); if (free_items_orig) pfree(items_orig); /* If any item pointers were removed, recreate the tuple. */ if (items) { OffsetNumber attnum; Datum key; GinNullCategory category; GinPostingList *plist; int plistsize; if (nitems > 0) { plist = ginCompressPostingList(items, nitems, GinMaxItemSize, NULL); plistsize = SizeOfGinPostingList(plist); } else { plist = NULL; plistsize = 0; } /* * if we already created a temporary page, make changes in * place */ if (tmppage == origpage) { /* * On first difference, create a temporary copy of the * page and copy the tuple's posting list to it. */ tmppage = PageGetTempPageCopy(origpage); /* set itup pointer to new page */ itup = (IndexTuple) PageGetItem(tmppage, PageGetItemId(tmppage, i)); } attnum = gintuple_get_attrnum(&gvs->ginstate, itup); key = gintuple_get_key(&gvs->ginstate, itup, &category); itup = GinFormTuple(&gvs->ginstate, attnum, key, category, (char *) plist, plistsize, nitems, true); if (plist) pfree(plist); PageIndexTupleDelete(tmppage, i); if (PageAddItem(tmppage, (Item) itup, IndexTupleSize(itup), i, false, false) != i) elog(ERROR, "failed to add item to index page in \"%s\"", RelationGetRelationName(gvs->index)); pfree(itup); pfree(items); } } } return (tmppage == origpage) ? NULL : tmppage; }
/* * Start* functions setup state of searches: find correct buffer and locks it, * Stop* functions unlock buffer (but don't release!) */ static void startScanEntry(Relation index, GinState *ginstate, GinScanEntry entry) { GinBtreeData btreeEntry; GinBtreeStack *stackEntry; Page page; bool needUnlock = TRUE; if (entry->master != NULL) { entry->isFinished = entry->master->isFinished; return; } /* * We should find entry, and begin scan of posting tree * or just store posting list in memory */ prepareEntryScan(&btreeEntry, index, entry->entry, ginstate); btreeEntry.searchMode = TRUE; stackEntry = ginFindLeafPage(&btreeEntry, NULL); page = BufferGetPage(stackEntry->buffer); entry->isFinished = TRUE; entry->buffer = InvalidBuffer; entry->offset = InvalidOffsetNumber; entry->list = NULL; entry->nlist = 0; entry->reduceResult = FALSE; entry->predictNumberResult = 0; if (btreeEntry.findItem(&btreeEntry, stackEntry)) { IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stackEntry->off)); if (GinIsPostingTree(itup)) { BlockNumber rootPostingTree = GinGetPostingTree(itup); GinPostingTreeScan *gdi; Page page; LockBuffer(stackEntry->buffer, GIN_UNLOCK); needUnlock = FALSE; gdi = prepareScanPostingTree(index, rootPostingTree, TRUE); entry->buffer = scanBeginPostingTree(gdi); /* * We keep buffer pinned because we need to prevent deletition * page during scan. See GIN's vacuum implementation. RefCount * is increased to keep buffer pinned after freeGinBtreeStack() call. */ IncrBufferRefCount(entry->buffer); page = BufferGetPage(entry->buffer); entry->predictNumberResult = gdi->stack->predictNumber * GinPageGetOpaque(page)->maxoff; /* * Keep page content in memory to prevent durable page locking */ entry->list = (ItemPointerData *) palloc( BLCKSZ ); entry->nlist = GinPageGetOpaque(page)->maxoff; memcpy( entry->list, GinDataPageGetItem(page, FirstOffsetNumber), GinPageGetOpaque(page)->maxoff * sizeof(ItemPointerData) ); LockBuffer(entry->buffer, GIN_UNLOCK); freeGinBtreeStack(gdi->stack); pfree(gdi); entry->isFinished = FALSE; } else if (GinGetNPosting(itup) > 0) { entry->nlist = GinGetNPosting(itup); entry->list = (ItemPointerData *) palloc(sizeof(ItemPointerData) * entry->nlist); memcpy(entry->list, GinGetPosting(itup), sizeof(ItemPointerData) * entry->nlist); entry->isFinished = FALSE; } } if (needUnlock) LockBuffer(stackEntry->buffer, GIN_UNLOCK); freeGinBtreeStack(stackEntry); }