/* * Build a fresh leaf tuple, either posting-list or posting-tree format * depending on whether the given items list will fit. * items[] must be in sorted order with no duplicates. * * This is basically the same logic as in addItemPointersToLeafTuple, * but working from slightly different input. */ static IndexTuple buildFreshLeafTuple(GinState *ginstate, OffsetNumber attnum, Datum key, GinNullCategory category, ItemPointerData *items, uint32 nitem, GinStatsData *buildStats, Buffer buffer) { IndexTuple res = NULL; GinPostingList *compressedList; /* try to build a posting list tuple with all the items */ compressedList = ginCompressPostingList(items, nitem, GinMaxItemSize, NULL); if (compressedList) { res = GinFormTuple(ginstate, attnum, key, category, (char *) compressedList, SizeOfGinPostingList(compressedList), nitem, false); pfree(compressedList); } if (!res) { /* posting list would be too big, build posting tree */ BlockNumber postingRoot; /* * Build posting-tree-only result tuple. We do this first so as to * fail quickly if the key is too big. */ res = GinFormTuple(ginstate, attnum, key, category, NULL, 0, 0, true); /* * Initialize a new posting tree with the TIDs. */ postingRoot = createPostingTree(ginstate->index, items, nitem, buildStats, buffer); /* And save the root link in the result tuple */ GinSetPostingTree(res, postingRoot); } return res; }
/* * Adds array of item pointers to tuple's posting list, or * creates posting tree and tuple pointing to tree in case * of not enough space. Max size of tuple is defined in * GinFormTuple(). Returns a new, modified index tuple. * items[] must be in sorted order with no duplicates. */ static IndexTuple addItemPointersToLeafTuple(GinState *ginstate, IndexTuple old, ItemPointerData *items, uint32 nitem, GinStatsData *buildStats, Buffer buffer) { OffsetNumber attnum; Datum key; GinNullCategory category; IndexTuple res; ItemPointerData *newItems, *oldItems; int oldNPosting, newNPosting; GinPostingList *compressedList; Assert(!GinIsPostingTree(old)); attnum = gintuple_get_attrnum(ginstate, old); key = gintuple_get_key(ginstate, old, &category); /* merge the old and new posting lists */ oldItems = ginReadTuple(ginstate, attnum, old, &oldNPosting); newItems = ginMergeItemPointers(items, nitem, oldItems, oldNPosting, &newNPosting); /* Compress the posting list, and try to a build tuple with room for it */ res = NULL; compressedList = ginCompressPostingList(newItems, newNPosting, GinMaxItemSize, NULL); pfree(newItems); if (compressedList) { res = GinFormTuple(ginstate, attnum, key, category, (char *) compressedList, SizeOfGinPostingList(compressedList), newNPosting, false); pfree(compressedList); } if (!res) { /* posting list would be too big, convert to posting tree */ BlockNumber postingRoot; /* * Initialize posting tree with the old tuple's posting list. It's * surely small enough to fit on one posting-tree page, and should * already be in order with no duplicates. */ postingRoot = createPostingTree(ginstate->index, oldItems, oldNPosting, buildStats, buffer); /* Now insert the TIDs-to-be-added into the posting tree */ ginInsertItemPointers(ginstate->index, postingRoot, items, nitem, buildStats); /* And build a new posting-tree-only result tuple */ res = GinFormTuple(ginstate, attnum, key, category, NULL, 0, 0, true); GinSetPostingTree(res, postingRoot); } pfree(oldItems); return res; }
static void ginRedoRecompress(Page page, ginxlogRecompressDataLeaf *data) { int actionno; int segno; GinPostingList *oldseg; Pointer segmentend; char *walbuf; int totalsize; /* * If the page is in pre-9.4 format, convert to new___ format first. */ if (!GinPageIsCompressed(page)) { ItemPointer uncompressed = (ItemPointer) GinDataPageGetData(page); int nuncompressed = GinPageGetOpaque(page)->maxoff; int npacked; GinPostingList *plist; plist = ginCompressPostingList(uncompressed, nuncompressed, BLCKSZ, &npacked); Assert(npacked == nuncompressed); totalsize = SizeOfGinPostingList(plist); memcpy(GinDataLeafPageGetPostingList(page), plist, totalsize); GinDataPageSetDataSize(page, totalsize); GinPageSetCompressed(page); GinPageGetOpaque(page)->maxoff = InvalidOffsetNumber; } oldseg = GinDataLeafPageGetPostingList(page); segmentend = (Pointer) oldseg + GinDataLeafPageGetPostingListSize(page); segno = 0; walbuf = ((char *) data) + sizeof(ginxlogRecompressDataLeaf); for (actionno = 0; actionno < data->nactions; actionno++) { uint8 a_segno = *((uint8 *) (walbuf++)); uint8 a_action = *((uint8 *) (walbuf++)); GinPostingList *newseg = NULL; int newsegsize = 0; ItemPointerData *items = NULL; uint16 nitems = 0; ItemPointerData *olditems; int nolditems; ItemPointerData *newitems; int nnewitems; int segsize; Pointer segptr; int szleft; /* Extract all the information we need from the WAL record */ if (a_action == GIN_SEGMENT_INSERT || a_action == GIN_SEGMENT_REPLACE) { newseg = (GinPostingList *) walbuf; newsegsize = SizeOfGinPostingList(newseg); walbuf += SHORTALIGN(newsegsize); } if (a_action == GIN_SEGMENT_ADDITEMS) { memcpy(&nitems, walbuf, sizeof(uint16)); walbuf += sizeof(uint16); items = (ItemPointerData *) walbuf; walbuf += nitems * sizeof(ItemPointerData); } /* Skip to the segment that this action concerns */ Assert(segno <= a_segno); while (segno < a_segno) { oldseg = GinNextPostingListSegment(oldseg); segno++; } /* * ADDITEMS action is handled like REPLACE, but the new___ segment to * replace the old one is reconstructed using the old segment from * disk and the new___ items from the WAL record. */ if (a_action == GIN_SEGMENT_ADDITEMS) { int npacked; olditems = ginPostingListDecode(oldseg, &nolditems); newitems = ginMergeItemPointers(items, nitems, olditems, nolditems, &nnewitems); Assert(nnewitems == nolditems + nitems); newseg = ginCompressPostingList(newitems, nnewitems, BLCKSZ, &npacked); Assert(npacked == nnewitems); newsegsize = SizeOfGinPostingList(newseg); a_action = GIN_SEGMENT_REPLACE; } segptr = (Pointer) oldseg; if (segptr != segmentend) segsize = SizeOfGinPostingList(oldseg); else { /* * Positioned after the last existing segment. Only INSERTs * expected here. */ Assert(a_action == GIN_SEGMENT_INSERT); segsize = 0; } szleft = segmentend - segptr; switch (a_action) { case GIN_SEGMENT_DELETE: memmove(segptr, segptr + segsize, szleft - segsize); segmentend -= segsize; segno++; break; case GIN_SEGMENT_INSERT: /* make room for the new___ segment */ memmove(segptr + newsegsize, segptr, szleft); /* copy the new___ segment in place */ memcpy(segptr, newseg, newsegsize); segmentend += newsegsize; segptr += newsegsize; break; case GIN_SEGMENT_REPLACE: /* shift the segments that follow */ memmove(segptr + newsegsize, segptr + segsize, szleft - segsize); /* copy the replacement segment in place */ memcpy(segptr, newseg, newsegsize); segmentend -= segsize; segmentend += newsegsize; segptr += newsegsize; segno++; break; default: elog(ERROR, "unexpected GIN leaf action: %u", a_action); } oldseg = (GinPostingList *) segptr; } totalsize = segmentend - (Pointer) GinDataLeafPageGetPostingList(page); GinDataPageSetDataSize(page, totalsize); }
/* * returns modified page or NULL if page isn't modified. * Function works with original page until first change is occurred, * then page is copied into temporary one. */ static Page ginVacuumEntryPage(GinVacuumState *gvs, Buffer buffer, BlockNumber *roots, uint32 *nroot) { Page origpage = BufferGetPage(buffer), tmppage; OffsetNumber i, maxoff = PageGetMaxOffsetNumber(origpage); tmppage = origpage; *nroot = 0; for (i = FirstOffsetNumber; i <= maxoff; i++) { IndexTuple itup = (IndexTuple) PageGetItem(tmppage, PageGetItemId(tmppage, i)); if (GinIsPostingTree(itup)) { /* * store posting tree's roots for further processing, we can't * vacuum it just now due to risk of deadlocks with scans/inserts */ roots[*nroot] = GinGetDownlink(itup); (*nroot)++; } else if (GinGetNPosting(itup) > 0) { int nitems; ItemPointer items_orig; bool free_items_orig; ItemPointer items; /* Get list of item pointers from the tuple. */ if (GinItupIsCompressed(itup)) { items_orig = ginPostingListDecode((GinPostingList *) GinGetPosting(itup), &nitems); free_items_orig = true; } else { items_orig = (ItemPointer) GinGetPosting(itup); nitems = GinGetNPosting(itup); free_items_orig = false; } /* Remove any items from the list that need to be vacuumed. */ items = ginVacuumItemPointers(gvs, items_orig, nitems, &nitems); if (free_items_orig) pfree(items_orig); /* If any item pointers were removed, recreate the tuple. */ if (items) { OffsetNumber attnum; Datum key; GinNullCategory category; GinPostingList *plist; int plistsize; if (nitems > 0) { plist = ginCompressPostingList(items, nitems, GinMaxItemSize, NULL); plistsize = SizeOfGinPostingList(plist); } else { plist = NULL; plistsize = 0; } /* * if we already created a temporary page, make changes in * place */ if (tmppage == origpage) { /* * On first difference, create a temporary copy of the * page and copy the tuple's posting list to it. */ tmppage = PageGetTempPageCopy(origpage); /* set itup pointer to new page */ itup = (IndexTuple) PageGetItem(tmppage, PageGetItemId(tmppage, i)); } attnum = gintuple_get_attrnum(&gvs->ginstate, itup); key = gintuple_get_key(&gvs->ginstate, itup, &category); itup = GinFormTuple(&gvs->ginstate, attnum, key, category, (char *) plist, plistsize, nitems, true); if (plist) pfree(plist); PageIndexTupleDelete(tmppage, i); if (PageAddItem(tmppage, (Item) itup, IndexTupleSize(itup), i, false, false) != i) elog(ERROR, "failed to add item to index page in \"%s\"", RelationGetRelationName(gvs->index)); pfree(itup); pfree(items); } } } return (tmppage == origpage) ? NULL : tmppage; }
/* * Redo recompression of posting list. Doing all the changes in-place is not * always possible, because it might require more space than we've on the page. * Instead, once modification is required we copy unprocessed tail of the page * into separately allocated chunk of memory for further reading original * versions of segments. Thanks to that we don't bother about moving page data * in-place. */ static void ginRedoRecompress(Page page, ginxlogRecompressDataLeaf *data) { int actionno; int segno; GinPostingList *oldseg; Pointer segmentend; char *walbuf; int totalsize; Pointer tailCopy = NULL; Pointer writePtr; Pointer segptr; /* * If the page is in pre-9.4 format, convert to new format first. */ if (!GinPageIsCompressed(page)) { ItemPointer uncompressed = (ItemPointer) GinDataPageGetData(page); int nuncompressed = GinPageGetOpaque(page)->maxoff; int npacked; /* * Empty leaf pages are deleted as part of vacuum, but leftmost and * rightmost pages are never deleted. So, pg_upgrade'd from pre-9.4 * instances might contain empty leaf pages, and we need to handle * them correctly. */ if (nuncompressed > 0) { GinPostingList *plist; plist = ginCompressPostingList(uncompressed, nuncompressed, BLCKSZ, &npacked); totalsize = SizeOfGinPostingList(plist); Assert(npacked == nuncompressed); memcpy(GinDataLeafPageGetPostingList(page), plist, totalsize); } else { totalsize = 0; } GinDataPageSetDataSize(page, totalsize); GinPageSetCompressed(page); GinPageGetOpaque(page)->maxoff = InvalidOffsetNumber; } oldseg = GinDataLeafPageGetPostingList(page); writePtr = (Pointer) oldseg; segmentend = (Pointer) oldseg + GinDataLeafPageGetPostingListSize(page); segno = 0; walbuf = ((char *) data) + sizeof(ginxlogRecompressDataLeaf); for (actionno = 0; actionno < data->nactions; actionno++) { uint8 a_segno = *((uint8 *) (walbuf++)); uint8 a_action = *((uint8 *) (walbuf++)); GinPostingList *newseg = NULL; int newsegsize = 0; ItemPointerData *items = NULL; uint16 nitems = 0; ItemPointerData *olditems; int nolditems; ItemPointerData *newitems; int nnewitems; int segsize; /* Extract all the information we need from the WAL record */ if (a_action == GIN_SEGMENT_INSERT || a_action == GIN_SEGMENT_REPLACE) { newseg = (GinPostingList *) walbuf; newsegsize = SizeOfGinPostingList(newseg); walbuf += SHORTALIGN(newsegsize); } if (a_action == GIN_SEGMENT_ADDITEMS) { memcpy(&nitems, walbuf, sizeof(uint16)); walbuf += sizeof(uint16); items = (ItemPointerData *) walbuf; walbuf += nitems * sizeof(ItemPointerData); } /* Skip to the segment that this action concerns */ Assert(segno <= a_segno); while (segno < a_segno) { /* * Once modification is started and page tail is copied, we've * to copy unmodified segments. */ segsize = SizeOfGinPostingList(oldseg); if (tailCopy) { Assert(writePtr + segsize < PageGetSpecialPointer(page)); memcpy(writePtr, (Pointer) oldseg, segsize); } writePtr += segsize; oldseg = GinNextPostingListSegment(oldseg); segno++; } /* * ADDITEMS action is handled like REPLACE, but the new segment to * replace the old one is reconstructed using the old segment from * disk and the new items from the WAL record. */ if (a_action == GIN_SEGMENT_ADDITEMS) { int npacked; olditems = ginPostingListDecode(oldseg, &nolditems); newitems = ginMergeItemPointers(items, nitems, olditems, nolditems, &nnewitems); Assert(nnewitems == nolditems + nitems); newseg = ginCompressPostingList(newitems, nnewitems, BLCKSZ, &npacked); Assert(npacked == nnewitems); newsegsize = SizeOfGinPostingList(newseg); a_action = GIN_SEGMENT_REPLACE; } segptr = (Pointer) oldseg; if (segptr != segmentend) segsize = SizeOfGinPostingList(oldseg); else { /* * Positioned after the last existing segment. Only INSERTs * expected here. */ Assert(a_action == GIN_SEGMENT_INSERT); segsize = 0; } /* * We're about to start modification of the page. So, copy tail of the * page if it's not done already. */ if (!tailCopy && segptr != segmentend) { int tailSize = segmentend - segptr; tailCopy = (Pointer) palloc(tailSize); memcpy(tailCopy, segptr, tailSize); segptr = tailCopy; oldseg = (GinPostingList *) segptr; segmentend = segptr + tailSize; } switch (a_action) { case GIN_SEGMENT_DELETE: segptr += segsize; segno++; break; case GIN_SEGMENT_INSERT: /* copy the new segment in place */ Assert(writePtr + newsegsize <= PageGetSpecialPointer(page)); memcpy(writePtr, newseg, newsegsize); writePtr += newsegsize; break; case GIN_SEGMENT_REPLACE: /* copy the new version of segment in place */ Assert(writePtr + newsegsize <= PageGetSpecialPointer(page)); memcpy(writePtr, newseg, newsegsize); writePtr += newsegsize; segptr += segsize; segno++; break; default: elog(ERROR, "unexpected GIN leaf action: %u", a_action); } oldseg = (GinPostingList *) segptr; } /* Copy the rest of unmodified segments if any. */ segptr = (Pointer) oldseg; if (segptr != segmentend && tailCopy) { int restSize = segmentend - segptr; Assert(writePtr + restSize <= PageGetSpecialPointer(page)); memcpy(writePtr, segptr, restSize); writePtr += restSize; } totalsize = writePtr - (Pointer) GinDataLeafPageGetPostingList(page); GinDataPageSetDataSize(page, totalsize); }