/* * Checks, should we move to right link... * Compares inserting itemp pointer with right bound of current page */ static bool dataIsMoveRight(GinBtree btree, Page page) { ItemPointer iptr = GinDataPageGetRightBound(page); if (GinPageRightMost(page)) return FALSE; return (ginCompareItemPointers(btree->items + btree->curitem, iptr) > 0) ? TRUE : FALSE; }
static bool entryIsMoveRight(GinBtree btree, Page page) { IndexTuple itup; if (GinPageRightMost(page)) return FALSE; itup = getRightMostTuple(page); if (compareAttEntries(btree->ginstate, btree->entryAttnum, btree->entryValue, gintuple_get_attrnum(btree->ginstate, itup), gin_index_getattr(btree->ginstate, itup)) > 0) return TRUE; return FALSE; }
static bool entryIsMoveRight(GinBtree btree, Page page) { IndexTuple itup; OffsetNumber attnum; Datum key; GinNullCategory category; if (GinPageRightMost(page)) return FALSE; itup = getRightMostTuple(page); attnum = gintuple_get_attrnum(btree->ginstate, itup); key = gintuple_get_key(btree->ginstate, itup, &category); if (ginCompareAttEntries(btree->ginstate, btree->entryAttnum, btree->entryKey, btree->entryCategory, attnum, key, category) > 0) return TRUE; return FALSE; }
/* * checks space to install new value, * item pointer never deletes! */ static bool dataIsEnoughSpace(GinBtree btree, Buffer buf, OffsetNumber off) { Page page = BufferGetPage(buf); Assert(GinPageIsData(page)); Assert(!btree->isDelete); if (GinPageIsLeaf(page)) { if (GinPageRightMost(page) && off > GinPageGetOpaque(page)->maxoff) { if ((btree->nitem - btree->curitem) * sizeof(ItemPointerData) <= GinDataPageGetFreeSpace(page)) return true; } else if (sizeof(ItemPointerData) <= GinDataPageGetFreeSpace(page)) return true; } else if (sizeof(PostingItem) <= GinDataPageGetFreeSpace(page)) return true; return false; }
/* * scans posting tree and deletes empty pages */ static bool ginScanToDelete(GinVacuumState *gvs, BlockNumber blkno, bool isRoot, DataPageDeleteStack *parent, OffsetNumber myoff) { DataPageDeleteStack *me; Buffer buffer; Page page; bool meDelete = FALSE; if (isRoot) { me = parent; } else { if (!parent->child) { me = (DataPageDeleteStack *) palloc0(sizeof(DataPageDeleteStack)); me->parent = parent; parent->child = me; me->leftBlkno = InvalidBlockNumber; } else me = parent->child; } buffer = ReadBufferExtended(gvs->index, MAIN_FORKNUM, blkno, RBM_NORMAL, gvs->strategy); page = BufferGetPage(buffer); Assert(GinPageIsData(page)); if (!GinPageIsLeaf(page)) { OffsetNumber i; me->blkno = blkno; for (i = FirstOffsetNumber; i <= GinPageGetOpaque(page)->maxoff; i++) { PostingItem *pitem = (PostingItem *) GinDataPageGetItem(page, i); if (ginScanToDelete(gvs, PostingItemGetBlockNumber(pitem), FALSE, me, i)) i--; } } if (GinPageGetOpaque(page)->maxoff < FirstOffsetNumber) { if (!(me->leftBlkno == InvalidBlockNumber && GinPageRightMost(page))) { /* we never delete right most branch */ Assert(!isRoot); if (GinPageGetOpaque(page)->maxoff < FirstOffsetNumber) { ginDeletePage(gvs, blkno, me->leftBlkno, me->parent->blkno, myoff, me->parent->isRoot); meDelete = TRUE; } } } ReleaseBuffer(buffer); if (!meDelete) me->leftBlkno = blkno; return meDelete; }
/* * Find correct tuple in non-leaf page. It supposed that * page correctly chosen and searching value SHOULD be on page */ static BlockNumber entryLocateEntry(GinBtree btree, GinBtreeStack *stack) { OffsetNumber low, high, maxoff; IndexTuple itup = NULL; int result; Page page = BufferGetPage(stack->buffer); Assert(!GinPageIsLeaf(page)); Assert(!GinPageIsData(page)); if (btree->fullScan) { stack->off = FirstOffsetNumber; stack->predictNumber *= PageGetMaxOffsetNumber(page); return btree->getLeftMostChild(btree, page); } low = FirstOffsetNumber; maxoff = high = PageGetMaxOffsetNumber(page); Assert(high >= low); high++; while (high > low) { OffsetNumber mid = low + ((high - low) / 2); if (mid == maxoff && GinPageRightMost(page)) { /* Right infinity */ result = -1; } else { OffsetNumber attnum; Datum key; GinNullCategory category; itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, mid)); attnum = gintuple_get_attrnum(btree->ginstate, itup); key = gintuple_get_key(btree->ginstate, itup, &category); result = ginCompareAttEntries(btree->ginstate, btree->entryAttnum, btree->entryKey, btree->entryCategory, attnum, key, category); } if (result == 0) { stack->off = mid; Assert(GinGetDownlink(itup) != GIN_ROOT_BLKNO); return GinGetDownlink(itup); } else if (result > 0) low = mid + 1; else high = mid; } Assert(high >= FirstOffsetNumber && high <= maxoff); stack->off = high; itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, high)); Assert(GinGetDownlink(itup) != GIN_ROOT_BLKNO); return GinGetDownlink(itup); }
/* * Finish a split by inserting the downlink for the new page to parent. * * On entry, stack->buffer is exclusively locked. * * If freestack is true, all the buffers are released and unlocked as we * crawl up the tree, and 'stack' is freed. Otherwise stack->buffer is kept * locked, and stack is unmodified, except for possibly moving right to find * the correct parent of page. */ static void ginFinishSplit(GinBtree btree, GinBtreeStack *stack, bool freestack, GinStatsData *buildStats) { Page page; bool done; bool first = true; /* * freestack == false when we encounter an incompletely split page during a * scan, while freestack == true is used in the normal scenario that a * split is finished right after the initial insert. */ if (!freestack) elog(DEBUG1, "finishing incomplete split of block %u in gin index \"%s\"", stack->blkno, RelationGetRelationName(btree->index)); /* this loop crawls up the stack until the insertion is complete */ do { GinBtreeStack *parent = stack->parent; void *insertdata; BlockNumber updateblkno; /* search parent to lock */ LockBuffer(parent->buffer, GIN_EXCLUSIVE); /* * If the parent page was incompletely split, finish that split first, * then continue with the current one. * * Note: we have to finish *all* incomplete splits we encounter, even * if we have to move right. Otherwise we might choose as the target * a page that has no downlink in the parent, and splitting it further * would fail. */ if (GinPageIsIncompleteSplit(BufferGetPage(parent->buffer))) ginFinishSplit(btree, parent, false, buildStats); /* move right if it's needed */ page = BufferGetPage(parent->buffer); while ((parent->off = btree->findChildPtr(btree, page, stack->blkno, parent->off)) == InvalidOffsetNumber) { if (GinPageRightMost(page)) { /* * rightmost page, but we don't find parent, we should use * plain search... */ LockBuffer(parent->buffer, GIN_UNLOCK); ginFindParents(btree, stack); parent = stack->parent; Assert(parent != NULL); break; } parent->buffer = ginStepRight(parent->buffer, btree->index, GIN_EXCLUSIVE); parent->blkno = BufferGetBlockNumber(parent->buffer); page = BufferGetPage(parent->buffer); if (GinPageIsIncompleteSplit(BufferGetPage(parent->buffer))) ginFinishSplit(btree, parent, false, buildStats); } /* insert the downlink */ insertdata = btree->prepareDownlink(btree, stack->buffer); updateblkno = GinPageGetOpaque(BufferGetPage(stack->buffer))->rightlink; done = ginPlaceToPage(btree, parent, insertdata, updateblkno, stack->buffer, buildStats); pfree(insertdata); /* * If the caller requested to free the stack, unlock and release the * child buffer now. Otherwise keep it pinned and locked, but if we * have to recurse up the tree, we can unlock the upper pages, only * keeping the page at the bottom of the stack locked. */ if (!first || freestack) LockBuffer(stack->buffer, GIN_UNLOCK); if (freestack) { ReleaseBuffer(stack->buffer); pfree(stack); } stack = parent; first = false; } while (!done); /* unlock the parent */ LockBuffer(stack->buffer, GIN_UNLOCK); if (freestack) freeGinBtreeStack(stack); }
/* * split page and fills WAL record. original buffer(lbuf) leaves untouched, * returns shadow page of lbuf filled new data. In leaf page and build mode puts all * ItemPointers to pages. Also, in build mode splits data by way to full fulled * left page */ static Page dataSplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off, XLogRecData **prdata) { char *ptr; OffsetNumber separator; ItemPointer bound; Page lpage = PageGetTempPageCopy(BufferGetPage(lbuf)); ItemPointerData oldbound = *GinDataPageGetRightBound(lpage); int sizeofitem = GinSizeOfDataPageItem(lpage); OffsetNumber maxoff = GinPageGetOpaque(lpage)->maxoff; Page rpage = BufferGetPage(rbuf); Size pageSize = PageGetPageSize(lpage); Size freeSpace; uint32 nCopied = 1; /* these must be static so they can be returned to caller */ static ginxlogSplit data; static XLogRecData rdata[4]; static char vector[2 * BLCKSZ]; GinInitPage(rpage, GinPageGetOpaque(lpage)->flags, pageSize); freeSpace = GinDataPageGetFreeSpace(rpage); *prdata = rdata; data.leftChildBlkno = (GinPageIsLeaf(lpage)) ? InvalidOffsetNumber : PostingItemGetBlockNumber(&(btree->pitem)); data.updateBlkno = dataPrepareData(btree, lpage, off); memcpy(vector, GinDataPageGetItem(lpage, FirstOffsetNumber), maxoff * sizeofitem); if (GinPageIsLeaf(lpage) && GinPageRightMost(lpage) && off > GinPageGetOpaque(lpage)->maxoff) { nCopied = 0; while (btree->curitem < btree->nitem && maxoff * sizeof(ItemPointerData) < 2 * (freeSpace - sizeof(ItemPointerData))) { memcpy(vector + maxoff * sizeof(ItemPointerData), btree->items + btree->curitem, sizeof(ItemPointerData)); maxoff++; nCopied++; btree->curitem++; } } else { ptr = vector + (off - 1) * sizeofitem; if (maxoff + 1 - off != 0) memmove(ptr + sizeofitem, ptr, (maxoff - off + 1) * sizeofitem); if (GinPageIsLeaf(lpage)) { memcpy(ptr, btree->items + btree->curitem, sizeofitem); btree->curitem++; } else memcpy(ptr, &(btree->pitem), sizeofitem); maxoff++; } /* * we suppose that during index creation table scaned from begin to end, * so ItemPointers are monotonically increased.. */ if (btree->isBuild && GinPageRightMost(lpage)) separator = freeSpace / sizeofitem; else separator = maxoff / 2; GinInitPage(rpage, GinPageGetOpaque(lpage)->flags, pageSize); GinInitPage(lpage, GinPageGetOpaque(rpage)->flags, pageSize); memcpy(GinDataPageGetItem(lpage, FirstOffsetNumber), vector, separator * sizeofitem); GinPageGetOpaque(lpage)->maxoff = separator; memcpy(GinDataPageGetItem(rpage, FirstOffsetNumber), vector + separator * sizeofitem, (maxoff - separator) * sizeofitem); GinPageGetOpaque(rpage)->maxoff = maxoff - separator; PostingItemSetBlockNumber(&(btree->pitem), BufferGetBlockNumber(lbuf)); if (GinPageIsLeaf(lpage)) btree->pitem.key = *(ItemPointerData *) GinDataPageGetItem(lpage, GinPageGetOpaque(lpage)->maxoff); else btree->pitem.key = ((PostingItem *) GinDataPageGetItem(lpage, GinPageGetOpaque(lpage)->maxoff))->key; btree->rightblkno = BufferGetBlockNumber(rbuf); /* set up right bound for left page */ bound = GinDataPageGetRightBound(lpage); *bound = btree->pitem.key; /* set up right bound for right page */ bound = GinDataPageGetRightBound(rpage); *bound = oldbound; data.node = btree->index->rd_node; data.rootBlkno = InvalidBlockNumber; data.lblkno = BufferGetBlockNumber(lbuf); data.rblkno = BufferGetBlockNumber(rbuf); data.separator = separator; data.nitem = maxoff; data.isData = TRUE; data.isLeaf = GinPageIsLeaf(lpage) ? TRUE : FALSE; data.isRootSplit = FALSE; data.rightbound = oldbound; rdata[0].buffer = InvalidBuffer; rdata[0].data = (char *) &data; rdata[0].len = sizeof(ginxlogSplit); rdata[0].next = &rdata[1]; rdata[1].buffer = InvalidBuffer; rdata[1].data = vector; rdata[1].len = MAXALIGN(maxoff * sizeofitem); rdata[1].next = NULL; return lpage; }
/* * Places keys to page and fills WAL record. In case leaf page and * build mode puts all ItemPointers to page. */ static void dataPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off, XLogRecData **prdata) { Page page = BufferGetPage(buf); int sizeofitem = GinSizeOfDataPageItem(page); int cnt = 0; /* these must be static so they can be returned to caller */ static XLogRecData rdata[3]; static ginxlogInsert data; *prdata = rdata; Assert(GinPageIsData(page)); data.updateBlkno = dataPrepareData(btree, page, off); data.node = btree->index->rd_node; data.blkno = BufferGetBlockNumber(buf); data.offset = off; data.nitem = 1; data.isDelete = FALSE; data.isData = TRUE; data.isLeaf = GinPageIsLeaf(page) ? TRUE : FALSE; /* * Prevent full page write if child's split occurs. That is needed to * remove incomplete splits while replaying WAL * * data.updateBlkno contains new block number (of newly created right * page) for recently splited page. */ if (data.updateBlkno == InvalidBlockNumber) { rdata[0].buffer = buf; rdata[0].buffer_std = FALSE; rdata[0].data = NULL; rdata[0].len = 0; rdata[0].next = &rdata[1]; cnt++; } rdata[cnt].buffer = InvalidBuffer; rdata[cnt].data = (char *) &data; rdata[cnt].len = sizeof(ginxlogInsert); rdata[cnt].next = &rdata[cnt + 1]; cnt++; rdata[cnt].buffer = InvalidBuffer; rdata[cnt].data = (GinPageIsLeaf(page)) ? ((char *) (btree->items + btree->curitem)) : ((char *) &(btree->pitem)); rdata[cnt].len = sizeofitem; rdata[cnt].next = NULL; if (GinPageIsLeaf(page)) { if (GinPageRightMost(page) && off > GinPageGetOpaque(page)->maxoff) { /* usually, create index... */ uint32 savedPos = btree->curitem; while (btree->curitem < btree->nitem) { GinDataPageAddItem(page, btree->items + btree->curitem, off); off++; btree->curitem++; } data.nitem = btree->curitem - savedPos; rdata[cnt].len = sizeofitem * data.nitem; } else { GinDataPageAddItem(page, btree->items + btree->curitem, off); btree->curitem++; } } else GinDataPageAddItem(page, &(btree->pitem), off); }
/* * split page and fills WAL record. original buffer(lbuf) leaves untouched, * returns shadow page of lbuf filled new data. In leaf page and build mode puts all * ItemPointers to pages. Also, in build mode splits data by way to full fulled * left page */ static Page dataSplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off, void *insertdata, BlockNumber updateblkno, XLogRecData **prdata) { char *ptr; OffsetNumber separator; ItemPointer bound; Page lpage = PageGetTempPageCopy(BufferGetPage(lbuf)); bool isleaf = GinPageIsLeaf(lpage); ItemPointerData oldbound = *GinDataPageGetRightBound(lpage); int sizeofitem = GinSizeOfDataPageItem(lpage); OffsetNumber maxoff = GinPageGetOpaque(lpage)->maxoff; Page rpage = BufferGetPage(rbuf); Size pageSize = PageGetPageSize(lpage); Size freeSpace; /* these must be static so they can be returned to caller */ static ginxlogSplitData data; static XLogRecData rdata[2]; static char vector[2 * BLCKSZ]; GinInitPage(rpage, GinPageGetOpaque(lpage)->flags, pageSize); freeSpace = GinDataPageGetFreeSpace(rpage); *prdata = rdata; /* Update existing downlink to point to next page (on internal page) */ if (!isleaf) { PostingItem *pitem = GinDataPageGetPostingItem(lpage, off); PostingItemSetBlockNumber(pitem, updateblkno); } if (isleaf) { memcpy(vector, GinDataPageGetItemPointer(lpage, FirstOffsetNumber), maxoff * sizeof(ItemPointerData)); } else { memcpy(vector, GinDataPageGetPostingItem(lpage, FirstOffsetNumber), maxoff * sizeof(PostingItem)); } if (isleaf && GinPageRightMost(lpage) && off > GinPageGetOpaque(lpage)->maxoff) { /* append new items to the end */ GinBtreeDataLeafInsertData *items = insertdata; while (items->curitem < items->nitem && maxoff * sizeof(ItemPointerData) < 2 * (freeSpace - sizeof(ItemPointerData))) { memcpy(vector + maxoff * sizeof(ItemPointerData), items->items + items->curitem, sizeof(ItemPointerData)); maxoff++; items->curitem++; } } else { ptr = vector + (off - 1) * sizeofitem; if (maxoff + 1 - off != 0) memmove(ptr + sizeofitem, ptr, (maxoff - off + 1) * sizeofitem); if (isleaf) { GinBtreeDataLeafInsertData *items = insertdata; memcpy(ptr, items->items + items->curitem, sizeofitem); items->curitem++; } else { PostingItem *pitem = insertdata; memcpy(ptr, pitem, sizeofitem); } maxoff++; } /* * we assume that during index creation the table scanned from beginning * to end, so ItemPointers are in monotonically increasing order. */ if (btree->isBuild && GinPageRightMost(lpage)) separator = freeSpace / sizeofitem; else separator = maxoff / 2; GinInitPage(rpage, GinPageGetOpaque(lpage)->flags, pageSize); GinInitPage(lpage, GinPageGetOpaque(rpage)->flags, pageSize); if (isleaf) memcpy(GinDataPageGetItemPointer(lpage, FirstOffsetNumber), vector, separator * sizeof(ItemPointerData)); else memcpy(GinDataPageGetPostingItem(lpage, FirstOffsetNumber), vector, separator * sizeof(PostingItem)); GinPageGetOpaque(lpage)->maxoff = separator; if (isleaf) memcpy(GinDataPageGetItemPointer(rpage, FirstOffsetNumber), vector + separator * sizeof(ItemPointerData), (maxoff - separator) * sizeof(ItemPointerData)); else memcpy(GinDataPageGetPostingItem(rpage, FirstOffsetNumber), vector + separator * sizeof(PostingItem), (maxoff - separator) * sizeof(PostingItem)); GinPageGetOpaque(rpage)->maxoff = maxoff - separator; /* set up right bound for left page */ bound = GinDataPageGetRightBound(lpage); if (GinPageIsLeaf(lpage)) *bound = *GinDataPageGetItemPointer(lpage, GinPageGetOpaque(lpage)->maxoff); else *bound = GinDataPageGetPostingItem(lpage, GinPageGetOpaque(lpage)->maxoff)->key; /* set up right bound for right page */ bound = GinDataPageGetRightBound(rpage); *bound = oldbound; data.separator = separator; data.nitem = maxoff; data.rightbound = oldbound; rdata[0].buffer = InvalidBuffer; rdata[0].data = (char *) &data; rdata[0].len = sizeof(ginxlogSplitData); rdata[0].next = &rdata[1]; rdata[1].buffer = InvalidBuffer; rdata[1].data = vector; rdata[1].len = maxoff * sizeofitem; rdata[1].next = NULL; return lpage; }
/* * Places keys to page and fills WAL record. In case leaf page and * build mode puts all ItemPointers to page. * * If none of the keys fit, returns false without modifying the page. * * On insertion to an internal node, in addition to inserting the given item, * the downlink of the existing item at 'off' is updated to point to * 'updateblkno'. */ static bool dataPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off, void *insertdata, BlockNumber updateblkno, XLogRecData **prdata) { Page page = BufferGetPage(buf); /* these must be static so they can be returned to caller */ static XLogRecData rdata[2]; /* quick exit if it doesn't fit */ if (!dataIsEnoughSpace(btree, buf, off, insertdata)) return false; *prdata = rdata; Assert(GinPageIsData(page)); /* Update existing downlink to point to next page (on internal page) */ if (!GinPageIsLeaf(page)) { PostingItem *pitem = GinDataPageGetPostingItem(page, off); PostingItemSetBlockNumber(pitem, updateblkno); } if (GinPageIsLeaf(page)) { GinBtreeDataLeafInsertData *items = insertdata; static ginxlogInsertDataLeaf data; uint32 savedPos = items->curitem; if (GinPageRightMost(page) && off > GinPageGetOpaque(page)->maxoff) { /* usually, create index... */ while (items->curitem < items->nitem) { GinDataPageAddItemPointer(page, items->items + items->curitem, off); off++; items->curitem++; } data.nitem = items->curitem - savedPos; } else { GinDataPageAddItemPointer(page, items->items + items->curitem, off); items->curitem++; data.nitem = 1; } rdata[0].buffer = buf; rdata[0].buffer_std = false; rdata[0].data = (char *) &data; rdata[0].len = offsetof(ginxlogInsertDataLeaf, items); rdata[0].next = &rdata[1]; rdata[1].buffer = buf; rdata[1].buffer_std = false; rdata[1].data = (char *) &items->items[savedPos]; rdata[1].len = sizeof(ItemPointerData) * data.nitem; rdata[1].next = NULL; } else { PostingItem *pitem = insertdata; GinDataPageAddPostingItem(page, pitem, off); rdata[0].buffer = buf; rdata[0].buffer_std = false; rdata[0].data = (char *) pitem; rdata[0].len = sizeof(PostingItem); rdata[0].next = NULL; } return true; }
/* * Find correct tuple in non-leaf page. It supposed that * page correctly choosen and searching value SHOULD be on page */ static BlockNumber entryLocateEntry(GinBtree btree, GinBtreeStack *stack) { OffsetNumber low, high, maxoff; IndexTuple itup = NULL; int result; Page page = BufferGetPage(stack->buffer); Assert(!GinPageIsLeaf(page)); Assert(!GinPageIsData(page)); if (btree->fullScan) { stack->off = FirstOffsetNumber; stack->predictNumber *= PageGetMaxOffsetNumber(page); return btree->getLeftMostPage(btree, page); } low = FirstOffsetNumber; maxoff = high = PageGetMaxOffsetNumber(page); Assert(high >= low); high++; while (high > low) { OffsetNumber mid = low + ((high - low) / 2); if (mid == maxoff && GinPageRightMost(page)) /* Right infinity */ result = -1; else { itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, mid)); result = compareAttEntries(btree->ginstate, btree->entryAttnum, btree->entryValue, gintuple_get_attrnum(btree->ginstate, itup), gin_index_getattr(btree->ginstate, itup)); } if (result == 0) { stack->off = mid; Assert(GinItemPointerGetBlockNumber(&(itup)->t_tid) != GIN_ROOT_BLKNO); return GinItemPointerGetBlockNumber(&(itup)->t_tid); } else if (result > 0) low = mid + 1; else high = mid; } Assert(high >= FirstOffsetNumber && high <= maxoff); stack->off = high; itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, high)); Assert(GinItemPointerGetBlockNumber(&(itup)->t_tid) != GIN_ROOT_BLKNO); return GinItemPointerGetBlockNumber(&(itup)->t_tid); }