/* * Place tuples from 'itup' to 'buffer'. If 'oldoffnum' is valid, the tuple * at that offset is atomically removed along with inserting the new tuples. * This is used to replace a tuple with a new one. * * If 'leftchildbuf' is valid, we're inserting the downlink for the page * to the right of 'leftchildbuf', or updating the downlink for 'leftchildbuf'. * F_FOLLOW_RIGHT flag on 'leftchildbuf' is cleared and NSN is set. * * If 'markfollowright' is true and the page is split, the left child is * marked with F_FOLLOW_RIGHT flag. That is the normal case. During buffered * index build, however, there is no concurrent access and the page splitting * is done in a slightly simpler fashion, and false is passed. * * If there is not enough room on the page, it is split. All the split * pages are kept pinned and locked and returned in *splitinfo, the caller * is responsible for inserting the downlinks for them. However, if * 'buffer' is the root page and it needs to be split, gistplacetopage() * performs the split as one atomic operation, and *splitinfo is set to NIL. * In that case, we continue to hold the root page locked, and the child * pages are released; note that new tuple(s) are *not* on the root page * but in one of the new child pages. * * If 'newblkno' is not NULL, returns the block number of page the first * new/updated tuple was inserted to. Usually it's the given page, but could * be its right sibling if the page was split. * * Returns 'true' if the page was split, 'false' otherwise. */ bool gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate, Buffer buffer, IndexTuple *itup, int ntup, OffsetNumber oldoffnum, BlockNumber *newblkno, Buffer leftchildbuf, List **splitinfo, bool markfollowright) { BlockNumber blkno = BufferGetBlockNumber(buffer); Page page = BufferGetPage(buffer); bool is_leaf = (GistPageIsLeaf(page)) ? true : false; XLogRecPtr recptr; int i; bool is_split; /* * Refuse to modify a page that's incompletely split. This should not * happen because we finish any incomplete splits while we walk down the * tree. However, it's remotely possible that another concurrent inserter * splits a parent page, and errors out before completing the split. We * will just throw an error in that case, and leave any split we had in * progress unfinished too. The next insert that comes along will clean up * the mess. */ if (GistFollowRight(page)) elog(ERROR, "concurrent GiST page split was incomplete"); *splitinfo = NIL; /* * if isupdate, remove old key: This node's key has been modified, either * because a child split occurred or because we needed to adjust our key * for an insert in a child node. Therefore, remove the old version of * this node's key. * * for WAL replay, in the non-split case we handle this by setting up a * one-element todelete array; in the split case, it's handled implicitly * because the tuple vector passed to gistSplit won't include this tuple. */ is_split = gistnospace(page, itup, ntup, oldoffnum, freespace); if (is_split) { /* no space for insertion */ IndexTuple *itvec; int tlen; SplitedPageLayout *dist = NULL, *ptr; BlockNumber oldrlink = InvalidBlockNumber; GistNSN oldnsn = 0; SplitedPageLayout rootpg; bool is_rootsplit; is_rootsplit = (blkno == GIST_ROOT_BLKNO); /* * Form index tuples vector to split. If we're replacing an old tuple, * remove the old version from the vector. */ itvec = gistextractpage(page, &tlen); if (OffsetNumberIsValid(oldoffnum)) { /* on inner page we should remove old tuple */ int pos = oldoffnum - FirstOffsetNumber; tlen--; if (pos != tlen) memmove(itvec + pos, itvec + pos + 1, sizeof(IndexTuple) * (tlen - pos)); } itvec = gistjoinvector(itvec, &tlen, itup, ntup); dist = gistSplit(rel, page, itvec, tlen, giststate); /* * Set up pages to work with. Allocate new buffers for all but the * leftmost page. The original page becomes the new leftmost page, and * is just replaced with the new contents. * * For a root-split, allocate new buffers for all child pages, the * original page is overwritten with new root page containing * downlinks to the new child pages. */ ptr = dist; if (!is_rootsplit) { /* save old rightlink and NSN */ oldrlink = GistPageGetOpaque(page)->rightlink; oldnsn = GistPageGetNSN(page); dist->buffer = buffer; dist->block.blkno = BufferGetBlockNumber(buffer); dist->page = PageGetTempPageCopySpecial(BufferGetPage(buffer)); /* clean all flags except F_LEAF */ GistPageGetOpaque(dist->page)->flags = (is_leaf) ? F_LEAF : 0; ptr = ptr->next; } for (; ptr; ptr = ptr->next) { /* Allocate new page */ ptr->buffer = gistNewBuffer(rel); GISTInitBuffer(ptr->buffer, (is_leaf) ? F_LEAF : 0); ptr->page = BufferGetPage(ptr->buffer); ptr->block.blkno = BufferGetBlockNumber(ptr->buffer); } /* * Now that we know which blocks the new pages go to, set up downlink * tuples to point to them. */ for (ptr = dist; ptr; ptr = ptr->next) { ItemPointerSetBlockNumber(&(ptr->itup->t_tid), ptr->block.blkno); GistTupleSetValid(ptr->itup); } /* * If this is a root split, we construct the new root page with the * downlinks here directly, instead of requiring the caller to insert * them. Add the new root page to the list along with the child pages. */ if (is_rootsplit) { IndexTuple *downlinks; int ndownlinks = 0; int i; rootpg.buffer = buffer; rootpg.page = PageGetTempPageCopySpecial(BufferGetPage(rootpg.buffer)); GistPageGetOpaque(rootpg.page)->flags = 0; /* Prepare a vector of all the downlinks */ for (ptr = dist; ptr; ptr = ptr->next) ndownlinks++; downlinks = palloc(sizeof(IndexTuple) * ndownlinks); for (i = 0, ptr = dist; ptr; ptr = ptr->next) downlinks[i++] = ptr->itup; rootpg.block.blkno = GIST_ROOT_BLKNO; rootpg.block.num = ndownlinks; rootpg.list = gistfillitupvec(downlinks, ndownlinks, &(rootpg.lenlist)); rootpg.itup = NULL; rootpg.next = dist; dist = &rootpg; } else { /* Prepare split-info to be returned to caller */ for (ptr = dist; ptr; ptr = ptr->next) { GISTPageSplitInfo *si = palloc(sizeof(GISTPageSplitInfo)); si->buf = ptr->buffer; si->downlink = ptr->itup; *splitinfo = lappend(*splitinfo, si); } } /* * Fill all pages. All the pages are new, ie. freshly allocated empty * pages, or a temporary copy of the old page. */ for (ptr = dist; ptr; ptr = ptr->next) { char *data = (char *) (ptr->list); for (i = 0; i < ptr->block.num; i++) { IndexTuple thistup = (IndexTuple) data; if (PageAddItem(ptr->page, (Item) data, IndexTupleSize(thistup), i + FirstOffsetNumber, false, false) == InvalidOffsetNumber) elog(ERROR, "failed to add item to index page in \"%s\"", RelationGetRelationName(rel)); /* * If this is the first inserted/updated tuple, let the caller * know which page it landed on. */ if (newblkno && ItemPointerEquals(&thistup->t_tid, &(*itup)->t_tid)) *newblkno = ptr->block.blkno; data += IndexTupleSize(thistup); } /* Set up rightlinks */ if (ptr->next && ptr->block.blkno != GIST_ROOT_BLKNO) GistPageGetOpaque(ptr->page)->rightlink = ptr->next->block.blkno; else GistPageGetOpaque(ptr->page)->rightlink = oldrlink; /* * Mark the all but the right-most page with the follow-right * flag. It will be cleared as soon as the downlink is inserted * into the parent, but this ensures that if we error out before * that, the index is still consistent. (in buffering build mode, * any error will abort the index build anyway, so this is not * needed.) */ if (ptr->next && !is_rootsplit && markfollowright) GistMarkFollowRight(ptr->page); else GistClearFollowRight(ptr->page); /* * Copy the NSN of the original page to all pages. The * F_FOLLOW_RIGHT flags ensure that scans will follow the * rightlinks until the downlinks are inserted. */ GistPageSetNSN(ptr->page, oldnsn); } START_CRIT_SECTION(); /* * Must mark buffers dirty before XLogInsert, even though we'll still * be changing their opaque fields below. */ for (ptr = dist; ptr; ptr = ptr->next) MarkBufferDirty(ptr->buffer); if (BufferIsValid(leftchildbuf)) MarkBufferDirty(leftchildbuf); /* * The first page in the chain was a temporary working copy meant to * replace the old page. Copy it over the old page. */ PageRestoreTempPage(dist->page, BufferGetPage(dist->buffer)); dist->page = BufferGetPage(dist->buffer); /* Write the WAL record */ if (RelationNeedsWAL(rel)) recptr = gistXLogSplit(rel->rd_node, blkno, is_leaf, dist, oldrlink, oldnsn, leftchildbuf, markfollowright); else recptr = gistGetFakeLSN(rel); for (ptr = dist; ptr; ptr = ptr->next) { PageSetLSN(ptr->page, recptr); } /* * Return the new child buffers to the caller. * * If this was a root split, we've already inserted the downlink * pointers, in the form of a new root page. Therefore we can release * all the new buffers, and keep just the root page locked. */ if (is_rootsplit) { for (ptr = dist->next; ptr; ptr = ptr->next) UnlockReleaseBuffer(ptr->buffer); } } else { /* * Enough space. We also get here if ntuples==0. */ START_CRIT_SECTION(); if (OffsetNumberIsValid(oldoffnum)) PageIndexTupleDelete(page, oldoffnum); gistfillbuffer(page, itup, ntup, InvalidOffsetNumber); MarkBufferDirty(buffer); if (BufferIsValid(leftchildbuf)) MarkBufferDirty(leftchildbuf); if (RelationNeedsWAL(rel)) { OffsetNumber ndeloffs = 0, deloffs[1]; if (OffsetNumberIsValid(oldoffnum)) { deloffs[0] = oldoffnum; ndeloffs = 1; } recptr = gistXLogUpdate(rel->rd_node, buffer, deloffs, ndeloffs, itup, ntup, leftchildbuf); PageSetLSN(page, recptr); } else { recptr = gistGetFakeLSN(rel); PageSetLSN(page, recptr); } if (newblkno) *newblkno = blkno; } /* * If we inserted the downlink for a child page, set NSN and clear * F_FOLLOW_RIGHT flag on the left child, so that concurrent scans know to * follow the rightlink if and only if they looked at the parent page * before we inserted the downlink. * * Note that we do this *after* writing the WAL record. That means that * the possible full page image in the WAL record does not include these * changes, and they must be replayed even if the page is restored from * the full page image. There's a chicken-and-egg problem: if we updated * the child pages first, we wouldn't know the recptr of the WAL record * we're about to write. */ if (BufferIsValid(leftchildbuf)) { Page leftpg = BufferGetPage(leftchildbuf); GistPageSetNSN(leftpg, recptr); GistClearFollowRight(leftpg); PageSetLSN(leftpg, recptr); } END_CRIT_SECTION(); return is_split; }
static ArrayTuple vacuumSplitPage(GistVacuum *gv, Page tempPage, Buffer buffer, IndexTuple *addon, int curlenaddon) { ArrayTuple res = {NULL, 0, false}; IndexTuple *vec; SplitedPageLayout *dist = NULL, *ptr; int i, veclen = 0; BlockNumber blkno = BufferGetBlockNumber(buffer); MemoryContext oldCtx = MemoryContextSwitchTo(gv->opCtx); vec = gistextractpage(tempPage, &veclen); vec = gistjoinvector(vec, &veclen, addon, curlenaddon); dist = gistSplit(gv->index, tempPage, vec, veclen, &(gv->giststate)); MemoryContextSwitchTo(oldCtx); if (blkno != GIST_ROOT_BLKNO) { /* if non-root split then we should not allocate new buffer */ dist->buffer = buffer; dist->page = tempPage; /* during vacuum we never split leaf page */ GistPageGetOpaque(dist->page)->flags = 0; } else pfree(tempPage); res.itup = (IndexTuple *) palloc(sizeof(IndexTuple) * veclen); res.ituplen = 0; /* make new pages and fills them */ for (ptr = dist; ptr; ptr = ptr->next) { char *data; if (ptr->buffer == InvalidBuffer) { ptr->buffer = gistNewBuffer(gv->index); GISTInitBuffer(ptr->buffer, 0); ptr->page = BufferGetPage(ptr->buffer); } ptr->block.blkno = BufferGetBlockNumber(ptr->buffer); data = (char *) (ptr->list); for (i = 0; i < ptr->block.num; i++) { if (PageAddItem(ptr->page, (Item) data, IndexTupleSize((IndexTuple) data), i + FirstOffsetNumber, false, false) == InvalidOffsetNumber) elog(ERROR, "failed to add item to index page in \"%s\"", RelationGetRelationName(gv->index)); data += IndexTupleSize((IndexTuple) data); } ItemPointerSetBlockNumber(&(ptr->itup->t_tid), ptr->block.blkno); res.itup[res.ituplen] = (IndexTuple) palloc(IndexTupleSize(ptr->itup)); memcpy(res.itup[res.ituplen], ptr->itup, IndexTupleSize(ptr->itup)); res.ituplen++; } START_CRIT_SECTION(); for (ptr = dist; ptr; ptr = ptr->next) { MarkBufferDirty(ptr->buffer); GistPageGetOpaque(ptr->page)->rightlink = InvalidBlockNumber; } /* restore splitted non-root page */ if (blkno != GIST_ROOT_BLKNO) { PageRestoreTempPage(dist->page, BufferGetPage(dist->buffer)); dist->page = BufferGetPage(dist->buffer); } if (!gv->index->rd_istemp) { XLogRecPtr recptr; XLogRecData *rdata; ItemPointerData key; /* set key for incomplete insert */ char *xlinfo; ItemPointerSet(&key, blkno, TUPLE_IS_VALID); rdata = formSplitRdata(gv->index->rd_node, blkno, false, &key, dist); xlinfo = rdata->data; recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_PAGE_SPLIT, rdata); for (ptr = dist; ptr; ptr = ptr->next) { PageSetLSN(BufferGetPage(ptr->buffer), recptr); PageSetTLI(BufferGetPage(ptr->buffer), ThisTimeLineID); } pfree(xlinfo); pfree(rdata); } else { for (ptr = dist; ptr; ptr = ptr->next) PageSetLSN(BufferGetPage(ptr->buffer), XLogRecPtrForTemp); } for (ptr = dist; ptr; ptr = ptr->next) { /* we must keep the buffer pin on the head page */ if (BufferGetBlockNumber(ptr->buffer) != blkno) UnlockReleaseBuffer(ptr->buffer); } if (blkno == GIST_ROOT_BLKNO) { ItemPointerData key; /* set key for incomplete insert */ ItemPointerSet(&key, blkno, TUPLE_IS_VALID); gistnewroot(gv->index, buffer, res.itup, res.ituplen, &key); } END_CRIT_SECTION(); MemoryContextReset(gv->opCtx); return res; }
static int gistlayerinsert(Relation r, BlockNumber blkno, IndexTuple **itup, /* in - out, has compressed entry */ int *len, /* in - out */ InsertIndexResult *res, /* out */ GISTSTATE *giststate) { Buffer buffer; Page page; OffsetNumber child; int ret; GISTPageOpaque opaque; buffer = ReadBuffer(r, blkno); page = (Page) BufferGetPage(buffer); opaque = (GISTPageOpaque) PageGetSpecialPointer(page); if (!(opaque->flags & F_LEAF)) { /* internal page, so we must walk on tree */ /* len IS equal 1 */ ItemId iid; BlockNumber nblkno; ItemPointerData oldtid; IndexTuple oldtup; child = gistchoose(r, page, *(*itup), giststate); iid = PageGetItemId(page, child); oldtup = (IndexTuple) PageGetItem(page, iid); nblkno = ItemPointerGetBlockNumber(&(oldtup->t_tid)); /* * After this call: 1. if child page was splited, then itup * contains keys for each page 2. if child page wasn't splited, * then itup contains additional for adjustment of current key */ ret = gistlayerinsert(r, nblkno, itup, len, res, giststate); /* nothing inserted in child */ if (!(ret & INSERTED)) { ReleaseBuffer(buffer); return 0x00; } /* child does not splited */ if (!(ret & SPLITED)) { IndexTuple newtup = gistgetadjusted(r, oldtup, (*itup)[0], giststate); if (!newtup) { /* not need to update key */ ReleaseBuffer(buffer); return 0x00; } pfree((*itup)[0]); /* !!! */ (*itup)[0] = newtup; } /* key is modified, so old version must be deleted */ ItemPointerSet(&oldtid, blkno, child); gistdelete(r, &oldtid); /* * if child was splitted, new key for child will be inserted in * the end list of child, so we must say to any scans that page is * changed beginning from 'child' offset */ if (ret & SPLITED) gistadjscans(r, GISTOP_SPLIT, blkno, child); } ret = INSERTED; if (gistnospace(page, (*itup), *len)) { /* no space for insertion */ IndexTuple *itvec, *newitup; int tlen, oldlen; ret |= SPLITED; itvec = gistreadbuffer(buffer, &tlen); itvec = gistjoinvector(itvec, &tlen, (*itup), *len); oldlen = *len; newitup = gistSplit(r, buffer, itvec, &tlen, giststate, (opaque->flags & F_LEAF) ? res : NULL); /* res only for * inserting in leaf */ ReleaseBuffer(buffer); do pfree((*itup)[oldlen - 1]); while ((--oldlen) > 0); pfree((*itup)); pfree(itvec); *itup = newitup; *len = tlen; /* now tlen >= 2 */ } else { /* enogth space */ OffsetNumber off, l; off = (PageIsEmpty(page)) ? FirstOffsetNumber : OffsetNumberNext(PageGetMaxOffsetNumber(page)); l = gistwritebuffer(r, page, (*itup), *len, off); WriteBuffer(buffer); /* * set res if insert into leaf page, in this case, len = 1 always */ if (res && (opaque->flags & F_LEAF)) ItemPointerSet(&((*res)->pointerData), blkno, l); if (*len > 1) { /* previous insert ret & SPLITED != 0 */ int i; /* * child was splited, so we must form union for insertion in * parent */ IndexTuple newtup = gistunion(r, (*itup), *len, giststate); ItemPointerSet(&(newtup->t_tid), blkno, 1); for (i = 0; i < *len; i++) pfree((*itup)[i]); (*itup)[0] = newtup; *len = 1; } } return ret; }
static bool gistplacetopage(GISTInsertState *state, GISTSTATE *giststate) { bool is_splitted = false; bool is_leaf = (GistPageIsLeaf(state->stack->page)) ? true : false; MIRROREDLOCK_BUFMGR_MUST_ALREADY_BE_HELD; /* * if (!is_leaf) remove old key: This node's key has been modified, either * because a child split occurred or because we needed to adjust our key * for an insert in a child node. Therefore, remove the old version of * this node's key. * * for WAL replay, in the non-split case we handle this by setting up a * one-element todelete array; in the split case, it's handled implicitly * because the tuple vector passed to gistSplit won't include this tuple. * * XXX: If we want to change fillfactors between node and leaf, fillfactor * = (is_leaf ? state->leaf_fillfactor : state->node_fillfactor) */ if (gistnospace(state->stack->page, state->itup, state->ituplen, is_leaf ? InvalidOffsetNumber : state->stack->childoffnum, state->freespace)) { /* no space for insertion */ IndexTuple *itvec; int tlen; SplitedPageLayout *dist = NULL, *ptr; BlockNumber rrlink = InvalidBlockNumber; GistNSN oldnsn; is_splitted = true; /* * Form index tuples vector to split: remove old tuple if t's needed * and add new tuples to vector */ itvec = gistextractpage(state->stack->page, &tlen); if (!is_leaf) { /* on inner page we should remove old tuple */ int pos = state->stack->childoffnum - FirstOffsetNumber; tlen--; if (pos != tlen) memmove(itvec + pos, itvec + pos + 1, sizeof(IndexTuple) * (tlen - pos)); } itvec = gistjoinvector(itvec, &tlen, state->itup, state->ituplen); dist = gistSplit(state->r, state->stack->page, itvec, tlen, giststate); state->itup = (IndexTuple *) palloc(sizeof(IndexTuple) * tlen); state->ituplen = 0; if (state->stack->blkno != GIST_ROOT_BLKNO) { /* * if non-root split then we should not allocate new buffer, but * we must create temporary page to operate */ dist->buffer = state->stack->buffer; dist->page = PageGetTempPage(BufferGetPage(dist->buffer), sizeof(GISTPageOpaqueData)); /* clean all flags except F_LEAF */ GistPageGetOpaque(dist->page)->flags = (is_leaf) ? F_LEAF : 0; } /* make new pages and fills them */ for (ptr = dist; ptr; ptr = ptr->next) { int i; char *data; /* get new page */ if (ptr->buffer == InvalidBuffer) { ptr->buffer = gistNewBuffer(state->r); GISTInitBuffer(ptr->buffer, (is_leaf) ? F_LEAF : 0); ptr->page = BufferGetPage(ptr->buffer); } ptr->block.blkno = BufferGetBlockNumber(ptr->buffer); /* * fill page, we can do it because all these pages are new * (ie not linked in tree or masked by temp page */ data = (char *) (ptr->list); for (i = 0; i < ptr->block.num; i++) { if (PageAddItem(ptr->page, (Item) data, IndexTupleSize((IndexTuple) data), i + FirstOffsetNumber, LP_USED) == InvalidOffsetNumber) elog(ERROR, "failed to add item to index page in \"%s\"", RelationGetRelationName(state->r)); data += IndexTupleSize((IndexTuple) data); } /* set up ItemPointer and remember it for parent */ ItemPointerSetBlockNumber(&(ptr->itup->t_tid), ptr->block.blkno); state->itup[state->ituplen] = ptr->itup; state->ituplen++; } /* saves old rightlink */ if (state->stack->blkno != GIST_ROOT_BLKNO) rrlink = GistPageGetOpaque(dist->page)->rightlink; START_CRIT_SECTION(); /* * must mark buffers dirty before XLogInsert, even though we'll still * be changing their opaque fields below. set up right links. */ for (ptr = dist; ptr; ptr = ptr->next) { MarkBufferDirty(ptr->buffer); GistPageGetOpaque(ptr->page)->rightlink = (ptr->next) ? ptr->next->block.blkno : rrlink; } /* restore splitted non-root page */ if (state->stack->blkno != GIST_ROOT_BLKNO) { PageRestoreTempPage(dist->page, BufferGetPage(dist->buffer)); dist->page = BufferGetPage(dist->buffer); } if (!state->r->rd_istemp) { XLogRecPtr recptr; XLogRecData *rdata; rdata = formSplitRdata(state->r, state->stack->blkno, is_leaf, &(state->key), dist); recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_PAGE_SPLIT, rdata); for (ptr = dist; ptr; ptr = ptr->next) { PageSetLSN(ptr->page, recptr); PageSetTLI(ptr->page, ThisTimeLineID); } } else { for (ptr = dist; ptr; ptr = ptr->next) { PageSetLSN(ptr->page, XLogRecPtrForTemp); } } /* set up NSN */ oldnsn = GistPageGetOpaque(dist->page)->nsn; if (state->stack->blkno == GIST_ROOT_BLKNO) /* if root split we should put initial value */ oldnsn = PageGetLSN(dist->page); for (ptr = dist; ptr; ptr = ptr->next) { /* only for last set oldnsn */ GistPageGetOpaque(ptr->page)->nsn = (ptr->next) ? PageGetLSN(ptr->page) : oldnsn; } /* * release buffers, if it was a root split then release all buffers * because we create all buffers */ ptr = (state->stack->blkno == GIST_ROOT_BLKNO) ? dist : dist->next; for (; ptr; ptr = ptr->next) UnlockReleaseBuffer(ptr->buffer); if (state->stack->blkno == GIST_ROOT_BLKNO) { gistnewroot(state->r, state->stack->buffer, state->itup, state->ituplen, &(state->key)); state->needInsertComplete = false; } END_CRIT_SECTION(); } else { /* enough space */ START_CRIT_SECTION(); if (!is_leaf) PageIndexTupleDelete(state->stack->page, state->stack->childoffnum); gistfillbuffer(state->r, state->stack->page, state->itup, state->ituplen, InvalidOffsetNumber); MarkBufferDirty(state->stack->buffer); if (!state->r->rd_istemp) { OffsetNumber noffs = 0, offs[1]; XLogRecPtr recptr; XLogRecData *rdata; if (!is_leaf) { /* only on inner page we should delete previous version */ offs[0] = state->stack->childoffnum; noffs = 1; } rdata = formUpdateRdata(state->r, state->stack->buffer, offs, noffs, state->itup, state->ituplen, &(state->key)); recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_PAGE_UPDATE, rdata); PageSetLSN(state->stack->page, recptr); PageSetTLI(state->stack->page, ThisTimeLineID); } else PageSetLSN(state->stack->page, XLogRecPtrForTemp); if (state->stack->blkno == GIST_ROOT_BLKNO) state->needInsertComplete = false; END_CRIT_SECTION(); if (state->ituplen > 1) { /* previous is_splitted==true */ /* * child was splited, so we must form union for insertion in * parent */ IndexTuple newtup = gistunion(state->r, state->itup, state->ituplen, giststate); ItemPointerSetBlockNumber(&(newtup->t_tid), state->stack->blkno); state->itup[0] = newtup; state->ituplen = 1; } else if (is_leaf) { /* * itup[0] store key to adjust parent, we set it to valid to * correct check by GistTupleIsInvalid macro in gistgetadjusted() */ ItemPointerSetBlockNumber(&(state->itup[0]->t_tid), state->stack->blkno); GistTupleSetValid(state->itup[0]); } } return is_splitted; }
/* * gistSplit -- split a page in the tree. */ static IndexTuple * gistSplit(Relation r, Buffer buffer, IndexTuple *itup, /* contains compressed entry */ int *len, GISTSTATE *giststate, InsertIndexResult *res) { Page p; Buffer leftbuf, rightbuf; Page left, right; IndexTuple *lvectup, *rvectup, *newtup; BlockNumber lbknum, rbknum; GISTPageOpaque opaque; GIST_SPLITVEC v; GistEntryVector *entryvec; bool *decompvec; int i, j, nlen; int MaxGrpId = 1; Datum datum; bool IsNull; p = (Page) BufferGetPage(buffer); opaque = (GISTPageOpaque) PageGetSpecialPointer(p); /* * The root of the tree is the first block in the relation. If we're * about to split the root, we need to do some hocus-pocus to enforce * this guarantee. */ if (BufferGetBlockNumber(buffer) == GISTP_ROOT) { leftbuf = ReadBuffer(r, P_NEW); GISTInitBuffer(leftbuf, opaque->flags); lbknum = BufferGetBlockNumber(leftbuf); left = (Page) BufferGetPage(leftbuf); } else { leftbuf = buffer; IncrBufferRefCount(buffer); lbknum = BufferGetBlockNumber(buffer); left = (Page) PageGetTempPage(p, sizeof(GISTPageOpaqueData)); } rightbuf = ReadBuffer(r, P_NEW); GISTInitBuffer(rightbuf, opaque->flags); rbknum = BufferGetBlockNumber(rightbuf); right = (Page) BufferGetPage(rightbuf); /* generate the item array */ entryvec = palloc(GEVHDRSZ + (*len + 1) * sizeof(GISTENTRY)); entryvec->n = *len + 1; decompvec = (bool *) palloc((*len + 1) * sizeof(bool)); for (i = 1; i <= *len; i++) { datum = index_getattr(itup[i - 1], 1, giststate->tupdesc, &IsNull); gistdentryinit(giststate, 0, &(entryvec->vector[i]), datum, r, p, i, ATTSIZE(datum, giststate->tupdesc, 1, IsNull), FALSE, IsNull); if ((!isAttByVal(giststate, 0)) && entryvec->vector[i].key != datum) decompvec[i] = TRUE; else decompvec[i] = FALSE; } /* * now let the user-defined picksplit function set up the split * vector; in entryvec have no null value!! */ FunctionCall2(&giststate->picksplitFn[0], PointerGetDatum(entryvec), PointerGetDatum(&v)); /* compatibility with old code */ if (v.spl_left[v.spl_nleft - 1] == InvalidOffsetNumber) v.spl_left[v.spl_nleft - 1] = (OffsetNumber) *len; if (v.spl_right[v.spl_nright - 1] == InvalidOffsetNumber) v.spl_right[v.spl_nright - 1] = (OffsetNumber) *len; v.spl_lattr[0] = v.spl_ldatum; v.spl_rattr[0] = v.spl_rdatum; v.spl_lisnull[0] = false; v.spl_risnull[0] = false; /* * if index is multikey, then we must to try get smaller bounding box * for subkey(s) */ if (r->rd_att->natts > 1) { v.spl_idgrp = (int *) palloc0(sizeof(int) * (*len + 1)); v.spl_grpflag = (char *) palloc0(sizeof(char) * (*len + 1)); v.spl_ngrp = (int *) palloc(sizeof(int) * (*len + 1)); MaxGrpId = gistfindgroup(giststate, entryvec->vector, &v); /* form union of sub keys for each page (l,p) */ gistunionsubkey(r, giststate, itup, &v); /* * if possible, we insert equivalent tuples with control by * penalty for a subkey(s) */ if (MaxGrpId > 1) gistadjsubkey(r, itup, len, &v, giststate); pfree(v.spl_idgrp); pfree(v.spl_grpflag); pfree(v.spl_ngrp); } /* clean up the entry vector: its keys need to be deleted, too */ for (i = 1; i <= *len; i++) if (decompvec[i]) pfree(DatumGetPointer(entryvec->vector[i].key)); pfree(entryvec); pfree(decompvec); /* form left and right vector */ lvectup = (IndexTuple *) palloc(sizeof(IndexTuple) * v.spl_nleft); rvectup = (IndexTuple *) palloc(sizeof(IndexTuple) * v.spl_nright); for (i = 0; i < v.spl_nleft; i++) lvectup[i] = itup[v.spl_left[i] - 1]; for (i = 0; i < v.spl_nright; i++) rvectup[i] = itup[v.spl_right[i] - 1]; /* write on disk (may be need another split) */ if (gistnospace(right, rvectup, v.spl_nright)) { nlen = v.spl_nright; newtup = gistSplit(r, rightbuf, rvectup, &nlen, giststate, (res && rvectup[nlen - 1] == itup[*len - 1]) ? res : NULL); ReleaseBuffer(rightbuf); for (j = 1; j < r->rd_att->natts; j++) if ((!isAttByVal(giststate, j)) && !v.spl_risnull[j]) pfree(DatumGetPointer(v.spl_rattr[j])); } else { OffsetNumber l; l = gistwritebuffer(r, right, rvectup, v.spl_nright, FirstOffsetNumber); WriteBuffer(rightbuf); if (res) ItemPointerSet(&((*res)->pointerData), rbknum, l); nlen = 1; newtup = (IndexTuple *) palloc(sizeof(IndexTuple) * 1); newtup[0] = gistFormTuple(giststate, r, v.spl_rattr, v.spl_rattrsize, v.spl_risnull); ItemPointerSet(&(newtup[0]->t_tid), rbknum, 1); } if (gistnospace(left, lvectup, v.spl_nleft)) { int llen = v.spl_nleft; IndexTuple *lntup; lntup = gistSplit(r, leftbuf, lvectup, &llen, giststate, (res && lvectup[llen - 1] == itup[*len - 1]) ? res : NULL); ReleaseBuffer(leftbuf); for (j = 1; j < r->rd_att->natts; j++) if ((!isAttByVal(giststate, j)) && !v.spl_lisnull[j]) pfree(DatumGetPointer(v.spl_lattr[j])); newtup = gistjoinvector(newtup, &nlen, lntup, llen); pfree(lntup); } else { OffsetNumber l; l = gistwritebuffer(r, left, lvectup, v.spl_nleft, FirstOffsetNumber); if (BufferGetBlockNumber(buffer) != GISTP_ROOT) PageRestoreTempPage(left, p); WriteBuffer(leftbuf); if (res) ItemPointerSet(&((*res)->pointerData), lbknum, l); nlen += 1; newtup = (IndexTuple *) repalloc((void *) newtup, sizeof(IndexTuple) * nlen); newtup[nlen - 1] = gistFormTuple(giststate, r, v.spl_lattr, v.spl_lattrsize, v.spl_lisnull); ItemPointerSet(&(newtup[nlen - 1]->t_tid), lbknum, 1); } /* !!! pfree */ pfree(rvectup); pfree(lvectup); pfree(v.spl_left); pfree(v.spl_right); *len = nlen; return newtup; }