/* * Process an index tuple. Runs the tuple down the tree until we reach a leaf * page or node buffer, and inserts the tuple there. Returns true if we have * to stop buffer emptying process (because one of child buffers can't take * index tuples anymore). */ static bool gistProcessItup(GISTBuildState *buildstate, IndexTuple itup, BlockNumber startblkno, int startlevel) { GISTSTATE *giststate = buildstate->giststate; GISTBuildBuffers *gfbb = buildstate->gfbb; Relation indexrel = buildstate->indexrel; BlockNumber childblkno; Buffer buffer; bool result = false; BlockNumber blkno; int level; OffsetNumber downlinkoffnum = InvalidOffsetNumber; BlockNumber parentblkno = InvalidBlockNumber; CHECK_FOR_INTERRUPTS(); /* * Loop until we reach a leaf page (level == 0) or a level with buffers * (not including the level we start at, because we would otherwise make * no progress). */ blkno = startblkno; level = startlevel; for (;;) { ItemId iid; IndexTuple idxtuple, newtup; Page page; OffsetNumber childoffnum; /* Have we reached a level with buffers? */ if (LEVEL_HAS_BUFFERS(level, gfbb) && level != startlevel) break; /* Have we reached a leaf page? */ if (level == 0) break; /* * Nope. Descend down to the next level then. Choose a child to * descend down to. */ buffer = ReadBuffer(indexrel, blkno); LockBuffer(buffer, GIST_EXCLUSIVE); page = (Page) BufferGetPage(buffer); childoffnum = gistchoose(indexrel, page, itup, giststate); iid = PageGetItemId(page, childoffnum); idxtuple = (IndexTuple) PageGetItem(page, iid); childblkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid)); if (level > 1) gistMemorizeParent(buildstate, childblkno, blkno); /* * Check that the key representing the target child node is consistent * with the key we're inserting. Update it if it's not. */ newtup = gistgetadjusted(indexrel, idxtuple, itup, giststate); if (newtup) { blkno = gistbufferinginserttuples(buildstate, buffer, level, &newtup, 1, childoffnum, InvalidBlockNumber, InvalidOffsetNumber); /* gistbufferinginserttuples() released the buffer */ } else UnlockReleaseBuffer(buffer); /* Descend to the child */ parentblkno = blkno; blkno = childblkno; downlinkoffnum = childoffnum; Assert(level > 0); level--; } if (LEVEL_HAS_BUFFERS(level, gfbb)) { /* * We've reached level with buffers. Place the index tuple to the * buffer, and add the buffer to the emptying queue if it overflows. */ GISTNodeBuffer *childNodeBuffer; /* Find the buffer or create a new one */ childNodeBuffer = gistGetNodeBuffer(gfbb, giststate, blkno, level); /* Add index tuple to it */ gistPushItupToNodeBuffer(gfbb, childNodeBuffer, itup); if (BUFFER_OVERFLOWED(childNodeBuffer, gfbb)) result = true; } else { /* * We've reached a leaf page. Place the tuple here. */ Assert(level == 0); buffer = ReadBuffer(indexrel, blkno); LockBuffer(buffer, GIST_EXCLUSIVE); gistbufferinginserttuples(buildstate, buffer, level, &itup, 1, InvalidOffsetNumber, parentblkno, downlinkoffnum); /* gistbufferinginserttuples() released the buffer */ } return result; }
/* * Process an index tuple. Runs the tuple down the tree until we reach a leaf * page or node buffer, and inserts the tuple there. Returns true if we have * to stop buffer emptying process (because one of child buffers can't take * index tuples anymore). */ static bool gistProcessItup(GISTBuildState *buildstate, IndexTuple itup, GISTBufferingInsertStack *startparent) { GISTSTATE *giststate = buildstate->giststate; GISTBuildBuffers *gfbb = buildstate->gfbb; Relation indexrel = buildstate->indexrel; GISTBufferingInsertStack *path; BlockNumber childblkno; Buffer buffer; bool result = false; /* * NULL passed in startparent means that we start index tuple processing * from the root. */ if (!startparent) path = gfbb->rootitem; else path = startparent; /* * Loop until we reach a leaf page (level == 0) or a level with buffers * (not including the level we start at, because we would otherwise make * no progress). */ for (;;) { ItemId iid; IndexTuple idxtuple, newtup; Page page; OffsetNumber childoffnum; GISTBufferingInsertStack *parent; /* Have we reached a level with buffers? */ if (LEVEL_HAS_BUFFERS(path->level, gfbb) && path != startparent) break; /* Have we reached a leaf page? */ if (path->level == 0) break; /* * Nope. Descend down to the next level then. Choose a child to * descend down to. */ buffer = ReadBuffer(indexrel, path->blkno); LockBuffer(buffer, GIST_EXCLUSIVE); page = (Page) BufferGetPage(buffer); childoffnum = gistchoose(indexrel, page, itup, giststate); iid = PageGetItemId(page, childoffnum); idxtuple = (IndexTuple) PageGetItem(page, iid); childblkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid)); /* * Check that the key representing the target child node is consistent * with the key we're inserting. Update it if it's not. */ newtup = gistgetadjusted(indexrel, idxtuple, itup, giststate); if (newtup) gistbufferinginserttuples(buildstate, buffer, &newtup, 1, childoffnum, path); UnlockReleaseBuffer(buffer); /* Create new path item representing current page */ parent = path; path = (GISTBufferingInsertStack *) MemoryContextAlloc(gfbb->context, sizeof(GISTBufferingInsertStack)); path->parent = parent; path->level = parent->level - 1; path->blkno = childblkno; path->downlinkoffnum = childoffnum; path->refCount = 0; /* it's unreferenced for now */ /* Adjust reference count of parent */ if (parent) parent->refCount++; } if (LEVEL_HAS_BUFFERS(path->level, gfbb)) { /* * We've reached level with buffers. Place the index tuple to the * buffer, and add the buffer to the emptying queue if it overflows. */ GISTNodeBuffer *childNodeBuffer; /* Find the buffer or create a new one */ childNodeBuffer = gistGetNodeBuffer(gfbb, giststate, path->blkno, path->downlinkoffnum, path->parent); /* Add index tuple to it */ gistPushItupToNodeBuffer(gfbb, childNodeBuffer, itup); if (BUFFER_OVERFLOWED(childNodeBuffer, gfbb)) result = true; } else { /* * We've reached a leaf page. Place the tuple here. */ buffer = ReadBuffer(indexrel, path->blkno); LockBuffer(buffer, GIST_EXCLUSIVE); gistbufferinginserttuples(buildstate, buffer, &itup, 1, InvalidOffsetNumber, path); UnlockReleaseBuffer(buffer); } /* * Free unreferenced path items, if any. Path item may be referenced by * node buffer. */ gistFreeUnreferencedPath(path); return result; }
static void gistfindleaf(GISTInsertState *state, GISTSTATE *giststate) { ItemId iid; IndexTuple idxtuple; GISTPageOpaque opaque; MIRROREDLOCK_BUFMGR_MUST_ALREADY_BE_HELD; /* * walk down, We don't lock page for a long time, but so we should be * ready to recheck path in a bad case... We remember, that page->lsn * should never be invalid. */ for (;;) { if (XLogRecPtrIsInvalid(state->stack->lsn)) state->stack->buffer = ReadBuffer(state->r, state->stack->blkno); LockBuffer(state->stack->buffer, GIST_SHARE); gistcheckpage(state->r, state->stack->buffer); state->stack->page = (Page) BufferGetPage(state->stack->buffer); opaque = GistPageGetOpaque(state->stack->page); state->stack->lsn = PageGetLSN(state->stack->page); Assert(state->r->rd_istemp || !XLogRecPtrIsInvalid(state->stack->lsn)); if (state->stack->blkno != GIST_ROOT_BLKNO && XLByteLT(state->stack->parent->lsn, opaque->nsn)) { /* * caused split non-root page is detected, go up to parent to * choose best child */ UnlockReleaseBuffer(state->stack->buffer); state->stack = state->stack->parent; continue; } if (!GistPageIsLeaf(state->stack->page)) { /* * This is an internal page, so continue to walk down the tree. We * find the child node that has the minimum insertion penalty and * recursively invoke ourselves to modify that node. Once the * recursive call returns, we may need to adjust the parent node * for two reasons: the child node split, or the key in this node * needs to be adjusted for the newly inserted key below us. */ GISTInsertStack *item = (GISTInsertStack *) palloc0(sizeof(GISTInsertStack)); state->stack->childoffnum = gistchoose(state->r, state->stack->page, state->itup[0], giststate); iid = PageGetItemId(state->stack->page, state->stack->childoffnum); idxtuple = (IndexTuple) PageGetItem(state->stack->page, iid); item->blkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid)); LockBuffer(state->stack->buffer, GIST_UNLOCK); item->parent = state->stack; item->child = NULL; if (state->stack) state->stack->child = item; state->stack = item; } else { /* be carefull, during unlock/lock page may be changed... */ LockBuffer(state->stack->buffer, GIST_UNLOCK); LockBuffer(state->stack->buffer, GIST_EXCLUSIVE); state->stack->page = (Page) BufferGetPage(state->stack->buffer); opaque = GistPageGetOpaque(state->stack->page); if (state->stack->blkno == GIST_ROOT_BLKNO) { /* * the only page can become inner instead of leaf is a root * page, so for root we should recheck it */ if (!GistPageIsLeaf(state->stack->page)) { /* * very rarely situation: during unlock/lock index with * number of pages = 1 was increased */ LockBuffer(state->stack->buffer, GIST_UNLOCK); continue; } /* * we don't need to check root split, because checking * leaf/inner is enough to recognize split for root */ } else if (XLByteLT(state->stack->parent->lsn, opaque->nsn)) { /* * detecting split during unlock/lock, so we should find * better child on parent */ /* forget buffer */ UnlockReleaseBuffer(state->stack->buffer); state->stack = state->stack->parent; continue; } state->stack->lsn = PageGetLSN(state->stack->page); /* ok we found a leaf page and it X-locked */ break; } } /* now state->stack->(page, buffer and blkno) points to leaf page */ }
/* * Workhouse routine for doing insertion into a GiST index. Note that * this routine assumes it is invoked in a short-lived memory context, * so it does not bother releasing palloc'd allocations. */ void gistdoinsert(Relation r, IndexTuple itup, Size freespace, GISTSTATE *giststate) { ItemId iid; IndexTuple idxtuple; GISTInsertStack firststack; GISTInsertStack *stack; GISTInsertState state; bool xlocked = false; memset(&state, 0, sizeof(GISTInsertState)); state.freespace = freespace; state.r = r; /* Start from the root */ firststack.blkno = GIST_ROOT_BLKNO; firststack.lsn = 0; firststack.parent = NULL; firststack.downlinkoffnum = InvalidOffsetNumber; state.stack = stack = &firststack; /* * Walk down along the path of smallest penalty, updating the parent * pointers with the key we're inserting as we go. If we crash in the * middle, the tree is consistent, although the possible parent updates * were a waste. */ for (;;) { if (XLogRecPtrIsInvalid(stack->lsn)) stack->buffer = ReadBuffer(state.r, stack->blkno); /* * Be optimistic and grab shared lock first. Swap it for an exclusive * lock later if we need to update the page. */ if (!xlocked) { LockBuffer(stack->buffer, GIST_SHARE); gistcheckpage(state.r, stack->buffer); } stack->page = (Page) BufferGetPage(stack->buffer); stack->lsn = PageGetLSN(stack->page); Assert(!RelationNeedsWAL(state.r) || !XLogRecPtrIsInvalid(stack->lsn)); /* * If this page was split but the downlink was never inserted to the * parent because the inserting backend crashed before doing that, fix * that now. */ if (GistFollowRight(stack->page)) { if (!xlocked) { LockBuffer(stack->buffer, GIST_UNLOCK); LockBuffer(stack->buffer, GIST_EXCLUSIVE); xlocked = true; /* someone might've completed the split when we unlocked */ if (!GistFollowRight(stack->page)) continue; } gistfixsplit(&state, giststate); UnlockReleaseBuffer(stack->buffer); xlocked = false; state.stack = stack = stack->parent; continue; } if (stack->blkno != GIST_ROOT_BLKNO && stack->parent->lsn < GistPageGetNSN(stack->page)) { /* * Concurrent split detected. There's no guarantee that the * downlink for this page is consistent with the tuple we're * inserting anymore, so go back to parent and rechoose the best * child. */ UnlockReleaseBuffer(stack->buffer); xlocked = false; state.stack = stack = stack->parent; continue; } if (!GistPageIsLeaf(stack->page)) { /* * This is an internal page so continue to walk down the tree. * Find the child node that has the minimum insertion penalty. */ BlockNumber childblkno; IndexTuple newtup; GISTInsertStack *item; OffsetNumber downlinkoffnum; downlinkoffnum = gistchoose(state.r, stack->page, itup, giststate); iid = PageGetItemId(stack->page, downlinkoffnum); idxtuple = (IndexTuple) PageGetItem(stack->page, iid); childblkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid)); /* * Check that it's not a leftover invalid tuple from pre-9.1 */ if (GistTupleIsInvalid(idxtuple)) ereport(ERROR, (errmsg("index \"%s\" contains an inner tuple marked as invalid", RelationGetRelationName(r)), errdetail("This is caused by an incomplete page split at crash recovery before upgrading to PostgreSQL 9.1."), errhint("Please REINDEX it."))); /* * Check that the key representing the target child node is * consistent with the key we're inserting. Update it if it's not. */ newtup = gistgetadjusted(state.r, idxtuple, itup, giststate); if (newtup) { /* * Swap shared lock for an exclusive one. Beware, the page may * change while we unlock/lock the page... */ if (!xlocked) { LockBuffer(stack->buffer, GIST_UNLOCK); LockBuffer(stack->buffer, GIST_EXCLUSIVE); xlocked = true; stack->page = (Page) BufferGetPage(stack->buffer); if (PageGetLSN(stack->page) != stack->lsn) { /* the page was changed while we unlocked it, retry */ continue; } } /* * Update the tuple. * * We still hold the lock after gistinserttuple(), but it * might have to split the page to make the updated tuple fit. * In that case the updated tuple might migrate to the other * half of the split, so we have to go back to the parent and * descend back to the half that's a better fit for the new * tuple. */ if (gistinserttuple(&state, stack, giststate, newtup, downlinkoffnum)) { /* * If this was a root split, the root page continues to be * the parent and the updated tuple went to one of the * child pages, so we just need to retry from the root * page. */ if (stack->blkno != GIST_ROOT_BLKNO) { UnlockReleaseBuffer(stack->buffer); xlocked = false; state.stack = stack = stack->parent; } continue; } } LockBuffer(stack->buffer, GIST_UNLOCK); xlocked = false; /* descend to the chosen child */ item = (GISTInsertStack *) palloc0(sizeof(GISTInsertStack)); item->blkno = childblkno; item->parent = stack; item->downlinkoffnum = downlinkoffnum; state.stack = stack = item; } else { /* * Leaf page. Insert the new key. We've already updated all the * parents on the way down, but we might have to split the page if * it doesn't fit. gistinserthere() will take care of that. */ /* * Swap shared lock for an exclusive one. Be careful, the page may * change while we unlock/lock the page... */ if (!xlocked) { LockBuffer(stack->buffer, GIST_UNLOCK); LockBuffer(stack->buffer, GIST_EXCLUSIVE); xlocked = true; stack->page = (Page) BufferGetPage(stack->buffer); stack->lsn = PageGetLSN(stack->page); if (stack->blkno == GIST_ROOT_BLKNO) { /* * the only page that can become inner instead of leaf is * the root page, so for root we should recheck it */ if (!GistPageIsLeaf(stack->page)) { /* * very rare situation: during unlock/lock index with * number of pages = 1 was increased */ LockBuffer(stack->buffer, GIST_UNLOCK); xlocked = false; continue; } /* * we don't need to check root split, because checking * leaf/inner is enough to recognize split for root */ } else if (GistFollowRight(stack->page) || stack->parent->lsn < GistPageGetNSN(stack->page)) { /* * The page was split while we momentarily unlocked the * page. Go back to parent. */ UnlockReleaseBuffer(stack->buffer); xlocked = false; state.stack = stack = stack->parent; continue; } } /* now state.stack->(page, buffer and blkno) points to leaf page */ gistinserttuple(&state, stack, giststate, itup, InvalidOffsetNumber); LockBuffer(stack->buffer, GIST_UNLOCK); /* Release any pins we might still hold before exiting */ for (; stack; stack = stack->parent) ReleaseBuffer(stack->buffer); break; } } }
static int gistlayerinsert(Relation r, BlockNumber blkno, IndexTuple **itup, /* in - out, has compressed entry */ int *len, /* in - out */ InsertIndexResult *res, /* out */ GISTSTATE *giststate) { Buffer buffer; Page page; OffsetNumber child; int ret; GISTPageOpaque opaque; buffer = ReadBuffer(r, blkno); page = (Page) BufferGetPage(buffer); opaque = (GISTPageOpaque) PageGetSpecialPointer(page); if (!(opaque->flags & F_LEAF)) { /* internal page, so we must walk on tree */ /* len IS equal 1 */ ItemId iid; BlockNumber nblkno; ItemPointerData oldtid; IndexTuple oldtup; child = gistchoose(r, page, *(*itup), giststate); iid = PageGetItemId(page, child); oldtup = (IndexTuple) PageGetItem(page, iid); nblkno = ItemPointerGetBlockNumber(&(oldtup->t_tid)); /* * After this call: 1. if child page was splited, then itup * contains keys for each page 2. if child page wasn't splited, * then itup contains additional for adjustment of current key */ ret = gistlayerinsert(r, nblkno, itup, len, res, giststate); /* nothing inserted in child */ if (!(ret & INSERTED)) { ReleaseBuffer(buffer); return 0x00; } /* child does not splited */ if (!(ret & SPLITED)) { IndexTuple newtup = gistgetadjusted(r, oldtup, (*itup)[0], giststate); if (!newtup) { /* not need to update key */ ReleaseBuffer(buffer); return 0x00; } pfree((*itup)[0]); /* !!! */ (*itup)[0] = newtup; } /* key is modified, so old version must be deleted */ ItemPointerSet(&oldtid, blkno, child); gistdelete(r, &oldtid); /* * if child was splitted, new key for child will be inserted in * the end list of child, so we must say to any scans that page is * changed beginning from 'child' offset */ if (ret & SPLITED) gistadjscans(r, GISTOP_SPLIT, blkno, child); } ret = INSERTED; if (gistnospace(page, (*itup), *len)) { /* no space for insertion */ IndexTuple *itvec, *newitup; int tlen, oldlen; ret |= SPLITED; itvec = gistreadbuffer(buffer, &tlen); itvec = gistjoinvector(itvec, &tlen, (*itup), *len); oldlen = *len; newitup = gistSplit(r, buffer, itvec, &tlen, giststate, (opaque->flags & F_LEAF) ? res : NULL); /* res only for * inserting in leaf */ ReleaseBuffer(buffer); do pfree((*itup)[oldlen - 1]); while ((--oldlen) > 0); pfree((*itup)); pfree(itvec); *itup = newitup; *len = tlen; /* now tlen >= 2 */ } else { /* enogth space */ OffsetNumber off, l; off = (PageIsEmpty(page)) ? FirstOffsetNumber : OffsetNumberNext(PageGetMaxOffsetNumber(page)); l = gistwritebuffer(r, page, (*itup), *len, off); WriteBuffer(buffer); /* * set res if insert into leaf page, in this case, len = 1 always */ if (res && (opaque->flags & F_LEAF)) ItemPointerSet(&((*res)->pointerData), blkno, l); if (*len > 1) { /* previous insert ret & SPLITED != 0 */ int i; /* * child was splited, so we must form union for insertion in * parent */ IndexTuple newtup = gistunion(r, (*itup), *len, giststate); ItemPointerSet(&(newtup->t_tid), blkno, 1); for (i = 0; i < *len; i++) pfree((*itup)[i]); (*itup)[0] = newtup; *len = 1; } } return ret; }