/* * Insert or replace a tuple in stack->buffer. If 'oldoffnum' is valid, the * tuple at 'oldoffnum' is replaced, otherwise the tuple is inserted as new. * 'stack' represents the path from the root to the page being updated. * * The caller must hold an exclusive lock on stack->buffer. The lock is still * held on return, but the page might not contain the inserted tuple if the * page was split. The function returns true if the page was split, false * otherwise. */ static bool gistinserttuple(GISTInsertState *state, GISTInsertStack *stack, GISTSTATE *giststate, IndexTuple tuple, OffsetNumber oldoffnum) { return gistinserttuples(state, stack, giststate, &tuple, 1, oldoffnum, InvalidBuffer, InvalidBuffer, false, false); }
/* * Workhouse routine for doing insertion into a GiST index. Note that * this routine assumes it is invoked in a short-lived memory context, * so it does not bother releasing palloc'd allocations. */ static void gistdoinsert(Relation r, IndexTuple itup, Size freespace, GISTSTATE *giststate) { ItemId iid; IndexTuple idxtuple; GISTInsertStack firststack; GISTInsertStack *stack; GISTInsertState state; bool xlocked = false; memset(&state, 0, sizeof(GISTInsertState)); state.freespace = freespace; state.r = r; /* Start from the root */ firststack.blkno = GIST_ROOT_BLKNO; firststack.lsn.xrecoff = 0; firststack.parent = NULL; state.stack = stack = &firststack; /* * Walk down along the path of smallest penalty, updating the parent * pointers with the key we're inserting as we go. If we crash in the * middle, the tree is consistent, although the possible parent updates * were a waste. */ for (;;) { if (XLogRecPtrIsInvalid(stack->lsn)) stack->buffer = ReadBuffer(state.r, stack->blkno); /* * Be optimistic and grab shared lock first. Swap it for an * exclusive lock later if we need to update the page. */ if (!xlocked) { LockBuffer(stack->buffer, GIST_SHARE); gistcheckpage(state.r, stack->buffer); } stack->page = (Page) BufferGetPage(stack->buffer); stack->lsn = PageGetLSN(stack->page); Assert(!RelationNeedsWAL(state.r) || !XLogRecPtrIsInvalid(stack->lsn)); /* * If this page was split but the downlink was never inserted to * the parent because the inserting backend crashed before doing * that, fix that now. */ if (GistFollowRight(stack->page)) { if (!xlocked) { LockBuffer(stack->buffer, GIST_UNLOCK); LockBuffer(stack->buffer, GIST_EXCLUSIVE); xlocked = true; /* someone might've completed the split when we unlocked */ if (!GistFollowRight(stack->page)) continue; } gistfixsplit(&state, giststate); UnlockReleaseBuffer(stack->buffer); xlocked = false; state.stack = stack = stack->parent; continue; } if (stack->blkno != GIST_ROOT_BLKNO && XLByteLT(stack->parent->lsn, GistPageGetOpaque(stack->page)->nsn)) { /* * Concurrent split detected. There's no guarantee that the * downlink for this page is consistent with the tuple we're * inserting anymore, so go back to parent and rechoose the * best child. */ UnlockReleaseBuffer(stack->buffer); xlocked = false; state.stack = stack = stack->parent; continue; } if (!GistPageIsLeaf(stack->page)) { /* * This is an internal page so continue to walk down the tree. * Find the child node that has the minimum insertion penalty. */ BlockNumber childblkno; IndexTuple newtup; GISTInsertStack *item; stack->childoffnum = gistchoose(state.r, stack->page, itup, giststate); iid = PageGetItemId(stack->page, stack->childoffnum); idxtuple = (IndexTuple) PageGetItem(stack->page, iid); childblkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid)); /* * Check that it's not a leftover invalid tuple from pre-9.1 */ if (GistTupleIsInvalid(idxtuple)) ereport(ERROR, (errmsg("index \"%s\" contains an inner tuple marked as invalid", RelationGetRelationName(r)), errdetail("This is caused by an incomplete page split at crash recovery before upgrading to 9.1."), errhint("Please REINDEX it."))); /* * Check that the key representing the target child node is * consistent with the key we're inserting. Update it if it's not. */ newtup = gistgetadjusted(state.r, idxtuple, itup, giststate); if (newtup) { /* * Swap shared lock for an exclusive one. Beware, the page * may change while we unlock/lock the page... */ if (!xlocked) { LockBuffer(stack->buffer, GIST_UNLOCK); LockBuffer(stack->buffer, GIST_EXCLUSIVE); xlocked = true; stack->page = (Page) BufferGetPage(stack->buffer); if (!XLByteEQ(PageGetLSN(stack->page), stack->lsn)) { /* the page was changed while we unlocked it, retry */ continue; } } /* * Update the tuple. * * gistinserthere() might have to split the page to make the * updated tuple fit. It will adjust the stack so that after * the call, we'll be holding a lock on the page containing * the tuple, which might have moved right. * * Except if this causes a root split, gistinserthere() * returns 'true'. In that case, stack only holds the new * root, and the child page was released. Have to start * all over. */ if (gistinserttuples(&state, stack, giststate, &newtup, 1, stack->childoffnum, InvalidBuffer)) { UnlockReleaseBuffer(stack->buffer); xlocked = false; state.stack = stack = stack->parent; continue; } } LockBuffer(stack->buffer, GIST_UNLOCK); xlocked = false; /* descend to the chosen child */ item = (GISTInsertStack *) palloc0(sizeof(GISTInsertStack)); item->blkno = childblkno; item->parent = stack; state.stack = stack = item; } else { /* * Leaf page. Insert the new key. We've already updated all the * parents on the way down, but we might have to split the page * if it doesn't fit. gistinserthere() will take care of that. */ /* * Swap shared lock for an exclusive one. Be careful, the page * may change while we unlock/lock the page... */ if (!xlocked) { LockBuffer(stack->buffer, GIST_UNLOCK); LockBuffer(stack->buffer, GIST_EXCLUSIVE); xlocked = true; stack->page = (Page) BufferGetPage(stack->buffer); stack->lsn = PageGetLSN(stack->page); if (stack->blkno == GIST_ROOT_BLKNO) { /* * the only page that can become inner instead of leaf * is the root page, so for root we should recheck it */ if (!GistPageIsLeaf(stack->page)) { /* * very rare situation: during unlock/lock index with * number of pages = 1 was increased */ LockBuffer(stack->buffer, GIST_UNLOCK); xlocked = false; continue; } /* * we don't need to check root split, because checking * leaf/inner is enough to recognize split for root */ } else if (GistFollowRight(stack->page) || XLByteLT(stack->parent->lsn, GistPageGetOpaque(stack->page)->nsn)) { /* * The page was split while we momentarily unlocked the * page. Go back to parent. */ UnlockReleaseBuffer(stack->buffer); xlocked = false; state.stack = stack = stack->parent; continue; } } /* now state.stack->(page, buffer and blkno) points to leaf page */ gistinserttuples(&state, stack, giststate, &itup, 1, InvalidOffsetNumber, InvalidBuffer); LockBuffer(stack->buffer, GIST_UNLOCK); /* Release any pins we might still hold before exiting */ for (; stack; stack = stack->parent) ReleaseBuffer(stack->buffer); break; } } }