Beispiel #1
0
/*
 * Insert a new item to a page.
 *
 * Returns true if the insertion was finished. On false, the page was split and
 * the parent needs to be updated. (a root split returns true as it doesn't
 * need any further action by the caller to complete)
 *
 * When inserting a downlink to a internal page, 'childbuf' contains the
 * child page that was split. Its GIN_INCOMPLETE_SPLIT flag will be cleared
 * atomically with the insert. Also, the existing item at the given location
 * is updated to point to 'updateblkno'.
 *
 * stack->buffer is locked on entry, and is kept locked.
 */
static bool
ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
			   void *insertdata, BlockNumber updateblkno,
			   Buffer childbuf, GinStatsData *buildStats)
{
	Page		page = BufferGetPage(stack->buffer);
	XLogRecData *payloadrdata;
	bool		fit;
	uint16		xlflags = 0;
	Page		childpage = NULL;

	if (GinPageIsData(page))
		xlflags |= GIN_INSERT_ISDATA;
	if (GinPageIsLeaf(page))
	{
		xlflags |= GIN_INSERT_ISLEAF;
		Assert(!BufferIsValid(childbuf));
		Assert(updateblkno == InvalidBlockNumber);
	}
	else
	{
		Assert(BufferIsValid(childbuf));
		Assert(updateblkno != InvalidBlockNumber);
		childpage = BufferGetPage(childbuf);
	}

	/*
	 * Try to put the incoming tuple on the page. If it doesn't fit,
	 * placeToPage method will return false and leave the page unmodified, and
	 * we'll have to split the page.
	 */
	START_CRIT_SECTION();
	fit = btree->placeToPage(btree, stack->buffer, stack->off,
							 insertdata, updateblkno,
							 &payloadrdata);
	if (fit)
	{
		MarkBufferDirty(stack->buffer);

		/* An insert to an internal page finishes the split of the child. */
		if (childbuf != InvalidBuffer)
		{
			GinPageGetOpaque(childpage)->flags &= ~GIN_INCOMPLETE_SPLIT;
			MarkBufferDirty(childbuf);
		}

		if (RelationNeedsWAL(btree->index))
		{
			XLogRecPtr	recptr;
			XLogRecData rdata[3];
			ginxlogInsert xlrec;
			BlockIdData	childblknos[2];

			xlrec.node = btree->index->rd_node;
			xlrec.blkno = BufferGetBlockNumber(stack->buffer);
			xlrec.offset = stack->off;
			xlrec.flags = xlflags;

			rdata[0].buffer = InvalidBuffer;
			rdata[0].data = (char *) &xlrec;
			rdata[0].len = sizeof(ginxlogInsert);

			/*
			 * Log information about child if this was an insertion of a
			 * downlink.
			 */
			if (childbuf != InvalidBuffer)
			{
				rdata[0].next = &rdata[1];

				BlockIdSet(&childblknos[0], BufferGetBlockNumber(childbuf));
				BlockIdSet(&childblknos[1], GinPageGetOpaque(childpage)->rightlink);

				rdata[1].buffer = InvalidBuffer;
				rdata[1].data = (char *) childblknos;
				rdata[1].len = sizeof(BlockIdData) * 2;
				rdata[1].next = &rdata[2];

				rdata[2].buffer = childbuf;
				rdata[2].buffer_std = false;
				rdata[2].data = NULL;
				rdata[2].len = 0;
				rdata[2].next = payloadrdata;
			}
			else
				rdata[0].next = payloadrdata;

			recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_INSERT, rdata);
			PageSetLSN(page, recptr);
			if (childbuf != InvalidBuffer)
				PageSetLSN(childpage, recptr);
		}

		END_CRIT_SECTION();

		return true;
	}
	else
	{
		/* Didn't fit, have to split */
		Buffer		rbuffer;
		Page		newlpage;
		BlockNumber savedRightLink;
		Page		rpage;
		XLogRecData rdata[2];
		ginxlogSplit data;
		Buffer		lbuffer = InvalidBuffer;
		Page		newrootpg = NULL;

		END_CRIT_SECTION();

		rbuffer = GinNewBuffer(btree->index);

		/* During index build, count the new page */
		if (buildStats)
		{
			if (btree->isData)
				buildStats->nDataPages++;
			else
				buildStats->nEntryPages++;
		}

		savedRightLink = GinPageGetOpaque(page)->rightlink;

		/*
		 * newlpage is a pointer to memory page, it is not associated with a
		 * buffer. stack->buffer is not touched yet.
		 */
		newlpage = btree->splitPage(btree, stack->buffer, rbuffer, stack->off,
									insertdata, updateblkno,
									&payloadrdata);

		data.node = btree->index->rd_node;
		data.rblkno = BufferGetBlockNumber(rbuffer);
		data.flags = xlflags;
		if (childbuf != InvalidBuffer)
		{
			Page childpage = BufferGetPage(childbuf);
			GinPageGetOpaque(childpage)->flags &= ~GIN_INCOMPLETE_SPLIT;

			data.leftChildBlkno = BufferGetBlockNumber(childbuf);
			data.rightChildBlkno = GinPageGetOpaque(childpage)->rightlink;
		}
		else
			data.leftChildBlkno = data.rightChildBlkno = InvalidBlockNumber;

		rdata[0].buffer = InvalidBuffer;
		rdata[0].data = (char *) &data;
		rdata[0].len = sizeof(ginxlogSplit);

		if (childbuf != InvalidBuffer)
		{
			rdata[0].next = &rdata[1];

			rdata[1].buffer = childbuf;
			rdata[1].buffer_std = false;
			rdata[1].data = NULL;
			rdata[1].len = 0;
			rdata[1].next = payloadrdata;
		}
		else
			rdata[0].next = payloadrdata;

		rpage = BufferGetPage(rbuffer);

		if (stack->parent == NULL)
		{
			/*
			 * split root, so we need to allocate new left page and place
			 * pointer on root to left and right page
			 */
			lbuffer = GinNewBuffer(btree->index);

			/* During index build, count the newly-added root page */
			if (buildStats)
			{
				if (btree->isData)
					buildStats->nDataPages++;
				else
					buildStats->nEntryPages++;
			}

			/*
			 * root never has a right-link, so we borrow the rrlink field to
			 * store the root block number.
			 */
			data.rrlink = BufferGetBlockNumber(stack->buffer);
			data.lblkno = BufferGetBlockNumber(lbuffer);
			data.flags |= GIN_SPLIT_ROOT;

			GinPageGetOpaque(rpage)->rightlink = InvalidBlockNumber;
			GinPageGetOpaque(newlpage)->rightlink = BufferGetBlockNumber(rbuffer);

			/*
			 * Construct a new root page containing downlinks to the new left
			 * and right pages. (do this in a temporary copy first rather
			 * than overwriting the original page directly, so that we can still
			 * abort gracefully if this fails.)
			 */
			newrootpg = PageGetTempPage(rpage);
			GinInitPage(newrootpg, GinPageGetOpaque(newlpage)->flags & ~GIN_LEAF, BLCKSZ);

			btree->fillRoot(btree, newrootpg,
							BufferGetBlockNumber(lbuffer), newlpage,
							BufferGetBlockNumber(rbuffer), rpage);
		}
		else
		{
			/* split non-root page */
			data.rrlink = savedRightLink;
			data.lblkno = BufferGetBlockNumber(stack->buffer);

			GinPageGetOpaque(rpage)->rightlink = savedRightLink;
			GinPageGetOpaque(newlpage)->flags |= GIN_INCOMPLETE_SPLIT;
			GinPageGetOpaque(newlpage)->rightlink = BufferGetBlockNumber(rbuffer);
		}

		/*
		 * Ok, we have the new contents of the left page in a temporary copy
		 * now (newlpage), and the newly-allocated right block has been filled
		 * in. The original page is still unchanged.
		 *
		 * If this is a root split, we also have a temporary page containing
		 * the new contents of the root. Copy the new left page to a
		 * newly-allocated block, and initialize the (original) root page the
		 * new copy. Otherwise, copy over the temporary copy of the new left
		 * page over the old left page.
		 */

		START_CRIT_SECTION();

		MarkBufferDirty(rbuffer);

		if (stack->parent == NULL)
		{
			PageRestoreTempPage(newlpage, BufferGetPage(lbuffer));
			MarkBufferDirty(lbuffer);
			newlpage = newrootpg;
		}

		PageRestoreTempPage(newlpage, BufferGetPage(stack->buffer));
		MarkBufferDirty(stack->buffer);

		/* write WAL record */
		if (RelationNeedsWAL(btree->index))
		{
			XLogRecPtr	recptr;

			recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_SPLIT, rdata);
			PageSetLSN(BufferGetPage(stack->buffer), recptr);
			PageSetLSN(rpage, recptr);
			if (stack->parent == NULL)
				PageSetLSN(BufferGetPage(lbuffer), recptr);
		}
		END_CRIT_SECTION();

		/*
		 * We can release the lock on the right page now, but keep the
		 * original buffer locked.
		 */
		UnlockReleaseBuffer(rbuffer);
		if (stack->parent == NULL)
			UnlockReleaseBuffer(lbuffer);

		/*
		 * If we split the root, we're done. Otherwise the split is not
		 * complete until the downlink for the new page has been inserted to
		 * the parent.
		 */
		if (stack->parent == NULL)
			return true;
		else
			return false;
	}
}
Beispiel #2
0
/*
 *	rtdosplit -- split a page in the tree.
 *
 *	  rtpicksplit does the interesting work of choosing the split.
 *	  This routine just does the bit-pushing.
 */
static void
rtdosplit(Relation r,
		  Buffer buffer,
		  RTSTACK *stack,
		  IndexTuple itup,
		  RTSTATE *rtstate)
{
	Page		p;
	Buffer		leftbuf,
				rightbuf;
	Page		left,
				right;
	ItemId		itemid;
	IndexTuple	item;
	IndexTuple	ltup,
				rtup;
	OffsetNumber maxoff;
	OffsetNumber i;
	OffsetNumber leftoff,
				rightoff;
	BlockNumber lbknum,
				rbknum;
	BlockNumber bufblock;
	RTreePageOpaque opaque;
	bool	   *isnull;
	SPLITVEC	v;
	OffsetNumber *spl_left,
			   *spl_right;
	TupleDesc	tupDesc;
	int			n;
	OffsetNumber newitemoff;

	p = (Page) BufferGetPage(buffer);
	opaque = (RTreePageOpaque) PageGetSpecialPointer(p);

	rtpicksplit(r, p, &v, itup, rtstate);

	/*
	 * The root of the tree is the first block in the relation.  If we're
	 * about to split the root, we need to do some hocus-pocus to enforce this
	 * guarantee.
	 */

	if (BufferGetBlockNumber(buffer) == P_ROOT)
	{
		leftbuf = ReadBuffer(r, P_NEW);
		RTInitBuffer(leftbuf, opaque->flags);
		lbknum = BufferGetBlockNumber(leftbuf);
		left = (Page) BufferGetPage(leftbuf);
	}
	else
	{
		leftbuf = buffer;
		IncrBufferRefCount(buffer);
		lbknum = BufferGetBlockNumber(buffer);
		left = (Page) PageGetTempPage(p, sizeof(RTreePageOpaqueData));
	}

	rightbuf = ReadBuffer(r, P_NEW);
	RTInitBuffer(rightbuf, opaque->flags);
	rbknum = BufferGetBlockNumber(rightbuf);
	right = (Page) BufferGetPage(rightbuf);

	spl_left = v.spl_left;
	spl_right = v.spl_right;
	leftoff = rightoff = FirstOffsetNumber;
	maxoff = PageGetMaxOffsetNumber(p);
	newitemoff = OffsetNumberNext(maxoff);

	/*
	 * spl_left contains a list of the offset numbers of the tuples that will
	 * go to the left page.  For each offset number, get the tuple item, then
	 * add the item to the left page.  Similarly for the right side.
	 */

	/* fill left node */
	for (n = 0; n < v.spl_nleft; n++)
	{
		i = *spl_left;
		if (i == newitemoff)
			item = itup;
		else
		{
			itemid = PageGetItemId(p, i);
			item = (IndexTuple) PageGetItem(p, itemid);
		}

		if (PageAddItem(left, (Item) item, IndexTupleSize(item),
						leftoff, LP_USED) == InvalidOffsetNumber)
			elog(ERROR, "failed to add index item to \"%s\"",
				 RelationGetRelationName(r));
		leftoff = OffsetNumberNext(leftoff);

		spl_left++;				/* advance in left split vector */
	}

	/* fill right node */
	for (n = 0; n < v.spl_nright; n++)
	{
		i = *spl_right;
		if (i == newitemoff)
			item = itup;
		else
		{
			itemid = PageGetItemId(p, i);
			item = (IndexTuple) PageGetItem(p, itemid);
		}

		if (PageAddItem(right, (Item) item, IndexTupleSize(item),
						rightoff, LP_USED) == InvalidOffsetNumber)
			elog(ERROR, "failed to add index item to \"%s\"",
				 RelationGetRelationName(r));
		rightoff = OffsetNumberNext(rightoff);

		spl_right++;			/* advance in right split vector */
	}

	/* Make sure we consumed all of the split vectors, and release 'em */
	Assert(*spl_left == InvalidOffsetNumber);
	Assert(*spl_right == InvalidOffsetNumber);
	pfree(v.spl_left);
	pfree(v.spl_right);

	if ((bufblock = BufferGetBlockNumber(buffer)) != P_ROOT)
		PageRestoreTempPage(left, p);
	WriteBuffer(leftbuf);
	WriteBuffer(rightbuf);

	/*
	 * Okay, the page is split.  We have three things left to do:
	 *
	 * 1)  Adjust any active scans on this index to cope with changes we
	 * introduced in its structure by splitting this page.
	 *
	 * 2)  "Tighten" the bounding box of the pointer to the left page in the
	 * parent node in the tree, if any.  Since we moved a bunch of stuff off
	 * the left page, we expect it to get smaller.	This happens in the
	 * internal insertion routine.
	 *
	 * 3)  Insert a pointer to the right page in the parent.  This may cause
	 * the parent to split.  If it does, we need to repeat steps one and two
	 * for each split node in the tree.
	 */

	/* adjust active scans */
	rtadjscans(r, RTOP_SPLIT, bufblock, FirstOffsetNumber);

	tupDesc = r->rd_att;
	isnull = (bool *) palloc(r->rd_rel->relnatts * sizeof(bool));
	memset(isnull, false, r->rd_rel->relnatts * sizeof(bool));

	ltup = index_form_tuple(tupDesc, &(v.spl_ldatum), isnull);
	rtup = index_form_tuple(tupDesc, &(v.spl_rdatum), isnull);

	pfree(isnull);
	pfree(DatumGetPointer(v.spl_ldatum));
	pfree(DatumGetPointer(v.spl_rdatum));

	/* set pointers to new child pages in the internal index tuples */
	ItemPointerSet(&(ltup->t_tid), lbknum, 1);
	ItemPointerSet(&(rtup->t_tid), rbknum, 1);

	rtintinsert(r, stack, ltup, rtup, rtstate);

	pfree(ltup);
	pfree(rtup);
}
Beispiel #3
0
static bool
gistplacetopage(GISTInsertState *state, GISTSTATE *giststate)
{
	bool		is_splitted = false;
	bool		is_leaf = (GistPageIsLeaf(state->stack->page)) ? true : false;

	MIRROREDLOCK_BUFMGR_MUST_ALREADY_BE_HELD;

	/*
	 * if (!is_leaf) remove old key: This node's key has been modified, either
	 * because a child split occurred or because we needed to adjust our key
	 * for an insert in a child node. Therefore, remove the old version of
	 * this node's key.
	 *
	 * for WAL replay, in the non-split case we handle this by setting up a
	 * one-element todelete array; in the split case, it's handled implicitly
	 * because the tuple vector passed to gistSplit won't include this tuple.
	 *
	 * XXX: If we want to change fillfactors between node and leaf, fillfactor
	 * = (is_leaf ? state->leaf_fillfactor : state->node_fillfactor)
	 */
	if (gistnospace(state->stack->page, state->itup, state->ituplen,
					is_leaf ? InvalidOffsetNumber : state->stack->childoffnum,
					state->freespace))
	{
		/* no space for insertion */
		IndexTuple *itvec;
		int			tlen;
		SplitedPageLayout *dist = NULL,
				   *ptr;
		BlockNumber rrlink = InvalidBlockNumber;
		GistNSN		oldnsn;

		is_splitted = true;

		/*
		 * Form index tuples vector to split: remove old tuple if t's needed
		 * and add new tuples to vector
		 */
		itvec = gistextractpage(state->stack->page, &tlen);
		if (!is_leaf)
		{
			/* on inner page we should remove old tuple */
			int			pos = state->stack->childoffnum - FirstOffsetNumber;

			tlen--;
			if (pos != tlen)
				memmove(itvec + pos, itvec + pos + 1, sizeof(IndexTuple) * (tlen - pos));
		}
		itvec = gistjoinvector(itvec, &tlen, state->itup, state->ituplen);
		dist = gistSplit(state->r, state->stack->page, itvec, tlen, giststate);

		state->itup = (IndexTuple *) palloc(sizeof(IndexTuple) * tlen);
		state->ituplen = 0;

		if (state->stack->blkno != GIST_ROOT_BLKNO)
		{
			/*
			 * if non-root split then we should not allocate new buffer, but
			 * we must create temporary page to operate
			 */
			dist->buffer = state->stack->buffer;
			dist->page = PageGetTempPage(BufferGetPage(dist->buffer), sizeof(GISTPageOpaqueData));

			/* clean all flags except F_LEAF */
			GistPageGetOpaque(dist->page)->flags = (is_leaf) ? F_LEAF : 0;
		}

		/* make new pages and fills them */
		for (ptr = dist; ptr; ptr = ptr->next)
		{
			int			i;
			char	   *data;

			/* get new page */
			if (ptr->buffer == InvalidBuffer)
			{
				ptr->buffer = gistNewBuffer(state->r);
				GISTInitBuffer(ptr->buffer, (is_leaf) ? F_LEAF : 0);
				ptr->page = BufferGetPage(ptr->buffer);
			}
			ptr->block.blkno = BufferGetBlockNumber(ptr->buffer);

			/*
			 * fill page, we can do it because all these pages are new
			 * (ie not linked in tree or masked by temp page
			 */
			data = (char *) (ptr->list);
			for (i = 0; i < ptr->block.num; i++)
			{
				if (PageAddItem(ptr->page, (Item) data, IndexTupleSize((IndexTuple) data), i + FirstOffsetNumber, LP_USED) == InvalidOffsetNumber)
					elog(ERROR, "failed to add item to index page in \"%s\"", RelationGetRelationName(state->r));
				data += IndexTupleSize((IndexTuple) data);
			}

			/* set up ItemPointer and remember it for parent */
			ItemPointerSetBlockNumber(&(ptr->itup->t_tid), ptr->block.blkno);
			state->itup[state->ituplen] = ptr->itup;
			state->ituplen++;
		}

		/* saves old rightlink */
		if (state->stack->blkno != GIST_ROOT_BLKNO)
			rrlink = GistPageGetOpaque(dist->page)->rightlink;

		START_CRIT_SECTION();

		/*
		 * must mark buffers dirty before XLogInsert, even though we'll still
		 * be changing their opaque fields below. set up right links.
		 */
		for (ptr = dist; ptr; ptr = ptr->next)
		{
			MarkBufferDirty(ptr->buffer);
			GistPageGetOpaque(ptr->page)->rightlink = (ptr->next) ?
				ptr->next->block.blkno : rrlink;
		}

		/* restore splitted non-root page */
		if (state->stack->blkno != GIST_ROOT_BLKNO)
		{
			PageRestoreTempPage(dist->page, BufferGetPage(dist->buffer));
			dist->page = BufferGetPage(dist->buffer);
		}

		if (!state->r->rd_istemp)
		{
			XLogRecPtr	recptr;
			XLogRecData *rdata;

			rdata = formSplitRdata(state->r, state->stack->blkno,
								   is_leaf, &(state->key), dist);

			recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_PAGE_SPLIT, rdata);

			for (ptr = dist; ptr; ptr = ptr->next)
			{
				PageSetLSN(ptr->page, recptr);
				PageSetTLI(ptr->page, ThisTimeLineID);
			}
		}
		else
		{
			for (ptr = dist; ptr; ptr = ptr->next)
			{
				PageSetLSN(ptr->page, XLogRecPtrForTemp);
			}
		}

		/* set up NSN */
		oldnsn = GistPageGetOpaque(dist->page)->nsn;
		if (state->stack->blkno == GIST_ROOT_BLKNO)
			/* if root split we should put initial value */
			oldnsn = PageGetLSN(dist->page);

		for (ptr = dist; ptr; ptr = ptr->next)
		{
			/* only for last set oldnsn */
			GistPageGetOpaque(ptr->page)->nsn = (ptr->next) ?
				PageGetLSN(ptr->page) : oldnsn;
		}

		/*
		 * release buffers, if it was a root split then release all buffers
		 * because we create all buffers
		 */
		ptr = (state->stack->blkno == GIST_ROOT_BLKNO) ? dist : dist->next;
		for (; ptr; ptr = ptr->next)
			UnlockReleaseBuffer(ptr->buffer);

		if (state->stack->blkno == GIST_ROOT_BLKNO)
		{
			gistnewroot(state->r, state->stack->buffer, state->itup, state->ituplen, &(state->key));
			state->needInsertComplete = false;
		}

		END_CRIT_SECTION();
	}
	else
	{
		/* enough space */
		START_CRIT_SECTION();

		if (!is_leaf)
			PageIndexTupleDelete(state->stack->page, state->stack->childoffnum);
		gistfillbuffer(state->r, state->stack->page, state->itup, state->ituplen, InvalidOffsetNumber);

		MarkBufferDirty(state->stack->buffer);

		if (!state->r->rd_istemp)
		{
			OffsetNumber noffs = 0,
						offs[1];
			XLogRecPtr	recptr;
			XLogRecData *rdata;

			if (!is_leaf)
			{
				/* only on inner page we should delete previous version */
				offs[0] = state->stack->childoffnum;
				noffs = 1;
			}

			rdata = formUpdateRdata(state->r, state->stack->buffer,
									offs, noffs,
									state->itup, state->ituplen,
									&(state->key));

			recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_PAGE_UPDATE, rdata);
			PageSetLSN(state->stack->page, recptr);
			PageSetTLI(state->stack->page, ThisTimeLineID);
		}
		else
			PageSetLSN(state->stack->page, XLogRecPtrForTemp);

		if (state->stack->blkno == GIST_ROOT_BLKNO)
			state->needInsertComplete = false;

		END_CRIT_SECTION();

		if (state->ituplen > 1)
		{						/* previous is_splitted==true */

			/*
			 * child was splited, so we must form union for insertion in
			 * parent
			 */
			IndexTuple	newtup = gistunion(state->r, state->itup, state->ituplen, giststate);

			ItemPointerSetBlockNumber(&(newtup->t_tid), state->stack->blkno);
			state->itup[0] = newtup;
			state->ituplen = 1;
		}
		else if (is_leaf)
		{
			/*
			 * itup[0] store key to adjust parent, we set it to valid to
			 * correct check by GistTupleIsInvalid macro in gistgetadjusted()
			 */
			ItemPointerSetBlockNumber(&(state->itup[0]->t_tid), state->stack->blkno);
			GistTupleSetValid(state->itup[0]);
		}
	}
	return is_splitted;
}
Beispiel #4
0
/*
 * Insert a new item to a page.
 *
 * Returns true if the insertion was finished. On false, the page was split and
 * the parent needs to be updated. (A root split returns true as it doesn't
 * need any further action by the caller to complete.)
 *
 * When inserting a downlink to an internal page, 'childbuf' contains the
 * child page that was split. Its GIN_INCOMPLETE_SPLIT flag will be cleared
 * atomically with the insert. Also, the existing item at offset stack->off
 * in the target page is updated to point to updateblkno.
 *
 * stack->buffer is locked on entry, and is kept locked.
 * Likewise for childbuf, if given.
 */
static bool
ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
			   void *insertdata, BlockNumber updateblkno,
			   Buffer childbuf, GinStatsData *buildStats)
{
	Page		page = BufferGetPage(stack->buffer);
	bool		result;
	GinPlaceToPageRC rc;
	uint16		xlflags = 0;
	Page		childpage = NULL;
	Page		newlpage = NULL,
				newrpage = NULL;
	void	   *ptp_workspace = NULL;
	XLogRecData payloadrdata[10];
	MemoryContext tmpCxt;
	MemoryContext oldCxt;

	/*
	 * We do all the work of this function and its subfunctions in a temporary
	 * memory context.  This avoids leakages and simplifies APIs, since some
	 * subfunctions allocate storage that has to survive until we've finished
	 * the WAL insertion.
	 */
	tmpCxt = AllocSetContextCreate(CurrentMemoryContext,
								   "ginPlaceToPage temporary context",
								   ALLOCSET_DEFAULT_MINSIZE,
								   ALLOCSET_DEFAULT_INITSIZE,
								   ALLOCSET_DEFAULT_MAXSIZE);
	oldCxt = MemoryContextSwitchTo(tmpCxt);

	if (GinPageIsData(page))
		xlflags |= GIN_INSERT_ISDATA;
	if (GinPageIsLeaf(page))
	{
		xlflags |= GIN_INSERT_ISLEAF;
		Assert(!BufferIsValid(childbuf));
		Assert(updateblkno == InvalidBlockNumber);
	}
	else
	{
		Assert(BufferIsValid(childbuf));
		Assert(updateblkno != InvalidBlockNumber);
		childpage = BufferGetPage(childbuf);
	}

	/*
	 * See if the incoming tuple will fit on the page.  beginPlaceToPage will
	 * decide if the page needs to be split, and will compute the split
	 * contents if so.  See comments for beginPlaceToPage and execPlaceToPage
	 * functions for more details of the API here.
	 */
	rc = btree->beginPlaceToPage(btree, stack->buffer, stack,
								 insertdata, updateblkno,
								 &ptp_workspace,
								 &newlpage, &newrpage,
								 payloadrdata);

	if (rc == GPTP_NO_WORK)
	{
		/* Nothing to do */
		result = true;
	}
	else if (rc == GPTP_INSERT)
	{
		/* It will fit, perform the insertion */
		START_CRIT_SECTION();

		/* Perform the page update, and set up WAL data about it */
		btree->execPlaceToPage(btree, stack->buffer, stack,
							   insertdata, updateblkno,
							   ptp_workspace, payloadrdata);

		MarkBufferDirty(stack->buffer);

		/* An insert to an internal page finishes the split of the child. */
		if (BufferIsValid(childbuf))
		{
			GinPageGetOpaque(childpage)->flags &= ~GIN_INCOMPLETE_SPLIT;
			MarkBufferDirty(childbuf);
		}

		if (RelationNeedsWAL(btree->index))
		{
			XLogRecPtr	recptr;
			XLogRecData rdata[3];
			ginxlogInsert xlrec;
			BlockIdData childblknos[2];

			xlrec.node = btree->index->rd_node;
			xlrec.blkno = BufferGetBlockNumber(stack->buffer);
			xlrec.flags = xlflags;

			rdata[0].buffer = InvalidBuffer;
			rdata[0].data = (char *) &xlrec;
			rdata[0].len = sizeof(ginxlogInsert);

			/*
			 * Log information about child if this was an insertion of a
			 * downlink.
			 */
			if (BufferIsValid(childbuf))
			{
				rdata[0].next = &rdata[1];

				BlockIdSet(&childblknos[0], BufferGetBlockNumber(childbuf));
				BlockIdSet(&childblknos[1], GinPageGetOpaque(childpage)->rightlink);

				rdata[1].buffer = InvalidBuffer;
				rdata[1].data = (char *) childblknos;
				rdata[1].len = sizeof(BlockIdData) * 2;
				rdata[1].next = &rdata[2];

				rdata[2].buffer = childbuf;
				rdata[2].buffer_std = true;
				rdata[2].data = NULL;
				rdata[2].len = 0;
				rdata[2].next = payloadrdata;
			}
			else
				rdata[0].next = payloadrdata;

			recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_INSERT, rdata);

			PageSetLSN(page, recptr);
			if (BufferIsValid(childbuf))
				PageSetLSN(childpage, recptr);
		}

		END_CRIT_SECTION();

		/* Insertion is complete. */
		result = true;
	}
	else if (rc == GPTP_SPLIT)
	{
		/*
		 * Didn't fit, need to split.  The split has been computed in newlpage
		 * and newrpage, which are pointers to palloc'd pages, not associated
		 * with buffers.  stack->buffer is not touched yet.
		 */
		Buffer		rbuffer;
		BlockNumber savedRightLink;
		ginxlogSplit data;
		Buffer		lbuffer = InvalidBuffer;
		Page		newrootpg = NULL;

		/* Get a new index page to become the right page */
		rbuffer = GinNewBuffer(btree->index);

		/* During index build, count the new page */
		if (buildStats)
		{
			if (btree->isData)
				buildStats->nDataPages++;
			else
				buildStats->nEntryPages++;
		}

		savedRightLink = GinPageGetOpaque(page)->rightlink;

		/* Begin setting up WAL record (which we might not use) */
		data.node = btree->index->rd_node;
		data.rblkno = BufferGetBlockNumber(rbuffer);
		data.flags = xlflags;
		if (BufferIsValid(childbuf))
		{
			data.leftChildBlkno = BufferGetBlockNumber(childbuf);
			data.rightChildBlkno = GinPageGetOpaque(childpage)->rightlink;
		}
		else
			data.leftChildBlkno = data.rightChildBlkno = InvalidBlockNumber;

		if (stack->parent == NULL)
		{
			/*
			 * splitting the root, so we need to allocate new left page and
			 * place pointers to left and right page on root page.
			 */
			lbuffer = GinNewBuffer(btree->index);

			/* During index build, count the new left page */
			if (buildStats)
			{
				if (btree->isData)
					buildStats->nDataPages++;
				else
					buildStats->nEntryPages++;
			}

			/*
			 * root never has a right-link, so we borrow the rrlink field to
			 * store the root block number.
			 */
			data.rrlink = BufferGetBlockNumber(stack->buffer);
			data.lblkno = BufferGetBlockNumber(lbuffer);
			data.flags |= GIN_SPLIT_ROOT;

			GinPageGetOpaque(newrpage)->rightlink = InvalidBlockNumber;
			GinPageGetOpaque(newlpage)->rightlink = BufferGetBlockNumber(rbuffer);

			/*
			 * Construct a new root page containing downlinks to the new left
			 * and right pages.  (Do this in a temporary copy rather than
			 * overwriting the original page directly, since we're not in the
			 * critical section yet.)
			 */
			newrootpg = PageGetTempPage(newrpage);
			GinInitPage(newrootpg, GinPageGetOpaque(newlpage)->flags & ~(GIN_LEAF | GIN_COMPRESSED), BLCKSZ);

			btree->fillRoot(btree, newrootpg,
							BufferGetBlockNumber(lbuffer), newlpage,
							BufferGetBlockNumber(rbuffer), newrpage);
		}
		else
		{
			/* splitting a non-root page */
			data.rrlink = savedRightLink;
			data.lblkno = BufferGetBlockNumber(stack->buffer);

			GinPageGetOpaque(newrpage)->rightlink = savedRightLink;
			GinPageGetOpaque(newlpage)->flags |= GIN_INCOMPLETE_SPLIT;
			GinPageGetOpaque(newlpage)->rightlink = BufferGetBlockNumber(rbuffer);
		}

		/*
		 * OK, we have the new contents of the left page in a temporary copy
		 * now (newlpage), and likewise for the new contents of the
		 * newly-allocated right block. The original page is still unchanged.
		 *
		 * If this is a root split, we also have a temporary page containing
		 * the new contents of the root.
		 */

		START_CRIT_SECTION();

		MarkBufferDirty(rbuffer);
		MarkBufferDirty(stack->buffer);

		/*
		 * Restore the temporary copies over the real buffers.
		 */
		if (stack->parent == NULL)
		{
			/* Splitting the root, three pages to update */
			MarkBufferDirty(lbuffer);
			memcpy(page, newrootpg, BLCKSZ);
			memcpy(BufferGetPage(lbuffer), newlpage, BLCKSZ);
			memcpy(BufferGetPage(rbuffer), newrpage, BLCKSZ);
		}
		else
		{
			/* Normal split, only two pages to update */
			memcpy(page, newlpage, BLCKSZ);
			memcpy(BufferGetPage(rbuffer), newrpage, BLCKSZ);
		}

		/* We also clear childbuf's INCOMPLETE_SPLIT flag, if passed */
		if (BufferIsValid(childbuf))
		{
			GinPageGetOpaque(childpage)->flags &= ~GIN_INCOMPLETE_SPLIT;
			MarkBufferDirty(childbuf);
		}

		/* write WAL record */
		if (RelationNeedsWAL(btree->index))
		{
			XLogRecData rdata[2];
			XLogRecPtr	recptr;

			rdata[0].buffer = InvalidBuffer;
			rdata[0].data = (char *) &data;
			rdata[0].len = sizeof(ginxlogSplit);

			if (BufferIsValid(childbuf))
			{
				rdata[0].next = &rdata[1];

				rdata[1].buffer = childbuf;
				rdata[1].buffer_std = true;
				rdata[1].data = NULL;
				rdata[1].len = 0;
				rdata[1].next = payloadrdata;
			}
			else
				rdata[0].next = payloadrdata;

			recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_SPLIT, rdata);

			PageSetLSN(page, recptr);
			PageSetLSN(BufferGetPage(rbuffer), recptr);
			if (stack->parent == NULL)
				PageSetLSN(BufferGetPage(lbuffer), recptr);
			if (BufferIsValid(childbuf))
				PageSetLSN(childpage, recptr);
		}
		END_CRIT_SECTION();

		/*
		 * We can release the locks/pins on the new pages now, but keep
		 * stack->buffer locked.  childbuf doesn't get unlocked either.
		 */
		UnlockReleaseBuffer(rbuffer);
		if (stack->parent == NULL)
			UnlockReleaseBuffer(lbuffer);

		/*
		 * If we split the root, we're done. Otherwise the split is not
		 * complete until the downlink for the new page has been inserted to
		 * the parent.
		 */
		result = (stack->parent == NULL);
	}
	else
	{
		elog(ERROR, "invalid return code from GIN placeToPage method: %d", rc);
		result = false;			/* keep compiler quiet */
	}

	/* Clean up temp context */
	MemoryContextSwitchTo(oldCxt);
	MemoryContextDelete(tmpCxt);

	return result;
}
Beispiel #5
0
/*
 *	gistSplit -- split a page in the tree.
 */
static IndexTuple *
gistSplit(Relation r,
		  Buffer buffer,
		  IndexTuple *itup,		/* contains compressed entry */
		  int *len,
		  GISTSTATE *giststate,
		  InsertIndexResult *res)
{
	Page		p;
	Buffer		leftbuf,
				rightbuf;
	Page		left,
				right;
	IndexTuple *lvectup,
			   *rvectup,
			   *newtup;
	BlockNumber lbknum,
				rbknum;
	GISTPageOpaque opaque;
	GIST_SPLITVEC v;
	GistEntryVector *entryvec;
	bool	   *decompvec;
	int			i,
				j,
				nlen;
	int			MaxGrpId = 1;
	Datum		datum;
	bool		IsNull;

	p = (Page) BufferGetPage(buffer);
	opaque = (GISTPageOpaque) PageGetSpecialPointer(p);

	/*
	 * The root of the tree is the first block in the relation.  If we're
	 * about to split the root, we need to do some hocus-pocus to enforce
	 * this guarantee.
	 */

	if (BufferGetBlockNumber(buffer) == GISTP_ROOT)
	{
		leftbuf = ReadBuffer(r, P_NEW);
		GISTInitBuffer(leftbuf, opaque->flags);
		lbknum = BufferGetBlockNumber(leftbuf);
		left = (Page) BufferGetPage(leftbuf);
	}
	else
	{
		leftbuf = buffer;
		IncrBufferRefCount(buffer);
		lbknum = BufferGetBlockNumber(buffer);
		left = (Page) PageGetTempPage(p, sizeof(GISTPageOpaqueData));
	}

	rightbuf = ReadBuffer(r, P_NEW);
	GISTInitBuffer(rightbuf, opaque->flags);
	rbknum = BufferGetBlockNumber(rightbuf);
	right = (Page) BufferGetPage(rightbuf);

	/* generate the item array */
	entryvec = palloc(GEVHDRSZ + (*len + 1) * sizeof(GISTENTRY));
	entryvec->n = *len + 1;
	decompvec = (bool *) palloc((*len + 1) * sizeof(bool));
	for (i = 1; i <= *len; i++)
	{
		datum = index_getattr(itup[i - 1], 1, giststate->tupdesc, &IsNull);
		gistdentryinit(giststate, 0, &(entryvec->vector[i]),
					   datum, r, p, i,
		   ATTSIZE(datum, giststate->tupdesc, 1, IsNull), FALSE, IsNull);
		if ((!isAttByVal(giststate, 0)) && entryvec->vector[i].key != datum)
			decompvec[i] = TRUE;
		else
			decompvec[i] = FALSE;
	}

	/*
	 * now let the user-defined picksplit function set up the split
	 * vector; in entryvec have no null value!!
	 */
	FunctionCall2(&giststate->picksplitFn[0],
				  PointerGetDatum(entryvec),
				  PointerGetDatum(&v));

	/* compatibility with old code */
	if (v.spl_left[v.spl_nleft - 1] == InvalidOffsetNumber)
		v.spl_left[v.spl_nleft - 1] = (OffsetNumber) *len;
	if (v.spl_right[v.spl_nright - 1] == InvalidOffsetNumber)
		v.spl_right[v.spl_nright - 1] = (OffsetNumber) *len;

	v.spl_lattr[0] = v.spl_ldatum;
	v.spl_rattr[0] = v.spl_rdatum;
	v.spl_lisnull[0] = false;
	v.spl_risnull[0] = false;

	/*
	 * if index is multikey, then we must to try get smaller bounding box
	 * for subkey(s)
	 */
	if (r->rd_att->natts > 1)
	{
		v.spl_idgrp = (int *) palloc0(sizeof(int) * (*len + 1));
		v.spl_grpflag = (char *) palloc0(sizeof(char) * (*len + 1));
		v.spl_ngrp = (int *) palloc(sizeof(int) * (*len + 1));

		MaxGrpId = gistfindgroup(giststate, entryvec->vector, &v);

		/* form union of sub keys for each page (l,p) */
		gistunionsubkey(r, giststate, itup, &v);

		/*
		 * if possible, we insert equivalent tuples with control by
		 * penalty for a subkey(s)
		 */
		if (MaxGrpId > 1)
			gistadjsubkey(r, itup, len, &v, giststate);

		pfree(v.spl_idgrp);
		pfree(v.spl_grpflag);
		pfree(v.spl_ngrp);
	}

	/* clean up the entry vector: its keys need to be deleted, too */
	for (i = 1; i <= *len; i++)
		if (decompvec[i])
			pfree(DatumGetPointer(entryvec->vector[i].key));
	pfree(entryvec);
	pfree(decompvec);

	/* form left and right vector */
	lvectup = (IndexTuple *) palloc(sizeof(IndexTuple) * v.spl_nleft);
	rvectup = (IndexTuple *) palloc(sizeof(IndexTuple) * v.spl_nright);

	for (i = 0; i < v.spl_nleft; i++)
		lvectup[i] = itup[v.spl_left[i] - 1];

	for (i = 0; i < v.spl_nright; i++)
		rvectup[i] = itup[v.spl_right[i] - 1];


	/* write on disk (may be need another split) */
	if (gistnospace(right, rvectup, v.spl_nright))
	{
		nlen = v.spl_nright;
		newtup = gistSplit(r, rightbuf, rvectup, &nlen, giststate,
			  (res && rvectup[nlen - 1] == itup[*len - 1]) ? res : NULL);
		ReleaseBuffer(rightbuf);
		for (j = 1; j < r->rd_att->natts; j++)
			if ((!isAttByVal(giststate, j)) && !v.spl_risnull[j])
				pfree(DatumGetPointer(v.spl_rattr[j]));
	}
	else
	{
		OffsetNumber l;

		l = gistwritebuffer(r, right, rvectup, v.spl_nright, FirstOffsetNumber);
		WriteBuffer(rightbuf);

		if (res)
			ItemPointerSet(&((*res)->pointerData), rbknum, l);

		nlen = 1;
		newtup = (IndexTuple *) palloc(sizeof(IndexTuple) * 1);
		newtup[0] = gistFormTuple(giststate, r, v.spl_rattr, v.spl_rattrsize, v.spl_risnull);
		ItemPointerSet(&(newtup[0]->t_tid), rbknum, 1);
	}


	if (gistnospace(left, lvectup, v.spl_nleft))
	{
		int			llen = v.spl_nleft;
		IndexTuple *lntup;

		lntup = gistSplit(r, leftbuf, lvectup, &llen, giststate,
			  (res && lvectup[llen - 1] == itup[*len - 1]) ? res : NULL);
		ReleaseBuffer(leftbuf);

		for (j = 1; j < r->rd_att->natts; j++)
			if ((!isAttByVal(giststate, j)) && !v.spl_lisnull[j])
				pfree(DatumGetPointer(v.spl_lattr[j]));

		newtup = gistjoinvector(newtup, &nlen, lntup, llen);
		pfree(lntup);
	}
	else
	{
		OffsetNumber l;

		l = gistwritebuffer(r, left, lvectup, v.spl_nleft, FirstOffsetNumber);
		if (BufferGetBlockNumber(buffer) != GISTP_ROOT)
			PageRestoreTempPage(left, p);

		WriteBuffer(leftbuf);

		if (res)
			ItemPointerSet(&((*res)->pointerData), lbknum, l);

		nlen += 1;
		newtup = (IndexTuple *) repalloc((void *) newtup, sizeof(IndexTuple) * nlen);
		newtup[nlen - 1] = gistFormTuple(giststate, r, v.spl_lattr, v.spl_lattrsize, v.spl_lisnull);
		ItemPointerSet(&(newtup[nlen - 1]->t_tid), lbknum, 1);
	}

	/* !!! pfree */
	pfree(rvectup);
	pfree(lvectup);
	pfree(v.spl_left);
	pfree(v.spl_right);

	*len = nlen;
	return newtup;
}