示例#1
0
文件: gistxlog.c 项目: GisKook/Gis
/*
 * Replay the clearing of F_FOLLOW_RIGHT flag.
 */
static void
gistRedoClearFollowRight(RelFileNode node, XLogRecPtr lsn,
						 BlockNumber leftblkno)
{
	Buffer		buffer;

	buffer = XLogReadBuffer(node, leftblkno, false);
	if (BufferIsValid(buffer))
	{
		Page		page = (Page) BufferGetPage(buffer);

		/*
		 * Note that we still update the page even if page LSN is equal to the
		 * LSN of this record, because the updated NSN is not included in the
		 * full page image.
		 */
		if (!XLByteLT(lsn, PageGetLSN(page)))
		{
			GistPageGetOpaque(page)->nsn = lsn;
			GistClearFollowRight(page);

			PageSetLSN(page, lsn);
			PageSetTLI(page, ThisTimeLineID);
			MarkBufferDirty(buffer);
		}
		UnlockReleaseBuffer(buffer);
	}
}
示例#2
0
文件: gist.c 项目: AlexHill/postgres
/*
 * Complete the incomplete split of state->stack->page.
 */
static void
gistfixsplit(GISTInsertState *state, GISTSTATE *giststate)
{
	GISTInsertStack *stack = state->stack;
	Buffer		buf;
	Page		page;
	List	   *splitinfo = NIL;

	elog(LOG, "fixing incomplete split in index \"%s\", block %u",
		 RelationGetRelationName(state->r), stack->blkno);

	Assert(GistFollowRight(stack->page));
	Assert(OffsetNumberIsValid(stack->downlinkoffnum));

	buf = stack->buffer;

	/*
	 * Read the chain of split pages, following the rightlinks. Construct a
	 * downlink tuple for each page.
	 */
	for (;;)
	{
		GISTPageSplitInfo *si = palloc(sizeof(GISTPageSplitInfo));
		IndexTuple	downlink;

		page = BufferGetPage(buf);

		/* Form the new downlink tuples to insert to parent */
		downlink = gistformdownlink(state->r, buf, giststate, stack);

		si->buf = buf;
		si->downlink = downlink;

		splitinfo = lappend(splitinfo, si);

		if (GistFollowRight(page))
		{
			/* lock next page */
			buf = ReadBuffer(state->r, GistPageGetOpaque(page)->rightlink);
			LockBuffer(buf, GIST_EXCLUSIVE);
		}
		else
			break;
	}

	/* Insert the downlinks */
	gistfinishsplit(state, stack, giststate, splitinfo, false);
}
/*
 * Initialize a new index page
 */
void
GISTInitBuffer(Buffer b, uint32 f)
{
	GISTPageOpaque opaque;
	Page		page;
	Size		pageSize;

	pageSize = BufferGetPageSize(b);
	page = BufferGetPage(b);
	PageInit(page, pageSize, sizeof(GISTPageOpaqueData));

	opaque = GistPageGetOpaque(page);
	opaque->flags = f;
	opaque->rightlink = InvalidBlockNumber;
	/* page was already zeroed by PageInit, so this is not needed: */
	/* memset(&(opaque->nsn), 0, sizeof(GistNSN)); */
}
示例#4
0
static void
pushStackIfSplited(Page page, GistBDItem *stack)
{
	GISTPageOpaque opaque = GistPageGetOpaque(page);

	if (stack->blkno != GIST_ROOT_BLKNO && !XLogRecPtrIsInvalid(stack->parentlsn) &&
		(GistFollowRight(page) || stack->parentlsn < GistPageGetNSN(page)) &&
		opaque->rightlink != InvalidBlockNumber /* sanity check */ )
	{
		/* split page detected, install right link to the stack */

		GistBDItem *ptr = (GistBDItem *) palloc(sizeof(GistBDItem));

		ptr->blkno = opaque->rightlink;
		ptr->parentlsn = stack->parentlsn;
		ptr->next = stack->next;
		stack->next = ptr;
	}
}
示例#5
0
文件: gist.c 项目: AlexHill/postgres
/*
 * Traverse the tree to find path from root page to specified "child" block.
 *
 * returns a new insertion stack, starting from the parent of "child", up
 * to the root. *downlinkoffnum is set to the offset of the downlink in the
 * direct parent of child.
 *
 * To prevent deadlocks, this should lock only one page at a time.
 */
static GISTInsertStack *
gistFindPath(Relation r, BlockNumber child, OffsetNumber *downlinkoffnum)
{
	Page		page;
	Buffer		buffer;
	OffsetNumber i,
				maxoff;
	ItemId		iid;
	IndexTuple	idxtuple;
	List	   *fifo;
	GISTInsertStack *top,
			   *ptr;
	BlockNumber blkno;

	top = (GISTInsertStack *) palloc0(sizeof(GISTInsertStack));
	top->blkno = GIST_ROOT_BLKNO;
	top->downlinkoffnum = InvalidOffsetNumber;

	fifo = list_make1(top);
	while (fifo != NIL)
	{
		/* Get next page to visit */
		top = linitial(fifo);
		fifo = list_delete_first(fifo);

		buffer = ReadBuffer(r, top->blkno);
		LockBuffer(buffer, GIST_SHARE);
		gistcheckpage(r, buffer);
		page = (Page) BufferGetPage(buffer);

		if (GistPageIsLeaf(page))
		{
			/*
			 * Because we scan the index top-down, all the rest of the pages
			 * in the queue must be leaf pages as well.
			 */
			UnlockReleaseBuffer(buffer);
			break;
		}

		top->lsn = PageGetLSN(page);

		/*
		 * If F_FOLLOW_RIGHT is set, the page to the right doesn't have a
		 * downlink. This should not normally happen..
		 */
		if (GistFollowRight(page))
			elog(ERROR, "concurrent GiST page split was incomplete");

		if (top->parent && top->parent->lsn < GistPageGetNSN(page) &&
			GistPageGetOpaque(page)->rightlink != InvalidBlockNumber /* sanity check */ )
		{
			/*
			 * Page was split while we looked elsewhere. We didn't see the
			 * downlink to the right page when we scanned the parent, so add
			 * it to the queue now.
			 *
			 * Put the right page ahead of the queue, so that we visit it
			 * next. That's important, because if this is the lowest internal
			 * level, just above leaves, we might already have queued up some
			 * leaf pages, and we assume that there can't be any non-leaf
			 * pages behind leaf pages.
			 */
			ptr = (GISTInsertStack *) palloc0(sizeof(GISTInsertStack));
			ptr->blkno = GistPageGetOpaque(page)->rightlink;
			ptr->downlinkoffnum = InvalidOffsetNumber;
			ptr->parent = top->parent;

			fifo = lcons(ptr, fifo);
		}

		maxoff = PageGetMaxOffsetNumber(page);

		for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
		{
			iid = PageGetItemId(page, i);
			idxtuple = (IndexTuple) PageGetItem(page, iid);
			blkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid));
			if (blkno == child)
			{
				/* Found it! */
				UnlockReleaseBuffer(buffer);
				*downlinkoffnum = i;
				return top;
			}
			else
			{
				/* Append this child to the list of pages to visit later */
				ptr = (GISTInsertStack *) palloc0(sizeof(GISTInsertStack));
				ptr->blkno = blkno;
				ptr->downlinkoffnum = i;
				ptr->parent = top;

				fifo = lappend(fifo, ptr);
			}
		}

		UnlockReleaseBuffer(buffer);
	}

	elog(ERROR, "failed to re-find parent of a page in index \"%s\", block %u",
		 RelationGetRelationName(r), child);
	return NULL;				/* keep compiler quiet */
}
示例#6
0
文件: gist.c 项目: AlexHill/postgres
/*
 * Place tuples from 'itup' to 'buffer'. If 'oldoffnum' is valid, the tuple
 * at that offset is atomically removed along with inserting the new tuples.
 * This is used to replace a tuple with a new one.
 *
 * If 'leftchildbuf' is valid, we're inserting the downlink for the page
 * to the right of 'leftchildbuf', or updating the downlink for 'leftchildbuf'.
 * F_FOLLOW_RIGHT flag on 'leftchildbuf' is cleared and NSN is set.
 *
 * If 'markfollowright' is true and the page is split, the left child is
 * marked with F_FOLLOW_RIGHT flag. That is the normal case. During buffered
 * index build, however, there is no concurrent access and the page splitting
 * is done in a slightly simpler fashion, and false is passed.
 *
 * If there is not enough room on the page, it is split. All the split
 * pages are kept pinned and locked and returned in *splitinfo, the caller
 * is responsible for inserting the downlinks for them. However, if
 * 'buffer' is the root page and it needs to be split, gistplacetopage()
 * performs the split as one atomic operation, and *splitinfo is set to NIL.
 * In that case, we continue to hold the root page locked, and the child
 * pages are released; note that new tuple(s) are *not* on the root page
 * but in one of the new child pages.
 *
 * If 'newblkno' is not NULL, returns the block number of page the first
 * new/updated tuple was inserted to. Usually it's the given page, but could
 * be its right sibling if the page was split.
 *
 * Returns 'true' if the page was split, 'false' otherwise.
 */
bool
gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
				Buffer buffer,
				IndexTuple *itup, int ntup, OffsetNumber oldoffnum,
				BlockNumber *newblkno,
				Buffer leftchildbuf,
				List **splitinfo,
				bool markfollowright)
{
	BlockNumber blkno = BufferGetBlockNumber(buffer);
	Page		page = BufferGetPage(buffer);
	bool		is_leaf = (GistPageIsLeaf(page)) ? true : false;
	XLogRecPtr	recptr;
	int			i;
	bool		is_split;

	/*
	 * Refuse to modify a page that's incompletely split. This should not
	 * happen because we finish any incomplete splits while we walk down the
	 * tree. However, it's remotely possible that another concurrent inserter
	 * splits a parent page, and errors out before completing the split. We
	 * will just throw an error in that case, and leave any split we had in
	 * progress unfinished too. The next insert that comes along will clean up
	 * the mess.
	 */
	if (GistFollowRight(page))
		elog(ERROR, "concurrent GiST page split was incomplete");

	*splitinfo = NIL;

	/*
	 * if isupdate, remove old key: This node's key has been modified, either
	 * because a child split occurred or because we needed to adjust our key
	 * for an insert in a child node. Therefore, remove the old version of
	 * this node's key.
	 *
	 * for WAL replay, in the non-split case we handle this by setting up a
	 * one-element todelete array; in the split case, it's handled implicitly
	 * because the tuple vector passed to gistSplit won't include this tuple.
	 */
	is_split = gistnospace(page, itup, ntup, oldoffnum, freespace);
	if (is_split)
	{
		/* no space for insertion */
		IndexTuple *itvec;
		int			tlen;
		SplitedPageLayout *dist = NULL,
				   *ptr;
		BlockNumber oldrlink = InvalidBlockNumber;
		GistNSN		oldnsn = 0;
		SplitedPageLayout rootpg;
		bool		is_rootsplit;

		is_rootsplit = (blkno == GIST_ROOT_BLKNO);

		/*
		 * Form index tuples vector to split. If we're replacing an old tuple,
		 * remove the old version from the vector.
		 */
		itvec = gistextractpage(page, &tlen);
		if (OffsetNumberIsValid(oldoffnum))
		{
			/* on inner page we should remove old tuple */
			int			pos = oldoffnum - FirstOffsetNumber;

			tlen--;
			if (pos != tlen)
				memmove(itvec + pos, itvec + pos + 1, sizeof(IndexTuple) * (tlen - pos));
		}
		itvec = gistjoinvector(itvec, &tlen, itup, ntup);
		dist = gistSplit(rel, page, itvec, tlen, giststate);

		/*
		 * Set up pages to work with. Allocate new buffers for all but the
		 * leftmost page. The original page becomes the new leftmost page, and
		 * is just replaced with the new contents.
		 *
		 * For a root-split, allocate new buffers for all child pages, the
		 * original page is overwritten with new root page containing
		 * downlinks to the new child pages.
		 */
		ptr = dist;
		if (!is_rootsplit)
		{
			/* save old rightlink and NSN */
			oldrlink = GistPageGetOpaque(page)->rightlink;
			oldnsn = GistPageGetNSN(page);

			dist->buffer = buffer;
			dist->block.blkno = BufferGetBlockNumber(buffer);
			dist->page = PageGetTempPageCopySpecial(BufferGetPage(buffer));

			/* clean all flags except F_LEAF */
			GistPageGetOpaque(dist->page)->flags = (is_leaf) ? F_LEAF : 0;

			ptr = ptr->next;
		}
		for (; ptr; ptr = ptr->next)
		{
			/* Allocate new page */
			ptr->buffer = gistNewBuffer(rel);
			GISTInitBuffer(ptr->buffer, (is_leaf) ? F_LEAF : 0);
			ptr->page = BufferGetPage(ptr->buffer);
			ptr->block.blkno = BufferGetBlockNumber(ptr->buffer);
		}

		/*
		 * Now that we know which blocks the new pages go to, set up downlink
		 * tuples to point to them.
		 */
		for (ptr = dist; ptr; ptr = ptr->next)
		{
			ItemPointerSetBlockNumber(&(ptr->itup->t_tid), ptr->block.blkno);
			GistTupleSetValid(ptr->itup);
		}

		/*
		 * If this is a root split, we construct the new root page with the
		 * downlinks here directly, instead of requiring the caller to insert
		 * them. Add the new root page to the list along with the child pages.
		 */
		if (is_rootsplit)
		{
			IndexTuple *downlinks;
			int			ndownlinks = 0;
			int			i;

			rootpg.buffer = buffer;
			rootpg.page = PageGetTempPageCopySpecial(BufferGetPage(rootpg.buffer));
			GistPageGetOpaque(rootpg.page)->flags = 0;

			/* Prepare a vector of all the downlinks */
			for (ptr = dist; ptr; ptr = ptr->next)
				ndownlinks++;
			downlinks = palloc(sizeof(IndexTuple) * ndownlinks);
			for (i = 0, ptr = dist; ptr; ptr = ptr->next)
				downlinks[i++] = ptr->itup;

			rootpg.block.blkno = GIST_ROOT_BLKNO;
			rootpg.block.num = ndownlinks;
			rootpg.list = gistfillitupvec(downlinks, ndownlinks,
										  &(rootpg.lenlist));
			rootpg.itup = NULL;

			rootpg.next = dist;
			dist = &rootpg;
		}
		else
		{
			/* Prepare split-info to be returned to caller */
			for (ptr = dist; ptr; ptr = ptr->next)
			{
				GISTPageSplitInfo *si = palloc(sizeof(GISTPageSplitInfo));

				si->buf = ptr->buffer;
				si->downlink = ptr->itup;
				*splitinfo = lappend(*splitinfo, si);
			}
		}

		/*
		 * Fill all pages. All the pages are new, ie. freshly allocated empty
		 * pages, or a temporary copy of the old page.
		 */
		for (ptr = dist; ptr; ptr = ptr->next)
		{
			char	   *data = (char *) (ptr->list);

			for (i = 0; i < ptr->block.num; i++)
			{
				IndexTuple	thistup = (IndexTuple) data;

				if (PageAddItem(ptr->page, (Item) data, IndexTupleSize(thistup), i + FirstOffsetNumber, false, false) == InvalidOffsetNumber)
					elog(ERROR, "failed to add item to index page in \"%s\"", RelationGetRelationName(rel));

				/*
				 * If this is the first inserted/updated tuple, let the caller
				 * know which page it landed on.
				 */
				if (newblkno && ItemPointerEquals(&thistup->t_tid, &(*itup)->t_tid))
					*newblkno = ptr->block.blkno;

				data += IndexTupleSize(thistup);
			}

			/* Set up rightlinks */
			if (ptr->next && ptr->block.blkno != GIST_ROOT_BLKNO)
				GistPageGetOpaque(ptr->page)->rightlink =
					ptr->next->block.blkno;
			else
				GistPageGetOpaque(ptr->page)->rightlink = oldrlink;

			/*
			 * Mark the all but the right-most page with the follow-right
			 * flag. It will be cleared as soon as the downlink is inserted
			 * into the parent, but this ensures that if we error out before
			 * that, the index is still consistent. (in buffering build mode,
			 * any error will abort the index build anyway, so this is not
			 * needed.)
			 */
			if (ptr->next && !is_rootsplit && markfollowright)
				GistMarkFollowRight(ptr->page);
			else
				GistClearFollowRight(ptr->page);

			/*
			 * Copy the NSN of the original page to all pages. The
			 * F_FOLLOW_RIGHT flags ensure that scans will follow the
			 * rightlinks until the downlinks are inserted.
			 */
			GistPageSetNSN(ptr->page, oldnsn);
		}

		START_CRIT_SECTION();

		/*
		 * Must mark buffers dirty before XLogInsert, even though we'll still
		 * be changing their opaque fields below.
		 */
		for (ptr = dist; ptr; ptr = ptr->next)
			MarkBufferDirty(ptr->buffer);
		if (BufferIsValid(leftchildbuf))
			MarkBufferDirty(leftchildbuf);

		/*
		 * The first page in the chain was a temporary working copy meant to
		 * replace the old page. Copy it over the old page.
		 */
		PageRestoreTempPage(dist->page, BufferGetPage(dist->buffer));
		dist->page = BufferGetPage(dist->buffer);

		/* Write the WAL record */
		if (RelationNeedsWAL(rel))
			recptr = gistXLogSplit(rel->rd_node, blkno, is_leaf,
								   dist, oldrlink, oldnsn, leftchildbuf,
								   markfollowright);
		else
			recptr = gistGetFakeLSN(rel);

		for (ptr = dist; ptr; ptr = ptr->next)
		{
			PageSetLSN(ptr->page, recptr);
		}

		/*
		 * Return the new child buffers to the caller.
		 *
		 * If this was a root split, we've already inserted the downlink
		 * pointers, in the form of a new root page. Therefore we can release
		 * all the new buffers, and keep just the root page locked.
		 */
		if (is_rootsplit)
		{
			for (ptr = dist->next; ptr; ptr = ptr->next)
				UnlockReleaseBuffer(ptr->buffer);
		}
	}
	else
	{
		/*
		 * Enough space. We also get here if ntuples==0.
		 */
		START_CRIT_SECTION();

		if (OffsetNumberIsValid(oldoffnum))
			PageIndexTupleDelete(page, oldoffnum);
		gistfillbuffer(page, itup, ntup, InvalidOffsetNumber);

		MarkBufferDirty(buffer);

		if (BufferIsValid(leftchildbuf))
			MarkBufferDirty(leftchildbuf);

		if (RelationNeedsWAL(rel))
		{
			OffsetNumber ndeloffs = 0,
						deloffs[1];

			if (OffsetNumberIsValid(oldoffnum))
			{
				deloffs[0] = oldoffnum;
				ndeloffs = 1;
			}

			recptr = gistXLogUpdate(rel->rd_node, buffer,
									deloffs, ndeloffs, itup, ntup,
									leftchildbuf);

			PageSetLSN(page, recptr);
		}
		else
		{
			recptr = gistGetFakeLSN(rel);
			PageSetLSN(page, recptr);
		}

		if (newblkno)
			*newblkno = blkno;
	}

	/*
	 * If we inserted the downlink for a child page, set NSN and clear
	 * F_FOLLOW_RIGHT flag on the left child, so that concurrent scans know to
	 * follow the rightlink if and only if they looked at the parent page
	 * before we inserted the downlink.
	 *
	 * Note that we do this *after* writing the WAL record. That means that
	 * the possible full page image in the WAL record does not include these
	 * changes, and they must be replayed even if the page is restored from
	 * the full page image. There's a chicken-and-egg problem: if we updated
	 * the child pages first, we wouldn't know the recptr of the WAL record
	 * we're about to write.
	 */
	if (BufferIsValid(leftchildbuf))
	{
		Page		leftpg = BufferGetPage(leftchildbuf);

		GistPageSetNSN(leftpg, recptr);
		GistClearFollowRight(leftpg);

		PageSetLSN(leftpg, recptr);
	}

	END_CRIT_SECTION();

	return is_split;
}
示例#7
0
static ArrayTuple
vacuumSplitPage(GistVacuum *gv, Page tempPage, Buffer buffer, IndexTuple *addon, int curlenaddon)
{
	ArrayTuple	res = {NULL, 0, false};
	IndexTuple *vec;
	SplitedPageLayout *dist = NULL,
			   *ptr;
	int			i,
				veclen = 0;
	BlockNumber blkno = BufferGetBlockNumber(buffer);
	MemoryContext oldCtx = MemoryContextSwitchTo(gv->opCtx);

	vec = gistextractpage(tempPage, &veclen);
	vec = gistjoinvector(vec, &veclen, addon, curlenaddon);
	dist = gistSplit(gv->index, tempPage, vec, veclen, &(gv->giststate));

	MemoryContextSwitchTo(oldCtx);

	if (blkno != GIST_ROOT_BLKNO)
	{
		/* if non-root split then we should not allocate new buffer */
		dist->buffer = buffer;
		dist->page = tempPage;
		/* during vacuum we never split leaf page */
		GistPageGetOpaque(dist->page)->flags = 0;
	}
	else
		pfree(tempPage);

	res.itup = (IndexTuple *) palloc(sizeof(IndexTuple) * veclen);
	res.ituplen = 0;

	/* make new pages and fills them */
	for (ptr = dist; ptr; ptr = ptr->next)
	{
		char	   *data;

		if (ptr->buffer == InvalidBuffer)
		{
			ptr->buffer = gistNewBuffer(gv->index);
			GISTInitBuffer(ptr->buffer, 0);
			ptr->page = BufferGetPage(ptr->buffer);
		}
		ptr->block.blkno = BufferGetBlockNumber(ptr->buffer);

		data = (char *) (ptr->list);
		for (i = 0; i < ptr->block.num; i++)
		{
			if (PageAddItem(ptr->page, (Item) data, IndexTupleSize((IndexTuple) data), i + FirstOffsetNumber, false, false) == InvalidOffsetNumber)
				elog(ERROR, "failed to add item to index page in \"%s\"", RelationGetRelationName(gv->index));
			data += IndexTupleSize((IndexTuple) data);
		}

		ItemPointerSetBlockNumber(&(ptr->itup->t_tid), ptr->block.blkno);
		res.itup[res.ituplen] = (IndexTuple) palloc(IndexTupleSize(ptr->itup));
		memcpy(res.itup[res.ituplen], ptr->itup, IndexTupleSize(ptr->itup));
		res.ituplen++;
	}

	START_CRIT_SECTION();

	for (ptr = dist; ptr; ptr = ptr->next)
	{
		MarkBufferDirty(ptr->buffer);
		GistPageGetOpaque(ptr->page)->rightlink = InvalidBlockNumber;
	}

	/* restore splitted non-root page */
	if (blkno != GIST_ROOT_BLKNO)
	{
		PageRestoreTempPage(dist->page, BufferGetPage(dist->buffer));
		dist->page = BufferGetPage(dist->buffer);
	}

	if (!gv->index->rd_istemp)
	{
		XLogRecPtr	recptr;
		XLogRecData *rdata;
		ItemPointerData key;	/* set key for incomplete insert */
		char	   *xlinfo;

		ItemPointerSet(&key, blkno, TUPLE_IS_VALID);

		rdata = formSplitRdata(gv->index->rd_node, blkno,
							   false, &key, dist);
		xlinfo = rdata->data;

		recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_PAGE_SPLIT, rdata);
		for (ptr = dist; ptr; ptr = ptr->next)
		{
			PageSetLSN(BufferGetPage(ptr->buffer), recptr);
			PageSetTLI(BufferGetPage(ptr->buffer), ThisTimeLineID);
		}

		pfree(xlinfo);
		pfree(rdata);
	}
	else
	{
		for (ptr = dist; ptr; ptr = ptr->next)
			PageSetLSN(BufferGetPage(ptr->buffer), XLogRecPtrForTemp);
	}

	for (ptr = dist; ptr; ptr = ptr->next)
	{
		/* we must keep the buffer pin on the head page */
		if (BufferGetBlockNumber(ptr->buffer) != blkno)
			UnlockReleaseBuffer(ptr->buffer);
	}

	if (blkno == GIST_ROOT_BLKNO)
	{
		ItemPointerData key;	/* set key for incomplete insert */

		ItemPointerSet(&key, blkno, TUPLE_IS_VALID);

		gistnewroot(gv->index, buffer, res.itup, res.ituplen, &key);
	}

	END_CRIT_SECTION();

	MemoryContextReset(gv->opCtx);

	return res;
}
示例#8
0
文件: gistget.c 项目: Brar/postgres
/*
 * Scan all items on the GiST index page identified by *pageItem, and insert
 * them into the queue (or directly to output areas)
 *
 * scan: index scan we are executing
 * pageItem: search queue item identifying an index page to scan
 * myDistances: distances array associated with pageItem, or NULL at the root
 * tbm: if not NULL, gistgetbitmap's output bitmap
 * ntids: if not NULL, gistgetbitmap's output tuple counter
 *
 * If tbm/ntids aren't NULL, we are doing an amgetbitmap scan, and heap
 * tuples should be reported directly into the bitmap.  If they are NULL,
 * we're doing a plain or ordered indexscan.  For a plain indexscan, heap
 * tuple TIDs are returned into so->pageData[].  For an ordered indexscan,
 * heap tuple TIDs are pushed into individual search queue items.  In an
 * index-only scan, reconstructed index tuples are returned along with the
 * TIDs.
 *
 * If we detect that the index page has split since we saw its downlink
 * in the parent, we push its new right sibling onto the queue so the
 * sibling will be processed next.
 */
static void
gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem, double *myDistances,
			 TIDBitmap *tbm, int64 *ntids)
{
	GISTScanOpaque so = (GISTScanOpaque) scan->opaque;
	GISTSTATE  *giststate = so->giststate;
	Relation	r = scan->indexRelation;
	Buffer		buffer;
	Page		page;
	GISTPageOpaque opaque;
	OffsetNumber maxoff;
	OffsetNumber i;
	MemoryContext oldcxt;

	Assert(!GISTSearchItemIsHeap(*pageItem));

	buffer = ReadBuffer(scan->indexRelation, pageItem->blkno);
	LockBuffer(buffer, GIST_SHARE);
	PredicateLockPage(r, BufferGetBlockNumber(buffer), scan->xs_snapshot);
	gistcheckpage(scan->indexRelation, buffer);
	page = BufferGetPage(buffer);
	TestForOldSnapshot(scan->xs_snapshot, r, page);
	opaque = GistPageGetOpaque(page);

	/*
	 * Check if we need to follow the rightlink. We need to follow it if the
	 * page was concurrently split since we visited the parent (in which case
	 * parentlsn < nsn), or if the system crashed after a page split but
	 * before the downlink was inserted into the parent.
	 */
	if (!XLogRecPtrIsInvalid(pageItem->data.parentlsn) &&
		(GistFollowRight(page) ||
		 pageItem->data.parentlsn < GistPageGetNSN(page)) &&
		opaque->rightlink != InvalidBlockNumber /* sanity check */ )
	{
		/* There was a page split, follow right link to add pages */
		GISTSearchItem *item;

		/* This can't happen when starting at the root */
		Assert(myDistances != NULL);

		oldcxt = MemoryContextSwitchTo(so->queueCxt);

		/* Create new GISTSearchItem for the right sibling index page */
		item = palloc(SizeOfGISTSearchItem(scan->numberOfOrderBys));
		item->blkno = opaque->rightlink;
		item->data.parentlsn = pageItem->data.parentlsn;

		/* Insert it into the queue using same distances as for this page */
		memcpy(item->distances, myDistances,
			   sizeof(double) * scan->numberOfOrderBys);

		pairingheap_add(so->queue, &item->phNode);

		MemoryContextSwitchTo(oldcxt);
	}

	so->nPageData = so->curPageData = 0;
	scan->xs_hitup = NULL;		/* might point into pageDataCxt */
	if (so->pageDataCxt)
		MemoryContextReset(so->pageDataCxt);

	/*
	 * We save the LSN of the page as we read it, so that we know whether it
	 * safe to apply LP_DEAD hints to the page later. This allows us to drop
	 * the pin for MVCC scans, which allows vacuum to avoid blocking.
	 */
	so->curPageLSN = BufferGetLSNAtomic(buffer);

	/*
	 * check all tuples on page
	 */
	maxoff = PageGetMaxOffsetNumber(page);
	for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
	{
		ItemId		iid = PageGetItemId(page, i);
		IndexTuple	it;
		bool		match;
		bool		recheck;
		bool		recheck_distances;

		/*
		 * If the scan specifies not to return killed tuples, then we treat a
		 * killed tuple as not passing the qual.
		 */
		if (scan->ignore_killed_tuples && ItemIdIsDead(iid))
			continue;

		it = (IndexTuple) PageGetItem(page, iid);

		/*
		 * Must call gistindex_keytest in tempCxt, and clean up any leftover
		 * junk afterward.
		 */
		oldcxt = MemoryContextSwitchTo(so->giststate->tempCxt);

		match = gistindex_keytest(scan, it, page, i,
								  &recheck, &recheck_distances);

		MemoryContextSwitchTo(oldcxt);
		MemoryContextReset(so->giststate->tempCxt);

		/* Ignore tuple if it doesn't match */
		if (!match)
			continue;

		if (tbm && GistPageIsLeaf(page))
		{
			/*
			 * getbitmap scan, so just push heap tuple TIDs into the bitmap
			 * without worrying about ordering
			 */
			tbm_add_tuples(tbm, &it->t_tid, 1, recheck);
			(*ntids)++;
		}
		else if (scan->numberOfOrderBys == 0 && GistPageIsLeaf(page))
		{
			/*
			 * Non-ordered scan, so report tuples in so->pageData[]
			 */
			so->pageData[so->nPageData].heapPtr = it->t_tid;
			so->pageData[so->nPageData].recheck = recheck;
			so->pageData[so->nPageData].offnum = i;

			/*
			 * In an index-only scan, also fetch the data from the tuple.  The
			 * reconstructed tuples are stored in pageDataCxt.
			 */
			if (scan->xs_want_itup)
			{
				oldcxt = MemoryContextSwitchTo(so->pageDataCxt);
				so->pageData[so->nPageData].recontup =
					gistFetchTuple(giststate, r, it);
				MemoryContextSwitchTo(oldcxt);
			}
			so->nPageData++;
		}
		else
		{
			/*
			 * Must push item into search queue.  We get here for any lower
			 * index page, and also for heap tuples if doing an ordered
			 * search.
			 */
			GISTSearchItem *item;

			oldcxt = MemoryContextSwitchTo(so->queueCxt);

			/* Create new GISTSearchItem for this item */
			item = palloc(SizeOfGISTSearchItem(scan->numberOfOrderBys));

			if (GistPageIsLeaf(page))
			{
				/* Creating heap-tuple GISTSearchItem */
				item->blkno = InvalidBlockNumber;
				item->data.heap.heapPtr = it->t_tid;
				item->data.heap.recheck = recheck;
				item->data.heap.recheckDistances = recheck_distances;

				/*
				 * In an index-only scan, also fetch the data from the tuple.
				 */
				if (scan->xs_want_itup)
					item->data.heap.recontup = gistFetchTuple(giststate, r, it);
			}
			else
			{
				/* Creating index-page GISTSearchItem */
				item->blkno = ItemPointerGetBlockNumber(&it->t_tid);

				/*
				 * LSN of current page is lsn of parent page for child. We
				 * only have a shared lock, so we need to get the LSN
				 * atomically.
				 */
				item->data.parentlsn = BufferGetLSNAtomic(buffer);
			}

			/* Insert it into the queue using new distance data */
			memcpy(item->distances, so->distances,
				   sizeof(double) * scan->numberOfOrderBys);

			pairingheap_add(so->queue, &item->phNode);

			MemoryContextSwitchTo(oldcxt);
		}
	}

	UnlockReleaseBuffer(buffer);
}
示例#9
0
文件: gistget.c 项目: adam8157/gpdb
/*
 * Scan all items on the GiST index page identified by *pageItem, and insert
 * them into the queue (or directly to output areas)
 *
 * scan: index scan we are executing
 * pageItem: search queue item identifying an index page to scan
 * myDistances: distances array associated with pageItem, or NULL at the root
 * tbm: if not NULL, gistgetbitmap's output bitmap
 * ntids: if not NULL, gistgetbitmap's output tuple counter
 *
 * If tbm/ntids aren't NULL, we are doing an amgetbitmap scan, and heap
 * tuples should be reported directly into the bitmap.  If they are NULL,
 * we're doing a plain or ordered indexscan.  For a plain indexscan, heap
 * tuple TIDs are returned into so->pageData[].  For an ordered indexscan,
 * heap tuple TIDs are pushed into individual search queue items.
 *
 * If we detect that the index page has split since we saw its downlink
 * in the parent, we push its new right sibling onto the queue so the
 * sibling will be processed next.
 */
static void
gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem, double *myDistances,
			 TIDBitmap *tbm, int64 *ntids)
{
	GISTScanOpaque so = (GISTScanOpaque) scan->opaque;
	Buffer		buffer;
	Page		page;
	GISTPageOpaque opaque;
	OffsetNumber maxoff;
	OffsetNumber i;
	GISTSearchTreeItem *tmpItem = so->tmpTreeItem;
	bool		isNew;
	MemoryContext oldcxt;

	Assert(!GISTSearchItemIsHeap(*pageItem));

	buffer = ReadBuffer(scan->indexRelation, pageItem->blkno);
	LockBuffer(buffer, GIST_SHARE);
	gistcheckpage(scan->indexRelation, buffer);
	page = BufferGetPage(buffer);
	opaque = GistPageGetOpaque(page);

	/*
	 * Check if we need to follow the rightlink. We need to follow it if the
	 * page was concurrently split since we visited the parent (in which case
	 * parentlsn < nsn), or if the system crashed after a page split but
	 * before the downlink was inserted into the parent.
	 */
	if (!XLogRecPtrIsInvalid(pageItem->data.parentlsn) &&
		(GistFollowRight(page) ||
		 pageItem->data.parentlsn < GistPageGetNSN(page)) &&
		opaque->rightlink != InvalidBlockNumber /* sanity check */ )
	{
		/* There was a page split, follow right link to add pages */
		GISTSearchItem *item;

		/* This can't happen when starting at the root */
		Assert(myDistances != NULL);

		oldcxt = MemoryContextSwitchTo(so->queueCxt);

		/* Create new GISTSearchItem for the right sibling index page */
		item = palloc(sizeof(GISTSearchItem));
		item->next = NULL;
		item->blkno = opaque->rightlink;
		item->data.parentlsn = pageItem->data.parentlsn;

		/* Insert it into the queue using same distances as for this page */
		tmpItem->head = item;
		tmpItem->lastHeap = NULL;
		memcpy(tmpItem->distances, myDistances,
			   sizeof(double) * scan->numberOfOrderBys);

		(void) rb_insert(so->queue, (RBNode *) tmpItem, &isNew);

		MemoryContextSwitchTo(oldcxt);
	}

	so->nPageData = so->curPageData = 0;

	/*
	 * check all tuples on page
	 */
	maxoff = PageGetMaxOffsetNumber(page);
	for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
	{
		IndexTuple	it = (IndexTuple) PageGetItem(page, PageGetItemId(page, i));
		bool		match;
		bool		recheck;

		/*
		 * Must call gistindex_keytest in tempCxt, and clean up any leftover
		 * junk afterward.
		 */
		oldcxt = MemoryContextSwitchTo(so->giststate->tempCxt);

		match = gistindex_keytest(scan, it, page, i, &recheck);

		MemoryContextSwitchTo(oldcxt);
		MemoryContextReset(so->giststate->tempCxt);

		/* Ignore tuple if it doesn't match */
		if (!match)
			continue;

		if (tbm && GistPageIsLeaf(page))
		{
			/*
			 * getbitmap scan, so just push heap tuple TIDs into the bitmap
			 * without worrying about ordering
			 */
			tbm_add_tuples(tbm, &it->t_tid, 1, recheck);
			(*ntids)++;
		}
		else if (scan->numberOfOrderBys == 0 && GistPageIsLeaf(page))
		{
			/*
			 * Non-ordered scan, so report heap tuples in so->pageData[]
			 */
			so->pageData[so->nPageData].heapPtr = it->t_tid;
			so->pageData[so->nPageData].recheck = recheck;
			so->nPageData++;
		}
		else
		{
			/*
			 * Must push item into search queue.  We get here for any lower
			 * index page, and also for heap tuples if doing an ordered
			 * search.
			 */
			GISTSearchItem *item;

			oldcxt = MemoryContextSwitchTo(so->queueCxt);

			/* Create new GISTSearchItem for this item */
			item = palloc(sizeof(GISTSearchItem));
			item->next = NULL;

			if (GistPageIsLeaf(page))
			{
				/* Creating heap-tuple GISTSearchItem */
				item->blkno = InvalidBlockNumber;
				item->data.heap.heapPtr = it->t_tid;
				item->data.heap.recheck = recheck;
			}
			else
			{
				/* Creating index-page GISTSearchItem */
				item->blkno = ItemPointerGetBlockNumber(&it->t_tid);

				/*
				 * LSN of current page is lsn of parent page for child. We
				 * only have a shared lock, so we need to get the LSN
				 * atomically.
				 */
				item->data.parentlsn = BufferGetLSNAtomic(buffer);
			}

			/* Insert it into the queue using new distance data */
			tmpItem->head = item;
			tmpItem->lastHeap = GISTSearchItemIsHeap(*item) ? item : NULL;
			memcpy(tmpItem->distances, so->distances,
				   sizeof(double) * scan->numberOfOrderBys);

			(void) rb_insert(so->queue, (RBNode *) tmpItem, &isNew);

			MemoryContextSwitchTo(oldcxt);
		}
	}

	UnlockReleaseBuffer(buffer);
}
示例#10
0
static bool
gistplacetopage(GISTInsertState *state, GISTSTATE *giststate)
{
	bool		is_splitted = false;
	bool		is_leaf = (GistPageIsLeaf(state->stack->page)) ? true : false;

	MIRROREDLOCK_BUFMGR_MUST_ALREADY_BE_HELD;

	/*
	 * if (!is_leaf) remove old key: This node's key has been modified, either
	 * because a child split occurred or because we needed to adjust our key
	 * for an insert in a child node. Therefore, remove the old version of
	 * this node's key.
	 *
	 * for WAL replay, in the non-split case we handle this by setting up a
	 * one-element todelete array; in the split case, it's handled implicitly
	 * because the tuple vector passed to gistSplit won't include this tuple.
	 *
	 * XXX: If we want to change fillfactors between node and leaf, fillfactor
	 * = (is_leaf ? state->leaf_fillfactor : state->node_fillfactor)
	 */
	if (gistnospace(state->stack->page, state->itup, state->ituplen,
					is_leaf ? InvalidOffsetNumber : state->stack->childoffnum,
					state->freespace))
	{
		/* no space for insertion */
		IndexTuple *itvec;
		int			tlen;
		SplitedPageLayout *dist = NULL,
				   *ptr;
		BlockNumber rrlink = InvalidBlockNumber;
		GistNSN		oldnsn;

		is_splitted = true;

		/*
		 * Form index tuples vector to split: remove old tuple if t's needed
		 * and add new tuples to vector
		 */
		itvec = gistextractpage(state->stack->page, &tlen);
		if (!is_leaf)
		{
			/* on inner page we should remove old tuple */
			int			pos = state->stack->childoffnum - FirstOffsetNumber;

			tlen--;
			if (pos != tlen)
				memmove(itvec + pos, itvec + pos + 1, sizeof(IndexTuple) * (tlen - pos));
		}
		itvec = gistjoinvector(itvec, &tlen, state->itup, state->ituplen);
		dist = gistSplit(state->r, state->stack->page, itvec, tlen, giststate);

		state->itup = (IndexTuple *) palloc(sizeof(IndexTuple) * tlen);
		state->ituplen = 0;

		if (state->stack->blkno != GIST_ROOT_BLKNO)
		{
			/*
			 * if non-root split then we should not allocate new buffer, but
			 * we must create temporary page to operate
			 */
			dist->buffer = state->stack->buffer;
			dist->page = PageGetTempPage(BufferGetPage(dist->buffer), sizeof(GISTPageOpaqueData));

			/* clean all flags except F_LEAF */
			GistPageGetOpaque(dist->page)->flags = (is_leaf) ? F_LEAF : 0;
		}

		/* make new pages and fills them */
		for (ptr = dist; ptr; ptr = ptr->next)
		{
			int			i;
			char	   *data;

			/* get new page */
			if (ptr->buffer == InvalidBuffer)
			{
				ptr->buffer = gistNewBuffer(state->r);
				GISTInitBuffer(ptr->buffer, (is_leaf) ? F_LEAF : 0);
				ptr->page = BufferGetPage(ptr->buffer);
			}
			ptr->block.blkno = BufferGetBlockNumber(ptr->buffer);

			/*
			 * fill page, we can do it because all these pages are new
			 * (ie not linked in tree or masked by temp page
			 */
			data = (char *) (ptr->list);
			for (i = 0; i < ptr->block.num; i++)
			{
				if (PageAddItem(ptr->page, (Item) data, IndexTupleSize((IndexTuple) data), i + FirstOffsetNumber, LP_USED) == InvalidOffsetNumber)
					elog(ERROR, "failed to add item to index page in \"%s\"", RelationGetRelationName(state->r));
				data += IndexTupleSize((IndexTuple) data);
			}

			/* set up ItemPointer and remember it for parent */
			ItemPointerSetBlockNumber(&(ptr->itup->t_tid), ptr->block.blkno);
			state->itup[state->ituplen] = ptr->itup;
			state->ituplen++;
		}

		/* saves old rightlink */
		if (state->stack->blkno != GIST_ROOT_BLKNO)
			rrlink = GistPageGetOpaque(dist->page)->rightlink;

		START_CRIT_SECTION();

		/*
		 * must mark buffers dirty before XLogInsert, even though we'll still
		 * be changing their opaque fields below. set up right links.
		 */
		for (ptr = dist; ptr; ptr = ptr->next)
		{
			MarkBufferDirty(ptr->buffer);
			GistPageGetOpaque(ptr->page)->rightlink = (ptr->next) ?
				ptr->next->block.blkno : rrlink;
		}

		/* restore splitted non-root page */
		if (state->stack->blkno != GIST_ROOT_BLKNO)
		{
			PageRestoreTempPage(dist->page, BufferGetPage(dist->buffer));
			dist->page = BufferGetPage(dist->buffer);
		}

		if (!state->r->rd_istemp)
		{
			XLogRecPtr	recptr;
			XLogRecData *rdata;

			rdata = formSplitRdata(state->r, state->stack->blkno,
								   is_leaf, &(state->key), dist);

			recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_PAGE_SPLIT, rdata);

			for (ptr = dist; ptr; ptr = ptr->next)
			{
				PageSetLSN(ptr->page, recptr);
				PageSetTLI(ptr->page, ThisTimeLineID);
			}
		}
		else
		{
			for (ptr = dist; ptr; ptr = ptr->next)
			{
				PageSetLSN(ptr->page, XLogRecPtrForTemp);
			}
		}

		/* set up NSN */
		oldnsn = GistPageGetOpaque(dist->page)->nsn;
		if (state->stack->blkno == GIST_ROOT_BLKNO)
			/* if root split we should put initial value */
			oldnsn = PageGetLSN(dist->page);

		for (ptr = dist; ptr; ptr = ptr->next)
		{
			/* only for last set oldnsn */
			GistPageGetOpaque(ptr->page)->nsn = (ptr->next) ?
				PageGetLSN(ptr->page) : oldnsn;
		}

		/*
		 * release buffers, if it was a root split then release all buffers
		 * because we create all buffers
		 */
		ptr = (state->stack->blkno == GIST_ROOT_BLKNO) ? dist : dist->next;
		for (; ptr; ptr = ptr->next)
			UnlockReleaseBuffer(ptr->buffer);

		if (state->stack->blkno == GIST_ROOT_BLKNO)
		{
			gistnewroot(state->r, state->stack->buffer, state->itup, state->ituplen, &(state->key));
			state->needInsertComplete = false;
		}

		END_CRIT_SECTION();
	}
	else
	{
		/* enough space */
		START_CRIT_SECTION();

		if (!is_leaf)
			PageIndexTupleDelete(state->stack->page, state->stack->childoffnum);
		gistfillbuffer(state->r, state->stack->page, state->itup, state->ituplen, InvalidOffsetNumber);

		MarkBufferDirty(state->stack->buffer);

		if (!state->r->rd_istemp)
		{
			OffsetNumber noffs = 0,
						offs[1];
			XLogRecPtr	recptr;
			XLogRecData *rdata;

			if (!is_leaf)
			{
				/* only on inner page we should delete previous version */
				offs[0] = state->stack->childoffnum;
				noffs = 1;
			}

			rdata = formUpdateRdata(state->r, state->stack->buffer,
									offs, noffs,
									state->itup, state->ituplen,
									&(state->key));

			recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_PAGE_UPDATE, rdata);
			PageSetLSN(state->stack->page, recptr);
			PageSetTLI(state->stack->page, ThisTimeLineID);
		}
		else
			PageSetLSN(state->stack->page, XLogRecPtrForTemp);

		if (state->stack->blkno == GIST_ROOT_BLKNO)
			state->needInsertComplete = false;

		END_CRIT_SECTION();

		if (state->ituplen > 1)
		{						/* previous is_splitted==true */

			/*
			 * child was splited, so we must form union for insertion in
			 * parent
			 */
			IndexTuple	newtup = gistunion(state->r, state->itup, state->ituplen, giststate);

			ItemPointerSetBlockNumber(&(newtup->t_tid), state->stack->blkno);
			state->itup[0] = newtup;
			state->ituplen = 1;
		}
		else if (is_leaf)
		{
			/*
			 * itup[0] store key to adjust parent, we set it to valid to
			 * correct check by GistTupleIsInvalid macro in gistgetadjusted()
			 */
			ItemPointerSetBlockNumber(&(state->itup[0]->t_tid), state->stack->blkno);
			GistTupleSetValid(state->itup[0]);
		}
	}
	return is_splitted;
}
示例#11
0
文件: gistxlog.c 项目: GisKook/Gis
/*
 * redo any page update (except page split)
 */
static void
gistRedoPageUpdateRecord(XLogRecPtr lsn, XLogRecord *record)
{
	char	   *begin = XLogRecGetData(record);
	gistxlogPageUpdate *xldata = (gistxlogPageUpdate *) begin;
	Buffer		buffer;
	Page		page;
	char	   *data;

	if (BlockNumberIsValid(xldata->leftchild))
		gistRedoClearFollowRight(xldata->node, lsn, xldata->leftchild);

	/* nothing more to do if page was backed up (and no info to do it with) */
	if (record->xl_info & XLR_BKP_BLOCK_1)
		return;

	buffer = XLogReadBuffer(xldata->node, xldata->blkno, false);
	if (!BufferIsValid(buffer))
		return;
	page = (Page) BufferGetPage(buffer);

	if (XLByteLE(lsn, PageGetLSN(page)))
	{
		UnlockReleaseBuffer(buffer);
		return;
	}

	data = begin + sizeof(gistxlogPageUpdate);

	/* Delete old tuples */
	if (xldata->ntodelete > 0)
	{
		int			i;
		OffsetNumber *todelete = (OffsetNumber *) data;

		data += sizeof(OffsetNumber) * xldata->ntodelete;

		for (i = 0; i < xldata->ntodelete; i++)
			PageIndexTupleDelete(page, todelete[i]);
		if (GistPageIsLeaf(page))
			GistMarkTuplesDeleted(page);
	}

	/* add tuples */
	if (data - begin < record->xl_len)
	{
		OffsetNumber off = (PageIsEmpty(page)) ? FirstOffsetNumber :
		OffsetNumberNext(PageGetMaxOffsetNumber(page));

		while (data - begin < record->xl_len)
		{
			IndexTuple	itup = (IndexTuple) data;
			Size		sz = IndexTupleSize(itup);
			OffsetNumber l;

			data += sz;

			l = PageAddItem(page, (Item) itup, sz, off, false, false);
			if (l == InvalidOffsetNumber)
				elog(ERROR, "failed to add item to GiST index page, size %d bytes",
					 (int) sz);
			off++;
		}
	}
	else
	{
		/*
		 * special case: leafpage, nothing to insert, nothing to delete, then
		 * vacuum marks page
		 */
		if (GistPageIsLeaf(page) && xldata->ntodelete == 0)
			GistClearTuplesDeleted(page);
	}

	if (!GistPageIsLeaf(page) && PageGetMaxOffsetNumber(page) == InvalidOffsetNumber && xldata->blkno == GIST_ROOT_BLKNO)

		/*
		 * all links on non-leaf root page was deleted by vacuum full, so root
		 * page becomes a leaf
		 */
		GistPageSetLeaf(page);

	GistPageGetOpaque(page)->rightlink = InvalidBlockNumber;
	PageSetLSN(page, lsn);
	PageSetTLI(page, ThisTimeLineID);
	MarkBufferDirty(buffer);
	UnlockReleaseBuffer(buffer);
}
示例#12
0
文件: gistxlog.c 项目: GisKook/Gis
static void
gistRedoPageSplitRecord(XLogRecPtr lsn, XLogRecord *record)
{
	gistxlogPageSplit *xldata = (gistxlogPageSplit *) XLogRecGetData(record);
	PageSplitRecord xlrec;
	Buffer		buffer;
	Page		page;
	int			i;
	bool		isrootsplit = false;

	if (BlockNumberIsValid(xldata->leftchild))
		gistRedoClearFollowRight(xldata->node, lsn, xldata->leftchild);
	decodePageSplitRecord(&xlrec, record);

	/* loop around all pages */
	for (i = 0; i < xlrec.data->npage; i++)
	{
		NewPage    *newpage = xlrec.page + i;
		int			flags;

		if (newpage->header->blkno == GIST_ROOT_BLKNO)
		{
			Assert(i == 0);
			isrootsplit = true;
		}

		buffer = XLogReadBuffer(xlrec.data->node, newpage->header->blkno, true);
		Assert(BufferIsValid(buffer));
		page = (Page) BufferGetPage(buffer);

		/* ok, clear buffer */
		if (xlrec.data->origleaf && newpage->header->blkno != GIST_ROOT_BLKNO)
			flags = F_LEAF;
		else
			flags = 0;
		GISTInitBuffer(buffer, flags);

		/* and fill it */
		gistfillbuffer(page, newpage->itup, newpage->header->num, FirstOffsetNumber);

		if (newpage->header->blkno == GIST_ROOT_BLKNO)
		{
			GistPageGetOpaque(page)->rightlink = InvalidBlockNumber;
			GistPageGetOpaque(page)->nsn = xldata->orignsn;
			GistClearFollowRight(page);
		}
		else
		{
			if (i < xlrec.data->npage - 1)
				GistPageGetOpaque(page)->rightlink = xlrec.page[i + 1].header->blkno;
			else
				GistPageGetOpaque(page)->rightlink = xldata->origrlink;
			GistPageGetOpaque(page)->nsn = xldata->orignsn;
			if (i < xlrec.data->npage - 1 && !isrootsplit)
				GistMarkFollowRight(page);
			else
				GistClearFollowRight(page);
		}

		PageSetLSN(page, lsn);
		PageSetTLI(page, ThisTimeLineID);
		MarkBufferDirty(buffer);
		UnlockReleaseBuffer(buffer);
	}
}
示例#13
0
文件: gist.c 项目: gurjeet/postgres
/*
 * Workhouse routine for doing insertion into a GiST index. Note that
 * this routine assumes it is invoked in a short-lived memory context,
 * so it does not bother releasing palloc'd allocations.
 */
static void
gistdoinsert(Relation r, IndexTuple itup, Size freespace, GISTSTATE *giststate)
{
	ItemId		iid;
	IndexTuple	idxtuple;
	GISTInsertStack firststack;
	GISTInsertStack *stack;
	GISTInsertState state;
	bool		xlocked = false;

	memset(&state, 0, sizeof(GISTInsertState));
	state.freespace = freespace;
	state.r = r;

	/* Start from the root */
	firststack.blkno = GIST_ROOT_BLKNO;
	firststack.lsn.xrecoff = 0;
	firststack.parent = NULL;
	state.stack = stack = &firststack;

	/*
	 * Walk down along the path of smallest penalty, updating the parent
	 * pointers with the key we're inserting as we go. If we crash in the
	 * middle, the tree is consistent, although the possible parent updates
	 * were a waste.
	 */
	for (;;)
	{
		if (XLogRecPtrIsInvalid(stack->lsn))
			stack->buffer = ReadBuffer(state.r, stack->blkno);

		/*
		 * Be optimistic and grab shared lock first. Swap it for an
		 * exclusive lock later if we need to update the page.
		 */
		if (!xlocked)
		{
			LockBuffer(stack->buffer, GIST_SHARE);
			gistcheckpage(state.r, stack->buffer);
		}

		stack->page = (Page) BufferGetPage(stack->buffer);
		stack->lsn = PageGetLSN(stack->page);
		Assert(!RelationNeedsWAL(state.r) || !XLogRecPtrIsInvalid(stack->lsn));

		/*
		 * If this page was split but the downlink was never inserted to
		 * the parent because the inserting backend crashed before doing
		 * that, fix that now.
		 */
		if (GistFollowRight(stack->page))
		{
			if (!xlocked)
			{
				LockBuffer(stack->buffer, GIST_UNLOCK);
				LockBuffer(stack->buffer, GIST_EXCLUSIVE);
				xlocked = true;
				/* someone might've completed the split when we unlocked */
				if (!GistFollowRight(stack->page))
					continue;
			}
			gistfixsplit(&state, giststate);

			UnlockReleaseBuffer(stack->buffer);
			xlocked = false;
			state.stack = stack = stack->parent;
			continue;
		}

		if (stack->blkno != GIST_ROOT_BLKNO &&
			XLByteLT(stack->parent->lsn,
					 GistPageGetOpaque(stack->page)->nsn))
		{
			/*
			 * Concurrent split detected. There's no guarantee that the
			 * downlink for this page is consistent with the tuple we're
			 * inserting anymore, so go back to parent and rechoose the
			 * best child.
			 */
			UnlockReleaseBuffer(stack->buffer);
			xlocked = false;
			state.stack = stack = stack->parent;
			continue;
		}

		if (!GistPageIsLeaf(stack->page))
		{
			/*
			 * This is an internal page so continue to walk down the tree.
			 * Find the child node that has the minimum insertion penalty.
			 */
			BlockNumber childblkno;
			IndexTuple newtup;
			GISTInsertStack *item;

			stack->childoffnum = gistchoose(state.r, stack->page, itup, giststate);
			iid = PageGetItemId(stack->page, stack->childoffnum);
			idxtuple = (IndexTuple) PageGetItem(stack->page, iid);
			childblkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid));

			/*
			 * Check that it's not a leftover invalid tuple from pre-9.1
			 */
			if (GistTupleIsInvalid(idxtuple))
				ereport(ERROR,
						(errmsg("index \"%s\" contains an inner tuple marked as invalid",
								RelationGetRelationName(r)),
						 errdetail("This is caused by an incomplete page split at crash recovery before upgrading to 9.1."),
						 errhint("Please REINDEX it.")));

			/*
			 * Check that the key representing the target child node is
			 * consistent with the key we're inserting. Update it if it's not.
			 */
			newtup = gistgetadjusted(state.r, idxtuple, itup, giststate);
			if (newtup)
			{
				/*
				 * Swap shared lock for an exclusive one. Beware, the page
				 * may change while we unlock/lock the page...
				 */
				if (!xlocked)
				{
					LockBuffer(stack->buffer, GIST_UNLOCK);
					LockBuffer(stack->buffer, GIST_EXCLUSIVE);
					xlocked = true;
					stack->page = (Page) BufferGetPage(stack->buffer);

					if (!XLByteEQ(PageGetLSN(stack->page), stack->lsn))
					{
						/* the page was changed while we unlocked it, retry */
						continue;
					}
				}
				/*
				 * Update the tuple.
				 *
				 * gistinserthere() might have to split the page to make the
				 * updated tuple fit. It will adjust the stack so that after
				 * the call, we'll be holding a lock on the page containing
				 * the tuple, which might have moved right.
				 *
				 * Except if this causes a root split, gistinserthere()
				 * returns 'true'. In that case, stack only holds the new
				 * root, and the child page was released. Have to start
				 * all over.
				 */
				if (gistinserttuples(&state, stack, giststate, &newtup, 1,
									 stack->childoffnum, InvalidBuffer))
				{
					UnlockReleaseBuffer(stack->buffer);
					xlocked = false;
					state.stack = stack = stack->parent;
					continue;
				}
			}
			LockBuffer(stack->buffer, GIST_UNLOCK);
			xlocked = false;

			/* descend to the chosen child */
			item = (GISTInsertStack *) palloc0(sizeof(GISTInsertStack));
			item->blkno = childblkno;
			item->parent = stack;
			state.stack = stack = item;
		}
		else
		{
			/*
			 * Leaf page. Insert the new key. We've already updated all the
			 * parents on the way down, but we might have to split the page
			 * if it doesn't fit. gistinserthere() will take care of that.
			 */

			/*
			 * Swap shared lock for an exclusive one. Be careful, the page
			 * may change while we unlock/lock the page...
			 */
			if (!xlocked)
			{
				LockBuffer(stack->buffer, GIST_UNLOCK);
				LockBuffer(stack->buffer, GIST_EXCLUSIVE);
				xlocked = true;
				stack->page = (Page) BufferGetPage(stack->buffer);
				stack->lsn = PageGetLSN(stack->page);

				if (stack->blkno == GIST_ROOT_BLKNO)
				{
					/*
					 * the only page that can become inner instead of leaf
					 * is the root page, so for root we should recheck it
					 */
					if (!GistPageIsLeaf(stack->page))
					{
						/*
						 * very rare situation: during unlock/lock index with
						 * number of pages = 1 was increased
						 */
						LockBuffer(stack->buffer, GIST_UNLOCK);
						xlocked = false;
						continue;
					}

					/*
					 * we don't need to check root split, because checking
					 * leaf/inner is enough to recognize split for root
					 */
				}
				else if (GistFollowRight(stack->page) ||
						 XLByteLT(stack->parent->lsn,
								  GistPageGetOpaque(stack->page)->nsn))
				{
					/*
					 * The page was split while we momentarily unlocked the
					 * page. Go back to parent.
					 */
					UnlockReleaseBuffer(stack->buffer);
					xlocked = false;
					state.stack = stack = stack->parent;
					continue;
				}
			}

			/* now state.stack->(page, buffer and blkno) points to leaf page */

			gistinserttuples(&state, stack, giststate, &itup, 1,
							 InvalidOffsetNumber, InvalidBuffer);
			LockBuffer(stack->buffer, GIST_UNLOCK);

			/* Release any pins we might still hold before exiting */
			for (; stack; stack = stack->parent)
				ReleaseBuffer(stack->buffer);
			break;
		}
	}
}
示例#14
0
/*
 * redo any page update (except page split)
 */
static void
gistRedoPageUpdateRecord(XLogRecPtr lsn, XLogRecord *record)
{
	char	   *begin = XLogRecGetData(record);
	gistxlogPageUpdate *xldata = (gistxlogPageUpdate *) begin;
	Buffer		buffer;
	Page		page;
	char	   *data;

	/*
	 * We need to acquire and hold lock on target page while updating the left
	 * child page.	If we have a full-page image of target page, getting the
	 * lock is a side-effect of restoring that image.  Note that even if the
	 * target page no longer exists, we'll still attempt to replay the change
	 * on the child page.
	 */
	if (record->xl_info & XLR_BKP_BLOCK(0))
		buffer = RestoreBackupBlock(lsn, record, 0, false, true);
	else
		buffer = XLogReadBuffer(xldata->node, xldata->blkno, false);

	/* Fix follow-right data on left child page */
	if (BlockNumberIsValid(xldata->leftchild))
		gistRedoClearFollowRight(lsn, record, 1,
								 xldata->node, xldata->leftchild);

	/* Done if target page no longer exists */
	if (!BufferIsValid(buffer))
		return;

	/* nothing more to do if page was backed up (and no info to do it with) */
	if (record->xl_info & XLR_BKP_BLOCK(0))
	{
		UnlockReleaseBuffer(buffer);
		return;
	}

	page = (Page) BufferGetPage(buffer);

	/* nothing more to do if change already applied */
	if (lsn <= PageGetLSN(page))
	{
		UnlockReleaseBuffer(buffer);
		return;
	}

	data = begin + sizeof(gistxlogPageUpdate);

	/* Delete old tuples */
	if (xldata->ntodelete > 0)
	{
		int			i;
		OffsetNumber *todelete = (OffsetNumber *) data;

		data += sizeof(OffsetNumber) * xldata->ntodelete;

		for (i = 0; i < xldata->ntodelete; i++)
			PageIndexTupleDelete(page, todelete[i]);
		if (GistPageIsLeaf(page))
			GistMarkTuplesDeleted(page);
	}

	/* add tuples */
	if (data - begin < record->xl_len)
	{
		OffsetNumber off = (PageIsEmpty(page)) ? FirstOffsetNumber :
		OffsetNumberNext(PageGetMaxOffsetNumber(page));

		while (data - begin < record->xl_len)
		{
			IndexTuple	itup = (IndexTuple) data;
			Size		sz = IndexTupleSize(itup);
			OffsetNumber l;

			data += sz;

			l = PageAddItem(page, (Item) itup, sz, off, false, false);
			if (l == InvalidOffsetNumber)
				elog(ERROR, "failed to add item to GiST index page, size %d bytes",
					 (int) sz);
			off++;
		}
	}
	else
	{
		/*
		 * special case: leafpage, nothing to insert, nothing to delete, then
		 * vacuum marks page
		 */
		if (GistPageIsLeaf(page) && xldata->ntodelete == 0)
			GistClearTuplesDeleted(page);
	}

	if (!GistPageIsLeaf(page) &&
		PageGetMaxOffsetNumber(page) == InvalidOffsetNumber &&
		xldata->blkno == GIST_ROOT_BLKNO)
	{
		/*
		 * all links on non-leaf root page was deleted by vacuum full, so root
		 * page becomes a leaf
		 */
		GistPageSetLeaf(page);
	}

	GistPageGetOpaque(page)->rightlink = InvalidBlockNumber;
	PageSetLSN(page, lsn);
	MarkBufferDirty(buffer);
	UnlockReleaseBuffer(buffer);
}
示例#15
0
static void
gistRedoPageSplitRecord(XLogRecPtr lsn, XLogRecord *record)
{
	gistxlogPageSplit *xldata = (gistxlogPageSplit *) XLogRecGetData(record);
	PageSplitRecord xlrec;
	Buffer		firstbuffer = InvalidBuffer;
	Buffer		buffer;
	Page		page;
	int			i;
	bool		isrootsplit = false;

	decodePageSplitRecord(&xlrec, record);

	/*
	 * We must hold lock on the first-listed page throughout the action,
	 * including while updating the left child page (if any).  We can unlock
	 * remaining pages in the list as soon as they've been written, because
	 * there is no path for concurrent queries to reach those pages without
	 * first visiting the first-listed page.
	 */

	/* loop around all pages */
	for (i = 0; i < xlrec.data->npage; i++)
	{
		NewPage    *newpage = xlrec.page + i;
		int			flags;

		if (newpage->header->blkno == GIST_ROOT_BLKNO)
		{
			Assert(i == 0);
			isrootsplit = true;
		}

		buffer = XLogReadBuffer(xlrec.data->node, newpage->header->blkno, true);
		Assert(BufferIsValid(buffer));
		page = (Page) BufferGetPage(buffer);

		/* ok, clear buffer */
		if (xlrec.data->origleaf && newpage->header->blkno != GIST_ROOT_BLKNO)
			flags = F_LEAF;
		else
			flags = 0;
		GISTInitBuffer(buffer, flags);

		/* and fill it */
		gistfillbuffer(page, newpage->itup, newpage->header->num, FirstOffsetNumber);

		if (newpage->header->blkno == GIST_ROOT_BLKNO)
		{
			GistPageGetOpaque(page)->rightlink = InvalidBlockNumber;
			GistPageSetNSN(page, xldata->orignsn);
			GistClearFollowRight(page);
		}
		else
		{
			if (i < xlrec.data->npage - 1)
				GistPageGetOpaque(page)->rightlink = xlrec.page[i + 1].header->blkno;
			else
				GistPageGetOpaque(page)->rightlink = xldata->origrlink;
			GistPageSetNSN(page, xldata->orignsn);
			if (i < xlrec.data->npage - 1 && !isrootsplit &&
				xldata->markfollowright)
				GistMarkFollowRight(page);
			else
				GistClearFollowRight(page);
		}

		PageSetLSN(page, lsn);
		MarkBufferDirty(buffer);

		if (i == 0)
			firstbuffer = buffer;
		else
			UnlockReleaseBuffer(buffer);
	}

	/* Fix follow-right data on left child page, if any */
	if (BlockNumberIsValid(xldata->leftchild))
		gistRedoClearFollowRight(lsn, record, 0,
								 xldata->node, xldata->leftchild);

	/* Finally, release lock on the first page */
	UnlockReleaseBuffer(firstbuffer);
}
示例#16
0
文件: gist.c 项目: AlexHill/postgres
/*
 * Updates the stack so that child->parent is the correct parent of the
 * child. child->parent must be exclusively locked on entry, and will
 * remain so at exit, but it might not be the same page anymore.
 */
static void
gistFindCorrectParent(Relation r, GISTInsertStack *child)
{
	GISTInsertStack *parent = child->parent;

	gistcheckpage(r, parent->buffer);
	parent->page = (Page) BufferGetPage(parent->buffer);

	/* here we don't need to distinguish between split and page update */
	if (child->downlinkoffnum == InvalidOffsetNumber ||
		parent->lsn != PageGetLSN(parent->page))
	{
		/* parent is changed, look child in right links until found */
		OffsetNumber i,
					maxoff;
		ItemId		iid;
		IndexTuple	idxtuple;
		GISTInsertStack *ptr;

		while (true)
		{
			maxoff = PageGetMaxOffsetNumber(parent->page);
			for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
			{
				iid = PageGetItemId(parent->page, i);
				idxtuple = (IndexTuple) PageGetItem(parent->page, iid);
				if (ItemPointerGetBlockNumber(&(idxtuple->t_tid)) == child->blkno)
				{
					/* yes!!, found */
					child->downlinkoffnum = i;
					return;
				}
			}

			parent->blkno = GistPageGetOpaque(parent->page)->rightlink;
			UnlockReleaseBuffer(parent->buffer);
			if (parent->blkno == InvalidBlockNumber)
			{
				/*
				 * End of chain and still didn't find parent. It's a very-very
				 * rare situation when root splited.
				 */
				break;
			}
			parent->buffer = ReadBuffer(r, parent->blkno);
			LockBuffer(parent->buffer, GIST_EXCLUSIVE);
			gistcheckpage(r, parent->buffer);
			parent->page = (Page) BufferGetPage(parent->buffer);
		}

		/*
		 * awful!!, we need search tree to find parent ... , but before we
		 * should release all old parent
		 */

		ptr = child->parent->parent;	/* child->parent already released
										 * above */
		while (ptr)
		{
			ReleaseBuffer(ptr->buffer);
			ptr = ptr->parent;
		}

		/* ok, find new path */
		ptr = parent = gistFindPath(r, child->blkno, &child->downlinkoffnum);

		/* read all buffers as expected by caller */
		/* note we don't lock them or gistcheckpage them here! */
		while (ptr)
		{
			ptr->buffer = ReadBuffer(r, ptr->blkno);
			ptr->page = (Page) BufferGetPage(ptr->buffer);
			ptr = ptr->parent;
		}

		/* install new chain of parents to stack */
		child->parent = parent;

		/* make recursive call to normal processing */
		LockBuffer(child->parent->buffer, GIST_EXCLUSIVE);
		gistFindCorrectParent(r, child);
	}

	return;
}
示例#17
0
static void
gistfindleaf(GISTInsertState *state, GISTSTATE *giststate)
{
	ItemId		iid;
	IndexTuple	idxtuple;
	GISTPageOpaque opaque;

	MIRROREDLOCK_BUFMGR_MUST_ALREADY_BE_HELD;

	/*
	 * walk down, We don't lock page for a long time, but so we should be
	 * ready to recheck path in a bad case... We remember, that page->lsn
	 * should never be invalid.
	 */
	for (;;)
	{
		if (XLogRecPtrIsInvalid(state->stack->lsn))
			state->stack->buffer = ReadBuffer(state->r, state->stack->blkno);
		LockBuffer(state->stack->buffer, GIST_SHARE);
		gistcheckpage(state->r, state->stack->buffer);

		state->stack->page = (Page) BufferGetPage(state->stack->buffer);
		opaque = GistPageGetOpaque(state->stack->page);

		state->stack->lsn = PageGetLSN(state->stack->page);
		Assert(state->r->rd_istemp || !XLogRecPtrIsInvalid(state->stack->lsn));

		if (state->stack->blkno != GIST_ROOT_BLKNO &&
			XLByteLT(state->stack->parent->lsn, opaque->nsn))
		{
			/*
			 * caused split non-root page is detected, go up to parent to
			 * choose best child
			 */
			UnlockReleaseBuffer(state->stack->buffer);
			state->stack = state->stack->parent;
			continue;
		}

		if (!GistPageIsLeaf(state->stack->page))
		{
			/*
			 * This is an internal page, so continue to walk down the tree. We
			 * find the child node that has the minimum insertion penalty and
			 * recursively invoke ourselves to modify that node. Once the
			 * recursive call returns, we may need to adjust the parent node
			 * for two reasons: the child node split, or the key in this node
			 * needs to be adjusted for the newly inserted key below us.
			 */
			GISTInsertStack *item = (GISTInsertStack *) palloc0(sizeof(GISTInsertStack));

			state->stack->childoffnum = gistchoose(state->r, state->stack->page, state->itup[0], giststate);

			iid = PageGetItemId(state->stack->page, state->stack->childoffnum);
			idxtuple = (IndexTuple) PageGetItem(state->stack->page, iid);
			item->blkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid));
			LockBuffer(state->stack->buffer, GIST_UNLOCK);

			item->parent = state->stack;
			item->child = NULL;
			if (state->stack)
				state->stack->child = item;
			state->stack = item;
		}
		else
		{
			/* be carefull, during unlock/lock page may be changed... */
			LockBuffer(state->stack->buffer, GIST_UNLOCK);
			LockBuffer(state->stack->buffer, GIST_EXCLUSIVE);
			state->stack->page = (Page) BufferGetPage(state->stack->buffer);
			opaque = GistPageGetOpaque(state->stack->page);

			if (state->stack->blkno == GIST_ROOT_BLKNO)
			{
				/*
				 * the only page can become inner instead of leaf is a root
				 * page, so for root we should recheck it
				 */
				if (!GistPageIsLeaf(state->stack->page))
				{
					/*
					 * very rarely situation: during unlock/lock index with
					 * number of pages = 1 was increased
					 */
					LockBuffer(state->stack->buffer, GIST_UNLOCK);
					continue;
				}

				/*
				 * we don't need to check root split, because checking
				 * leaf/inner is enough to recognize split for root
				 */

			}
			else if (XLByteLT(state->stack->parent->lsn, opaque->nsn))
			{
				/*
				 * detecting split during unlock/lock, so we should find
				 * better child on parent
				 */

				/* forget buffer */
				UnlockReleaseBuffer(state->stack->buffer);

				state->stack = state->stack->parent;
				continue;
			}

			state->stack->lsn = PageGetLSN(state->stack->page);

			/* ok we found a leaf page and it X-locked */
			break;
		}
	}

	/* now state->stack->(page, buffer and blkno) points to leaf page */
}
示例#18
0
/*
 * Traverse the tree to find path from root page to specified "child" block.
 *
 * returns from the beginning of closest parent;
 *
 * To prevent deadlocks, this should lock only one page simultaneously.
 */
GISTInsertStack *
gistFindPath(Relation r, BlockNumber child)
{
	Page		page;
	Buffer		buffer;
	OffsetNumber i,
				maxoff;
	ItemId		iid;
	IndexTuple	idxtuple;
	GISTInsertStack *top,
			   *tail,
			   *ptr;
	BlockNumber blkno;

	MIRROREDLOCK_BUFMGR_MUST_ALREADY_BE_HELD;

	top = tail = (GISTInsertStack *) palloc0(sizeof(GISTInsertStack));
	top->blkno = GIST_ROOT_BLKNO;

	while (top && top->blkno != child)
	{
		buffer = ReadBuffer(r, top->blkno);
		LockBuffer(buffer, GIST_SHARE);
		gistcheckpage(r, buffer);
		page = (Page) BufferGetPage(buffer);

		if (GistPageIsLeaf(page))
		{
			/* we can safety go away, follows only leaf pages */
			UnlockReleaseBuffer(buffer);
			return NULL;
		}

		top->lsn = PageGetLSN(page);

		if (top->parent && XLByteLT(top->parent->lsn, GistPageGetOpaque(page)->nsn) &&
			GistPageGetOpaque(page)->rightlink != InvalidBlockNumber /* sanity check */ )
		{
			/* page splited while we thinking of... */
			ptr = (GISTInsertStack *) palloc0(sizeof(GISTInsertStack));
			ptr->blkno = GistPageGetOpaque(page)->rightlink;
			ptr->childoffnum = InvalidOffsetNumber;
			ptr->parent = top;
			ptr->next = NULL;
			tail->next = ptr;
			tail = ptr;
		}

		maxoff = PageGetMaxOffsetNumber(page);

		for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
		{
			iid = PageGetItemId(page, i);
			idxtuple = (IndexTuple) PageGetItem(page, iid);
			blkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid));
			if (blkno == child)
			{
				OffsetNumber poff = InvalidOffsetNumber;

				/* make childs links */
				ptr = top;
				while (ptr->parent)
				{
					/* set child link */
					ptr->parent->child = ptr;
					/* move childoffnum.. */
					if (ptr == top)
					{
						/* first iteration */
						poff = ptr->parent->childoffnum;
						ptr->parent->childoffnum = ptr->childoffnum;
					}
					else
					{
						OffsetNumber tmp = ptr->parent->childoffnum;

						ptr->parent->childoffnum = poff;
						poff = tmp;
					}
					ptr = ptr->parent;
				}
				top->childoffnum = i;
				UnlockReleaseBuffer(buffer);
				return top;
			}
			else
			{
				/* Install next inner page to the end of stack */
				ptr = (GISTInsertStack *) palloc0(sizeof(GISTInsertStack));
				ptr->blkno = blkno;
				ptr->childoffnum = i;	/* set offsetnumber of child to child
										 * !!! */
				ptr->parent = top;
				ptr->next = NULL;
				tail->next = ptr;
				tail = ptr;
			}
		}

		UnlockReleaseBuffer(buffer);
		top = top->next;
	}

	return NULL;
}
示例#19
0
static void
gistRedoPageSplitRecord(XLogReaderState *record)
{
	XLogRecPtr	lsn = record->EndRecPtr;
	gistxlogPageSplit *xldata = (gistxlogPageSplit *) XLogRecGetData(record);
	Buffer		firstbuffer = InvalidBuffer;
	Buffer		buffer;
	Page		page;
	int			i;
	bool		isrootsplit = false;

	/*
	 * We must hold lock on the first-listed page throughout the action,
	 * including while updating the left child page (if any).  We can unlock
	 * remaining pages in the list as soon as they've been written, because
	 * there is no path for concurrent queries to reach those pages without
	 * first visiting the first-listed page.
	 */

	/* loop around all pages */
	for (i = 0; i < xldata->npage; i++)
	{
		int			flags;
		char	   *data;
		Size		datalen;
		int			num;
		BlockNumber blkno;
		IndexTuple *tuples;

		XLogRecGetBlockTag(record, i + 1, NULL, NULL, &blkno);
		if (blkno == GIST_ROOT_BLKNO)
		{
			Assert(i == 0);
			isrootsplit = true;
		}

		buffer = XLogInitBufferForRedo(record, i + 1);
		page = (Page) BufferGetPage(buffer);
		data = XLogRecGetBlockData(record, i + 1, &datalen);

		tuples = decodePageSplitRecord(data, datalen, &num);

		/* ok, clear buffer */
		if (xldata->origleaf && blkno != GIST_ROOT_BLKNO)
			flags = F_LEAF;
		else
			flags = 0;
		GISTInitBuffer(buffer, flags);

		/* and fill it */
		gistfillbuffer(page, tuples, num, FirstOffsetNumber);

		if (blkno == GIST_ROOT_BLKNO)
		{
			GistPageGetOpaque(page)->rightlink = InvalidBlockNumber;
			GistPageSetNSN(page, xldata->orignsn);
			GistClearFollowRight(page);
		}
		else
		{
			if (i < xldata->npage - 1)
			{
				BlockNumber nextblkno;

				XLogRecGetBlockTag(record, i + 2, NULL, NULL, &nextblkno);
				GistPageGetOpaque(page)->rightlink = nextblkno;
			}
			else
				GistPageGetOpaque(page)->rightlink = xldata->origrlink;
			GistPageSetNSN(page, xldata->orignsn);
			if (i < xldata->npage - 1 && !isrootsplit &&
				xldata->markfollowright)
				GistMarkFollowRight(page);
			else
				GistClearFollowRight(page);
		}

		PageSetLSN(page, lsn);
		MarkBufferDirty(buffer);

		if (i == 0)
			firstbuffer = buffer;
		else
			UnlockReleaseBuffer(buffer);
	}

	/* Fix follow-right data on left child page, if any */
	if (XLogRecHasBlockRef(record, 0))
		gistRedoClearFollowRight(record, 0);

	/* Finally, release lock on the first page */
	UnlockReleaseBuffer(firstbuffer);
}
示例#20
0
文件: gistget.c 项目: 50wu/gpdb
/*
 * Fetch a tuples that matchs the search key; this can be invoked
 * either to fetch the first such tuple or subsequent matching
 * tuples. Returns true iff a matching tuple was found.
 */
static int
gistnext(IndexScanDesc scan, ScanDirection dir, ItemPointer tids,
		 int maxtids, bool ignore_killed_tuples)
{
	MIRROREDLOCK_BUFMGR_DECLARE;

	Page		p;
	OffsetNumber n;
	GISTScanOpaque so;
	GISTSearchStack *stk;
	IndexTuple	it;
	GISTPageOpaque opaque;
	int			ntids = 0;

	so = (GISTScanOpaque) scan->opaque;

	// -------- MirroredLock ----------
	MIRROREDLOCK_BUFMGR_LOCK;

	if ( so->qual_ok == false )
		return 0;

	if (ItemPointerIsValid(&so->curpos) == false)
	{
		/* Being asked to fetch the first entry, so start at the root */
		Assert(so->curbuf == InvalidBuffer);
		Assert(so->stack == NULL);

		so->curbuf = ReadBuffer(scan->indexRelation, GIST_ROOT_BLKNO);

		stk = so->stack = (GISTSearchStack *) palloc0(sizeof(GISTSearchStack));

		stk->next = NULL;
		stk->block = GIST_ROOT_BLKNO;

		pgstat_count_index_scan(scan->indexRelation);
	}
	else if (so->curbuf == InvalidBuffer)
	{
		MIRROREDLOCK_BUFMGR_UNLOCK;
		// -------- MirroredLock ----------

		return 0;
	}

	/*
	 * check stored pointers from last visit 
	 */
	if ( so->nPageData > 0 ) 
	{
		while( ntids < maxtids && so->curPageData < so->nPageData )
		{
			tids[ ntids ] = scan->xs_ctup.t_self = so->pageData[ so->curPageData ].heapPtr;
			ItemPointerSet(&(so->curpos),
							   BufferGetBlockNumber(so->curbuf), 
							   so->pageData[ so->curPageData ].pageOffset);

				
			so->curPageData ++;
			ntids++;
		}

		if ( ntids == maxtids )
		{
			MIRROREDLOCK_BUFMGR_UNLOCK;
			// -------- MirroredLock ----------

			return ntids;
		}
		
		/*
		 * Go to the next page
		 */
		stk = so->stack->next;
		pfree(so->stack);
		so->stack = stk;

		/* If we're out of stack entries, we're done */
		if (so->stack == NULL)
		{
			ReleaseBuffer(so->curbuf);
			so->curbuf = InvalidBuffer;

			MIRROREDLOCK_BUFMGR_UNLOCK;
			// -------- MirroredLock ----------

			return ntids;
		}

		so->curbuf = ReleaseAndReadBuffer(so->curbuf,
										  scan->indexRelation,
										  stk->block);
	}

	for (;;)
	{
		/* First of all, we need lock buffer */
		Assert(so->curbuf != InvalidBuffer);
		LockBuffer(so->curbuf, GIST_SHARE);
		gistcheckpage(scan->indexRelation, so->curbuf);
		p = BufferGetPage(so->curbuf);
		opaque = GistPageGetOpaque(p);

		/* remember lsn to identify page changed for tuple's killing */
		so->stack->lsn = PageGetLSN(p);

		/* check page split, occured from last visit or visit to parent */
		if (!XLogRecPtrIsInvalid(so->stack->parentlsn) &&
			XLByteLT(so->stack->parentlsn, opaque->nsn) &&
			opaque->rightlink != InvalidBlockNumber /* sanity check */ &&
			(so->stack->next == NULL || so->stack->next->block != opaque->rightlink)		/* check if already
					added */ )
		{
			/* detect page split, follow right link to add pages */

			stk = (GISTSearchStack *) palloc(sizeof(GISTSearchStack));
			stk->next = so->stack->next;
			stk->block = opaque->rightlink;
			stk->parentlsn = so->stack->parentlsn;
			memset(&(stk->lsn), 0, sizeof(GistNSN));
			so->stack->next = stk;
		}

		/* if page is empty, then just skip it */
		if (PageIsEmpty(p))
		{
			LockBuffer(so->curbuf, GIST_UNLOCK);
			stk = so->stack->next;
			pfree(so->stack);
			so->stack = stk;

			if (so->stack == NULL)
			{
				ReleaseBuffer(so->curbuf);
				so->curbuf = InvalidBuffer;

				MIRROREDLOCK_BUFMGR_UNLOCK;
				// -------- MirroredLock ----------

				return ntids;
			}

			so->curbuf = ReleaseAndReadBuffer(so->curbuf, scan->indexRelation,
											  stk->block);
			continue;
		}

		if (ScanDirectionIsBackward(dir))
			n = PageGetMaxOffsetNumber(p);
		else
			n = FirstOffsetNumber;

		/* wonderful, we can look at page */
		so->nPageData = so->curPageData = 0;

		for (;;)
		{
			n = gistfindnext(scan, n, dir);

			if (!OffsetNumberIsValid(n))
			{
				while( ntids < maxtids && so->curPageData < so->nPageData )
				{
					tids[ ntids ] = scan->xs_ctup.t_self = 
						so->pageData[ so->curPageData ].heapPtr;
				
					ItemPointerSet(&(so->curpos),
								   BufferGetBlockNumber(so->curbuf), 
								   so->pageData[ so->curPageData ].pageOffset);

					so->curPageData ++;
					ntids++;
				}

				if ( ntids == maxtids )
				{
					LockBuffer(so->curbuf, GIST_UNLOCK);
					
					MIRROREDLOCK_BUFMGR_UNLOCK;
					// -------- MirroredLock ----------
					
					return ntids;
				}

				/*
				 * We ran out of matching index entries on the current page,
				 * so pop the top stack entry and use it to continue the
				 * search.
				 */
				LockBuffer(so->curbuf, GIST_UNLOCK);
				stk = so->stack->next;
				pfree(so->stack);
				so->stack = stk;

				/* If we're out of stack entries, we're done */

				if (so->stack == NULL)
				{
					ReleaseBuffer(so->curbuf);
					so->curbuf = InvalidBuffer;
					
					MIRROREDLOCK_BUFMGR_UNLOCK;
					// -------- MirroredLock ----------
					
					return ntids;
				}

				so->curbuf = ReleaseAndReadBuffer(so->curbuf,
												  scan->indexRelation,
												  stk->block);
				/* XXX	go up */
				break;
			}

			if (GistPageIsLeaf(p))
			{
				/*
				 * We've found a matching index entry in a leaf page, so
				 * return success. Note that we keep "curbuf" pinned so that
				 * we can efficiently resume the index scan later.
				 */

				if (!(ignore_killed_tuples && ItemIdIsDead(PageGetItemId(p, n))))
				{
					it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n));
					so->pageData[ so->nPageData ].heapPtr = it->t_tid;
					so->pageData[ so->nPageData ].pageOffset = n;
					so->nPageData ++;
				}
			}
			else
			{
				/*
				 * We've found an entry in an internal node whose key is
				 * consistent with the search key, so push it to stack
				 */

				stk = (GISTSearchStack *) palloc(sizeof(GISTSearchStack));

				it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n));
				stk->block = ItemPointerGetBlockNumber(&(it->t_tid));
				memset(&(stk->lsn), 0, sizeof(GistNSN));
				stk->parentlsn = so->stack->lsn;

				stk->next = so->stack->next;
				so->stack->next = stk;

			}

			if (ScanDirectionIsBackward(dir))
				n = OffsetNumberPrev(n);
			else
				n = OffsetNumberNext(n);
		}
	}

	MIRROREDLOCK_BUFMGR_UNLOCK;
	// -------- MirroredLock ----------

	return ntids;
}