Ejemplo n.º 1
0
/*
 * Insert a new item to a page.
 *
 * Returns true if the insertion was finished. On false, the page was split and
 * the parent needs to be updated. (A root split returns true as it doesn't
 * need any further action by the caller to complete.)
 *
 * When inserting a downlink to an internal page, 'childbuf' contains the
 * child page that was split. Its GIN_INCOMPLETE_SPLIT flag will be cleared
 * atomically with the insert. Also, the existing item at offset stack->off
 * in the target page is updated to point to updateblkno.
 *
 * stack->buffer is locked on entry, and is kept locked.
 * Likewise for childbuf, if given.
 */
static bool
ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
			   void *insertdata, BlockNumber updateblkno,
			   Buffer childbuf, GinStatsData *buildStats)
{
	Page		page = BufferGetPage(stack->buffer);
	bool		result;
	GinPlaceToPageRC rc;
	uint16		xlflags = 0;
	Page		childpage = NULL;
	Page		newlpage = NULL,
				newrpage = NULL;
	void	   *ptp_workspace = NULL;
	XLogRecData payloadrdata[10];
	MemoryContext tmpCxt;
	MemoryContext oldCxt;

	/*
	 * We do all the work of this function and its subfunctions in a temporary
	 * memory context.  This avoids leakages and simplifies APIs, since some
	 * subfunctions allocate storage that has to survive until we've finished
	 * the WAL insertion.
	 */
	tmpCxt = AllocSetContextCreate(CurrentMemoryContext,
								   "ginPlaceToPage temporary context",
								   ALLOCSET_DEFAULT_MINSIZE,
								   ALLOCSET_DEFAULT_INITSIZE,
								   ALLOCSET_DEFAULT_MAXSIZE);
	oldCxt = MemoryContextSwitchTo(tmpCxt);

	if (GinPageIsData(page))
		xlflags |= GIN_INSERT_ISDATA;
	if (GinPageIsLeaf(page))
	{
		xlflags |= GIN_INSERT_ISLEAF;
		Assert(!BufferIsValid(childbuf));
		Assert(updateblkno == InvalidBlockNumber);
	}
	else
	{
		Assert(BufferIsValid(childbuf));
		Assert(updateblkno != InvalidBlockNumber);
		childpage = BufferGetPage(childbuf);
	}

	/*
	 * See if the incoming tuple will fit on the page.  beginPlaceToPage will
	 * decide if the page needs to be split, and will compute the split
	 * contents if so.  See comments for beginPlaceToPage and execPlaceToPage
	 * functions for more details of the API here.
	 */
	rc = btree->beginPlaceToPage(btree, stack->buffer, stack,
								 insertdata, updateblkno,
								 &ptp_workspace,
								 &newlpage, &newrpage,
								 payloadrdata);

	if (rc == GPTP_NO_WORK)
	{
		/* Nothing to do */
		result = true;
	}
	else if (rc == GPTP_INSERT)
	{
		/* It will fit, perform the insertion */
		START_CRIT_SECTION();

		/* Perform the page update, and set up WAL data about it */
		btree->execPlaceToPage(btree, stack->buffer, stack,
							   insertdata, updateblkno,
							   ptp_workspace, payloadrdata);

		MarkBufferDirty(stack->buffer);

		/* An insert to an internal page finishes the split of the child. */
		if (BufferIsValid(childbuf))
		{
			GinPageGetOpaque(childpage)->flags &= ~GIN_INCOMPLETE_SPLIT;
			MarkBufferDirty(childbuf);
		}

		if (RelationNeedsWAL(btree->index))
		{
			XLogRecPtr	recptr;
			XLogRecData rdata[3];
			ginxlogInsert xlrec;
			BlockIdData childblknos[2];

			xlrec.node = btree->index->rd_node;
			xlrec.blkno = BufferGetBlockNumber(stack->buffer);
			xlrec.flags = xlflags;

			rdata[0].buffer = InvalidBuffer;
			rdata[0].data = (char *) &xlrec;
			rdata[0].len = sizeof(ginxlogInsert);

			/*
			 * Log information about child if this was an insertion of a
			 * downlink.
			 */
			if (BufferIsValid(childbuf))
			{
				rdata[0].next = &rdata[1];

				BlockIdSet(&childblknos[0], BufferGetBlockNumber(childbuf));
				BlockIdSet(&childblknos[1], GinPageGetOpaque(childpage)->rightlink);

				rdata[1].buffer = InvalidBuffer;
				rdata[1].data = (char *) childblknos;
				rdata[1].len = sizeof(BlockIdData) * 2;
				rdata[1].next = &rdata[2];

				rdata[2].buffer = childbuf;
				rdata[2].buffer_std = true;
				rdata[2].data = NULL;
				rdata[2].len = 0;
				rdata[2].next = payloadrdata;
			}
			else
				rdata[0].next = payloadrdata;

			recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_INSERT, rdata);

			PageSetLSN(page, recptr);
			if (BufferIsValid(childbuf))
				PageSetLSN(childpage, recptr);
		}

		END_CRIT_SECTION();

		/* Insertion is complete. */
		result = true;
	}
	else if (rc == GPTP_SPLIT)
	{
		/*
		 * Didn't fit, need to split.  The split has been computed in newlpage
		 * and newrpage, which are pointers to palloc'd pages, not associated
		 * with buffers.  stack->buffer is not touched yet.
		 */
		Buffer		rbuffer;
		BlockNumber savedRightLink;
		ginxlogSplit data;
		Buffer		lbuffer = InvalidBuffer;
		Page		newrootpg = NULL;

		/* Get a new index page to become the right page */
		rbuffer = GinNewBuffer(btree->index);

		/* During index build, count the new page */
		if (buildStats)
		{
			if (btree->isData)
				buildStats->nDataPages++;
			else
				buildStats->nEntryPages++;
		}

		savedRightLink = GinPageGetOpaque(page)->rightlink;

		/* Begin setting up WAL record (which we might not use) */
		data.node = btree->index->rd_node;
		data.rblkno = BufferGetBlockNumber(rbuffer);
		data.flags = xlflags;
		if (BufferIsValid(childbuf))
		{
			data.leftChildBlkno = BufferGetBlockNumber(childbuf);
			data.rightChildBlkno = GinPageGetOpaque(childpage)->rightlink;
		}
		else
			data.leftChildBlkno = data.rightChildBlkno = InvalidBlockNumber;

		if (stack->parent == NULL)
		{
			/*
			 * splitting the root, so we need to allocate new left page and
			 * place pointers to left and right page on root page.
			 */
			lbuffer = GinNewBuffer(btree->index);

			/* During index build, count the new left page */
			if (buildStats)
			{
				if (btree->isData)
					buildStats->nDataPages++;
				else
					buildStats->nEntryPages++;
			}

			/*
			 * root never has a right-link, so we borrow the rrlink field to
			 * store the root block number.
			 */
			data.rrlink = BufferGetBlockNumber(stack->buffer);
			data.lblkno = BufferGetBlockNumber(lbuffer);
			data.flags |= GIN_SPLIT_ROOT;

			GinPageGetOpaque(newrpage)->rightlink = InvalidBlockNumber;
			GinPageGetOpaque(newlpage)->rightlink = BufferGetBlockNumber(rbuffer);

			/*
			 * Construct a new root page containing downlinks to the new left
			 * and right pages.  (Do this in a temporary copy rather than
			 * overwriting the original page directly, since we're not in the
			 * critical section yet.)
			 */
			newrootpg = PageGetTempPage(newrpage);
			GinInitPage(newrootpg, GinPageGetOpaque(newlpage)->flags & ~(GIN_LEAF | GIN_COMPRESSED), BLCKSZ);

			btree->fillRoot(btree, newrootpg,
							BufferGetBlockNumber(lbuffer), newlpage,
							BufferGetBlockNumber(rbuffer), newrpage);
		}
		else
		{
			/* splitting a non-root page */
			data.rrlink = savedRightLink;
			data.lblkno = BufferGetBlockNumber(stack->buffer);

			GinPageGetOpaque(newrpage)->rightlink = savedRightLink;
			GinPageGetOpaque(newlpage)->flags |= GIN_INCOMPLETE_SPLIT;
			GinPageGetOpaque(newlpage)->rightlink = BufferGetBlockNumber(rbuffer);
		}

		/*
		 * OK, we have the new contents of the left page in a temporary copy
		 * now (newlpage), and likewise for the new contents of the
		 * newly-allocated right block. The original page is still unchanged.
		 *
		 * If this is a root split, we also have a temporary page containing
		 * the new contents of the root.
		 */

		START_CRIT_SECTION();

		MarkBufferDirty(rbuffer);
		MarkBufferDirty(stack->buffer);

		/*
		 * Restore the temporary copies over the real buffers.
		 */
		if (stack->parent == NULL)
		{
			/* Splitting the root, three pages to update */
			MarkBufferDirty(lbuffer);
			memcpy(page, newrootpg, BLCKSZ);
			memcpy(BufferGetPage(lbuffer), newlpage, BLCKSZ);
			memcpy(BufferGetPage(rbuffer), newrpage, BLCKSZ);
		}
		else
		{
			/* Normal split, only two pages to update */
			memcpy(page, newlpage, BLCKSZ);
			memcpy(BufferGetPage(rbuffer), newrpage, BLCKSZ);
		}

		/* We also clear childbuf's INCOMPLETE_SPLIT flag, if passed */
		if (BufferIsValid(childbuf))
		{
			GinPageGetOpaque(childpage)->flags &= ~GIN_INCOMPLETE_SPLIT;
			MarkBufferDirty(childbuf);
		}

		/* write WAL record */
		if (RelationNeedsWAL(btree->index))
		{
			XLogRecData rdata[2];
			XLogRecPtr	recptr;

			rdata[0].buffer = InvalidBuffer;
			rdata[0].data = (char *) &data;
			rdata[0].len = sizeof(ginxlogSplit);

			if (BufferIsValid(childbuf))
			{
				rdata[0].next = &rdata[1];

				rdata[1].buffer = childbuf;
				rdata[1].buffer_std = true;
				rdata[1].data = NULL;
				rdata[1].len = 0;
				rdata[1].next = payloadrdata;
			}
			else
				rdata[0].next = payloadrdata;

			recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_SPLIT, rdata);

			PageSetLSN(page, recptr);
			PageSetLSN(BufferGetPage(rbuffer), recptr);
			if (stack->parent == NULL)
				PageSetLSN(BufferGetPage(lbuffer), recptr);
			if (BufferIsValid(childbuf))
				PageSetLSN(childpage, recptr);
		}
		END_CRIT_SECTION();

		/*
		 * We can release the locks/pins on the new pages now, but keep
		 * stack->buffer locked.  childbuf doesn't get unlocked either.
		 */
		UnlockReleaseBuffer(rbuffer);
		if (stack->parent == NULL)
			UnlockReleaseBuffer(lbuffer);

		/*
		 * If we split the root, we're done. Otherwise the split is not
		 * complete until the downlink for the new page has been inserted to
		 * the parent.
		 */
		result = (stack->parent == NULL);
	}
	else
	{
		elog(ERROR, "invalid return code from GIN placeToPage method: %d", rc);
		result = false;			/* keep compiler quiet */
	}

	/* Clean up temp context */
	MemoryContextSwitchTo(oldCxt);
	MemoryContextDelete(tmpCxt);

	return result;
}