/*
 * Inserts array of item pointers, may execute several tree scan (very rare)
 */
void
ginInsertItemPointers(GinPostingTreeScan *gdi,
                      ItemPointerData *items, uint32 nitem,
                      GinStatsData *buildStats)
{
    BlockNumber rootBlkno = gdi->stack->blkno;

    gdi->btree.items = items;
    gdi->btree.nitem = nitem;
    gdi->btree.curitem = 0;

    while (gdi->btree.curitem < gdi->btree.nitem)
    {
        if (!gdi->stack)
            gdi->stack = ginPrepareFindLeafPage(&gdi->btree, rootBlkno);

        gdi->stack = ginFindLeafPage(&gdi->btree, gdi->stack);

        if (gdi->btree.findItem(&(gdi->btree), gdi->stack))
        {
            /*
             * gdi->btree.items[gdi->btree.curitem] already exists in index
             */
            gdi->btree.curitem++;
            LockBuffer(gdi->stack->buffer, GIN_UNLOCK);
            freeGinBtreeStack(gdi->stack);
        }
        else
            ginInsertValue(&(gdi->btree), gdi->stack, buildStats);

        gdi->stack = NULL;
    }
}
Esempio n. 2
0
/*
 * Inserts array of item pointers, may execute several tree scan (very rare)
 */
void
ginInsertItemPointers(Relation index, BlockNumber rootBlkno,
					  ItemPointerData *items, uint32 nitem,
					  GinStatsData *buildStats)
{
	GinBtreeData btree;
	GinBtreeDataLeafInsertData insertdata;
	GinBtreeStack *stack;

	ginPrepareDataScan(&btree, index, rootBlkno);
	btree.isBuild = (buildStats != NULL);
	insertdata.items = items;
	insertdata.nitem = nitem;
	insertdata.curitem = 0;

	while (insertdata.curitem < insertdata.nitem)
	{
		/* search for the leaf page where the first item should go to */
		btree.itemptr = insertdata.items[insertdata.curitem];
		stack = ginFindLeafPage(&btree, false);

		if (btree.findItem(&btree, stack))
		{
			/*
			 * Current item already exists in index.
			 */
			insertdata.curitem++;
			LockBuffer(stack->buffer, GIN_UNLOCK);
			freeGinBtreeStack(stack);
		}
		else
			ginInsertValue(&btree, stack, &insertdata, buildStats);
	}
}
Esempio n. 3
0
/*
 * Inserts only one entry to the index, but it can add more than 1 ItemPointer.
 */
static void
ginEntryInsert(Relation index, GinState *ginstate, Datum value, ItemPointerData *items, uint32 nitem, bool isBuild)
{
	GinBtreeData btree;
	GinBtreeStack *stack;
	IndexTuple	itup;
	Page		page;

	prepareEntryScan(&btree, index, value, ginstate);

	stack = ginFindLeafPage(&btree, NULL);
	page = BufferGetPage(stack->buffer);

	if (btree.findItem(&btree, stack))
	{
		/* found entry */
		itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off));

		if (GinIsPostingTree(itup))
		{
			/* lock root of posting tree */
			GinPostingTreeScan *gdi;
			BlockNumber rootPostingTree = GinGetPostingTree(itup);

			/* release all stack */
			LockBuffer(stack->buffer, GIN_UNLOCK);
			freeGinBtreeStack(stack);

			/* insert into posting tree */
			gdi = prepareScanPostingTree(index, rootPostingTree, FALSE);
			gdi->btree.isBuild = isBuild;
			insertItemPointer(gdi, items, nitem);

			return;
		}

		itup = addItemPointersToTuple(index, ginstate, stack, itup, items, nitem, isBuild);

		btree.isDelete = TRUE;
	}
	else
	{
		/* We suppose, that tuple can store at list one itempointer */
		itup = GinFormTuple(ginstate, value, items, 1);
		if (itup == NULL || IndexTupleSize(itup) >= GinMaxItemSize)
			elog(ERROR, "huge tuple");

		if (nitem > 1)
		{
			IndexTuple	previtup = itup;

			itup = addItemPointersToTuple(index, ginstate, stack, previtup, items + 1, nitem - 1, isBuild);
			pfree(previtup);
		}
	}

	btree.entry = itup;
	ginInsertValue(&btree, stack);
	pfree(itup);
}
Esempio n. 4
0
/*
 * Insert one or more heap TIDs associated with the given key value.
 * This will either add a single key entry, or enlarge a pre-existing entry.
 *
 * During an index build, buildStats is non-null and the counters
 * it contains should be incremented as needed.
 */
void
ginEntryInsert(GinState *ginstate,
			   OffsetNumber attnum, Datum key, GinNullCategory category,
			   ItemPointerData *items, uint32 nitem,
			   GinStatsData *buildStats)
{
	GinBtreeData btree;
	GinBtreeStack *stack;
	IndexTuple	itup;
	Page		page;

	/* During index build, count the to-be-inserted entry */
	if (buildStats)
		buildStats->nEntries++;

	ginPrepareEntryScan(&btree, attnum, key, category, ginstate);

	stack = ginFindLeafPage(&btree, GIN_ROOT_BLKNO, false);
	page = BufferGetPage(stack->buffer);

	if (btree.findItem(&btree, stack))
	{
		/* found pre-existing entry */
		itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off));

		if (GinIsPostingTree(itup))
		{
			/* add entries to existing posting tree */
			BlockNumber rootPostingTree = GinGetPostingTree(itup);

			/* release all stack */
			LockBuffer(stack->buffer, GIN_UNLOCK);
			freeGinBtreeStack(stack);

			/* insert into posting tree */
			ginInsertItemPointers(ginstate->index, rootPostingTree,
								  items, nitem,
								  buildStats);
			return;
		}

		/* modify an existing leaf entry */
		itup = addItemPointersToLeafTuple(ginstate, itup,
										  items, nitem, buildStats);

		btree.isDelete = TRUE;
	}
	else
	{
		/* no match, so construct a new leaf entry */
		itup = buildFreshLeafTuple(ginstate, attnum, key, category,
								   items, nitem, buildStats);
	}

	/* Insert the new or modified leaf tuple */
	btree.entry = itup;
	ginInsertValue(&btree, stack, buildStats);
	pfree(itup);
}
Esempio n. 5
0
/*
 * Insert a value to tree described by stack.
 *
 * The value to be inserted is given in 'insertdata'. Its format depends
 * on whether this is an entry or data tree, ginInsertValue just passes it
 * through to the tree-specific callback function.
 *
 * During an index build, buildStats is non-null and the counters it contains
 * are incremented as needed.
 *
 * NB: the passed-in stack is freed, as though by freeGinBtreeStack.
 */
void
ginInsertValue(GinBtree btree, GinBtreeStack *stack, void *insertdata,
			   GinStatsData *buildStats)
{
	bool		done;

	/* If the leaf page was incompletely split, finish the split first */
	if (GinPageIsIncompleteSplit(BufferGetPage(stack->buffer)))
		ginFinishSplit(btree, stack, false, buildStats);

	done = ginPlaceToPage(btree, stack,
						  insertdata, InvalidBlockNumber,
						  InvalidBuffer, buildStats);
	if (done)
	{
		LockBuffer(stack->buffer, GIN_UNLOCK);
		freeGinBtreeStack(stack);
	}
	else
		ginFinishSplit(btree, stack, true, buildStats);
}
Esempio n. 6
0
/*
 * Insert value (stored in GinBtree) to tree described by stack
 *
 * During an index build, buildStats is non-null and the counters
 * it contains should be incremented as needed.
 *
 * NB: the passed-in stack is freed, as though by freeGinBtreeStack.
 */
void
ginInsertValue(GinBtree btree, GinBtreeStack *stack, GinStatsData *buildStats)
{
	GinBtreeStack *parent = stack;
	BlockNumber rootBlkno = InvalidBuffer;
	Page		page,
				rpage,
				lpage;

	/* remember root BlockNumber */
	while (parent)
	{
		rootBlkno = parent->blkno;
		parent = parent->parent;
	}

	while (stack)
	{
		XLogRecData *rdata;
		BlockNumber savedRightLink;

		page = BufferGetPage(stack->buffer);
		savedRightLink = GinPageGetOpaque(page)->rightlink;

		if (btree->isEnoughSpace(btree, stack->buffer, stack->off))
		{
			START_CRIT_SECTION();
			btree->placeToPage(btree, stack->buffer, stack->off, &rdata);

			MarkBufferDirty(stack->buffer);

			if (RelationNeedsWAL(btree->index))
			{
				XLogRecPtr	recptr;

				recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_INSERT, rdata);
				PageSetLSN(page, recptr);
				PageSetTLI(page, ThisTimeLineID);
			}

			LockBuffer(stack->buffer, GIN_UNLOCK);
			END_CRIT_SECTION();

			freeGinBtreeStack(stack);

			return;
		}
		else
		{
			Buffer		rbuffer = GinNewBuffer(btree->index);
			Page		newlpage;

			/*
			 * newlpage is a pointer to memory page, it doesn't associate with
			 * buffer, stack->buffer should be untouched
			 */
			newlpage = btree->splitPage(btree, stack->buffer, rbuffer, stack->off, &rdata);

			((ginxlogSplit *) (rdata->data))->rootBlkno = rootBlkno;

			/* During index build, count the newly-split page */
			if (buildStats)
			{
				if (btree->isData)
					buildStats->nDataPages++;
				else
					buildStats->nEntryPages++;
			}

			parent = stack->parent;

			if (parent == NULL)
			{
				/*
				 * split root, so we need to allocate new left page and place
				 * pointer on root to left and right page
				 */
				Buffer		lbuffer = GinNewBuffer(btree->index);

				((ginxlogSplit *) (rdata->data))->isRootSplit = TRUE;
				((ginxlogSplit *) (rdata->data))->rrlink = InvalidBlockNumber;

				page = BufferGetPage(stack->buffer);
				lpage = BufferGetPage(lbuffer);
				rpage = BufferGetPage(rbuffer);

				GinPageGetOpaque(rpage)->rightlink = InvalidBlockNumber;
				GinPageGetOpaque(newlpage)->rightlink = BufferGetBlockNumber(rbuffer);
				((ginxlogSplit *) (rdata->data))->lblkno = BufferGetBlockNumber(lbuffer);

				START_CRIT_SECTION();

				GinInitBuffer(stack->buffer, GinPageGetOpaque(newlpage)->flags & ~GIN_LEAF);
				PageRestoreTempPage(newlpage, lpage);
				btree->fillRoot(btree, stack->buffer, lbuffer, rbuffer);

				MarkBufferDirty(rbuffer);
				MarkBufferDirty(lbuffer);
				MarkBufferDirty(stack->buffer);

				if (RelationNeedsWAL(btree->index))
				{
					XLogRecPtr	recptr;

					recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_SPLIT, rdata);
					PageSetLSN(page, recptr);
					PageSetTLI(page, ThisTimeLineID);
					PageSetLSN(lpage, recptr);
					PageSetTLI(lpage, ThisTimeLineID);
					PageSetLSN(rpage, recptr);
					PageSetTLI(rpage, ThisTimeLineID);
				}

				UnlockReleaseBuffer(rbuffer);
				UnlockReleaseBuffer(lbuffer);
				LockBuffer(stack->buffer, GIN_UNLOCK);
				END_CRIT_SECTION();

				freeGinBtreeStack(stack);

				/* During index build, count the newly-added root page */
				if (buildStats)
				{
					if (btree->isData)
						buildStats->nDataPages++;
					else
						buildStats->nEntryPages++;
				}

				return;
			}
			else
			{
				/* split non-root page */
				((ginxlogSplit *) (rdata->data))->isRootSplit = FALSE;
				((ginxlogSplit *) (rdata->data))->rrlink = savedRightLink;

				lpage = BufferGetPage(stack->buffer);
				rpage = BufferGetPage(rbuffer);

				GinPageGetOpaque(rpage)->rightlink = savedRightLink;
				GinPageGetOpaque(newlpage)->rightlink = BufferGetBlockNumber(rbuffer);

				START_CRIT_SECTION();
				PageRestoreTempPage(newlpage, lpage);

				MarkBufferDirty(rbuffer);
				MarkBufferDirty(stack->buffer);

				if (RelationNeedsWAL(btree->index))
				{
					XLogRecPtr	recptr;

					recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_SPLIT, rdata);
					PageSetLSN(lpage, recptr);
					PageSetTLI(lpage, ThisTimeLineID);
					PageSetLSN(rpage, recptr);
					PageSetTLI(rpage, ThisTimeLineID);
				}
				UnlockReleaseBuffer(rbuffer);
				END_CRIT_SECTION();
			}
		}

		btree->isDelete = FALSE;

		/* search parent to lock */
		LockBuffer(parent->buffer, GIN_EXCLUSIVE);

		/* move right if it's needed */
		page = BufferGetPage(parent->buffer);
		while ((parent->off = btree->findChildPtr(btree, page, stack->blkno, parent->off)) == InvalidOffsetNumber)
		{
			BlockNumber rightlink = GinPageGetOpaque(page)->rightlink;

			LockBuffer(parent->buffer, GIN_UNLOCK);

			if (rightlink == InvalidBlockNumber)
			{
				/*
				 * rightmost page, but we don't find parent, we should use
				 * plain search...
				 */
				ginFindParents(btree, stack, rootBlkno);
				parent = stack->parent;
				page = BufferGetPage(parent->buffer);
				break;
			}

			parent->blkno = rightlink;
			parent->buffer = ReleaseAndReadBuffer(parent->buffer, btree->index, parent->blkno);
			LockBuffer(parent->buffer, GIN_EXCLUSIVE);
			page = BufferGetPage(parent->buffer);
		}

		UnlockReleaseBuffer(stack->buffer);
		pfree(stack);
		stack = parent;
	}
}
Esempio n. 7
0
/*
 * Finish a split by inserting the downlink for the new page to parent.
 *
 * On entry, stack->buffer is exclusively locked.
 *
 * If freestack is true, all the buffers are released and unlocked as we
 * crawl up the tree, and 'stack' is freed. Otherwise stack->buffer is kept
 * locked, and stack is unmodified, except for possibly moving right to find
 * the correct parent of page.
 */
static void
ginFinishSplit(GinBtree btree, GinBtreeStack *stack, bool freestack,
			   GinStatsData *buildStats)
{
	Page		page;
	bool		done;
	bool		first = true;

	/*
	 * freestack == false when we encounter an incompletely split page during a
	 * scan, while freestack == true is used in the normal scenario that a
	 * split is finished right after the initial insert.
	 */
	if (!freestack)
		elog(DEBUG1, "finishing incomplete split of block %u in gin index \"%s\"",
			 stack->blkno, RelationGetRelationName(btree->index));

	/* this loop crawls up the stack until the insertion is complete */
	do
	{
		GinBtreeStack *parent = stack->parent;
		void	   *insertdata;
		BlockNumber updateblkno;

		/* search parent to lock */
		LockBuffer(parent->buffer, GIN_EXCLUSIVE);

		/*
		 * If the parent page was incompletely split, finish that split first,
		 * then continue with the current one.
		 *
		 * Note: we have to finish *all* incomplete splits we encounter, even
		 * if we have to move right. Otherwise we might choose as the target
		 * a page that has no downlink in the parent, and splitting it further
		 * would fail.
		 */
		if (GinPageIsIncompleteSplit(BufferGetPage(parent->buffer)))
			ginFinishSplit(btree, parent, false, buildStats);

		/* move right if it's needed */
		page = BufferGetPage(parent->buffer);
		while ((parent->off = btree->findChildPtr(btree, page, stack->blkno, parent->off)) == InvalidOffsetNumber)
		{
			if (GinPageRightMost(page))
			{
				/*
				 * rightmost page, but we don't find parent, we should use
				 * plain search...
				 */
				LockBuffer(parent->buffer, GIN_UNLOCK);
				ginFindParents(btree, stack);
				parent = stack->parent;
				Assert(parent != NULL);
				break;
			}

			parent->buffer = ginStepRight(parent->buffer, btree->index, GIN_EXCLUSIVE);
			parent->blkno = BufferGetBlockNumber(parent->buffer);
			page = BufferGetPage(parent->buffer);

			if (GinPageIsIncompleteSplit(BufferGetPage(parent->buffer)))
				ginFinishSplit(btree, parent, false, buildStats);
		}

		/* insert the downlink */
		insertdata = btree->prepareDownlink(btree, stack->buffer);
		updateblkno = GinPageGetOpaque(BufferGetPage(stack->buffer))->rightlink;
		done = ginPlaceToPage(btree, parent,
							  insertdata, updateblkno,
							  stack->buffer, buildStats);
		pfree(insertdata);

		/*
		 * If the caller requested to free the stack, unlock and release the
		 * child buffer now. Otherwise keep it pinned and locked, but if we
		 * have to recurse up the tree, we can unlock the upper pages, only
		 * keeping the page at the bottom of the stack locked.
		 */
		if (!first || freestack)
			LockBuffer(stack->buffer, GIN_UNLOCK);
		if (freestack)
		{
			ReleaseBuffer(stack->buffer);
			pfree(stack);
		}
		stack = parent;

		first = false;
	} while (!done);

	/* unlock the parent */
	LockBuffer(stack->buffer, GIN_UNLOCK);

	if (freestack)
		freeGinBtreeStack(stack);
}
Esempio n. 8
0
/*
 * Start* functions setup state of searches: find correct buffer and locks it,
 * Stop* functions unlock buffer (but don't release!)
 */
static void
startScanEntry(Relation index, GinState *ginstate, GinScanEntry entry)
{
	GinBtreeData    btreeEntry;
	GinBtreeStack  *stackEntry;
	Page            page;
	bool            needUnlock = TRUE;

	if (entry->master != NULL)
	{
		entry->isFinished = entry->master->isFinished;
		return;
	}

	/*
	 * We should find entry, and begin scan of posting tree
	 * or just store posting list in memory
	 */

	prepareEntryScan(&btreeEntry, index, entry->entry, ginstate);
	btreeEntry.searchMode = TRUE;
	stackEntry = ginFindLeafPage(&btreeEntry, NULL);
	page = BufferGetPage(stackEntry->buffer);

	entry->isFinished = TRUE;
	entry->buffer = InvalidBuffer;
	entry->offset = InvalidOffsetNumber;
	entry->list = NULL;
	entry->nlist = 0;
	entry->reduceResult = FALSE;
	entry->predictNumberResult = 0;

	if (btreeEntry.findItem(&btreeEntry, stackEntry))
	{
		IndexTuple	itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stackEntry->off));

		if (GinIsPostingTree(itup))
		{
			BlockNumber rootPostingTree = GinGetPostingTree(itup);
			GinPostingTreeScan *gdi;
			Page		page;

			LockBuffer(stackEntry->buffer, GIN_UNLOCK);
			needUnlock = FALSE;
			gdi = prepareScanPostingTree(index, rootPostingTree, TRUE);

			entry->buffer = scanBeginPostingTree(gdi);
			/*
			 * We keep buffer pinned because we need to prevent deletition
			 * page during scan. See GIN's vacuum implementation. RefCount
			 * is increased to keep buffer pinned after freeGinBtreeStack() call.
			 */
			IncrBufferRefCount(entry->buffer);

			page = BufferGetPage(entry->buffer);
			entry->predictNumberResult = gdi->stack->predictNumber * GinPageGetOpaque(page)->maxoff;

			/*
			 * Keep page content in memory to prevent durable page locking
			 */
			entry->list = (ItemPointerData *) palloc( BLCKSZ );
			entry->nlist = GinPageGetOpaque(page)->maxoff;
			memcpy( entry->list, GinDataPageGetItem(page, FirstOffsetNumber),
						GinPageGetOpaque(page)->maxoff * sizeof(ItemPointerData) );

            LockBuffer(entry->buffer, GIN_UNLOCK);
			freeGinBtreeStack(gdi->stack);
			pfree(gdi);
			entry->isFinished = FALSE;
		}
		else if (GinGetNPosting(itup) > 0)
		{
			entry->nlist = GinGetNPosting(itup);
			entry->list = (ItemPointerData *) palloc(sizeof(ItemPointerData) * entry->nlist);
			memcpy(entry->list, GinGetPosting(itup), sizeof(ItemPointerData) * entry->nlist);
			entry->isFinished = FALSE;
		}
	}

	if (needUnlock)
		LockBuffer(stackEntry->buffer, GIN_UNLOCK);
	freeGinBtreeStack(stackEntry);
}
Esempio n. 9
0
/*
 * Inserts only one entry to the index, but it can add more than 1 ItemPointer.
 *
 * During an index build, buildStats is non-null and the counters
 * it contains should be incremented as needed.
 */
void
ginEntryInsert(Relation index, GinState *ginstate,
			   OffsetNumber attnum, Datum value,
			   ItemPointerData *items, uint32 nitem,
			   GinStatsData *buildStats)
{
	GinBtreeData btree;
	GinBtreeStack *stack;
	IndexTuple	itup;
	Page		page;

	/* During index build, count the to-be-inserted entry */
	if (buildStats)
		buildStats->nEntries++;

	ginPrepareEntryScan(&btree, index, attnum, value, ginstate);

	stack = ginFindLeafPage(&btree, NULL);
	page = BufferGetPage(stack->buffer);

	if (btree.findItem(&btree, stack))
	{
		/* found entry */
		itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off));

		if (GinIsPostingTree(itup))
		{
			/* lock root of posting tree */
			GinPostingTreeScan *gdi;
			BlockNumber rootPostingTree = GinGetPostingTree(itup);

			/* release all stack */
			LockBuffer(stack->buffer, GIN_UNLOCK);
			freeGinBtreeStack(stack);

			/* insert into posting tree */
			gdi = ginPrepareScanPostingTree(index, rootPostingTree, FALSE);
			gdi->btree.isBuild = (buildStats != NULL);
			ginInsertItemPointer(gdi, items, nitem, buildStats);
			pfree(gdi);

			return;
		}

		itup = addItemPointersToTuple(index, ginstate, stack, itup,
									  items, nitem, buildStats);

		btree.isDelete = TRUE;
	}
	else
	{
		/* We suppose that tuple can store at least one itempointer */
		itup = GinFormTuple(index, ginstate, attnum, value, items, 1, true);

		if (nitem > 1)
		{
			/* Add the rest, making a posting tree if necessary */
			IndexTuple	previtup = itup;

			itup = addItemPointersToTuple(index, ginstate, stack, previtup,
										  items + 1, nitem - 1, buildStats);
			pfree(previtup);
		}
	}

	btree.entry = itup;
	ginInsertValue(&btree, stack, buildStats);
	pfree(itup);
}