Exemple #1
0
/*----------
 * Add an item to a disk page from the sort output.
 *
 * We must be careful to observe the page layout conventions of nbtsearch.c:
 * - rightmost pages start data items at P_HIKEY instead of at P_FIRSTKEY.
 * - on non-leaf pages, the key portion of the first item need not be
 *	 stored, we should store only the link.
 *
 * A leaf page being built looks like:
 *
 * +----------------+---------------------------------+
 * | PageHeaderData | linp0 linp1 linp2 ...           |
 * +-----------+----+---------------------------------+
 * | ... linpN |									  |
 * +-----------+--------------------------------------+
 * |	 ^ last										  |
 * |												  |
 * +-------------+------------------------------------+
 * |			 | itemN ...                          |
 * +-------------+------------------+-----------------+
 * |		  ... item3 item2 item1 | "special space" |
 * +--------------------------------+-----------------+
 *
 * Contrast this with the diagram in bufpage.h; note the mismatch
 * between linps and items.  This is because we reserve linp0 as a
 * placeholder for the pointer to the "high key" item; when we have
 * filled up the page, we will set linp0 to point to itemN and clear
 * linpN.  On the other hand, if we find this is the last (rightmost)
 * page, we leave the items alone and slide the linp array over.
 *
 * 'last' pointer indicates the last offset added to the page.
 *----------
 */
static void
_bt_buildadd(BTWriteState *wstate, BTPageState *state, IndexTuple itup)
{
	Page		npage;
	BlockNumber nblkno;
	OffsetNumber last_off;
	Size		pgspc;
	Size		itupsz;

	/*
	 * This is a handy place to check for cancel interrupts during the btree
	 * load phase of index creation.
	 */
	CHECK_FOR_INTERRUPTS();

	npage = state->btps_page;
	nblkno = state->btps_blkno;
	last_off = state->btps_lastoff;

	pgspc = PageGetFreeSpace(npage);
	itupsz = IndexTupleDSize(*itup);
	itupsz = MAXALIGN(itupsz);

	/*
	 * Check whether the item can fit on a btree page at all. (Eventually, we
	 * ought to try to apply TOAST methods if not.) We actually need to be
	 * able to fit three items on every page, so restrict any one item to 1/3
	 * the per-page available space. Note that at this point, itupsz doesn't
	 * include the ItemId.
	 *
	 * NOTE: similar code appears in _bt_insertonpg() to defend against
	 * oversize items being inserted into an already-existing index. But
	 * during creation of an index, we don't go through there.
	 */
	if (itupsz > BTMaxItemSize(npage))
		ereport(ERROR,
				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
			errmsg("index row size %zu exceeds maximum %zu for index \"%s\"",
				   itupsz, BTMaxItemSize(npage),
				   RelationGetRelationName(wstate->index)),
		errhint("Values larger than 1/3 of a buffer page cannot be indexed.\n"
				"Consider a function index of an MD5 hash of the value, "
				"or use full text indexing."),
				 errtableconstraint(wstate->heap,
									RelationGetRelationName(wstate->index))));

	/*
	 * Check to see if page is "full".  It's definitely full if the item won't
	 * fit.  Otherwise, compare to the target freespace derived from the
	 * fillfactor.  However, we must put at least two items on each page, so
	 * disregard fillfactor if we don't have that many.
	 */
	if (pgspc < itupsz || (pgspc < state->btps_full && last_off > P_FIRSTKEY))
	{
		/*
		 * Finish off the page and write it out.
		 */
		Page		opage = npage;
		BlockNumber oblkno = nblkno;
		ItemId		ii;
		ItemId		hii;
		IndexTuple	oitup;

		/* Create new page of same level */
		npage = _bt_blnewpage(state->btps_level);

		/* and assign it a page position */
		nblkno = wstate->btws_pages_alloced++;

		/*
		 * We copy the last item on the page into the new page, and then
		 * rearrange the old page so that the 'last item' becomes its high key
		 * rather than a true data item.  There had better be at least two
		 * items on the page already, else the page would be empty of useful
		 * data.
		 */
		Assert(last_off > P_FIRSTKEY);
		ii = PageGetItemId(opage, last_off);
		oitup = (IndexTuple) PageGetItem(opage, ii);
		_bt_sortaddtup(npage, ItemIdGetLength(ii), oitup, P_FIRSTKEY);

		/*
		 * Move 'last' into the high key position on opage
		 */
		hii = PageGetItemId(opage, P_HIKEY);
		*hii = *ii;
		ItemIdSetUnused(ii);	/* redundant */
		((PageHeader) opage)->pd_lower -= sizeof(ItemIdData);

		/*
		 * Link the old page into its parent, using its minimum key. If we
		 * don't have a parent, we have to create one; this adds a new btree
		 * level.
		 */
		if (state->btps_next == NULL)
			state->btps_next = _bt_pagestate(wstate, state->btps_level + 1);

		Assert(state->btps_minkey != NULL);
		ItemPointerSet(&(state->btps_minkey->t_tid), oblkno, P_HIKEY);
		_bt_buildadd(wstate, state->btps_next, state->btps_minkey);
		pfree(state->btps_minkey);

		/*
		 * Save a copy of the minimum key for the new page.  We have to copy
		 * it off the old page, not the new one, in case we are not at leaf
		 * level.
		 */
		state->btps_minkey = CopyIndexTuple(oitup);

		/*
		 * Set the sibling links for both pages.
		 */
		{
			BTPageOpaque oopaque = (BTPageOpaque) PageGetSpecialPointer(opage);
			BTPageOpaque nopaque = (BTPageOpaque) PageGetSpecialPointer(npage);

			oopaque->btpo_next = nblkno;
			nopaque->btpo_prev = oblkno;
			nopaque->btpo_next = P_NONE;		/* redundant */
		}

		/*
		 * Write out the old page.  We never need to touch it again, so we can
		 * free the opage workspace too.
		 */
		_bt_blwritepage(wstate, opage, oblkno);

		/*
		 * Reset last_off to point to new page
		 */
		last_off = P_FIRSTKEY;
	}

	/*
	 * If the new item is the first for its page, stash a copy for later. Note
	 * this will only happen for the first item on a level; on later pages,
	 * the first item for a page is copied from the prior page in the code
	 * above.
	 */
	if (last_off == P_HIKEY)
	{
		Assert(state->btps_minkey == NULL);
		state->btps_minkey = CopyIndexTuple(itup);
	}

	/*
	 * Add the new item into the current page.
	 */
	last_off = OffsetNumberNext(last_off);
	_bt_sortaddtup(npage, itupsz, itup, last_off);

	state->btps_page = npage;
	state->btps_blkno = nblkno;
	state->btps_lastoff = last_off;
}
Exemple #2
0
/*
 * Insert an entry and perhaps return the top element of the heap in *e
 *
 * Comparison happens from the specified level to the end of levels, as needed:
 *	 Return < 0 if smaller than heap top; *e is unchanged
 *	 Return = 0 if eq to heap top ; *e is unchanged (but will have value equal to the heap top)
 *	 Return > 0 if successfully inserted; *e is populated with the removed heap top
 *
 * If 0 would be returned but the heap is marked as needing uniqueness enforcement, error is generated instead
 */
static int
mkheap_putAndGet_impl(MKHeap *mkheap, MKEntry *e)
{
	int			c = 0;
	int			toplv;
	MKEntry		tmp;

	/* can't put+get from an empty heap */
	Assert(mkheap->count > 0);

	if (mkheap->mkctxt->enforceUnique &&
		mke_has_duplicates_with_root(mkheap))
	{
		/**
		 * See NOTE ON UNIQUENESS CHECKING in the comment at the top of the file
		 * for information about why we check for duplicates here
		 */
		Datum		values[INDEX_MAX_KEYS];
		bool		isnull[INDEX_MAX_KEYS];

		index_deform_tuple((IndexTuple) mkheap->p->ptr, mkheap->mkctxt->tupdesc, values, isnull);
		ereport(ERROR,
				(errcode(ERRCODE_UNIQUE_VIOLATION),
				 errmsg("could not create unique index \"%s\"",
						RelationGetRelationName(mkheap->mkctxt->indexRel)),
				 errdetail("Key %s is duplicated.",
						   BuildIndexValueDescription(mkheap->mkctxt->indexRel,
													  values, isnull)),
				 errtableconstraint(mkheap->mkctxt->heapRel,
								 RelationGetRelationName(mkheap->mkctxt->indexRel))));
	}

	if (mke_is_empty(e))
	{
		/*
		 * adding an empty (sentinel): just remove from count and fallthrough
		 * to where top is removed
		 */
		--mkheap->count;
	}
	else if (mke_get_run(e) != mke_get_run(mkheap->p))
	{
		/*
		 * this code assumes that the new one, with lower run, is LARGER than
		 * the top -- so it must be larger run
		 */
		Assert(mke_get_run(e) > mke_get_run(mkheap->p));

		/*
		 * when the runs differ it is because we attempted once with the runs
		 * equal. So if level is zero then:  the level was zero AND validly
		 * prepared for the previous run -- and there is no need to prep again
		 */
		if (mke_get_lv(e) != 0)
		{
			/* Not same run, at least prepare lv 0 */
			if (mkheap->mkctxt->fetchForPrep)
				tupsort_prepare(e, mkheap->mkctxt, 0);
			mke_set_lv(e, 0);
		}

		/*
		 * now fall through and let top be returned, new one is also inserted
		 * so no change to count
		 */
	}
	else
	{
		/* same run so figure out where it fits in relation to the heap top */
		int			lv = 0;

		toplv = mke_get_lv(mkheap->p);
		mke_set_lv(e, lv);

		/* populate level until we differ from the top element of the heap */
		while (lv < toplv)
		{
			if (mkheap->mkctxt->fetchForPrep)
				tupsort_prepare(e, mkheap->mkctxt, lv);
			c = mkheap_compare(mkheap, e, mkheap->lvtops + lv);
			if (c != 0)
				break;

			mke_set_lv(e, ++lv);
		}

		/* smaller than top */
		if (c < 0)
			return -1;

		/*
		 * we have not done e->lv == toplv yet since we increment at the end
		 * of the previous loop.  Do it now.
		 */
		Assert(mke_get_lv(e) == lv);
		if (lv == toplv)
		{
			if (mkheap->mkctxt->fetchForPrep)
				tupsort_prepare(e, mkheap->mkctxt, lv);
			c = mkheap_compare(mkheap, e, mkheap->p);
			if (c < 0)
				return -1;
		}

		if (c == 0)
		{
			/*
			 * Equal and at top level.
			 *
			 * This means that e is less-than/equal to all entries except the
			 * heap top.
			 */
			Assert(mke_get_lv(e) == lv);
			Assert(lv == mke_get_lv(mkheap->p));

			/*
			 * Expand more levels of lvtop in the current top and the new one
			 * until we detect a difference.
			 */
			while (lv < mkheap->mkctxt->total_lv - 1)
			{
				mkheap_save_lvtop(mkheap);

				++lv;

				/* expand top */
				if (mkheap->mkctxt->fetchForPrep)
					tupsort_prepare(mkheap->p, mkheap->mkctxt, lv);

				/* expand new element */
				if (mkheap->mkctxt->fetchForPrep)
					tupsort_prepare(e, mkheap->mkctxt, lv);

				mke_set_lv(mkheap->p, lv);
				mke_set_lv(e, lv);

				c = mkheap_compare(mkheap, e, mkheap->p);
				if (c != 0)
					break;
			}

			if (c <= 0)
			{
				/*
				 * if new one is less than current top then we just return
				 * that negative comparison
				 */

				/*
				 * if new one equals the current top then we could do an
				 * insert and immediate removal -- but it won't matter so we
				 * simply return right away, leaving *e untouched
				 */

				/* enforce uniqueness first */
				if (c == 0 && mkheap->mkctxt->enforceUnique)
				{
					Datum		values[INDEX_MAX_KEYS];
					bool		isnull[INDEX_MAX_KEYS];

					index_deform_tuple((IndexTuple) mkheap->p->ptr, mkheap->mkctxt->tupdesc, values, isnull);
					ereport(ERROR,
							(errcode(ERRCODE_UNIQUE_VIOLATION),
							 errmsg("could not create unique index \"%s\"",
						  RelationGetRelationName(mkheap->mkctxt->indexRel)),
							 errdetail("Key %s is duplicated.",
						 BuildIndexValueDescription(mkheap->mkctxt->indexRel,
													values, isnull))));
				}

				return c;
			}
		}
	}

	/*
	 * Now, I am bigger than top but not definitely smaller/equal to all other
	 * entries
	 *
	 * So we will: return top as *e do heap shuffling to restore heap ordering
	 */
	tmp = *e;
	*e = mkheap->p[0];

	/* Sift down a hole to bottom of (current or next) run, depends on tmp.run */
	mkheap_siftdown(mkheap, 0, &tmp);

	if (mkheap_need_heapify(mkheap))
		mkheap_heapify(mkheap, false);

	if (mkheap->count > 0)
	{
		mkheap_update_lvtops(mkheap);
	}

#ifdef USE_ASSERT_CHECKING
	if (gp_mk_sort_check)
		mkheap_verify_heap(mkheap, 0);
#endif

	return 1;
}