/* ----------------------------------------------------------------
 *		WorkTableScanNext
 *
 *		This is a workhorse for ExecWorkTableScan
 * ----------------------------------------------------------------
 */
static TupleTableSlot *
WorkTableScanNext(WorkTableScanState *node)
{
	TupleTableSlot *slot;
	Tuplestorestate *tuplestorestate;

	/*
	 * get information from the estate and scan state
	 *
	 * Note: we intentionally do not support backward scan.  Although it would
	 * take only a couple more lines here, it would force nodeRecursiveunion.c
	 * to create the tuplestore with backward scan enabled, which has a
	 * performance cost.  In practice backward scan is never useful for a
	 * worktable plan node, since it cannot appear high enough in the plan
	 * tree of a scrollable cursor to be exposed to a backward-scan
	 * requirement.  So it's not worth expending effort to support it.
	 *
	 * Note: we are also assuming that this node is the only reader of the
	 * worktable.  Therefore, we don't need a private___ read pointer for the
	 * tuplestore, nor do we need to tell tuplestore_gettupleslot to copy.
	 */
	Assert(ScanDirectionIsForward(node->ss.ps.state->es_direction));

	tuplestorestate = node->rustate->working_table;

	/*
	 * Get the next tuple from tuplestore. Return NULL if no more tuples.
	 */
	slot = node->ss.ss_ScanTupleSlot;
	(void) tuplestore_gettupleslot(tuplestorestate, true, false, slot);
	return slot;
}
Example #2
0
void parquet_getnext(ParquetScanDesc scan, ScanDirection direction,
		TupleTableSlot *slot) {

	AOTupleId aoTupleId;
	Assert(ScanDirectionIsForward(direction));

	for(;;)
	{
		if(scan->bufferDone)
		{
			/*
			 * Get the next row group. We call this function until we
			 * successfully get a block to process, or finished reading
			 * all the data (all 'segment' files) for this relation.
			 */
			while(!getNextRowGroup(scan))
			{
				/* have we read all this relation's data. done! */
				if(scan->pqs_done_all_splits)
				{
					ExecClearTuple(slot);
					return /*NULL*/;
				}
			}

			scan->bufferDone = false;
		}

		bool tupleExist = ParquetRowGroupReader_ScanNextTuple(
												scan->pqs_tupDesc,
												&scan->rowGroupReader,
												scan->hawqAttrToParquetColChunks,
												scan->proj,
												slot);

		if(tupleExist)
		{
			int segno = ((FileSplitNode *)list_nth(scan->splits, scan->pqs_splits_processed - 1))->segno;
			AOTupleIdInit_Init(&aoTupleId);
			AOTupleIdInit_segmentFileNum(&aoTupleId,
										 segno);

			scan->cur_seg_row++;
			AOTupleIdInit_rowNum(&aoTupleId, scan->cur_seg_row);

			scan->cdb_fake_ctid = *((ItemPointer)&aoTupleId);

			slot_set_ctid(slot, &(scan->cdb_fake_ctid));

			return;
		}
		/* no more items in the row group, get new buffer */
		scan->bufferDone = true;
	}
}
Example #3
0
/* ------------------------------------------------------------------
 * 	ExecShareInputScan
 * 	Retrieve a tuple from the ShareInputScan
 * ------------------------------------------------------------------
 */
TupleTableSlot *
ExecShareInputScan(ShareInputScanState *node)
{
	EState *estate;
	ScanDirection dir;
	bool forward;
	TupleTableSlot *slot;

	ShareInputScan * sisc = (ShareInputScan *) node->ss.ps.plan;

	ShareType share_type = sisc->share_type;

	/* 
	 * get state info from node
	 */
	estate = node->ss.ps.state;
	dir = estate->es_direction;
	forward = ScanDirectionIsForward(dir);


	/* if first time call, need to initialize the tuplestore state.  */
	if(node->ts_state == NULL)
	{
		elog(DEBUG1, "SISC (shareid=%d, slice=%d): No tuplestore yet, initializing tuplestore",
				sisc->share_id, currentSliceId);
		init_tuplestore_state(node);
	}

	slot = node->ss.ps.ps_ResultTupleSlot;

	while(1)
	{
		bool gotOK = false;

		if(share_type == SHARE_MATERIAL || share_type == SHARE_MATERIAL_XSLICE) 
		{
			ntuplestore_acc_advance((NTupleStoreAccessor *) node->ts_pos, forward ? 1 : -1);
			gotOK = ntuplestore_acc_current_tupleslot((NTupleStoreAccessor *) node->ts_pos, slot);
		}
		else
		{
			gotOK = tuplesort_gettupleslot_pos(node->ts_state->sortstore, (TuplesortPos *)node->ts_pos, forward, slot, CurrentMemoryContext);
		}

		if(!gotOK)
			return NULL;

		SIMPLE_FAULT_INJECTOR(ExecShareInputNext);

		return slot;
	}

	Assert(!"should not be here");
	return NULL;
}
Example #4
0
/* ----------------------------------------------------------------
 *		FunctionNext
 *
 *		This is a workhorse for ExecFunctionScan
 * ----------------------------------------------------------------
 */
static TupleTableSlot *
FunctionNext(FunctionScanState *node)
{
	TupleTableSlot *slot;
	EState	   *estate;
	ScanDirection direction;
	Tuplestorestate *tuplestorestate;
	bool		should_free;
	HeapTuple	heapTuple;

	/*
	 * get information from the estate and scan state
	 */
	estate = node->ss.ps.state;
	direction = estate->es_direction;

	tuplestorestate = node->tuplestorestate;

	/*
	 * If first time through, read all tuples from function and put them in a
	 * tuplestore. Subsequent calls just fetch tuples from tuplestore.
	 */
	if (tuplestorestate == NULL)
	{
		ExprContext *econtext = node->ss.ps.ps_ExprContext;
		TupleDesc	funcTupdesc;

		node->tuplestorestate = tuplestorestate =
			ExecMakeTableFunctionResult(node->funcexpr,
										econtext,
										node->tupdesc,
										&funcTupdesc);

		/*
		 * If function provided a tupdesc, cross-check it.	We only really
		 * need to do this for functions returning RECORD, but might as well
		 * do it always.
		 */
		if (funcTupdesc)
			tupledesc_match(node->tupdesc, funcTupdesc);
	}

	/*
	 * Get the next tuple from tuplestore. Return NULL if no more tuples.
	 */
	heapTuple = tuplestore_getheaptuple(tuplestorestate,
										ScanDirectionIsForward(direction),
										&should_free);
	slot = node->ss.ss_ScanTupleSlot;
	if (heapTuple)
		return ExecStoreTuple(heapTuple, slot, InvalidBuffer, should_free);
	else
		return ExecClearTuple(slot);
}
/* ----------------------------------------------------------------
 *		FunctionNext
 *
 *		This is a workhorse for ExecFunctionScan
 * ----------------------------------------------------------------
 */
static TupleTableSlot *
FunctionNext(FunctionScanState *node)
{
	TupleTableSlot *slot;
	EState	   *estate;
	ScanDirection direction;
	Tuplestorestate *tuplestorestate;
	bool		should_free;
	HeapTuple	heapTuple;

	/*
	 * get information from the estate and scan state
	 */
	estate = node->ss.ps.state;
	direction = estate->es_direction;

	tuplestorestate = node->tuplestorestate;

	/*
	 * If first time through, read all tuples from function and put them
	 * in a tuplestore. Subsequent calls just fetch tuples from
	 * tuplestore.
	 */
	if (tuplestorestate == NULL)
	{
		ExprContext *econtext = node->ss.ps.ps_ExprContext;
		TupleDesc	funcTupdesc;

		node->tuplestorestate = tuplestorestate =
			ExecMakeTableFunctionResult(node->funcexpr,
										econtext,
										node->tupdesc,
										&funcTupdesc);

		/*
		 * If function provided a tupdesc, cross-check it.	We only really
		 * need to do this for functions returning RECORD, but might as
		 * well do it always.
		 */
		if (funcTupdesc && !tupledesc_match(node->tupdesc, funcTupdesc))
			ereport(ERROR,
					(errcode(ERRCODE_DATATYPE_MISMATCH),
					 errmsg("query-specified return row and actual function return row do not match")));
	}

	/*
	 * Get the next tuple from tuplestore. Return NULL if no more tuples.
	 */
	heapTuple = tuplestore_getheaptuple(tuplestorestate,
										ScanDirectionIsForward(direction),
										&should_free);
	slot = node->ss.ss_ScanTupleSlot;
	return ExecStoreTuple(heapTuple, slot, InvalidBuffer, should_free);
}
Example #6
0
/*
 * Step from current page.
 */
Buffer
rumStep(Buffer buffer, Relation index, int lockmode,
			 ScanDirection scanDirection)
{
	Buffer		nextbuffer;
	Page		page = BufferGetPage(buffer);
	bool		isLeaf = RumPageIsLeaf(page);
	bool		isData = RumPageIsData(page);
	BlockNumber blkno;

	blkno = (ScanDirectionIsForward(scanDirection)) ?
		RumPageGetOpaque(page)->rightlink :
		RumPageGetOpaque(page)->leftlink;

	if (blkno == InvalidBlockNumber)
	{
		UnlockReleaseBuffer(buffer);
		return InvalidBuffer;
	}

	nextbuffer = ReadBuffer(index, blkno);
	UnlockReleaseBuffer(buffer);
	LockBuffer(nextbuffer, lockmode);

	/* Sanity check that the page we stepped to is of similar kind. */
	page = BufferGetPage(nextbuffer);
	if (isLeaf != RumPageIsLeaf(page) || isData != RumPageIsData(page))
		elog(ERROR, "right sibling of RUM page is of different type");

	/*
	 * Given the proper lock sequence above, we should never land on a deleted
	 * page.
	 */
	if (RumPageIsDeleted(page))
		elog(ERROR, "%s sibling of RUM page was deleted",
			 ScanDirectionIsForward(scanDirection) ? "right" : "left");

	return nextbuffer;
}
Example #7
0
/* ----------------------------------------------------------------
 *	   ExecAppend
 *
 *		Handles iteration over multiple subplans.
 * ----------------------------------------------------------------
 */
TupleTableSlot *
ExecAppend(AppendState *node)
{
	for (;;)
	{
		PlanState  *subnode;
		TupleTableSlot *result;

		/*
		 * figure out which subplan we are currently processing
		 */
		subnode = node->appendplans[node->as_whichplan];

		/*
		 * get a tuple from the subplan
		 */
		result = ExecProcNode(subnode);

		if (!TupIsNull(result))
		{
			/*
			 * If the subplan gave us something then return it as-is. We do
			 * NOT make use of the result slot that was set up in
			 * ExecInitAppend, first because there's no reason to and second
			 * because it may have the wrong tuple descriptor in
			 * inherited-UPDATE cases.
			 */
			return result;
		}

		/*
		 * Go on to the "next" subplan in the appropriate direction. If no
		 * more subplans, return the empty slot set up for us by
		 * ExecInitAppend.
		 */
		if (ScanDirectionIsForward(node->ps.state->es_direction))
			node->as_whichplan++;
		else
			node->as_whichplan--;
		if (!exec_append_initialize_next(node))
			return ExecClearTuple(node->ps.ps_ResultTupleSlot);

		/* Else loop back and try to get a tuple from the new subplan */
	}
}
Example #8
0
/*
 *	_bt_next() -- Get the next item in a scan.
 *
 *		On entry, so->currPos describes the current page, which is pinned
 *		but not locked, and so->currPos.itemIndex identifies which item was
 *		previously returned.
 *
 *		On successful exit, scan->xs_ctup.t_self is set to the TID of the
 *		next heap tuple, and if requested, scan->xs_itup points to a copy of
 *		the index tuple.  so->currPos is updated as needed.
 *
 *		On failure exit (no more tuples), we release pin and set
 *		so->currPos.buf to InvalidBuffer.
 */
bool
_bt_next(IndexScanDesc scan, ScanDirection dir)
{
	BTScanOpaque so = (BTScanOpaque) scan->opaque;
	BTScanPosItem *currItem;

	/*
	 * Advance to next tuple on current page; or if there's no more, try to
	 * step to the next page with data.
	 */
	if (ScanDirectionIsForward(dir))
	{
		if (++so->currPos.itemIndex > so->currPos.lastItem)
		{
			/* We must acquire lock before applying _bt_steppage */
			Assert(BufferIsValid(so->currPos.buf));
			LockBuffer(so->currPos.buf, BT_READ);
			if (!_bt_steppage(scan, dir))
				return false;
			/* Drop the lock, but not pin, on the new page */
			LockBuffer(so->currPos.buf, BUFFER_LOCK_UNLOCK);
		}
	}
	else
	{
		if (--so->currPos.itemIndex < so->currPos.firstItem)
		{
			/* We must acquire lock before applying _bt_steppage */
			Assert(BufferIsValid(so->currPos.buf));
			LockBuffer(so->currPos.buf, BT_READ);
			if (!_bt_steppage(scan, dir))
				return false;
			/* Drop the lock, but not pin, on the new page */
			LockBuffer(so->currPos.buf, BUFFER_LOCK_UNLOCK);
		}
	}

	/* OK, itemIndex says what to return */
	currItem = &so->currPos.items[so->currPos.itemIndex];
	scan->xs_ctup.t_self = currItem->heapTid;
	if (scan->xs_want_itup)
		scan->xs_itup = (IndexTuple) (so->currTuples + currItem->tupleOffset);

	return true;
}
Example #9
0
/* ----------------------------------------------------------------
 *		FunctionNext
 *
 *		This is a workhorse for ExecFunctionScan
 * ----------------------------------------------------------------
 */
static TupleTableSlot *
FunctionNext(FunctionScanState *node)
{
	TupleTableSlot *slot;
	EState	   *estate;
	ScanDirection direction;
	Tuplestorestate *tuplestorestate;

	/*
	 * get information from the estate and scan state
	 */
	estate = node->ss.ps.state;
	direction = estate->es_direction;

	tuplestorestate = node->tuplestorestate;

	/*
	 * If first time through, read all tuples from function and put them in a
	 * tuplestore. Subsequent calls just fetch tuples from tuplestore.
	 */
	if (tuplestorestate == NULL)
	{
		node->tuplestorestate = tuplestorestate =
			ExecMakeTableFunctionResult(node->funcexpr,
										node->ss.ps.ps_ExprContext,
										node->tupdesc,
										node->eflags & EXEC_FLAG_BACKWARD);
	}

	/*
	 * Get the next tuple from tuplestore. Return NULL if no more tuples.
	 */
	slot = node->ss.ss_ScanTupleSlot;
	(void) tuplestore_gettupleslot(tuplestorestate,
								   ScanDirectionIsForward(direction),
								   false,
								   slot);
	return slot;
}
Example #10
0
/*
 *	_bt_first() -- Find the first item in a scan.
 *
 *		We need to be clever about the type of scan, the operation it's
 *		performing, and the tree ordering.	We find the
 *		first item in the tree that satisfies the qualification
 *		associated with the scan descriptor.  On exit, the page containing
 *		the current index tuple is read locked and pinned, and the scan's
 *		opaque data entry is updated to include the buffer.
 */
bool
_bt_first(IndexScanDesc scan, ScanDirection dir)
{
	Relation	rel = scan->indexRelation;
	BTScanOpaque so = (BTScanOpaque) scan->opaque;
	Buffer		buf;
	Page		page;
	BTStack		stack;
	OffsetNumber offnum;
	BTItem		btitem;
	IndexTuple	itup;
	ItemPointer current;
	BlockNumber blkno;
	StrategyNumber strat;
	bool		res;
	int32		result;
	bool		scanFromEnd;
	bool		continuescan;
	ScanKey		scankeys = NULL;
	int			keysCount = 0;
	int		   *nKeyIs = NULL;
	int			i,
				j;
	StrategyNumber strat_total;

	/*
	 * Order the scan keys in our canonical fashion and eliminate any
	 * redundant keys.
	 */
	_bt_orderkeys(scan);

	/*
	 * Quit now if _bt_orderkeys() discovered that the scan keys can never
	 * be satisfied (eg, x == 1 AND x > 2).
	 */
	if (!so->qual_ok)
		return false;

	/*
	 * Examine the scan keys to discover where we need to start the scan.
	 */
	scanFromEnd = false;
	strat_total = BTEqualStrategyNumber;
	if (so->numberOfKeys > 0)
	{
		nKeyIs = (int *) palloc(so->numberOfKeys * sizeof(int));
		for (i = 0; i < so->numberOfKeys; i++)
		{
			AttrNumber	attno = so->keyData[i].sk_attno;

			/* ignore keys for already-determined attrs */
			if (attno <= keysCount)
				continue;
			/* if we didn't find a boundary for the preceding attr, quit */
			if (attno > keysCount + 1)
				break;
			strat = _bt_getstrat(rel, attno,
								 so->keyData[i].sk_procedure);

			/*
			 * Can we use this key as a starting boundary for this attr?
			 *
			 * We can use multiple keys if they look like, say, = >= = but we
			 * have to stop after accepting a > or < boundary.
			 */
			if (strat == strat_total ||
				strat == BTEqualStrategyNumber)
				nKeyIs[keysCount++] = i;
			else if (ScanDirectionIsBackward(dir) &&
					 (strat == BTLessStrategyNumber ||
					  strat == BTLessEqualStrategyNumber))
			{
				nKeyIs[keysCount++] = i;
				strat_total = strat;
				if (strat == BTLessStrategyNumber)
					break;
			}
			else if (ScanDirectionIsForward(dir) &&
					 (strat == BTGreaterStrategyNumber ||
					  strat == BTGreaterEqualStrategyNumber))
			{
				nKeyIs[keysCount++] = i;
				strat_total = strat;
				if (strat == BTGreaterStrategyNumber)
					break;
			}
		}
		if (keysCount == 0)
			scanFromEnd = true;
	}
	else
		scanFromEnd = true;

	/* if we just need to walk down one edge of the tree, do that */
	if (scanFromEnd)
	{
		if (nKeyIs)
			pfree(nKeyIs);
		return _bt_endpoint(scan, dir);
	}

	/*
	 * We want to start the scan somewhere within the index.  Set up a
	 * scankey we can use to search for the correct starting point.
	 */
	scankeys = (ScanKey) palloc(keysCount * sizeof(ScanKeyData));
	for (i = 0; i < keysCount; i++)
	{
		FmgrInfo   *procinfo;

		j = nKeyIs[i];

		/*
		 * _bt_orderkeys disallows it, but it's place to add some code
		 * later
		 */
		if (so->keyData[j].sk_flags & SK_ISNULL)
		{
			pfree(nKeyIs);
			pfree(scankeys);
			elog(ERROR, "btree doesn't support is(not)null, yet");
			return false;
		}
		procinfo = index_getprocinfo(rel, i + 1, BTORDER_PROC);
		ScanKeyEntryInitializeWithInfo(scankeys + i,
									   so->keyData[j].sk_flags,
									   i + 1,
									   procinfo,
									   CurrentMemoryContext,
									   so->keyData[j].sk_argument);
	}
	if (nKeyIs)
		pfree(nKeyIs);

	current = &(scan->currentItemData);

	/*
	 * Use the manufactured scan key to descend the tree and position
	 * ourselves on the target leaf page.
	 */
	stack = _bt_search(rel, keysCount, scankeys, &buf, BT_READ);

	/* don't need to keep the stack around... */
	_bt_freestack(stack);

	if (!BufferIsValid(buf))
	{
		/* Only get here if index is completely empty */
		ItemPointerSetInvalid(current);
		so->btso_curbuf = InvalidBuffer;
		pfree(scankeys);
		return false;
	}

	/* remember which buffer we have pinned */
	so->btso_curbuf = buf;
	blkno = BufferGetBlockNumber(buf);
	page = BufferGetPage(buf);

	/* position to the precise item on the page */
	offnum = _bt_binsrch(rel, buf, keysCount, scankeys);

	ItemPointerSet(current, blkno, offnum);

	/*
	 * At this point we are positioned at the first item >= scan key, or
	 * possibly at the end of a page on which all the existing items are
	 * less than the scan key and we know that everything on later pages
	 * is greater than or equal to scan key.
	 *
	 * We could step forward in the latter case, but that'd be a waste of
	 * time if we want to scan backwards.  So, it's now time to examine
	 * the scan strategy to find the exact place to start the scan.
	 *
	 * Note: if _bt_step fails (meaning we fell off the end of the index in
	 * one direction or the other), we either return false (no matches) or
	 * call _bt_endpoint() to set up a scan starting at that index
	 * endpoint, as appropriate for the desired scan type.
	 *
	 * it's yet other place to add some code later for is(not)null ...
	 */

	switch (strat_total)
	{
		case BTLessStrategyNumber:

			/*
			 * Back up one to arrive at last item < scankey
			 */
			if (!_bt_step(scan, &buf, BackwardScanDirection))
			{
				pfree(scankeys);
				return false;
			}
			break;

		case BTLessEqualStrategyNumber:

			/*
			 * We need to find the last item <= scankey, so step forward
			 * till we find one > scankey, then step back one.
			 */
			if (offnum > PageGetMaxOffsetNumber(page))
			{
				if (!_bt_step(scan, &buf, ForwardScanDirection))
				{
					pfree(scankeys);
					return _bt_endpoint(scan, dir);
				}
			}
			for (;;)
			{
				offnum = ItemPointerGetOffsetNumber(current);
				page = BufferGetPage(buf);
				result = _bt_compare(rel, keysCount, scankeys, page, offnum);
				if (result < 0)
					break;
				if (!_bt_step(scan, &buf, ForwardScanDirection))
				{
					pfree(scankeys);
					return _bt_endpoint(scan, dir);
				}
			}
			if (!_bt_step(scan, &buf, BackwardScanDirection))
			{
				pfree(scankeys);
				return false;
			}
			break;

		case BTEqualStrategyNumber:

			/*
			 * Make sure we are on the first equal item; might have to
			 * step forward if currently at end of page.
			 */
			if (offnum > PageGetMaxOffsetNumber(page))
			{
				if (!_bt_step(scan, &buf, ForwardScanDirection))
				{
					pfree(scankeys);
					return false;
				}
				offnum = ItemPointerGetOffsetNumber(current);
				page = BufferGetPage(buf);
			}
			result = _bt_compare(rel, keysCount, scankeys, page, offnum);
			if (result != 0)
				goto nomatches; /* no equal items! */

			/*
			 * If a backward scan was specified, need to start with last
			 * equal item not first one.
			 */
			if (ScanDirectionIsBackward(dir))
			{
				do
				{
					if (!_bt_step(scan, &buf, ForwardScanDirection))
					{
						pfree(scankeys);
						return _bt_endpoint(scan, dir);
					}
					offnum = ItemPointerGetOffsetNumber(current);
					page = BufferGetPage(buf);
					result = _bt_compare(rel, keysCount, scankeys, page, offnum);
				} while (result == 0);
				if (!_bt_step(scan, &buf, BackwardScanDirection))
					elog(ERROR, "equal items disappeared?");
			}
			break;

		case BTGreaterEqualStrategyNumber:

			/*
			 * We want the first item >= scankey, which is where we are...
			 * unless we're not anywhere at all...
			 */
			if (offnum > PageGetMaxOffsetNumber(page))
			{
				if (!_bt_step(scan, &buf, ForwardScanDirection))
				{
					pfree(scankeys);
					return false;
				}
			}
			break;

		case BTGreaterStrategyNumber:

			/*
			 * We want the first item > scankey, so make sure we are on an
			 * item and then step over any equal items.
			 */
			if (offnum > PageGetMaxOffsetNumber(page))
			{
				if (!_bt_step(scan, &buf, ForwardScanDirection))
				{
					pfree(scankeys);
					return false;
				}
				offnum = ItemPointerGetOffsetNumber(current);
				page = BufferGetPage(buf);
			}
			result = _bt_compare(rel, keysCount, scankeys, page, offnum);
			while (result == 0)
			{
				if (!_bt_step(scan, &buf, ForwardScanDirection))
				{
					pfree(scankeys);
					return false;
				}
				offnum = ItemPointerGetOffsetNumber(current);
				page = BufferGetPage(buf);
				result = _bt_compare(rel, keysCount, scankeys, page, offnum);
			}
			break;
	}

	/* okay, current item pointer for the scan is right */
	offnum = ItemPointerGetOffsetNumber(current);
	page = BufferGetPage(buf);
	btitem = (BTItem) PageGetItem(page, PageGetItemId(page, offnum));
	itup = &btitem->bti_itup;

	/* is the first item actually acceptable? */
	if (_bt_checkkeys(scan, itup, dir, &continuescan))
	{
		/* yes, return it */
		scan->xs_ctup.t_self = itup->t_tid;
		res = true;
	}
	else if (continuescan)
	{
		/* no, but there might be another one that is */
		res = _bt_next(scan, dir);
	}
	else
	{
		/* no tuples in the index match this scan key */
nomatches:
		ItemPointerSetInvalid(current);
		so->btso_curbuf = InvalidBuffer;
		_bt_relbuf(rel, buf);
		res = false;
	}

	pfree(scankeys);

	return res;
}
Example #11
0
/*
 *	_bt_endpoint() -- Find the first or last key in the index.
 *
 * This is used by _bt_first() to set up a scan when we've determined
 * that the scan must start at the beginning or end of the index (for
 * a forward or backward scan respectively).
 */
static bool
_bt_endpoint(IndexScanDesc scan, ScanDirection dir)
{
	Relation	rel;
	Buffer		buf;
	Page		page;
	BTPageOpaque opaque;
	ItemPointer current;
	OffsetNumber maxoff;
	OffsetNumber start;
	BlockNumber blkno;
	BTItem		btitem;
	IndexTuple	itup;
	BTScanOpaque so;
	bool		res;
	bool		continuescan;

	rel = scan->indexRelation;
	current = &(scan->currentItemData);
	so = (BTScanOpaque) scan->opaque;

	/*
	 * Scan down to the leftmost or rightmost leaf page.  This is a
	 * simplified version of _bt_search().	We don't maintain a stack
	 * since we know we won't need it.
	 */
	buf = _bt_get_endpoint(rel, 0, ScanDirectionIsBackward(dir));

	if (!BufferIsValid(buf))
	{
		/* empty index... */
		ItemPointerSetInvalid(current);
		so->btso_curbuf = InvalidBuffer;
		return false;
	}

	blkno = BufferGetBlockNumber(buf);
	page = BufferGetPage(buf);
	opaque = (BTPageOpaque) PageGetSpecialPointer(page);
	Assert(P_ISLEAF(opaque));

	maxoff = PageGetMaxOffsetNumber(page);

	if (ScanDirectionIsForward(dir))
	{
		/* There could be dead pages to the left, so not this: */
		/* Assert(P_LEFTMOST(opaque)); */

		start = P_FIRSTDATAKEY(opaque);
	}
	else if (ScanDirectionIsBackward(dir))
	{
		Assert(P_RIGHTMOST(opaque));

		start = PageGetMaxOffsetNumber(page);
		if (start < P_FIRSTDATAKEY(opaque))		/* watch out for empty
												 * page */
			start = P_FIRSTDATAKEY(opaque);
	}
	else
	{
		elog(ERROR, "invalid scan direction: %d", (int) dir);
		start = 0;				/* keep compiler quiet */
	}

	ItemPointerSet(current, blkno, start);
	/* remember which buffer we have pinned */
	so->btso_curbuf = buf;

	/*
	 * Left/rightmost page could be empty due to deletions, if so step
	 * till we find a nonempty page.
	 */
	if (start > maxoff)
	{
		if (!_bt_step(scan, &buf, dir))
			return false;
		start = ItemPointerGetOffsetNumber(current);
		page = BufferGetPage(buf);
	}

	btitem = (BTItem) PageGetItem(page, PageGetItemId(page, start));
	itup = &(btitem->bti_itup);

	/* see if we picked a winner */
	if (_bt_checkkeys(scan, itup, dir, &continuescan))
	{
		/* yes, return it */
		scan->xs_ctup.t_self = itup->t_tid;
		res = true;
	}
	else if (continuescan)
	{
		/* no, but there might be another one that is */
		res = _bt_next(scan, dir);
	}
	else
	{
		/* no tuples in the index match this scan key */
		ItemPointerSetInvalid(current);
		so->btso_curbuf = InvalidBuffer;
		_bt_relbuf(rel, buf);
		res = false;
	}

	return res;
}
Example #12
0
/* ----------------------------------------------------------------
 *		ExecMaterial
 *
 *		As long as we are at the end of the data collected in the tuplestore,
 *		we collect one new row from the subplan on each call, and stash it
 *		aside in the tuplestore before returning it.  The tuplestore is
 *		only read if we are asked to scan backwards, rescan, or mark/restore.
 *
 * ----------------------------------------------------------------
 */
TupleTableSlot *				/* result tuple from subplan */
ExecMaterial(MaterialState *node)
{
	EState	   *estate;
	ScanDirection dir;
	bool		forward;
	Tuplestorestate *tuplestorestate;
	HeapTuple	heapTuple = NULL;
	bool		should_free = false;
	bool		eof_tuplestore;
	TupleTableSlot *slot;

	/*
	 * get state info from node
	 */
	estate = node->ss.ps.state;
	dir = estate->es_direction;
	forward = ScanDirectionIsForward(dir);
	tuplestorestate = (Tuplestorestate *) node->tuplestorestate;

	/*
	 * If first time through, and we need a tuplestore, initialize it.
	 */
	if (tuplestorestate == NULL && node->randomAccess)
	{
		tuplestorestate = tuplestore_begin_heap(true, false, work_mem);

		node->tuplestorestate = (void *) tuplestorestate;
	}

	/*
	 * If we are not at the end of the tuplestore, or are going backwards, try
	 * to fetch a tuple from tuplestore.
	 */
	eof_tuplestore = (tuplestorestate == NULL) ||
		tuplestore_ateof(tuplestorestate);

	if (!forward && eof_tuplestore)
	{
		if (!node->eof_underlying)
		{
			/*
			 * When reversing direction at tuplestore EOF, the first
			 * getheaptuple call will fetch the last-added tuple; but we want
			 * to return the one before that, if possible. So do an extra
			 * fetch.
			 */
			heapTuple = tuplestore_getheaptuple(tuplestorestate,
												forward,
												&should_free);
			if (heapTuple == NULL)
				return NULL;	/* the tuplestore must be empty */
			if (should_free)
				heap_freetuple(heapTuple);
		}
		eof_tuplestore = false;
	}

	if (!eof_tuplestore)
	{
		heapTuple = tuplestore_getheaptuple(tuplestorestate,
											forward,
											&should_free);
		if (heapTuple == NULL && forward)
			eof_tuplestore = true;
	}

	/*
	 * If necessary, try to fetch another row from the subplan.
	 *
	 * Note: the eof_underlying state variable exists to short-circuit further
	 * subplan calls.  It's not optional, unfortunately, because some plan
	 * node types are not robust about being called again when they've already
	 * returned NULL.
	 */
	if (eof_tuplestore && !node->eof_underlying)
	{
		PlanState  *outerNode;
		TupleTableSlot *outerslot;

		/*
		 * We can only get here with forward==true, so no need to worry about
		 * which direction the subplan will go.
		 */
		outerNode = outerPlanState(node);
		outerslot = ExecProcNode(outerNode);
		if (TupIsNull(outerslot))
		{
			node->eof_underlying = true;
			return NULL;
		}
		heapTuple = ExecFetchSlotTuple(outerslot);
		should_free = false;

		/*
		 * Append returned tuple to tuplestore, too.  NOTE: because the
		 * tuplestore is certainly in EOF state, its read position will move
		 * forward over the added tuple.  This is what we want.
		 */
		if (tuplestorestate)
			tuplestore_puttuple(tuplestorestate, (void *) heapTuple);
	}

	/*
	 * Return the obtained tuple, if any.
	 */
	slot = (TupleTableSlot *) node->ss.ps.ps_ResultTupleSlot;
	if (heapTuple)
		return ExecStoreTuple(heapTuple, slot, InvalidBuffer, should_free);
	else
		return ExecClearTuple(slot);
}
Example #13
0
/* ----------------------------------------------------------------
 *		CteScanNext
 *
 *		This is a workhorse for ExecCteScan
 * ----------------------------------------------------------------
 */
static TupleTableSlot *
CteScanNext(CteScanState *node)
{
	EState	   *estate;
	ScanDirection dir;
	bool		forward;
	Tuplestorestate *tuplestorestate;
	bool		eof_tuplestore;
	TupleTableSlot *slot;

	/*
	 * get state info from node
	 */
	estate = node->ss.ps.state;
	dir = estate->es_direction;
	forward = ScanDirectionIsForward(dir);
	tuplestorestate = node->leader->cte_table;
	tuplestore_select_read_pointer(tuplestorestate, node->readptr);
	slot = node->ss.ss_ScanTupleSlot;

	/*
	 * If we are not at the end of the tuplestore, or are going backwards, try
	 * to fetch a tuple from tuplestore.
	 */
	eof_tuplestore = tuplestore_ateof(tuplestorestate);

	if (!forward && eof_tuplestore)
	{
		if (!node->leader->eof_cte)
		{
			/*
			 * When reversing direction at tuplestore EOF, the first
			 * gettupleslot call will fetch the last-added tuple; but we want
			 * to return the one before that, if possible. So do an extra
			 * fetch.
			 */
			if (!tuplestore_advance(tuplestorestate, forward))
				return NULL;	/* the tuplestore must be empty */
		}
		eof_tuplestore = false;
	}

	/*
	 * If we can fetch another tuple from the tuplestore, return it.
	 *
	 * Note: we have to use copy=true in the tuplestore_gettupleslot call,
	 * because we are sharing the tuplestore with other nodes that might write
	 * into the tuplestore before we get called again.
	 */
	if (!eof_tuplestore)
	{
		if (tuplestore_gettupleslot(tuplestorestate, forward, true, slot))
			return slot;
		if (forward)
			eof_tuplestore = true;
	}

	/*
	 * If necessary, try to fetch another row from the CTE query.
	 *
	 * Note: the eof_cte state variable exists to short-circuit further calls
	 * of the CTE plan.  It's not optional, unfortunately, because some plan
	 * node types are not robust about being called again when they've already
	 * returned NULL.
	 */
	if (eof_tuplestore && !node->leader->eof_cte)
	{
		TupleTableSlot *cteslot;

		/*
		 * We can only get here with forward==true, so no need to worry about
		 * which direction the subplan will go.
		 */
		cteslot = ExecProcNode(node->cteplanstate);
		if (TupIsNull(cteslot))
		{
			node->leader->eof_cte = true;
			return NULL;
		}

		/*
		 * There are corner cases where the subplan could change which
		 * tuplestore read pointer is active, so be sure to reselect ours
		 * before storing the tuple we got.
		 */
		tuplestore_select_read_pointer(tuplestorestate, node->readptr);

		/*
		 * Append a copy of the returned tuple to tuplestore.  NOTE: because
		 * our read pointer is certainly in EOF state, its read position will
		 * move forward over the added tuple.  This is what we want.  Also,
		 * any other readers will *not* move past the new tuple, which is what
		 * they want.
		 */
		tuplestore_puttupleslot(tuplestorestate, cteslot);

		/*
		 * We MUST copy the CTE query's output tuple into our own slot. This
		 * is because other CteScan nodes might advance the CTE query before
		 * we are called again, and our output tuple must stay stable over
		 * that.
		 */
		return ExecCopySlot(slot, cteslot);
	}

	/*
	 * Nothing left ...
	 */
	return ExecClearTuple(slot);
}
Example #14
0
/*
 *	_bt_steppage() -- Step to next page containing valid data for scan
 *
 * On entry, so->currPos.buf must be pinned and read-locked.  We'll drop
 * the lock and pin before moving to next page.
 *
 * On success exit, we hold pin and read-lock on the next interesting page,
 * and so->currPos is updated to contain data from that page.
 *
 * If there are no more matching records in the given direction, we drop all
 * locks and pins, set so->currPos.buf to InvalidBuffer, and return FALSE.
 */
static bool
_bt_steppage(IndexScanDesc scan, ScanDirection dir)
{
	BTScanOpaque so = (BTScanOpaque) scan->opaque;
	Relation	rel;
	Page		page;
	BTPageOpaque opaque;

	/* we must have the buffer pinned and locked */
	Assert(BufferIsValid(so->currPos.buf));

	/* Before leaving current page, deal with any killed items */
	if (so->numKilled > 0)
		_bt_killitems(scan, true);

	/*
	 * Before we modify currPos, make a copy of the page data if there was a
	 * mark position that needs it.
	 */
	if (so->markItemIndex >= 0)
	{
		/* bump pin on current buffer for assignment to mark buffer */
		IncrBufferRefCount(so->currPos.buf);
		memcpy(&so->markPos, &so->currPos,
			   offsetof(BTScanPosData, items[1]) +
			   so->currPos.lastItem * sizeof(BTScanPosItem));
		if (so->markTuples)
			memcpy(so->markTuples, so->currTuples,
				   so->currPos.nextTupleOffset);
		so->markPos.itemIndex = so->markItemIndex;
		so->markItemIndex = -1;
	}

	rel = scan->indexRelation;

	if (ScanDirectionIsForward(dir))
	{
		/* Walk right to the next page with data */
		/* We must rely on the previously saved nextPage link! */
		BlockNumber blkno = so->currPos.nextPage;

		/* Remember we left a page with data */
		so->currPos.moreLeft = true;

		for (;;)
		{
			/* release the previous buffer */
			_bt_relbuf(rel, so->currPos.buf);
			so->currPos.buf = InvalidBuffer;
			/* if we're at end of scan, give up */
			if (blkno == P_NONE || !so->currPos.moreRight)
				return false;
			/* check for interrupts while we're not holding any buffer lock */
			CHECK_FOR_INTERRUPTS();
			/* step right one page */
			so->currPos.buf = _bt_getbuf(rel, blkno, BT_READ);
			/* check for deleted page */
			page = BufferGetPage(so->currPos.buf);
			opaque = (BTPageOpaque) PageGetSpecialPointer(page);
			if (!P_IGNORE(opaque))
			{
				PredicateLockPage(rel, blkno, scan->xs_snapshot);
				/* see if there are any matches on this page */
				/* note that this will clear moreRight if we can stop */
				if (_bt_readpage(scan, dir, P_FIRSTDATAKEY(opaque)))
					break;
			}
			/* nope, keep going */
			blkno = opaque->btpo_next;
		}
	}
	else
	{
		/* Remember we left a page with data */
		so->currPos.moreRight = true;

		/*
		 * Walk left to the next page with data.  This is much more complex
		 * than the walk-right case because of the possibility that the page
		 * to our left splits while we are in flight to it, plus the
		 * possibility that the page we were on gets deleted after we leave
		 * it.	See nbtree/README for details.
		 */
		for (;;)
		{
			/* Done if we know there are no matching keys to the left */
			if (!so->currPos.moreLeft)
			{
				_bt_relbuf(rel, so->currPos.buf);
				so->currPos.buf = InvalidBuffer;
				return false;
			}

			/* Step to next physical page */
			so->currPos.buf = _bt_walk_left(rel, so->currPos.buf);

			/* if we're physically at end of index, return failure */
			if (so->currPos.buf == InvalidBuffer)
				return false;

			/*
			 * Okay, we managed to move left to a non-deleted page. Done if
			 * it's not half-dead and contains matching tuples. Else loop back
			 * and do it all again.
			 */
			page = BufferGetPage(so->currPos.buf);
			opaque = (BTPageOpaque) PageGetSpecialPointer(page);
			if (!P_IGNORE(opaque))
			{
				PredicateLockPage(rel, BufferGetBlockNumber(so->currPos.buf), scan->xs_snapshot);
				/* see if there are any matches on this page */
				/* note that this will clear moreLeft if we can stop */
				if (_bt_readpage(scan, dir, PageGetMaxOffsetNumber(page)))
					break;
			}
		}
	}

	return true;
}
/* ----------------------------------------------------------------
 *		IndexOnlyNext
 *
 *		Retrieve a tuple from the IndexOnlyScan node's index.
 * ----------------------------------------------------------------
 */
static TupleTableSlot *
IndexOnlyNext(IndexOnlyScanState *node)
{
	EState	   *estate;
	ExprContext *econtext;
	ScanDirection direction;
	IndexScanDesc scandesc;
	TupleTableSlot *slot;
	ItemPointer tid;

	/*
	 * extract necessary information from index scan node
	 */
	estate = node->ss.ps.state;
	direction = estate->es_direction;
	/* flip direction if this is an overall backward scan */
	if (ScanDirectionIsBackward(((IndexOnlyScan *) node->ss.ps.plan)->indexorderdir))
	{
		if (ScanDirectionIsForward(direction))
			direction = BackwardScanDirection;
		else if (ScanDirectionIsBackward(direction))
			direction = ForwardScanDirection;
	}
	scandesc = node->ioss_ScanDesc;
	econtext = node->ss.ps.ps_ExprContext;
	slot = node->ss.ss_ScanTupleSlot;

	/*
	 * OK, now that we have what we need, fetch the next tuple.
	 */
	while ((tid = index_getnext_tid(scandesc, direction)) != NULL)
	{
		HeapTuple	tuple = NULL;

		/*
		 * We can skip the heap fetch if the TID references a heap page on
		 * which all tuples are known visible to everybody.  In any case,
		 * we'll use the index tuple not the heap tuple as the data source.
		 *
		 * Note on Memory Ordering Effects: visibilitymap_test does not lock
		 * the visibility map buffer, and therefore the result we read here
		 * could be slightly stale.  However, it can't be stale enough to
		 * matter.
		 *
		 * We need to detect clearing a VM bit due to an insert right away,
		 * because the tuple is present in the index page but not visible. The
		 * reading of the TID by this scan (using a shared lock on the index
		 * buffer) is serialized with the insert of the TID into the index
		 * (using an exclusive lock on the index buffer). Because the VM bit
		 * is cleared before updating the index, and locking/unlocking of the
		 * index page acts as a full memory barrier, we are sure to see the
		 * cleared bit if we see a recently-inserted TID.
		 *
		 * Deletes do not update the index page (only VACUUM will clear out
		 * the TID), so the clearing of the VM bit by a delete is not
		 * serialized with this test below, and we may see a value that is
		 * significantly stale. However, we don't care about the delete right
		 * away, because the tuple is still visible until the deleting
		 * transaction commits or the statement ends (if it's our
		 * transaction). In either case, the lock on the VM buffer will have
		 * been released (acting as a write barrier) after clearing the
		 * bit. And for us to have a snapshot that includes the deleting
		 * transaction (making the tuple invisible), we must have acquired
		 * ProcArrayLock after that time, acting as a read barrier.
		 *
		 * It's worth going through this complexity to avoid needing to lock
		 * the VM buffer, which could cause significant contention.
		 */
		if (!visibilitymap_test(scandesc->heapRelation,
								ItemPointerGetBlockNumber(tid),
								&node->ioss_VMBuffer))
		{
			/*
			 * Rats, we have to visit the heap to check visibility.
			 */
			node->ioss_HeapFetches++;
			tuple = index_fetch_heap(scandesc);
			if (tuple == NULL)
				continue;		/* no visible tuple, try next index entry */

			/*
			 * Only MVCC snapshots are supported here, so there should be no
			 * need to keep following the HOT chain once a visible entry has
			 * been found.  If we did want to allow that, we'd need to keep
			 * more state to remember not to call index_getnext_tid next time.
			 */
			if (scandesc->xs_continue_hot)
				elog(ERROR, "non-MVCC snapshots are not supported in index-only scans");

			/*
			 * Note: at this point we are holding a pin on the heap page, as
			 * recorded in scandesc->xs_cbuf.  We could release that pin now,
			 * but it's not clear whether it's a win to do so.  The next index
			 * entry might require a visit to the same heap page.
			 */
		}

		/*
		 * Fill the scan tuple slot with data from the index.
		 */
		StoreIndexTuple(slot, scandesc->xs_itup, scandesc->xs_itupdesc);

		/*
		 * If the index was lossy, we have to recheck the index quals.
		 * (Currently, this can never happen, but we should support the case
		 * for possible future use, eg with GiST indexes.)
		 */
		if (scandesc->xs_recheck)
		{
			econtext->ecxt_scantuple = slot;
			ResetExprContext(econtext);
			if (!ExecQual(node->indexqual, econtext, false))
			{
				/* Fails recheck, so drop it and loop back for another */
				InstrCountFiltered2(node, 1);
				continue;
			}
		}

		/*
		 * We don't currently support rechecking ORDER BY distances.  (In
		 * principle, if the index can support retrieval of the originally
		 * indexed value, it should be able to produce an exact distance
		 * calculation too.  So it's not clear that adding code here for
		 * recheck/re-sort would be worth the trouble.  But we should at least
		 * throw an error if someone tries it.)
		 */
		if (scandesc->numberOfOrderBys > 0 && scandesc->xs_recheckorderby)
			ereport(ERROR,
					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
					 errmsg("lossy distance functions are not supported in index-only scans")));

		/*
		 * Predicate locks for index-only scans must be acquired at the page
		 * level when the heap is not accessed, since tuple-level predicate
		 * locks need the tuple's xmin value.  If we had to visit the tuple
		 * anyway, then we already have the tuple-level lock and can skip the
		 * page lock.
		 */
		if (tuple == NULL)
			PredicateLockPage(scandesc->heapRelation,
							  ItemPointerGetBlockNumber(tid),
							  estate->es_snapshot);

		return slot;
	}

	/*
	 * if we get here it means the index scan failed so we are at the end of
	 * the scan..
	 */
	return ExecClearTuple(slot);
}
Example #16
0
/* ----------------------------------------------------------------
 *		IndexNext
 *
 *		Retrieve a tuple from the IndexScan node's currentRelation
 *		using the index specified in the IndexScanState information.
 * ----------------------------------------------------------------
 */
TupleTableSlot *
IndexNext(IndexScanState *node)
{
    EState	   *estate;
    ExprContext *econtext;
    ScanDirection direction;
    IndexScanDesc scandesc;
    Index		scanrelid;
    HeapTuple	tuple;
    TupleTableSlot *slot;

    /*
     * extract necessary information from index scan node
     */
    estate = node->ss.ps.state;
    direction = estate->es_direction;

    initScanDesc(node);

    /* flip direction if this is an overall backward scan */
    if (ScanDirectionIsBackward(((IndexScan *) node->ss.ps.plan)->indexorderdir))
    {
        if (ScanDirectionIsForward(direction))
            direction = BackwardScanDirection;
        else if (ScanDirectionIsBackward(direction))
            direction = ForwardScanDirection;
    }
    scandesc = node->iss_ScanDesc;
    econtext = node->ss.ps.ps_ExprContext;
    slot = node->ss.ss_ScanTupleSlot;
    scanrelid = ((IndexScan *) node->ss.ps.plan)->scan.scanrelid;

    /*
     * Check if we are evaluating PlanQual for tuple of this relation.
     * Additional checking is not good, but no other way for now. We could
     * introduce new nodes for this case and handle IndexScan --> NewNode
     * switching in Init/ReScan plan...
     */
    if (estate->es_evTuple != NULL &&
            estate->es_evTuple[scanrelid - 1] != NULL)
    {
        if (estate->es_evTupleNull[scanrelid - 1])
        {
            if (!node->ss.ps.delayEagerFree)
            {
                ExecEagerFreeIndexScan(node);
            }

            return ExecClearTuple(slot);
        }

        ExecStoreGenericTuple(estate->es_evTuple[scanrelid - 1], slot, false);

        /* Does the tuple meet the indexqual condition? */
        econtext->ecxt_scantuple = slot;

        ResetExprContext(econtext);

        if (!ExecQual(node->indexqualorig, econtext, false))
        {
            if (!node->ss.ps.delayEagerFree)
            {
                ExecEagerFreeIndexScan(node);
            }

            ExecClearTuple(slot);		/* would not be returned by scan */
        }

        /* Flag for the next call that no more tuples */
        estate->es_evTupleNull[scanrelid - 1] = true;

        Gpmon_M_Incr_Rows_Out(GpmonPktFromIndexScanState(node));
        CheckSendPlanStateGpmonPkt(&node->ss.ps);
        return slot;
    }

    /*
     * ok, now that we have what we need, fetch the next tuple.
     */
    if ((tuple = index_getnext(scandesc, direction)) != NULL)
    {
        /*
         * Store the scanned tuple in the scan tuple slot of the scan state.
         * Note: we pass 'false' because tuples returned by amgetnext are
         * pointers onto disk pages and must not be pfree()'d.
         */
        ExecStoreHeapTuple(tuple,	/* tuple to store */
                           slot,	/* slot to store in */
                           scandesc->xs_cbuf,		/* buffer containing tuple */
                           false);	/* don't pfree */

        Gpmon_M_Incr_Rows_Out(GpmonPktFromIndexScanState(node));
        CheckSendPlanStateGpmonPkt(&node->ss.ps);
        return slot;
    }

    if (!node->ss.ps.delayEagerFree)
    {
        ExecEagerFreeIndexScan(node);
    }

    /*
     * if we get here it means the index scan failed so we are at the end of
     * the scan..
     */
    return ExecClearTuple(slot);
}
Example #17
0
/* ----------------------------------------------------------------
 *		FunctionNext
 *
 *		This is a workhorse for ExecFunctionScan
 * ----------------------------------------------------------------
 */
static TupleTableSlot *
FunctionNext(FunctionScanState *node)
{
	TupleTableSlot *slot;
	EState	   *estate;
	ScanDirection direction;
	Tuplestorestate *tuplestorestate;

	/*
	 * get information from the estate and scan state
	 */
	estate = node->ss.ps.state;
	direction = estate->es_direction;

	tuplestorestate = node->tuplestorestate;

	/*
	 * If first time through, read all tuples from function and put them in a
	 * tuplestore. Subsequent calls just fetch tuples from tuplestore.
	 */
	if (tuplestorestate == NULL)
	{
		tuplestorestate = ExecMakeTableFunctionResult(
				node->funcexpr,
				node->ss.ps.ps_ExprContext,
				node->tupdesc,
				PlanStateOperatorMemKB( (PlanState *) node));
		node->tuplestorestate = tuplestorestate;

		/* CDB: Offer extra info for EXPLAIN ANALYZE. */
		if (node->ss.ps.instrument)
		{
			/* Let the tuplestore share our Instrumentation object. */
			tuplestore_set_instrument(tuplestorestate, node->ss.ps.instrument);

			/* Request a callback at end of query. */
			node->ss.ps.cdbexplainfun = ExecFunctionScanExplainEnd;
		}

	}

	/*
	 * Get the next tuple from tuplestore. Return NULL if no more tuples.
	 */
	slot = node->ss.ss_ScanTupleSlot;
	if (tuplestore_gettupleslot(tuplestorestate, 
				ScanDirectionIsForward(direction),
				slot))
	{
		/* CDB: Label each row with a synthetic ctid for subquery dedup. */
		if (node->cdb_want_ctid)
		{
			HeapTuple   tuple = ExecFetchSlotHeapTuple(slot); 

			/* Increment 48-bit row count */
			node->cdb_fake_ctid.ip_posid++;
			if (node->cdb_fake_ctid.ip_posid == 0)
				ItemPointerSetBlockNumber(&node->cdb_fake_ctid,
						1 + ItemPointerGetBlockNumber(&node->cdb_fake_ctid));

			tuple->t_self = node->cdb_fake_ctid;
		}
	}

	if (!TupIsNull(slot))
	{
		Gpmon_M_Incr_Rows_Out(GpmonPktFromFuncScanState(node));
		CheckSendPlanStateGpmonPkt(&node->ss.ps);
	}

	else if (!node->ss.ps.delayEagerFree)
	{
		ExecEagerFreeFunctionScan((FunctionScanState *)(&node->ss.ps));
	}
	
	return slot;
}
Example #18
0
/* ----------------------------------------------------------------
 *		ExecMaterial
 *
 *		As long as we are at the end of the data collected in the tuplestore,
 *		we collect one new row from the subplan on each call, and stash it
 *		aside in the tuplestore before returning it.  The tuplestore is
 *		only read if we are asked to scan backwards, rescan, or mark/restore.
 *
 * ----------------------------------------------------------------
 */
TupleTableSlot *				/* result tuple from subplan */
ExecMaterial(MaterialState *node)
{
	EState	   *estate;
	ScanDirection dir;
	bool		forward;
	Tuplestorestate *tuplestorestate;
	bool		eof_tuplestore;
	TupleTableSlot *slot;

	/*
	 * get state info from node
	 */
	estate = node->ss.ps.state;
	dir = estate->es_direction;
	forward = ScanDirectionIsForward(dir);
	tuplestorestate = node->tuplestorestate;

	/*
	 * If first time through, and we need a tuplestore, initialize it.
	 */
	if (tuplestorestate == NULL && node->eflags != 0)
	{
		tuplestorestate = tuplestore_begin_heap(true, false, work_mem);
		tuplestore_set_eflags(tuplestorestate, node->eflags);
		if (node->eflags & EXEC_FLAG_MARK)
		{
			/*
			 * Allocate a second read pointer to serve as the mark. We know it
			 * must have index 1, so needn't store that.
			 */
			int ptrno	PG_USED_FOR_ASSERTS_ONLY;

			ptrno = tuplestore_alloc_read_pointer(tuplestorestate,
												  node->eflags);
			Assert(ptrno == 1);
		}
		node->tuplestorestate = tuplestorestate;
	}

	/*
	 * If we are not at the end of the tuplestore, or are going backwards, try
	 * to fetch a tuple from tuplestore.
	 */
	eof_tuplestore = (tuplestorestate == NULL) ||
		tuplestore_ateof(tuplestorestate);

	if (!forward && eof_tuplestore)
	{
		if (!node->eof_underlying)
		{
			/*
			 * When reversing direction at tuplestore EOF, the first
			 * gettupleslot call will fetch the last-added tuple; but we want
			 * to return the one before that, if possible. So do an extra
			 * fetch.
			 */
			if (!tuplestore_advance(tuplestorestate, forward))
				return NULL;	/* the tuplestore must be empty */
		}
		eof_tuplestore = false;
	}

	/*
	 * If we can fetch another tuple from the tuplestore, return it.
	 */
	slot = node->ss.ps.ps_ResultTupleSlot;
	if (!eof_tuplestore)
	{
		if (tuplestore_gettupleslot(tuplestorestate, forward, false, slot))
			return slot;
		if (forward)
			eof_tuplestore = true;
	}

	/*
	 * If necessary, try to fetch another row from the subplan.
	 *
	 * Note: the eof_underlying state variable exists to short-circuit further
	 * subplan calls.  It's not optional, unfortunately, because some plan
	 * node types are not robust about being called again when they've already
	 * returned NULL.
	 */
	if (eof_tuplestore && !node->eof_underlying)
	{
		PlanState  *outerNode;
		TupleTableSlot *outerslot;

		/*
		 * We can only get here with forward==true, so no need to worry about
		 * which direction the subplan will go.
		 */
		outerNode = outerPlanState(node);
		outerslot = ExecProcNode(outerNode);
		if (TupIsNull(outerslot))
		{
			node->eof_underlying = true;
			return NULL;
		}

		/*
		 * Append a copy of the returned tuple to tuplestore.  NOTE: because
		 * the tuplestore is certainly in EOF state, its read position will
		 * move forward over the added tuple.  This is what we want.
		 */
		if (tuplestorestate)
			tuplestore_puttupleslot(tuplestorestate, outerslot);

		/*
		 * We can just return the subplan's returned tuple, without copying.
		 */
		return outerslot;
	}

	/*
	 * Nothing left ...
	 */
	return ExecClearTuple(slot);
}
Example #19
0
/* ----------------------------------------------------------------
 *	   ExecProcAppend
 *
 *		Handles the iteration over the multiple scans.
 *
 *	   NOTE: Can't call this ExecAppend, that name is used in execMain.
 * ----------------------------------------------------------------
 */
TupleTableSlot *
ExecProcAppend(AppendState *node)
{
	EState	   *estate;
	int			whichplan;
	PlanState  *subnode;
	TupleTableSlot *result;
	TupleTableSlot *result_slot;
	ScanDirection direction;

	/*
	 * get information from the node
	 */
	estate = node->ps.state;
	direction = estate->es_direction;
	whichplan = node->as_whichplan;
	result_slot = node->ps.ps_ResultTupleSlot;

	/*
	 * figure out which subplan we are currently processing
	 */
	subnode = node->appendplans[whichplan];

	/*
	 * get a tuple from the subplan
	 */
	result = ExecProcNode(subnode);

	if (!TupIsNull(result))
	{
		/*
		 * if the subplan gave us something then place a copy of whatever
		 * we get into our result slot and return it.
		 *
		 * Note we rely on the subplan to retain ownership of the tuple for
		 * as long as we need it --- we don't copy it.
		 */
		return ExecStoreTuple(result->val, result_slot, InvalidBuffer, false);
	}
	else
	{
		/*
		 * .. go on to the "next" subplan in the appropriate direction and
		 * try processing again (recursively)
		 */
		if (ScanDirectionIsForward(direction))
			node->as_whichplan++;
		else
			node->as_whichplan--;

		/*
		 * return something from next node or an empty slot if all of our
		 * subplans have been exhausted.
		 */
		if (exec_append_initialize_next(node))
		{
			ExecSetSlotDescriptorIsNew(result_slot, true);
			return ExecProcAppend(node);
		}
		else
			return ExecClearTuple(result_slot);
	}
}
Example #20
0
/*
 *	_bt_step() -- Step one item in the requested direction in a scan on
 *				  the tree.
 *
 *		*bufP is the current buffer (read-locked and pinned).  If we change
 *		pages, it's updated appropriately.
 *
 *		If successful, update scan's currentItemData and return true.
 *		If no adjacent record exists in the requested direction,
 *		release buffer pin/locks and return false.
 */
bool
_bt_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir)
{
	Relation	rel = scan->indexRelation;
	ItemPointer current = &(scan->currentItemData);
	BTScanOpaque so = (BTScanOpaque) scan->opaque;
	Page		page;
	BTPageOpaque opaque;
	OffsetNumber offnum,
				maxoff;
	BlockNumber blkno;

	/*
	 * Don't use ItemPointerGetOffsetNumber or you risk to get assertion
	 * due to ability of ip_posid to be equal 0.
	 */
	offnum = current->ip_posid;

	page = BufferGetPage(*bufP);
	opaque = (BTPageOpaque) PageGetSpecialPointer(page);
	maxoff = PageGetMaxOffsetNumber(page);

	if (ScanDirectionIsForward(dir))
	{
		if (!PageIsEmpty(page) && offnum < maxoff)
			offnum = OffsetNumberNext(offnum);
		else
		{
			/* Walk right to the next page with data */
			for (;;)
			{
				/* if we're at end of scan, release the buffer and return */
				if (P_RIGHTMOST(opaque))
				{
					_bt_relbuf(rel, *bufP);
					ItemPointerSetInvalid(current);
					*bufP = so->btso_curbuf = InvalidBuffer;
					return false;
				}
				/* step right one page */
				blkno = opaque->btpo_next;
				_bt_relbuf(rel, *bufP);
				*bufP = _bt_getbuf(rel, blkno, BT_READ);
				page = BufferGetPage(*bufP);
				opaque = (BTPageOpaque) PageGetSpecialPointer(page);
				if (!P_IGNORE(opaque))
				{
					maxoff = PageGetMaxOffsetNumber(page);
					/* done if it's not empty */
					offnum = P_FIRSTDATAKEY(opaque);
					if (!PageIsEmpty(page) && offnum <= maxoff)
						break;
				}
			}
		}
	}
	else
/* backwards scan */
	{
		if (offnum > P_FIRSTDATAKEY(opaque))
			offnum = OffsetNumberPrev(offnum);
		else
		{
			/*
			 * Walk left to the next page with data.  This is much more
			 * complex than the walk-right case because of the possibility
			 * that the page to our left splits while we are in flight to
			 * it, plus the possibility that the page we were on gets
			 * deleted after we leave it.  See nbtree/README for details.
			 */
			for (;;)
			{
				*bufP = _bt_walk_left(rel, *bufP);

				/* if we're at end of scan, return failure */
				if (*bufP == InvalidBuffer)
				{
					ItemPointerSetInvalid(current);
					so->btso_curbuf = InvalidBuffer;
					return false;
				}
				page = BufferGetPage(*bufP);
				opaque = (BTPageOpaque) PageGetSpecialPointer(page);

				/*
				 * Okay, we managed to move left to a non-deleted page.
				 * Done if it's not half-dead and not empty.  Else loop
				 * back and do it all again.
				 */
				if (!P_IGNORE(opaque))
				{
					maxoff = PageGetMaxOffsetNumber(page);
					offnum = maxoff;
					if (!PageIsEmpty(page) &&
						maxoff >= P_FIRSTDATAKEY(opaque))
						break;
				}
			}
		}
	}

	/* Update scan state */
	so->btso_curbuf = *bufP;
	blkno = BufferGetBlockNumber(*bufP);
	ItemPointerSet(current, blkno, offnum);

	return true;
}
Example #21
0
/*
 * hidden_getnext
 *
 * Returns the next tuple from the hidden data.  This has to be called after
 * exceeding heap/index scan.
 */
HeapTuple
hidden_getnext(HiddenScanDesc hscan, ScanDirection direction)
{
	if (ScanDirectionIsForward(direction))
	{
		if (hscan->hdn_idx < 0)
			hscan->hdn_idx = 0;

		while (hscan->hdn_idx < hscan->hdn_len)
		{
			HeapTuple	tuple;
			bool		valid = true;

			/* fetch */
			tuple = hscan->hdn_tuples[hscan->hdn_idx++];

			/* key test */
			if (hscan->hdn_nkeys > 0)
				HeapKeyTest(tuple, RelationGetDescr(hscan->hdn_rel),
							hscan->hdn_nkeys, hscan->hdn_key, valid);

			if (valid)
			{
				/* save the last tuple for the sake of "no movement" */
				hscan->hdn_lasttuple = tuple;
				return tuple;
			}
		}
	}
	else if (ScanDirectionIsBackward(direction))
	{
		if (hscan->hdn_idx >= hscan->hdn_len)
			hscan->hdn_idx = hscan->hdn_len - 1;

		while (hscan->hdn_idx >= 0)
		{
			HeapTuple	tuple;
			bool		valid = true;

			/* fetch */
			tuple = hscan->hdn_tuples[hscan->hdn_idx--];

			/* key test */
			if (hscan->hdn_nkeys > 0)
				HeapKeyTest(tuple, RelationGetDescr(hscan->hdn_rel),
							hscan->hdn_nkeys, hscan->hdn_key, valid);

			if (valid)
			{
				/* save the last tuple for the sake of "no movement" */
				hscan->hdn_lasttuple = tuple;
				return tuple;
			}
		}
	}
	else
	{
		/*
		 * ``no movement'' scan direction: refetch prior tuple
		 */
		return hscan->hdn_lasttuple;
	}

	hscan->hdn_lasttuple = NULL;
	return NULL;
}
Example #22
0
/*
 *	_bt_first() -- Find the first item in a scan.
 *
 *		We need to be clever about the direction of scan, the search
 *		conditions, and the tree ordering.	We find the first item (or,
 *		if backwards scan, the last item) in the tree that satisfies the
 *		qualifications in the scan key.  On success exit, the page containing
 *		the current index tuple is pinned but not locked, and data about
 *		the matching tuple(s) on the page has been loaded into so->currPos.
 *		scan->xs_ctup.t_self is set to the heap TID of the current tuple,
 *		and if requested, scan->xs_itup points to a copy of the index tuple.
 *
 * If there are no matching items in the index, we return FALSE, with no
 * pins or locks held.
 *
 * Note that scan->keyData[], and the so->keyData[] scankey built from it,
 * are both search-type scankeys (see nbtree/README for more about this).
 * Within this routine, we build a temporary insertion-type scankey to use
 * in locating the scan start position.
 */
bool
_bt_first(IndexScanDesc scan, ScanDirection dir)
{
	Relation	rel = scan->indexRelation;
	BTScanOpaque so = (BTScanOpaque) scan->opaque;
	Buffer		buf;
	BTStack		stack;
	OffsetNumber offnum;
	StrategyNumber strat;
	bool		nextkey;
	bool		goback;
	ScanKey		startKeys[INDEX_MAX_KEYS];
	ScanKeyData scankeys[INDEX_MAX_KEYS];
	ScanKeyData notnullkeys[INDEX_MAX_KEYS];
	int			keysCount = 0;
	int			i;
	StrategyNumber strat_total;
	BTScanPosItem *currItem;

	pgstat_count_index_scan(rel);

	/*
	 * Examine the scan keys and eliminate any redundant keys; also mark the
	 * keys that must be matched to continue the scan.
	 */
	_bt_preprocess_keys(scan);

	/*
	 * Quit now if _bt_preprocess_keys() discovered that the scan keys can
	 * never be satisfied (eg, x == 1 AND x > 2).
	 */
	if (!so->qual_ok)
		return false;

	/*----------
	 * Examine the scan keys to discover where we need to start the scan.
	 *
	 * We want to identify the keys that can be used as starting boundaries;
	 * these are =, >, or >= keys for a forward scan or =, <, <= keys for
	 * a backwards scan.  We can use keys for multiple attributes so long as
	 * the prior attributes had only =, >= (resp. =, <=) keys.	Once we accept
	 * a > or < boundary or find an attribute with no boundary (which can be
	 * thought of as the same as "> -infinity"), we can't use keys for any
	 * attributes to its right, because it would break our simplistic notion
	 * of what initial positioning strategy to use.
	 *
	 * When the scan keys include cross-type operators, _bt_preprocess_keys
	 * may not be able to eliminate redundant keys; in such cases we will
	 * arbitrarily pick a usable one for each attribute.  This is correct
	 * but possibly not optimal behavior.  (For example, with keys like
	 * "x >= 4 AND x >= 5" we would elect to scan starting at x=4 when
	 * x=5 would be more efficient.)  Since the situation only arises given
	 * a poorly-worded query plus an incomplete opfamily, live with it.
	 *
	 * When both equality and inequality keys appear for a single attribute
	 * (again, only possible when cross-type operators appear), we *must*
	 * select one of the equality keys for the starting point, because
	 * _bt_checkkeys() will stop the scan as soon as an equality qual fails.
	 * For example, if we have keys like "x >= 4 AND x = 10" and we elect to
	 * start at x=4, we will fail and stop before reaching x=10.  If multiple
	 * equality quals survive preprocessing, however, it doesn't matter which
	 * one we use --- by definition, they are either redundant or
	 * contradictory.
	 *
	 * Any regular (not SK_SEARCHNULL) key implies a NOT NULL qualifier.
	 * If the index stores nulls at the end of the index we'll be starting
	 * from, and we have no boundary key for the column (which means the key
	 * we deduced NOT NULL from is an inequality key that constrains the other
	 * end of the index), then we cons up an explicit SK_SEARCHNOTNULL key to
	 * use as a boundary key.  If we didn't do this, we might find ourselves
	 * traversing a lot of null entries at the start of the scan.
	 *
	 * In this loop, row-comparison keys are treated the same as keys on their
	 * first (leftmost) columns.  We'll add on lower-order columns of the row
	 * comparison below, if possible.
	 *
	 * The selected scan keys (at most one per index column) are remembered by
	 * storing their addresses into the local startKeys[] array.
	 *----------
	 */
	strat_total = BTEqualStrategyNumber;
	if (so->numberOfKeys > 0)
	{
		AttrNumber	curattr;
		ScanKey		chosen;
		ScanKey		impliesNN;
		ScanKey		cur;

		/*
		 * chosen is the so-far-chosen key for the current attribute, if any.
		 * We don't cast the decision in stone until we reach keys for the
		 * next attribute.
		 */
		curattr = 1;
		chosen = NULL;
		/* Also remember any scankey that implies a NOT NULL constraint */
		impliesNN = NULL;

		/*
		 * Loop iterates from 0 to numberOfKeys inclusive; we use the last
		 * pass to handle after-last-key processing.  Actual exit from the
		 * loop is at one of the "break" statements below.
		 */
		for (cur = so->keyData, i = 0;; cur++, i++)
		{
			if (i >= so->numberOfKeys || cur->sk_attno != curattr)
			{
				/*
				 * Done looking at keys for curattr.  If we didn't find a
				 * usable boundary key, see if we can deduce a NOT NULL key.
				 */
				if (chosen == NULL && impliesNN != NULL &&
					((impliesNN->sk_flags & SK_BT_NULLS_FIRST) ?
					 ScanDirectionIsForward(dir) :
					 ScanDirectionIsBackward(dir)))
				{
					/* Yes, so build the key in notnullkeys[keysCount] */
					chosen = &notnullkeys[keysCount];
					ScanKeyEntryInitialize(chosen,
										   (SK_SEARCHNOTNULL | SK_ISNULL |
											(impliesNN->sk_flags &
										  (SK_BT_DESC | SK_BT_NULLS_FIRST))),
										   curattr,
								 ((impliesNN->sk_flags & SK_BT_NULLS_FIRST) ?
								  BTGreaterStrategyNumber :
								  BTLessStrategyNumber),
										   InvalidOid,
										   InvalidOid,
										   InvalidOid,
										   (Datum) 0);
				}

				/*
				 * If we still didn't find a usable boundary key, quit; else
				 * save the boundary key pointer in startKeys.
				 */
				if (chosen == NULL)
					break;
				startKeys[keysCount++] = chosen;

				/*
				 * Adjust strat_total, and quit if we have stored a > or <
				 * key.
				 */
				strat = chosen->sk_strategy;
				if (strat != BTEqualStrategyNumber)
				{
					strat_total = strat;
					if (strat == BTGreaterStrategyNumber ||
						strat == BTLessStrategyNumber)
						break;
				}

				/*
				 * Done if that was the last attribute, or if next key is not
				 * in sequence (implying no boundary key is available for the
				 * next attribute).
				 */
				if (i >= so->numberOfKeys ||
					cur->sk_attno != curattr + 1)
					break;

				/*
				 * Reset for next attr.
				 */
				curattr = cur->sk_attno;
				chosen = NULL;
				impliesNN = NULL;
			}

			/*
			 * Can we use this key as a starting boundary for this attr?
			 *
			 * If not, does it imply a NOT NULL constraint?  (Because
			 * SK_SEARCHNULL keys are always assigned BTEqualStrategyNumber,
			 * *any* inequality key works for that; we need not test.)
			 */
			switch (cur->sk_strategy)
			{
				case BTLessStrategyNumber:
				case BTLessEqualStrategyNumber:
					if (chosen == NULL)
					{
						if (ScanDirectionIsBackward(dir))
							chosen = cur;
						else
							impliesNN = cur;
					}
					break;
				case BTEqualStrategyNumber:
					/* override any non-equality choice */
					chosen = cur;
					break;
				case BTGreaterEqualStrategyNumber:
				case BTGreaterStrategyNumber:
					if (chosen == NULL)
					{
						if (ScanDirectionIsForward(dir))
							chosen = cur;
						else
							impliesNN = cur;
					}
					break;
			}
		}
	}

	/*
	 * If we found no usable boundary keys, we have to start from one end of
	 * the tree.  Walk down that edge to the first or last key, and scan from
	 * there.
	 */
	if (keysCount == 0)
		return _bt_endpoint(scan, dir);

	/*
	 * We want to start the scan somewhere within the index.  Set up an
	 * insertion scankey we can use to search for the boundary point we
	 * identified above.  The insertion scankey is built in the local
	 * scankeys[] array, using the keys identified by startKeys[].
	 */
	Assert(keysCount <= INDEX_MAX_KEYS);
	for (i = 0; i < keysCount; i++)
	{
		ScanKey		cur = startKeys[i];

		Assert(cur->sk_attno == i + 1);

		if (cur->sk_flags & SK_ROW_HEADER)
		{
			/*
			 * Row comparison header: look to the first row member instead.
			 *
			 * The member scankeys are already in insertion format (ie, they
			 * have sk_func = 3-way-comparison function), but we have to watch
			 * out for nulls, which _bt_preprocess_keys didn't check. A null
			 * in the first row member makes the condition unmatchable, just
			 * like qual_ok = false.
			 */
			ScanKey		subkey = (ScanKey) DatumGetPointer(cur->sk_argument);

			Assert(subkey->sk_flags & SK_ROW_MEMBER);
			if (subkey->sk_flags & SK_ISNULL)
				return false;
			memcpy(scankeys + i, subkey, sizeof(ScanKeyData));

			/*
			 * If the row comparison is the last positioning key we accepted,
			 * try to add additional keys from the lower-order row members.
			 * (If we accepted independent conditions on additional index
			 * columns, we use those instead --- doesn't seem worth trying to
			 * determine which is more restrictive.)  Note that this is OK
			 * even if the row comparison is of ">" or "<" type, because the
			 * condition applied to all but the last row member is effectively
			 * ">=" or "<=", and so the extra keys don't break the positioning
			 * scheme.	But, by the same token, if we aren't able to use all
			 * the row members, then the part of the row comparison that we
			 * did use has to be treated as just a ">=" or "<=" condition, and
			 * so we'd better adjust strat_total accordingly.
			 */
			if (i == keysCount - 1)
			{
				bool		used_all_subkeys = false;

				Assert(!(subkey->sk_flags & SK_ROW_END));
				for (;;)
				{
					subkey++;
					Assert(subkey->sk_flags & SK_ROW_MEMBER);
					if (subkey->sk_attno != keysCount + 1)
						break;	/* out-of-sequence, can't use it */
					if (subkey->sk_strategy != cur->sk_strategy)
						break;	/* wrong direction, can't use it */
					if (subkey->sk_flags & SK_ISNULL)
						break;	/* can't use null keys */
					Assert(keysCount < INDEX_MAX_KEYS);
					memcpy(scankeys + keysCount, subkey, sizeof(ScanKeyData));
					keysCount++;
					if (subkey->sk_flags & SK_ROW_END)
					{
						used_all_subkeys = true;
						break;
					}
				}
				if (!used_all_subkeys)
				{
					switch (strat_total)
					{
						case BTLessStrategyNumber:
							strat_total = BTLessEqualStrategyNumber;
							break;
						case BTGreaterStrategyNumber:
							strat_total = BTGreaterEqualStrategyNumber;
							break;
					}
				}
				break;			/* done with outer loop */
			}
		}
		else
		{
			/*
			 * Ordinary comparison key.  Transform the search-style scan key
			 * to an insertion scan key by replacing the sk_func with the
			 * appropriate btree comparison function.
			 *
			 * If scankey operator is not a cross-type comparison, we can use
			 * the cached comparison function; otherwise gotta look it up in
			 * the catalogs.  (That can't lead to infinite recursion, since no
			 * indexscan initiated by syscache lookup will use cross-data-type
			 * operators.)
			 *
			 * We support the convention that sk_subtype == InvalidOid means
			 * the opclass input type; this is a hack to simplify life for
			 * ScanKeyInit().
			 */
			if (cur->sk_subtype == rel->rd_opcintype[i] ||
				cur->sk_subtype == InvalidOid)
			{
				FmgrInfo   *procinfo;

				procinfo = index_getprocinfo(rel, cur->sk_attno, BTORDER_PROC);
				ScanKeyEntryInitializeWithInfo(scankeys + i,
											   cur->sk_flags,
											   cur->sk_attno,
											   InvalidStrategy,
											   cur->sk_subtype,
											   cur->sk_collation,
											   procinfo,
											   cur->sk_argument);
			}
			else
			{
				RegProcedure cmp_proc;

				cmp_proc = get_opfamily_proc(rel->rd_opfamily[i],
											 rel->rd_opcintype[i],
											 cur->sk_subtype,
											 BTORDER_PROC);
				if (!RegProcedureIsValid(cmp_proc))
					elog(ERROR, "missing support function %d(%u,%u) for attribute %d of index \"%s\"",
						 BTORDER_PROC, rel->rd_opcintype[i], cur->sk_subtype,
						 cur->sk_attno, RelationGetRelationName(rel));
				ScanKeyEntryInitialize(scankeys + i,
									   cur->sk_flags,
									   cur->sk_attno,
									   InvalidStrategy,
									   cur->sk_subtype,
									   cur->sk_collation,
									   cmp_proc,
									   cur->sk_argument);
			}
		}
	}

	/*----------
	 * Examine the selected initial-positioning strategy to determine exactly
	 * where we need to start the scan, and set flag variables to control the
	 * code below.
	 *
	 * If nextkey = false, _bt_search and _bt_binsrch will locate the first
	 * item >= scan key.  If nextkey = true, they will locate the first
	 * item > scan key.
	 *
	 * If goback = true, we will then step back one item, while if
	 * goback = false, we will start the scan on the located item.
	 *----------
	 */
	switch (strat_total)
	{
		case BTLessStrategyNumber:

			/*
			 * Find first item >= scankey, then back up one to arrive at last
			 * item < scankey.	(Note: this positioning strategy is only used
			 * for a backward scan, so that is always the correct starting
			 * position.)
			 */
			nextkey = false;
			goback = true;
			break;

		case BTLessEqualStrategyNumber:

			/*
			 * Find first item > scankey, then back up one to arrive at last
			 * item <= scankey.  (Note: this positioning strategy is only used
			 * for a backward scan, so that is always the correct starting
			 * position.)
			 */
			nextkey = true;
			goback = true;
			break;

		case BTEqualStrategyNumber:

			/*
			 * If a backward scan was specified, need to start with last equal
			 * item not first one.
			 */
			if (ScanDirectionIsBackward(dir))
			{
				/*
				 * This is the same as the <= strategy.  We will check at the
				 * end whether the found item is actually =.
				 */
				nextkey = true;
				goback = true;
			}
			else
			{
				/*
				 * This is the same as the >= strategy.  We will check at the
				 * end whether the found item is actually =.
				 */
				nextkey = false;
				goback = false;
			}
			break;

		case BTGreaterEqualStrategyNumber:

			/*
			 * Find first item >= scankey.	(This is only used for forward
			 * scans.)
			 */
			nextkey = false;
			goback = false;
			break;

		case BTGreaterStrategyNumber:

			/*
			 * Find first item > scankey.  (This is only used for forward
			 * scans.)
			 */
			nextkey = true;
			goback = false;
			break;

		default:
			/* can't get here, but keep compiler quiet */
			elog(ERROR, "unrecognized strat_total: %d", (int) strat_total);
			return false;
	}

	/*
	 * Use the manufactured insertion scan key to descend the tree and
	 * position ourselves on the target leaf page.
	 */
	stack = _bt_search(rel, keysCount, scankeys, nextkey, &buf, BT_READ);

	/* don't need to keep the stack around... */
	_bt_freestack(stack);

	/* remember which buffer we have pinned, if any */
	so->currPos.buf = buf;

	if (!BufferIsValid(buf))
	{
		/*
		 * We only get here if the index is completely empty. Lock relation
		 * because nothing finer to lock exists.
		 */
		PredicateLockRelation(rel, scan->xs_snapshot);
		return false;
	}
	else
		PredicateLockPage(rel, BufferGetBlockNumber(buf),
						  scan->xs_snapshot);

	/* initialize moreLeft/moreRight appropriately for scan direction */
	if (ScanDirectionIsForward(dir))
	{
		so->currPos.moreLeft = false;
		so->currPos.moreRight = true;
	}
	else
	{
		so->currPos.moreLeft = true;
		so->currPos.moreRight = false;
	}
	so->numKilled = 0;			/* just paranoia */
	so->markItemIndex = -1;		/* ditto */

	/* position to the precise item on the page */
	offnum = _bt_binsrch(rel, buf, keysCount, scankeys, nextkey);

	/*
	 * If nextkey = false, we are positioned at the first item >= scan key, or
	 * possibly at the end of a page on which all the existing items are less
	 * than the scan key and we know that everything on later pages is greater
	 * than or equal to scan key.
	 *
	 * If nextkey = true, we are positioned at the first item > scan key, or
	 * possibly at the end of a page on which all the existing items are less
	 * than or equal to the scan key and we know that everything on later
	 * pages is greater than scan key.
	 *
	 * The actually desired starting point is either this item or the prior
	 * one, or in the end-of-page case it's the first item on the next page or
	 * the last item on this page.	Adjust the starting offset if needed. (If
	 * this results in an offset before the first item or after the last one,
	 * _bt_readpage will report no items found, and then we'll step to the
	 * next page as needed.)
	 */
	if (goback)
		offnum = OffsetNumberPrev(offnum);

	/*
	 * Now load data from the first page of the scan.
	 */
	if (!_bt_readpage(scan, dir, offnum))
	{
		/*
		 * There's no actually-matching data on this page.  Try to advance to
		 * the next page.  Return false if there's no matching data at all.
		 */
		if (!_bt_steppage(scan, dir))
			return false;
	}

	/* Drop the lock, but not pin, on the current page */
	LockBuffer(so->currPos.buf, BUFFER_LOCK_UNLOCK);

	/* OK, itemIndex says what to return */
	currItem = &so->currPos.items[so->currPos.itemIndex];
	scan->xs_ctup.t_self = currItem->heapTid;
	if (scan->xs_want_itup)
		scan->xs_itup = (IndexTuple) (so->currTuples + currItem->tupleOffset);

	return true;
}
Example #23
0
/*
 *	_bt_readpage() -- Load data from current index page into so->currPos
 *
 * Caller must have pinned and read-locked so->currPos.buf; the buffer's state
 * is not changed here.  Also, currPos.moreLeft and moreRight must be valid;
 * they are updated as appropriate.  All other fields of so->currPos are
 * initialized from scratch here.
 *
 * We scan the current page starting at offnum and moving in the indicated
 * direction.  All items matching the scan keys are loaded into currPos.items.
 * moreLeft or moreRight (as appropriate) is cleared if _bt_checkkeys reports
 * that there can be no more matching tuples in the current scan direction.
 *
 * Returns true if any matching items found on the page, false if none.
 */
static bool
_bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum)
{
	BTScanOpaque so = (BTScanOpaque) scan->opaque;
	Page		page;
	BTPageOpaque opaque;
	OffsetNumber minoff;
	OffsetNumber maxoff;
	int			itemIndex;
	IndexTuple	itup;
	bool		continuescan;

	/* we must have the buffer pinned and locked */
	Assert(BufferIsValid(so->currPos.buf));

	page = BufferGetPage(so->currPos.buf);
	opaque = (BTPageOpaque) PageGetSpecialPointer(page);
	minoff = P_FIRSTDATAKEY(opaque);
	maxoff = PageGetMaxOffsetNumber(page);

	/*
	 * we must save the page's right-link while scanning it; this tells us
	 * where to step right to after we're done with these items.  There is no
	 * corresponding need for the left-link, since splits always go right.
	 */
	so->currPos.nextPage = opaque->btpo_next;

	/* initialize tuple workspace to empty */
	so->currPos.nextTupleOffset = 0;

	if (ScanDirectionIsForward(dir))
	{
		/* load items[] in ascending order */
		itemIndex = 0;

		offnum = Max(offnum, minoff);

		while (offnum <= maxoff)
		{
			itup = _bt_checkkeys(scan, page, offnum, dir, &continuescan);
			if (itup != NULL)
			{
				/* tuple passes all scan key conditions, so remember it */
				_bt_saveitem(so, itemIndex, offnum, itup);
				itemIndex++;
			}
			if (!continuescan)
			{
				/* there can't be any more matches, so stop */
				so->currPos.moreRight = false;
				break;
			}

			offnum = OffsetNumberNext(offnum);
		}

		Assert(itemIndex <= MaxIndexTuplesPerPage);
		so->currPos.firstItem = 0;
		so->currPos.lastItem = itemIndex - 1;
		so->currPos.itemIndex = 0;
	}
	else
	{
		/* load items[] in descending order */
		itemIndex = MaxIndexTuplesPerPage;

		offnum = Min(offnum, maxoff);

		while (offnum >= minoff)
		{
			itup = _bt_checkkeys(scan, page, offnum, dir, &continuescan);
			if (itup != NULL)
			{
				/* tuple passes all scan key conditions, so remember it */
				itemIndex--;
				_bt_saveitem(so, itemIndex, offnum, itup);
			}
			if (!continuescan)
			{
				/* there can't be any more matches, so stop */
				so->currPos.moreLeft = false;
				break;
			}

			offnum = OffsetNumberPrev(offnum);
		}

		Assert(itemIndex >= 0);
		so->currPos.firstItem = itemIndex;
		so->currPos.lastItem = MaxIndexTuplesPerPage - 1;
		so->currPos.itemIndex = MaxIndexTuplesPerPage - 1;
	}

	return (so->currPos.firstItem <= so->currPos.lastItem);
}
Example #24
0
/* ----------------------------------------------------------------
 *		ValuesNext
 *
 *		This is a workhorse for ExecValuesScan
 * ----------------------------------------------------------------
 */
static TupleTableSlot *
ValuesNext(ValuesScanState *node)
{
	TupleTableSlot *slot;
	EState	   *estate;
	ExprContext *econtext;
	ScanDirection direction;
	List	   *exprlist;

	/*
	 * get information from the estate and scan state
	 */
	estate = node->ss.ps.state;
	direction = estate->es_direction;
	slot = node->ss.ss_ScanTupleSlot;
	econtext = node->rowcontext;

	/*
	 * Get the next tuple. Return NULL if no more tuples.
	 */
	if (ScanDirectionIsForward(direction))
	{
		if (node->curr_idx < node->array_len)
			node->curr_idx++;
		if (node->curr_idx < node->array_len)
			exprlist = node->exprlists[node->curr_idx];
		else
			exprlist = NIL;
	}
	else
	{
		if (node->curr_idx >= 0)
			node->curr_idx--;
		if (node->curr_idx >= 0)
			exprlist = node->exprlists[node->curr_idx];
		else
			exprlist = NIL;
	}

	/*
	 * Always clear the result slot; this is appropriate if we are at the end
	 * of the data, and if we're not, we still need it as the first step of
	 * the store-virtual-tuple protocol.  It seems wise to clear the slot
	 * before we reset the context it might have pointers into.
	 */
	ExecClearTuple(slot);

	if (exprlist)
	{
		MemoryContext oldContext;
		List	   *exprstatelist;
		Datum	   *values;
		bool	   *isnull;
		ListCell   *lc;
		int			resind;

		/*
		 * Get rid of any prior cycle's leftovers.  We use ReScanExprContext
		 * not just ResetExprContext because we want any registered shutdown
		 * callbacks to be called.
		 */
		ReScanExprContext(econtext);

		/*
		 * Build the expression eval state in the econtext's per-tuple memory.
		 * This is a tad unusual, but we want to delete the eval state again
		 * when we move to the next row, to avoid growth of memory
		 * requirements over a long values list.
		 */
		oldContext = MemoryContextSwitchTo(econtext->ecxt_per_tuple_memory);

		/*
		 * Pass NULL, not my plan node, because we don't want anything in this
		 * transient state linking into permanent state.  The only possibility
		 * is a SubPlan, and there shouldn't be any (any subselects in the
		 * VALUES list should be InitPlans).
		 */
		exprstatelist = (List *) ExecInitExpr((Expr *) exprlist, NULL);

		/* parser should have checked all sublists are the same length */
		Assert(list_length(exprstatelist) == slot->tts_tupleDescriptor->natts);

		/*
		 * Compute the expressions and build a virtual result tuple. We
		 * already did ExecClearTuple(slot).
		 */
		values = slot->tts_values;
		isnull = slot->tts_isnull;

		resind = 0;
		foreach(lc, exprstatelist)
		{
			ExprState  *estate = (ExprState *) lfirst(lc);

			values[resind] = ExecEvalExpr(estate,
										  econtext,
										  &isnull[resind],
										  NULL);
			resind++;
		}

		MemoryContextSwitchTo(oldContext);

		/*
		 * And return the virtual tuple.
		 */
		ExecStoreVirtualTuple(slot);
	}
Example #25
0
/*
 *	_bt_endpoint() -- Find the first or last page in the index, and scan
 * from there to the first key satisfying all the quals.
 *
 * This is used by _bt_first() to set up a scan when we've determined
 * that the scan must start at the beginning or end of the index (for
 * a forward or backward scan respectively).  Exit conditions are the
 * same as for _bt_first().
 */
static bool
_bt_endpoint(IndexScanDesc scan, ScanDirection dir)
{
	Relation	rel = scan->indexRelation;
	BTScanOpaque so = (BTScanOpaque) scan->opaque;
	Buffer		buf;
	Page		page;
	BTPageOpaque opaque;
	OffsetNumber start;
	BTScanPosItem *currItem;

	/*
	 * Scan down to the leftmost or rightmost leaf page.  This is a simplified
	 * version of _bt_search().  We don't maintain a stack since we know we
	 * won't need it.
	 */
	buf = _bt_get_endpoint(rel, 0, ScanDirectionIsBackward(dir));

	if (!BufferIsValid(buf))
	{
		/*
		 * Empty index. Lock the whole relation, as nothing finer to lock
		 * exists.
		 */
		PredicateLockRelation(rel, scan->xs_snapshot);
		so->currPos.buf = InvalidBuffer;
		return false;
	}

	PredicateLockPage(rel, BufferGetBlockNumber(buf), scan->xs_snapshot);
	page = BufferGetPage(buf);
	opaque = (BTPageOpaque) PageGetSpecialPointer(page);
	Assert(P_ISLEAF(opaque));

	if (ScanDirectionIsForward(dir))
	{
		/* There could be dead pages to the left, so not this: */
		/* Assert(P_LEFTMOST(opaque)); */

		start = P_FIRSTDATAKEY(opaque);
	}
	else if (ScanDirectionIsBackward(dir))
	{
		Assert(P_RIGHTMOST(opaque));

		start = PageGetMaxOffsetNumber(page);
	}
	else
	{
		elog(ERROR, "invalid scan direction: %d", (int) dir);
		start = 0;				/* keep compiler quiet */
	}

	/* remember which buffer we have pinned */
	so->currPos.buf = buf;

	/* initialize moreLeft/moreRight appropriately for scan direction */
	if (ScanDirectionIsForward(dir))
	{
		so->currPos.moreLeft = false;
		so->currPos.moreRight = true;
	}
	else
	{
		so->currPos.moreLeft = true;
		so->currPos.moreRight = false;
	}
	so->numKilled = 0;			/* just paranoia */
	so->markItemIndex = -1;		/* ditto */

	/*
	 * Now load data from the first page of the scan.
	 */
	if (!_bt_readpage(scan, dir, start))
	{
		/*
		 * There's no actually-matching data on this page.  Try to advance to
		 * the next page.  Return false if there's no matching data at all.
		 */
		if (!_bt_steppage(scan, dir))
			return false;
	}

	/* Drop the lock, but not pin, on the current page */
	LockBuffer(so->currPos.buf, BUFFER_LOCK_UNLOCK);

	/* OK, itemIndex says what to return */
	currItem = &so->currPos.items[so->currPos.itemIndex];
	scan->xs_ctup.t_self = currItem->heapTid;
	if (scan->xs_want_itup)
		scan->xs_itup = (IndexTuple) (so->currTuples + currItem->tupleOffset);

	return true;
}
Example #26
0
/* ----------------------------------------------------------------
 *		ExecSort
 *
 *		Sorts tuples from the outer subtree of the node using tuplesort,
 *		which saves the results in a temporary file or memory. After the
 *		initial call, returns a tuple from the file with each call.
 *
 *		Conditions:
 *		  -- none.
 *
 *		Initial States:
 *		  -- the outer child is prepared to return the first tuple.
 * ----------------------------------------------------------------
 */
TupleTableSlot *
ExecSort(SortState *node)
{
	EState	   *estate;
	ScanDirection dir;
	Tuplesortstate *tuplesortstate;
	TupleTableSlot *slot;

	/*
	 * get state info from node
	 */
	SO1_printf("ExecSort: %s\n",
			   "entering routine");

	estate = node->ss.ps.state;
	dir = estate->es_direction;
	tuplesortstate = (Tuplesortstate *) node->tuplesortstate;

	/*
	 * If first time through, read all tuples from outer plan and pass them to
	 * tuplesort.c. Subsequent calls just fetch tuples from tuplesort.
	 */

	if (!node->sort_Done)
	{
		Sort	   *plannode = (Sort *) node->ss.ps.plan;
		PlanState  *outerNode;
		TupleDesc	tupDesc;

		SO1_printf("ExecSort: %s\n",
				   "sorting subplan");

		/*
		 * Want to scan subplan in the forward direction while creating the
		 * sorted data.
		 */
		estate->es_direction = ForwardScanDirection;

		/*
		 * Initialize tuplesort module.
		 */
		SO1_printf("ExecSort: %s\n",
				   "calling tuplesort_begin");

		outerNode = outerPlanState(node);
		tupDesc = ExecGetResultType(outerNode);

		tuplesortstate = tuplesort_begin_heap(tupDesc,
											  plannode->numCols,
											  plannode->sortColIdx,
											  plannode->sortOperators,
											  plannode->collations,
											  plannode->nullsFirst,
											  work_mem,
											  node->randomAccess);
		if (node->bounded)
			tuplesort_set_bound(tuplesortstate, node->bound);
		node->tuplesortstate = (void *) tuplesortstate;

		/*
		 * Scan the subplan and feed all the tuples to tuplesort.
		 */

		for (;;)
		{
			slot = ExecProcNode(outerNode);

			if (TupIsNull(slot))
				break;

			tuplesort_puttupleslot(tuplesortstate, slot);
		}

		/*
		 * Complete the sort.
		 */
		tuplesort_performsort(tuplesortstate);

		/*
		 * restore to user specified direction
		 */
		estate->es_direction = dir;

		/*
		 * finally set the sorted flag to true
		 */
		node->sort_Done = true;
		node->bounded_Done = node->bounded;
		node->bound_Done = node->bound;
		SO1_printf("ExecSort: %s\n", "sorting done");
	}

	SO1_printf("ExecSort: %s\n",
			   "retrieving tuple from tuplesort");

	/*
	 * Get the first or next tuple from tuplesort. Returns NULL if no more
	 * tuples.
	 */
	slot = node->ss.ps.ps_ResultTupleSlot;
	(void) tuplesort_gettupleslot(tuplesortstate,
								  ScanDirectionIsForward(dir),
								  slot);

	return slot;
}
Example #27
0
/* ----------------------------------------------------------------
 *		ExecMaterial
 *
 *		As long as we are at the end of the data collected in the tuplestore,
 *		we collect one new row from the subplan on each call, and stash it
 *		aside in the tuplestore before returning it.  The tuplestore is
 *		only read if we are asked to scan backwards, rescan, or mark/restore.
 *
 * ----------------------------------------------------------------
 */
TupleTableSlot *				/* result tuple from subplan */
ExecMaterial(MaterialState *node)
{
	EState	   *estate;
	ScanDirection dir;
	bool		forward;

	NTupleStore *ts;
	NTupleStoreAccessor *tsa;

	bool		eof_tuplestore;
	TupleTableSlot *slot;
	Material *ma;
	
	/*
	 * get state info from node
	 */
	estate = node->ss.ps.state;
	dir = estate->es_direction;
	forward = ScanDirectionIsForward(dir);

	ts = node->ts_state->matstore;
	tsa = (NTupleStoreAccessor *) node->ts_pos;

	ma = (Material *) node->ss.ps.plan;
	Assert(IsA(ma, Material));

	/*
	 * If first time through, and we need a tuplestore, initialize it.
	 */
	if (ts == NULL && (ma->share_type != SHARE_NOTSHARED || node->randomAccess))
	{
		/* 
		 * For cross slice material, we only run ExecMaterial on DriverSlice 
		 */
		if(ma->share_type == SHARE_MATERIAL_XSLICE)
		{
			char rwfile_prefix[100];

			if(ma->driver_slice != currentSliceId)
			{
				elog(LOG, "Material Exec on CrossSlice, current slice %d", currentSliceId);
				return NULL;
			}
			
			shareinput_create_bufname_prefix(rwfile_prefix, sizeof(rwfile_prefix), ma->share_id); 
			elog(LOG, "Material node creates shareinput rwfile %s", rwfile_prefix);

			ts = ntuplestore_create_readerwriter(rwfile_prefix, PlanStateOperatorMemKB((PlanState *)node) * 1024, true);
			tsa = ntuplestore_create_accessor(ts, true);
		}
		else
		{
			/* Non-shared Materialize node */
			bool isWriter = true;
			workfile_set *work_set = NULL;

			if (gp_workfile_caching)
			{
				work_set = workfile_mgr_find_set( &node->ss.ps);

				if (NULL != work_set)
				{
					/* Reusing cached workfiles. Tell subplan we won't be needing any tuples */
					elog(gp_workfile_caching_loglevel, "Materialize reusing cached workfiles, initiating Squelch walker");

					isWriter = false;
					ExecSquelchNode(outerPlanState(node));
					node->eof_underlying = true;
					node->cached_workfiles_found = true;

					if (node->ss.ps.instrument)
					{
						node->ss.ps.instrument->workfileReused = true;
					}
				}
			}

			if (NULL == work_set)
			{
				/*
				 * No work_set found, this is because:
				 *  a. workfile caching is enabled but we didn't find any reusable set
				 *  b. workfile caching is disabled
				 * Creating new empty workset
				 */
				Assert(!node->cached_workfiles_found);

				/* Don't try to cache when running under a ShareInputScan node */
				bool can_reuse = (ma->share_type == SHARE_NOTSHARED);

				work_set = workfile_mgr_create_set(BUFFILE, can_reuse, &node->ss.ps, NULL_SNAPSHOT);
				isWriter = true;
			}

			Assert(NULL != work_set);
			AssertEquivalent(node->cached_workfiles_found, !isWriter);

			ts = ntuplestore_create_workset(work_set, node->cached_workfiles_found,
					PlanStateOperatorMemKB((PlanState *) node) * 1024);
			tsa = ntuplestore_create_accessor(ts, isWriter);
		}
		
		Assert(ts && tsa);
		node->ts_state->matstore = ts;
		node->ts_pos = (void *) tsa;

        /* CDB: Offer extra info for EXPLAIN ANALYZE. */
        if (node->ss.ps.instrument)
        {
            /* Let the tuplestore share our Instrumentation object. */
			ntuplestore_setinstrument(ts, node->ss.ps.instrument);

            /* Request a callback at end of query. */
            node->ss.ps.cdbexplainfun = ExecMaterialExplainEnd;
        }

		/*
		 * MPP: If requested, fetch all rows from subplan and put them
		 * in the tuplestore.  This decouples a middle slice's receiving
		 * and sending Motion operators to neutralize a deadlock hazard.
		 * MPP TODO: Remove when a better solution is implemented.
		 *
		 * ShareInput: if the material node
		 * is used to share input, we will need to fetch all rows and put
		 * them in tuple store
		 */
		while (((Material *) node->ss.ps.plan)->cdb_strict
				|| ma->share_type != SHARE_NOTSHARED)
		{
			/*
			 * When reusing cached workfiles, we already have all the tuples,
			 * and we don't need to read anything from subplan.
			 */
			if (node->cached_workfiles_found)
			{
				break;
			}
			TupleTableSlot *outerslot = ExecProcNode(outerPlanState(node));

			if (TupIsNull(outerslot))
			{
				node->eof_underlying = true;

				if (ntuplestore_created_reusable_workfiles(ts))
				{
					ntuplestore_flush(ts);
					ntuplestore_mark_workset_complete(ts);
				}

				ntuplestore_acc_seek_bof(tsa);

				break;
			}
			Gpmon_M_Incr(GpmonPktFromMaterialState(node), GPMON_QEXEC_M_ROWSIN); 

			ntuplestore_acc_put_tupleslot(tsa, outerslot);
		}
	
		CheckSendPlanStateGpmonPkt(&node->ss.ps);

		if(forward)
			ntuplestore_acc_seek_bof(tsa);
		else
			ntuplestore_acc_seek_eof(tsa);

		/* for share input, material do not need to return any tuple */
		if(ma->share_type != SHARE_NOTSHARED)
		{
			Assert(ma->share_type == SHARE_MATERIAL || ma->share_type == SHARE_MATERIAL_XSLICE);
			/* 
			 * if the material is shared across slice, notify consumers that
			 * it is ready.
			 */
			if(ma->share_type == SHARE_MATERIAL_XSLICE) 
			{
				if (ma->driver_slice == currentSliceId)
				{
					ntuplestore_flush(ts);

					node->share_lk_ctxt = shareinput_writer_notifyready(ma->share_id, ma->nsharer_xslice,
							estate->es_plannedstmt->planGen);
				}
			}
			return NULL;
		}
	}

	if(ma->share_type != SHARE_NOTSHARED)
		return NULL;

	/*
	 * If we can fetch another tuple from the tuplestore, return it.
	 */
	slot = node->ss.ps.ps_ResultTupleSlot;

	if(forward)
		eof_tuplestore = (tsa == NULL) || !ntuplestore_acc_advance(tsa, 1);
	else
		eof_tuplestore = (tsa == NULL) || !ntuplestore_acc_advance(tsa, -1);

	if(tsa!=NULL && ntuplestore_acc_tell(tsa, NULL))
	{
		ntuplestore_acc_current_tupleslot(tsa, slot);
          	if (!TupIsNull(slot))
                {
          		Gpmon_M_Incr_Rows_Out(GpmonPktFromMaterialState(node)); 
                        CheckSendPlanStateGpmonPkt(&node->ss.ps);
                }
		return slot;
	}

	/*
	 * If necessary, try to fetch another row from the subplan.
	 *
	 * Note: the eof_underlying state variable exists to short-circuit further
	 * subplan calls.  It's not optional, unfortunately, because some plan
	 * node types are not robust about being called again when they've already
	 * returned NULL.
	 * If reusing cached workfiles, there is no need to execute subplan at all.
	 */
	if (eof_tuplestore && !node->eof_underlying)
	{
		PlanState  *outerNode;
		TupleTableSlot *outerslot;

		Assert(!node->cached_workfiles_found && "we shouldn't get here when using cached workfiles");

		/*
		 * We can only get here with forward==true, so no need to worry about
		 * which direction the subplan will go.
		 */
		outerNode = outerPlanState(node);
		outerslot = ExecProcNode(outerNode);
		if (TupIsNull(outerslot))
		{
			node->eof_underlying = true;
			if (ntuplestore_created_reusable_workfiles(ts))
			{
				ntuplestore_flush(ts);
				ntuplestore_mark_workset_complete(ts);
			}

			if (!node->ss.ps.delayEagerFree)
			{
				ExecEagerFreeMaterial(node);
			}

			return NULL;
		}

		Gpmon_M_Incr(GpmonPktFromMaterialState(node), GPMON_QEXEC_M_ROWSIN); 

		if (tsa)
			ntuplestore_acc_put_tupleslot(tsa, outerslot);

		/*
		 * And return a copy of the tuple.	(XXX couldn't we just return the
		 * outerslot?)
		 */
          	Gpmon_M_Incr_Rows_Out(GpmonPktFromMaterialState(node)); 
                CheckSendPlanStateGpmonPkt(&node->ss.ps);
		return ExecCopySlot(slot, outerslot); 
	}


	if (!node->ss.ps.delayEagerFree)
	{
		ExecEagerFreeMaterial(node);
	}

	/*
	 * Nothing left ...
	 */
	return NULL;
}
Example #28
0
/*
 * Test whether an indextuple satisfies all the scankey conditions.
 *
 * If so, copy its TID into scan->xs_ctup.t_self, and return TRUE.
 * If not, return FALSE (xs_ctup is not changed).
 *
 * If the tuple fails to pass the qual, we also determine whether there's
 * any need to continue the scan beyond this tuple, and set *continuescan
 * accordingly.  See comments for _bt_preprocess_keys(), above, about how
 * this is done.
 *
 * scan: index scan descriptor (containing a search-type scankey)
 * page: buffer page containing index tuple
 * offnum: offset number of index tuple (must be a valid item!)
 * dir: direction we are scanning in
 * continuescan: output parameter (will be set correctly in all cases)
 */
bool
_bt_checkkeys(IndexScanDesc scan,
			  Page page, OffsetNumber offnum,
			  ScanDirection dir, bool *continuescan)
{
	ItemId		iid = PageGetItemId(page, offnum);
	bool		tuple_valid;
	IndexTuple	tuple;
	TupleDesc	tupdesc;
	BTScanOpaque so;
	int			keysz;
	int			ikey;
	ScanKey		key;

	*continuescan = true;		/* default assumption */

	/*
	 * If the scan specifies not to return killed tuples, then we treat a
	 * killed tuple as not passing the qual.  Most of the time, it's a win to
	 * not bother examining the tuple's index keys, but just return
	 * immediately with continuescan = true to proceed to the next tuple.
	 * However, if this is the last tuple on the page, we should check the
	 * index keys to prevent uselessly advancing to the next page.
	 */
	if (scan->ignore_killed_tuples && ItemIdIsDead(iid))
	{
		/* return immediately if there are more tuples on the page */
		if (ScanDirectionIsForward(dir))
		{
			if (offnum < PageGetMaxOffsetNumber(page))
				return false;
		}
		else
		{
			BTPageOpaque opaque = (BTPageOpaque) PageGetSpecialPointer(page);

			if (offnum > P_FIRSTDATAKEY(opaque))
				return false;
		}

		/*
		 * OK, we want to check the keys, but we'll return FALSE even if the
		 * tuple passes the key tests.
		 */
		tuple_valid = false;
	}
	else
		tuple_valid = true;

	tuple = (IndexTuple) PageGetItem(page, iid);

	IncrIndexProcessed();

	tupdesc = RelationGetDescr(scan->indexRelation);
	so = (BTScanOpaque) scan->opaque;
	keysz = so->numberOfKeys;

	for (key = so->keyData, ikey = 0; ikey < keysz; key++, ikey++)
	{
		Datum		datum;
		bool		isNull;
		Datum		test;

		/* row-comparison keys need special processing */
		if (key->sk_flags & SK_ROW_HEADER)
		{
			if (_bt_check_rowcompare(key, tuple, tupdesc, dir, continuescan))
				continue;
			return false;
		}

		datum = index_getattr(tuple,
							  key->sk_attno,
							  tupdesc,
							  &isNull);

		if (key->sk_flags & SK_ISNULL)
		{
			/* Handle IS NULL tests */
			Assert(key->sk_flags & SK_SEARCHNULL);

			if (isNull)
				continue;		/* tuple satisfies this qual */

			/*
			 * Tuple fails this qual.  If it's a required qual for the current
			 * scan direction, then we can conclude no further tuples will
			 * pass, either.
			 */
			if ((key->sk_flags & SK_BT_REQFWD) &&
				ScanDirectionIsForward(dir))
				*continuescan = false;
			else if ((key->sk_flags & SK_BT_REQBKWD) &&
					 ScanDirectionIsBackward(dir))
				*continuescan = false;

			/*
			 * In any case, this indextuple doesn't match the qual.
			 */
			return false;
		}

		if (isNull)
		{
			if (key->sk_flags & SK_BT_NULLS_FIRST)
			{
				/*
				 * Since NULLs are sorted before non-NULLs, we know we have
				 * reached the lower limit of the range of values for this
				 * index attr.	On a backward scan, we can stop if this qual
				 * is one of the "must match" subset.  On a forward scan,
				 * however, we should keep going.
				 */
				if ((key->sk_flags & SK_BT_REQBKWD) &&
					ScanDirectionIsBackward(dir))
					*continuescan = false;
			}
			else
			{
				/*
				 * Since NULLs are sorted after non-NULLs, we know we have
				 * reached the upper limit of the range of values for this
				 * index attr.	On a forward scan, we can stop if this qual is
				 * one of the "must match" subset.	On a backward scan,
				 * however, we should keep going.
				 */
				if ((key->sk_flags & SK_BT_REQFWD) &&
					ScanDirectionIsForward(dir))
					*continuescan = false;
			}

			/*
			 * In any case, this indextuple doesn't match the qual.
			 */
			return false;
		}

		test = FunctionCall2(&key->sk_func, datum, key->sk_argument);

		if (!DatumGetBool(test))
		{
			/*
			 * Tuple fails this qual.  If it's a required qual for the current
			 * scan direction, then we can conclude no further tuples will
			 * pass, either.
			 *
			 * Note: because we stop the scan as soon as any required equality
			 * qual fails, it is critical that equality quals be used for the
			 * initial positioning in _bt_first() when they are available. See
			 * comments in _bt_first().
			 */
			if ((key->sk_flags & SK_BT_REQFWD) &&
				ScanDirectionIsForward(dir))
				*continuescan = false;
			else if ((key->sk_flags & SK_BT_REQBKWD) &&
					 ScanDirectionIsBackward(dir))
				*continuescan = false;

			/*
			 * In any case, this indextuple doesn't match the qual.
			 */
			return false;
		}
	}

	/* If we get here, the tuple passes all index quals. */
	if (tuple_valid)
		scan->xs_ctup.t_self = tuple->t_tid;

	return tuple_valid;
}
Example #29
0
/* ----------------------------------------------------------------
 *		IndexNext
 *
 *		Retrieve a tuple from the IndexScan node's currentRelation
 *		using the index specified in the IndexScanState information.
 * ----------------------------------------------------------------
 */
static TupleTableSlot *
IndexNext(IndexScanState *node)
{
	EState	   *estate;
	ExprContext *econtext;
	ScanDirection direction;
	IndexScanDesc scandesc;
	HeapTuple	tuple;
	TupleTableSlot *slot;

	/*
	 * extract necessary information from index scan node
	 */
	estate = node->ss.ps.state;
	direction = estate->es_direction;
	/* flip direction if this is an overall backward scan */
	if (ScanDirectionIsBackward(((IndexScan *) node->ss.ps.plan)->indexorderdir))
	{
		if (ScanDirectionIsForward(direction))
			direction = BackwardScanDirection;
		else if (ScanDirectionIsBackward(direction))
			direction = ForwardScanDirection;
	}
	scandesc = node->iss_ScanDesc;
	econtext = node->ss.ps.ps_ExprContext;
	slot = node->ss.ss_ScanTupleSlot;

	/*
	 * ok, now that we have what we need, fetch the next tuple.
	 */
	while ((tuple = index_getnext(scandesc, direction)) != NULL)
	{
		/*
		 * Store the scanned tuple in the scan tuple slot of the scan state.
		 * Note: we pass 'false' because tuples returned by amgetnext are
		 * pointers onto disk pages and must not be pfree()'d.
		 */
		ExecStoreTuple(tuple,	/* tuple to store */
					   slot,	/* slot to store in */
					   scandesc->xs_cbuf,		/* buffer containing tuple */
					   false);	/* don't pfree */

		/*
		 * If the index was lossy, we have to recheck the index quals using
		 * the real tuple.
		 */
		if (scandesc->xs_recheck)
		{
			econtext->ecxt_scantuple = slot;
			ResetExprContext(econtext);
			if (!ExecQual(node->indexqualorig, econtext, false))
				continue;		/* nope, so ask index for another one */
		}

		return slot;
	}

	/*
	 * if we get here it means the index scan failed so we are at the end of
	 * the scan..
	 */
	return ExecClearTuple(slot);
}
Example #30
0
/*
 * Test whether an indextuple satisfies a row-comparison scan condition.
 *
 * Return true if so, false if not.  If not, also clear *continuescan if
 * it's not possible for any future tuples in the current scan direction
 * to pass the qual.
 *
 * This is a subroutine for _bt_checkkeys, which see for more info.
 */
static bool
_bt_check_rowcompare(ScanKey skey, IndexTuple tuple, TupleDesc tupdesc,
					 ScanDirection dir, bool *continuescan)
{
	ScanKey		subkey = (ScanKey) DatumGetPointer(skey->sk_argument);
	int32		cmpresult = 0;
	bool		result;

	/* First subkey should be same as the header says */
	Assert(subkey->sk_attno == skey->sk_attno);

	/* Loop over columns of the row condition */
	for (;;)
	{
		Datum		datum;
		bool		isNull;

		Assert(subkey->sk_flags & SK_ROW_MEMBER);

		datum = index_getattr(tuple,
							  subkey->sk_attno,
							  tupdesc,
							  &isNull);

		if (isNull)
		{
			if (subkey->sk_flags & SK_BT_NULLS_FIRST)
			{
				/*
				 * Since NULLs are sorted before non-NULLs, we know we have
				 * reached the lower limit of the range of values for this
				 * index attr. On a backward scan, we can stop if this qual is
				 * one of the "must match" subset.	On a forward scan,
				 * however, we should keep going.
				 */
				if ((subkey->sk_flags & SK_BT_REQBKWD) &&
					ScanDirectionIsBackward(dir))
					*continuescan = false;
			}
			else
			{
				/*
				 * Since NULLs are sorted after non-NULLs, we know we have
				 * reached the upper limit of the range of values for this
				 * index attr. On a forward scan, we can stop if this qual is
				 * one of the "must match" subset.	On a backward scan,
				 * however, we should keep going.
				 */
				if ((subkey->sk_flags & SK_BT_REQFWD) &&
					ScanDirectionIsForward(dir))
					*continuescan = false;
			}

			/*
			 * In any case, this indextuple doesn't match the qual.
			 */
			return false;
		}

		if (subkey->sk_flags & SK_ISNULL)
		{
			/*
			 * Unlike the simple-scankey case, this isn't a disallowed case.
			 * But it can never match.	If all the earlier row comparison
			 * columns are required for the scan direction, we can stop the
			 * scan, because there can't be another tuple that will succeed.
			 */
			if (subkey != (ScanKey) DatumGetPointer(skey->sk_argument))
				subkey--;
			if ((subkey->sk_flags & SK_BT_REQFWD) &&
				ScanDirectionIsForward(dir))
				*continuescan = false;
			else if ((subkey->sk_flags & SK_BT_REQBKWD) &&
					 ScanDirectionIsBackward(dir))
				*continuescan = false;
			return false;
		}

		/* Perform the test --- three-way comparison not bool operator */
		cmpresult = DatumGetInt32(FunctionCall2(&subkey->sk_func,
												datum,
												subkey->sk_argument));

		if (subkey->sk_flags & SK_BT_DESC)
			cmpresult = -cmpresult;

		/* Done comparing if unequal, else advance to next column */
		if (cmpresult != 0)
			break;

		if (subkey->sk_flags & SK_ROW_END)
			break;
		subkey++;
	}

	/*
	 * At this point cmpresult indicates the overall result of the row
	 * comparison, and subkey points to the deciding column (or the last
	 * column if the result is "=").
	 */
	switch (subkey->sk_strategy)
	{
			/* EQ and NE cases aren't allowed here */
		case BTLessStrategyNumber:
			result = (cmpresult < 0);
			break;
		case BTLessEqualStrategyNumber:
			result = (cmpresult <= 0);
			break;
		case BTGreaterEqualStrategyNumber:
			result = (cmpresult >= 0);
			break;
		case BTGreaterStrategyNumber:
			result = (cmpresult > 0);
			break;
		default:
			elog(ERROR, "unrecognized RowCompareType: %d",
				 (int) subkey->sk_strategy);
			result = 0;			/* keep compiler quiet */
			break;
	}

	if (!result)
	{
		/*
		 * Tuple fails this qual.  If it's a required qual for the current
		 * scan direction, then we can conclude no further tuples will pass,
		 * either.	Note we have to look at the deciding column, not
		 * necessarily the first or last column of the row condition.
		 */
		if ((subkey->sk_flags & SK_BT_REQFWD) &&
			ScanDirectionIsForward(dir))
			*continuescan = false;
		else if ((subkey->sk_flags & SK_BT_REQBKWD) &&
				 ScanDirectionIsBackward(dir))
			*continuescan = false;
	}

	return result;
}