Example #1
0
void
ExecSquelchShareInputScan(ShareInputScanState *node)
{
	ShareType share_type = ((ShareInputScan *) node->ss.ps.plan)->share_type;
	bool isWriter = outerPlanState(node) != NULL;
	bool tuplestoreInitialized = node->ts_state != NULL;

	/*
	 * If this SharedInputScan is shared within the same slice then its
	 * subtree may still need to be executed and the motions in the subtree
	 * cannot yet be stopped. Thus, don't recurse in this case.
	 *
	 * In squelching a cross-slice SharedInputScan writer, we need to ensure
	 * we don't block any reader on other slices as a result of not
	 * materializing the shared plan.
	 *
	 * Note that we emphatically can't "fake" an empty tuple store and just
	 * go ahead waking up the readers because that can lead to wrong results.
	 */
	switch (share_type)
	{
		case SHARE_MATERIAL:
		case SHARE_SORT:
			/* don't recurse into child */
			return;

		case SHARE_MATERIAL_XSLICE:
		case SHARE_SORT_XSLICE:
			if (isWriter && !tuplestoreInitialized)
				ExecProcNode((PlanState *) node);
			break;
		case SHARE_NOTSHARED:
			break;
	}
	ExecSquelchNode(outerPlanState(node));

	/* Free any resources that we can. */
	ExecEagerFreeShareInputScan(node);
}
Example #2
0
/* ----------------------------------------------------------------
 *		ExecMaterial
 *
 *		As long as we are at the end of the data collected in the tuplestore,
 *		we collect one new row from the subplan on each call, and stash it
 *		aside in the tuplestore before returning it.  The tuplestore is
 *		only read if we are asked to scan backwards, rescan, or mark/restore.
 *
 * ----------------------------------------------------------------
 */
TupleTableSlot *				/* result tuple from subplan */
ExecMaterial(MaterialState *node)
{
	EState	   *estate;
	ScanDirection dir;
	bool		forward;

	NTupleStore *ts;
	NTupleStoreAccessor *tsa;

	bool		eof_tuplestore;
	TupleTableSlot *slot;
	Material *ma;
	
	/*
	 * get state info from node
	 */
	estate = node->ss.ps.state;
	dir = estate->es_direction;
	forward = ScanDirectionIsForward(dir);

	ts = node->ts_state->matstore;
	tsa = (NTupleStoreAccessor *) node->ts_pos;

	ma = (Material *) node->ss.ps.plan;
	Assert(IsA(ma, Material));

	/*
	 * If first time through, and we need a tuplestore, initialize it.
	 */
	if (ts == NULL && (ma->share_type != SHARE_NOTSHARED || node->randomAccess))
	{
		/* 
		 * For cross slice material, we only run ExecMaterial on DriverSlice 
		 */
		if(ma->share_type == SHARE_MATERIAL_XSLICE)
		{
			char rwfile_prefix[100];

			if(ma->driver_slice != currentSliceId)
			{
				elog(LOG, "Material Exec on CrossSlice, current slice %d", currentSliceId);
				return NULL;
			}
			
			shareinput_create_bufname_prefix(rwfile_prefix, sizeof(rwfile_prefix), ma->share_id); 
			elog(LOG, "Material node creates shareinput rwfile %s", rwfile_prefix);

			ts = ntuplestore_create_readerwriter(rwfile_prefix, PlanStateOperatorMemKB((PlanState *)node) * 1024, true);
			tsa = ntuplestore_create_accessor(ts, true);
		}
		else
		{
			/* Non-shared Materialize node */
			bool isWriter = true;
			workfile_set *work_set = NULL;

			if (gp_workfile_caching)
			{
				work_set = workfile_mgr_find_set( &node->ss.ps);

				if (NULL != work_set)
				{
					/* Reusing cached workfiles. Tell subplan we won't be needing any tuples */
					elog(gp_workfile_caching_loglevel, "Materialize reusing cached workfiles, initiating Squelch walker");

					isWriter = false;
					ExecSquelchNode(outerPlanState(node));
					node->eof_underlying = true;
					node->cached_workfiles_found = true;

					if (node->ss.ps.instrument)
					{
						node->ss.ps.instrument->workfileReused = true;
					}
				}
			}

			if (NULL == work_set)
			{
				/*
				 * No work_set found, this is because:
				 *  a. workfile caching is enabled but we didn't find any reusable set
				 *  b. workfile caching is disabled
				 * Creating new empty workset
				 */
				Assert(!node->cached_workfiles_found);

				/* Don't try to cache when running under a ShareInputScan node */
				bool can_reuse = (ma->share_type == SHARE_NOTSHARED);

				work_set = workfile_mgr_create_set(BUFFILE, can_reuse, &node->ss.ps, NULL_SNAPSHOT);
				isWriter = true;
			}

			Assert(NULL != work_set);
			AssertEquivalent(node->cached_workfiles_found, !isWriter);

			ts = ntuplestore_create_workset(work_set, node->cached_workfiles_found,
					PlanStateOperatorMemKB((PlanState *) node) * 1024);
			tsa = ntuplestore_create_accessor(ts, isWriter);
		}
		
		Assert(ts && tsa);
		node->ts_state->matstore = ts;
		node->ts_pos = (void *) tsa;

        /* CDB: Offer extra info for EXPLAIN ANALYZE. */
        if (node->ss.ps.instrument)
        {
            /* Let the tuplestore share our Instrumentation object. */
			ntuplestore_setinstrument(ts, node->ss.ps.instrument);

            /* Request a callback at end of query. */
            node->ss.ps.cdbexplainfun = ExecMaterialExplainEnd;
        }

		/*
		 * MPP: If requested, fetch all rows from subplan and put them
		 * in the tuplestore.  This decouples a middle slice's receiving
		 * and sending Motion operators to neutralize a deadlock hazard.
		 * MPP TODO: Remove when a better solution is implemented.
		 *
		 * ShareInput: if the material node
		 * is used to share input, we will need to fetch all rows and put
		 * them in tuple store
		 */
		while (((Material *) node->ss.ps.plan)->cdb_strict
				|| ma->share_type != SHARE_NOTSHARED)
		{
			/*
			 * When reusing cached workfiles, we already have all the tuples,
			 * and we don't need to read anything from subplan.
			 */
			if (node->cached_workfiles_found)
			{
				break;
			}
			TupleTableSlot *outerslot = ExecProcNode(outerPlanState(node));

			if (TupIsNull(outerslot))
			{
				node->eof_underlying = true;

				if (ntuplestore_created_reusable_workfiles(ts))
				{
					ntuplestore_flush(ts);
					ntuplestore_mark_workset_complete(ts);
				}

				ntuplestore_acc_seek_bof(tsa);

				break;
			}
			Gpmon_M_Incr(GpmonPktFromMaterialState(node), GPMON_QEXEC_M_ROWSIN); 

			ntuplestore_acc_put_tupleslot(tsa, outerslot);
		}
	
		CheckSendPlanStateGpmonPkt(&node->ss.ps);

		if(forward)
			ntuplestore_acc_seek_bof(tsa);
		else
			ntuplestore_acc_seek_eof(tsa);

		/* for share input, material do not need to return any tuple */
		if(ma->share_type != SHARE_NOTSHARED)
		{
			Assert(ma->share_type == SHARE_MATERIAL || ma->share_type == SHARE_MATERIAL_XSLICE);
			/* 
			 * if the material is shared across slice, notify consumers that
			 * it is ready.
			 */
			if(ma->share_type == SHARE_MATERIAL_XSLICE) 
			{
				if (ma->driver_slice == currentSliceId)
				{
					ntuplestore_flush(ts);

					node->share_lk_ctxt = shareinput_writer_notifyready(ma->share_id, ma->nsharer_xslice,
							estate->es_plannedstmt->planGen);
				}
			}
			return NULL;
		}
	}

	if(ma->share_type != SHARE_NOTSHARED)
		return NULL;

	/*
	 * If we can fetch another tuple from the tuplestore, return it.
	 */
	slot = node->ss.ps.ps_ResultTupleSlot;

	if(forward)
		eof_tuplestore = (tsa == NULL) || !ntuplestore_acc_advance(tsa, 1);
	else
		eof_tuplestore = (tsa == NULL) || !ntuplestore_acc_advance(tsa, -1);

	if(tsa!=NULL && ntuplestore_acc_tell(tsa, NULL))
	{
		ntuplestore_acc_current_tupleslot(tsa, slot);
          	if (!TupIsNull(slot))
                {
          		Gpmon_M_Incr_Rows_Out(GpmonPktFromMaterialState(node)); 
                        CheckSendPlanStateGpmonPkt(&node->ss.ps);
                }
		return slot;
	}

	/*
	 * If necessary, try to fetch another row from the subplan.
	 *
	 * Note: the eof_underlying state variable exists to short-circuit further
	 * subplan calls.  It's not optional, unfortunately, because some plan
	 * node types are not robust about being called again when they've already
	 * returned NULL.
	 * If reusing cached workfiles, there is no need to execute subplan at all.
	 */
	if (eof_tuplestore && !node->eof_underlying)
	{
		PlanState  *outerNode;
		TupleTableSlot *outerslot;

		Assert(!node->cached_workfiles_found && "we shouldn't get here when using cached workfiles");

		/*
		 * We can only get here with forward==true, so no need to worry about
		 * which direction the subplan will go.
		 */
		outerNode = outerPlanState(node);
		outerslot = ExecProcNode(outerNode);
		if (TupIsNull(outerslot))
		{
			node->eof_underlying = true;
			if (ntuplestore_created_reusable_workfiles(ts))
			{
				ntuplestore_flush(ts);
				ntuplestore_mark_workset_complete(ts);
			}

			if (!node->ss.ps.delayEagerFree)
			{
				ExecEagerFreeMaterial(node);
			}

			return NULL;
		}

		Gpmon_M_Incr(GpmonPktFromMaterialState(node), GPMON_QEXEC_M_ROWSIN); 

		if (tsa)
			ntuplestore_acc_put_tupleslot(tsa, outerslot);

		/*
		 * And return a copy of the tuple.	(XXX couldn't we just return the
		 * outerslot?)
		 */
          	Gpmon_M_Incr_Rows_Out(GpmonPktFromMaterialState(node)); 
                CheckSendPlanStateGpmonPkt(&node->ss.ps);
		return ExecCopySlot(slot, outerslot); 
	}


	if (!node->ss.ps.delayEagerFree)
	{
		ExecEagerFreeMaterial(node);
	}

	/*
	 * Nothing left ...
	 */
	return NULL;
}
Example #3
0
/* ----------------------------------------------------------------
 *		ExecLimit
 *
 *		This is a very simple node which just performs LIMIT/OFFSET
 *		filtering on the stream of tuples returned by a subplan.
 * ----------------------------------------------------------------
 */
TupleTableSlot *				/* return: a tuple or NULL */
ExecLimit(LimitState *node)
{
	ScanDirection direction;
	TupleTableSlot *slot;
	PlanState  *outerPlan;

	/*
	 * get information from the node
	 */
	direction = node->ps.state->es_direction;
	outerPlan = outerPlanState(node);

	/*
	 * The main logic is a simple state machine.
	 */
	switch (node->lstate)
	{
		case LIMIT_INITIAL:

			/*
			 * First call for this node, so compute limit/offset. (We can't do
			 * this any earlier, because parameters from upper nodes will not
			 * be set during ExecInitLimit.)  This also sets position = 0 and
			 * changes the state to LIMIT_RESCAN.
			 */
			recompute_limits(node);

			/* FALL THRU */

		case LIMIT_RESCAN:

			/*
			 * If backwards scan, just return NULL without changing state.
			 */
			if (!ScanDirectionIsForward(direction))
				return NULL;

			/*
			 * Check for empty window; if so, treat like empty subplan.
			 */
			if (node->count <= 0 && !node->noCount)
			{
				node->lstate = LIMIT_EMPTY;

				/*
				 * CDB: We'll read no more from outer subtree. To keep our
				 * sibling QEs from being starved, tell source QEs not to clog
				 * up the pipeline with our never-to-be-consumed data.
				 */
				ExecSquelchNode(outerPlan);

				return NULL;
			}

			/*
			 * Fetch rows from subplan until we reach position > offset.
			 */
			for (;;)
			{
				slot = ExecProcNode(outerPlan);
				if (TupIsNull(slot))
				{
					/*
					 * The subplan returns too few tuples for us to produce
					 * any output at all.
					 */
					node->lstate = LIMIT_EMPTY;
					return NULL;
				}
				node->subSlot = slot;
				if (++node->position > node->offset)
					break;
			}

			/*
			 * Okay, we have the first tuple of the window.
			 */
			node->lstate = LIMIT_INWINDOW;
			break;

		case LIMIT_EMPTY:

			/*
			 * The subplan is known to return no tuples (or not more than
			 * OFFSET tuples, in general).	So we return no tuples.
			 */
			ExecSquelchNode(outerPlan); /* CDB */
			return NULL;

		case LIMIT_INWINDOW:
			if (ScanDirectionIsForward(direction))
			{
				/*
				 * Forwards scan, so check for stepping off end of window. If
				 * we are at the end of the window, return NULL without
				 * advancing the subplan or the position variable; but change
				 * the state machine state to record having done so.
				 */
				if (!node->noCount &&
					node->position - node->offset >= node->count)
				{
					node->lstate = LIMIT_WINDOWEND;
					ExecSquelchNode(outerPlan); /* CDB */
					return NULL;
				}

				/*
				 * Get next tuple from subplan, if any.
				 */
				slot = ExecProcNode(outerPlan);
				if (TupIsNull(slot))
				{
					node->lstate = LIMIT_SUBPLANEOF;
					return NULL;
				}
				node->subSlot = slot;
				node->position++;
			}
			else
			{
				/*
				 * Backwards scan, so check for stepping off start of window.
				 * As above, change only state-machine status if so.
				 */
				if (node->position <= node->offset + 1)
				{
					node->lstate = LIMIT_WINDOWSTART;
					ExecSquelchNode(outerPlan); /* CDB */
					return NULL;
				}

				/*
				 * Get previous tuple from subplan; there should be one!
				 */
				slot = ExecProcNode(outerPlan);
				if (TupIsNull(slot))
					elog(ERROR, "LIMIT subplan failed to run backwards");
				node->subSlot = slot;
				node->position--;
			}
			break;

		case LIMIT_SUBPLANEOF:
			if (ScanDirectionIsForward(direction))
				return NULL;

			/*
			 * Backing up from subplan EOF, so re-fetch previous tuple; there
			 * should be one!  Note previous tuple must be in window.
			 */
			slot = ExecProcNode(outerPlan);
			if (TupIsNull(slot))
				elog(ERROR, "LIMIT subplan failed to run backwards");
			node->subSlot = slot;
			node->lstate = LIMIT_INWINDOW;
			/* position does not change 'cause we didn't advance it before */
			break;

		case LIMIT_WINDOWEND:
			if (ScanDirectionIsForward(direction))
				return NULL;

			/*
			 * Backing up from window end: simply re-return the last tuple
			 * fetched from the subplan.
			 */
			slot = node->subSlot;
			node->lstate = LIMIT_INWINDOW;
			/* position does not change 'cause we didn't advance it before */
			break;

		case LIMIT_WINDOWSTART:
			if (!ScanDirectionIsForward(direction))
				return NULL;

			/*
			 * Advancing after having backed off window start: simply
			 * re-return the last tuple fetched from the subplan.
			 */
			slot = node->subSlot;
			node->lstate = LIMIT_INWINDOW;
			/* position does not change 'cause we didn't change it before */
			break;

		default:
			elog(ERROR, "impossible LIMIT state: %d",
				 (int) node->lstate);
			slot = NULL;		/* keep compiler quiet */
			break;
	}

	/* Return the current tuple */
	Assert(!TupIsNull(slot));

        if (!TupIsNull(slot))
        {
            Gpmon_M_Incr_Rows_Out(GpmonPktFromLimitState(node));
            CheckSendPlanStateGpmonPkt(&node->ps); 
        }
	return slot;
}
Example #4
0
/* ----------------------------------------------------------------
 *		MultiExecHash
 *
 *		build hash table for hashjoin, doing partitioning if more
 *		than one batch is required.
 * ----------------------------------------------------------------
 */
Node *
MultiExecHash(HashState *node)
{
	PlanState  *outerNode;
	List	   *hashkeys;
	HashJoinTable hashtable;
	TupleTableSlot *slot;
	ExprContext *econtext;
	uint32		hashvalue = 0;

	/* must provide our own instrumentation support */
	if (node->ps.instrument)
		InstrStartNode(node->ps.instrument);

	/*
	 * get state info from node
	 */
	outerNode = outerPlanState(node);
	hashtable = node->hashtable;

	/*
	 * set expression context
	 */
	hashkeys = node->hashkeys;
	econtext = node->ps.ps_ExprContext;

#ifdef FAULT_INJECTOR
    FaultInjector_InjectFaultIfSet(
    		MultiExecHashLargeVmem,
            DDLNotSpecified,
            "",  // databaseName
            ""); // tableName
#endif

	/*
	 * get all inner tuples and insert into the hash table (or temp files)
	 */
	for (;;)
	{
		slot = ExecProcNode(outerNode);
		if (TupIsNull(slot))
			break;

		Gpmon_M_Incr(GpmonPktFromHashState(node), GPMON_QEXEC_M_ROWSIN); 
                CheckSendPlanStateGpmonPkt(&node->ps);
		/* We have to compute the hash value */
		econtext->ecxt_innertuple = slot;
		bool hashkeys_null = false;

		if (ExecHashGetHashValue(node, hashtable, econtext, hashkeys, false,
								 node->hs_keepnull, &hashvalue, &hashkeys_null))
		{
			ExecHashTableInsert(node, hashtable, slot, hashvalue);
		}

		if (hashkeys_null)
		{
			node->hs_hashkeys_null = true;
			if (node->hs_quit_if_hashkeys_null)
			{
				ExecSquelchNode(outerNode);
				return NULL;
			}
		}
	}

	/* Now we have set up all the initial batches & primary overflow batches. */
	hashtable->nbatch_outstart = hashtable->nbatch;

	/* must provide our own instrumentation support */
	if (node->ps.instrument)
		InstrStopNode(node->ps.instrument, hashtable->totalTuples);

	/*
	 * We do not return the hash table directly because it's not a subtype of
	 * Node, and so would violate the MultiExecProcNode API.  Instead, our
	 * parent Hashjoin node is expected to know how to fish it out of our node
	 * state.  Ugly but not really worth cleaning up, since Hashjoin knows
	 * quite a bit more about Hash besides that.
	 */
	return NULL;
}
Example #5
0
File: nodeSort.c Project: huor/gpdb
/* ----------------------------------------------------------------
 *		ExecSort
 *
 *		Sorts tuples from the outer subtree of the node using tuplesort,
 *		which saves the results in a temporary file or memory. After the
 *		initial call, returns a tuple from the file with each call.
 *
 *		Conditions:
 *		  -- none.
 *
 *		Initial States:
 *		  -- the outer child is prepared to return the first tuple.
 * ----------------------------------------------------------------
 */
TupleTableSlot *
ExecSort(SortState *node)
{
	EState	   *estate;
	ScanDirection dir;
	Tuplesortstate *tuplesortstate = NULL;
	Tuplesortstate_mk *tuplesortstate_mk = NULL;
	TupleTableSlot *slot = NULL;
	Sort 		*plannode = NULL;
	PlanState  *outerNode = NULL;
	TupleDesc	tupDesc = NULL;
	workfile_set *work_set = NULL;

	/*
	 * get state info from node
	 */
	SO1_printf("ExecSort: %s\n",
			   "entering routine");

	estate = node->ss.ps.state;
	dir = estate->es_direction;

	if(gp_enable_mk_sort)
	{
		tuplesortstate_mk = node->tuplesortstate->sortstore_mk;
	}
	else
	{
		tuplesortstate = node->tuplesortstate->sortstore;
	}

	/*
	 * In Window node, we might need to call ExecSort again even when
	 * the last tuple in the Sort has been retrieved. Since we might
	 * eager free the tuplestore, the tuplestorestate could be NULL.
	 * We simply return NULL in this case.
	 */
	if (node->sort_Done &&
		((gp_enable_mk_sort && tuplesortstate_mk == NULL) ||
		 (!gp_enable_mk_sort && tuplesortstate == NULL)))
	{
		return NULL;
	}

	plannode = (Sort *) node->ss.ps.plan;


	/*
	 * If called for the first time, initialize tuplesort_state
	 */

	if (!node->sort_Done)
	{
		SO1_printf("ExecSort: %s\n",
				   "sorting subplan");

		if (gp_workfile_caching)
		{
			/* Look for cached workfile set. Mark here if found */
			work_set = workfile_mgr_find_set(&node->ss.ps);
			if (work_set != NULL)
			{
				elog(gp_workfile_caching_loglevel, "Sort found matching cached workfile set");
				node->cached_workfiles_found = true;
			}
		}

		/*
		 * Want to scan subplan in the forward direction while creating the
		 * sorted data.
		 */
		estate->es_direction = ForwardScanDirection;

		/*
		 * Initialize tuplesort module.
		 */
		SO1_printf("ExecSort: %s\n",
				   "calling tuplesort_begin");

		outerNode = outerPlanState(node);
		tupDesc = ExecGetResultType(outerNode);

		if(plannode->share_type == SHARE_SORT_XSLICE)
		{
			char rwfile_prefix[100];
			if(plannode->driver_slice != currentSliceId)
			{
				elog(LOG, "Sort exec on CrossSlice, current slice %d", currentSliceId);
				return NULL;
			}

			shareinput_create_bufname_prefix(rwfile_prefix, sizeof(rwfile_prefix), plannode->share_id);
			elog(LOG, "Sort node create shareinput rwfile %s", rwfile_prefix);

			if(gp_enable_mk_sort)
				tuplesortstate_mk = tuplesort_begin_heap_file_readerwriter_mk(
					& node->ss,
					rwfile_prefix, true,
					tupDesc,
					plannode->numCols,
					plannode->sortOperators,
					plannode->sortColIdx,
					PlanStateOperatorMemKB((PlanState *) node),
					true
					); 
			else
				tuplesortstate = tuplesort_begin_heap_file_readerwriter(
					rwfile_prefix, true,
					tupDesc,
					plannode->numCols,
					plannode->sortOperators,
					plannode->sortColIdx,
					PlanStateOperatorMemKB((PlanState *) node),
					true
					); 
		}
		else
		{
			if(gp_enable_mk_sort)
				tuplesortstate_mk = tuplesort_begin_heap_mk(& node->ss,
						tupDesc,
						plannode->numCols,
						plannode->sortOperators,
						plannode->sortColIdx,
						PlanStateOperatorMemKB((PlanState *) node),
						node->randomAccess);
			else
				tuplesortstate = tuplesort_begin_heap(tupDesc,
						plannode->numCols,
						plannode->sortOperators,
						plannode->sortColIdx,
						PlanStateOperatorMemKB((PlanState *) node),
						node->randomAccess);
		}

		if(gp_enable_mk_sort)
		{
			node->tuplesortstate->sortstore_mk = tuplesortstate_mk;
		}
		else
		{
			node->tuplesortstate->sortstore = tuplesortstate;
		}

		/* CDB */
		{
			ExprContext *econtext = node->ss.ps.ps_ExprContext;
			bool 		isNull;
			int64 		limit = 0;
			int64 		offset = 0;
			int 		unique = 0;
			int 		sort_flags = gp_sort_flags; /* get the guc */
			int         maxdistinct = gp_sort_max_distinct; /* get the guc */

			if (node->limitCount)
			{
				limit =
						DatumGetInt64(
								ExecEvalExprSwitchContext(node->limitCount,
														  econtext,
														  &isNull,
														  NULL));
				/* Interpret NULL limit as no limit */
				if (isNull)
					limit = 0;
				else if (limit < 0)
					limit = 0;

			}
			if (node->limitOffset)
			{
				offset =
						DatumGetInt64(
								ExecEvalExprSwitchContext(node->limitOffset,
														  econtext,
														  &isNull,
														  NULL));
				/* Interpret NULL offset as no offset */
				if (isNull)
					offset = 0;
				else if (offset < 0)
					offset = 0;

			}

			if (node->noduplicates)
				unique = 1;
			
			if(gp_enable_mk_sort)
				cdb_tuplesort_init_mk(tuplesortstate_mk, offset, limit, unique, sort_flags, maxdistinct);
			else
				cdb_tuplesort_init(tuplesortstate, offset, limit, unique, sort_flags, maxdistinct);
		}

		/* If EXPLAIN ANALYZE, share our Instrumentation object with sort. */
		if(gp_enable_mk_sort)
		{
			if (node->ss.ps.instrument)
				tuplesort_set_instrument_mk(tuplesortstate_mk,
						node->ss.ps.instrument,
						node->ss.ps.cdbexplainbuf);

			tuplesort_set_gpmon_mk(tuplesortstate_mk, &node->ss.ps.gpmon_pkt, 
					&node->ss.ps.gpmon_plan_tick);
		}
		else
		{
			if (node->ss.ps.instrument)
				tuplesort_set_instrument(tuplesortstate,
						node->ss.ps.instrument,
						node->ss.ps.cdbexplainbuf);

			tuplesort_set_gpmon(tuplesortstate, &node->ss.ps.gpmon_pkt, 
					&node->ss.ps.gpmon_plan_tick);
		}


	}

	/*
	 * Before reading any tuples from below, check if we can re-use
	 * existing spill files.
	 * Only mk_sort supports spill file caching.
	 */
	if (!node->sort_Done && gp_enable_mk_sort && gp_workfile_caching)
	{
		Assert(tuplesortstate_mk != NULL);

		if (node->cached_workfiles_found && !node->cached_workfiles_loaded)
		{
			Assert(work_set != NULL);
			elog(gp_workfile_caching_loglevel, "nodeSort: loading cached workfile metadata");

			tuplesort_set_spillfile_set_mk(tuplesortstate_mk, work_set);
			tuplesort_read_spill_metadata_mk(tuplesortstate_mk);
			node->cached_workfiles_loaded = true;

			if (node->ss.ps.instrument)
			{
				node->ss.ps.instrument->workfileReused = true;
			}

			/* Loaded sorted data from cached workfile, therefore
			 * no need to sort anymore!
			 */
			node->sort_Done = true;

			elog(gp_workfile_caching_loglevel, "Sort reusing cached workfiles, initiating Squelch walker");
			ExecSquelchNode(outerNode);
		}
	}



	/*
	 * If first time through and no cached workfiles can be used,
	 * read all tuples from outer plan and pass them to
	 * tuplesort.c. Subsequent calls just fetch tuples from tuplesort.
	 */
	if (!node->sort_Done)
	{

		Assert(outerNode != NULL);

		/*
		 * Scan the subplan and feed all the tuples to tuplesort.
		 */

		for (;;)
		{
			slot = ExecProcNode(outerNode);

			if (TupIsNull(slot))
			{
				break;
			}

			CheckSendPlanStateGpmonPkt(&node->ss.ps);
			if(gp_enable_mk_sort)
				tuplesort_puttupleslot_mk(tuplesortstate_mk, slot);
			else
				tuplesort_puttupleslot(tuplesortstate, slot);
		}

#ifdef FAULT_INJECTOR
		FaultInjector_InjectFaultIfSet(
				ExecSortBeforeSorting,
				DDLNotSpecified,
				"" /* databaseName */,
				"" /* tableName */
				);
#endif

		/*
		 * Complete the sort.
		 */
		if(gp_enable_mk_sort)
		{
			tuplesort_performsort_mk(tuplesortstate_mk);
		}
		else
		{
			tuplesort_performsort(tuplesortstate);
		}

		CheckSendPlanStateGpmonPkt(&node->ss.ps);
		/*
		 * restore to user specified direction
		 */
		estate->es_direction = dir;

		/*
		 * finally set the sorted flag to true
		 */
		node->sort_Done = true;
		SO1_printf("ExecSort: %s\n", "sorting done");

		/* for share input, do not need to return any tuple */
		if(plannode->share_type != SHARE_NOTSHARED) 
		{
			Assert(plannode->share_type == SHARE_SORT || plannode->share_type == SHARE_SORT_XSLICE);

			if(plannode->share_type == SHARE_SORT_XSLICE)
			{
				if(plannode->driver_slice == currentSliceId)
				{
					if(gp_enable_mk_sort)
						tuplesort_flush_mk(tuplesortstate_mk);
					else
						tuplesort_flush(tuplesortstate);

					node->share_lk_ctxt = shareinput_writer_notifyready(plannode->share_id, plannode->nsharer_xslice,
							estate->es_plannedstmt->planGen);
				}
			}

			return NULL;
		}

	} /* if (!node->sort_Done) */

	if(plannode->share_type != SHARE_NOTSHARED)
		return NULL;
				
	SO1_printf("ExecSort: %s\n",
			   "retrieving tuple from tuplesort");

	/*
	 * Get the first or next tuple from tuplesort. Returns NULL if no more
	 * tuples.
	 */
	slot = node->ss.ps.ps_ResultTupleSlot;
	if(gp_enable_mk_sort)
		(void) tuplesort_gettupleslot_mk(tuplesortstate_mk,
				ScanDirectionIsForward(dir),
				slot);
	else
		(void) tuplesort_gettupleslot(tuplesortstate,
				ScanDirectionIsForward(dir),
				slot);

	if (TupIsNull(slot) && !node->ss.ps.delayEagerFree)
	{
		ExecEagerFreeSort(node);
	}

	return slot;
}
Example #6
0
static TupleTableSlot *
execMotionSender(MotionState * node)
{
	/* SENDER LOGIC */
	TupleTableSlot *outerTupleSlot;
	PlanState  *outerNode;
	Motion	   *motion = (Motion *) node->ps.plan;
	bool		done = false;


#ifdef MEASURE_MOTION_TIME
	struct timeval time1;
	struct timeval time2;

	gettimeofday(&time1, NULL);
#endif

	AssertState(motion->motionType == MOTIONTYPE_HASH || 
			(motion->motionType == MOTIONTYPE_EXPLICIT && motion->segidColIdx > 0) || 
			(motion->motionType == MOTIONTYPE_FIXED && motion->numOutputSegs <= 1));
	Assert(node->ps.state->interconnect_context);

	while (!done)
	{
		/* grab TupleTableSlot from our child. */
		outerNode = outerPlanState(node);
		outerTupleSlot = ExecProcNode(outerNode);

#ifdef MEASURE_MOTION_TIME
		gettimeofday(&time2, NULL);

		node->otherTime.tv_sec += time2.tv_sec - time1.tv_sec;
		node->otherTime.tv_usec += time2.tv_usec - time1.tv_usec;

		while (node->otherTime.tv_usec < 0)
		{
			node->otherTime.tv_usec += 1000000;
			node->otherTime.tv_sec--;
		}

		while (node->otherTime.tv_usec >= 1000000)
		{
			node->otherTime.tv_usec -= 1000000;
			node->otherTime.tv_sec++;
		}
#endif
		if (done || TupIsNull(outerTupleSlot))
		{
			doSendEndOfStream(motion, node);
			done = true;
		}
		else
		{
			doSendTuple(motion, node, outerTupleSlot);
			/* doSendTuple() may have set node->stopRequested as a side-effect */

			Gpmon_M_Incr_Rows_Out(GpmonPktFromMotionState(node)); 
			setMotionStatsForGpmon(node);
			CheckSendPlanStateGpmonPkt(&node->ps);

			if (node->stopRequested)
			{
				elog(gp_workfile_caching_loglevel, "Motion initiating Squelch walker");
				/* propagate stop notification to our children */
				ExecSquelchNode(outerNode);
				done = true;
			}
		}
#ifdef MEASURE_MOTION_TIME
		gettimeofday(&time1, NULL);

		node->motionTime.tv_sec += time1.tv_sec - time2.tv_sec;
		node->motionTime.tv_usec += time1.tv_usec - time2.tv_usec;

		while (node->motionTime.tv_usec < 0)
		{
			node->motionTime.tv_usec += 1000000;
			node->motionTime.tv_sec--;
		}

		while (node->motionTime.tv_usec >= 1000000)
		{
			node->motionTime.tv_usec -= 1000000;
			node->motionTime.tv_sec++;
		}
#endif
	}

	Assert(node->stopRequested || node->numTuplesFromChild == node->numTuplesToAMS);

	/* nothing else to send out, so we return NULL up the tree. */
	return NULL;
}