Exemplo n.º 1
0
/* ----------------------------------------------------------------
 *		ExecMaterial
 *
 *		As long as we are at the end of the data collected in the tuplestore,
 *		we collect one new row from the subplan on each call, and stash it
 *		aside in the tuplestore before returning it.  The tuplestore is
 *		only read if we are asked to scan backwards, rescan, or mark/restore.
 *
 * ----------------------------------------------------------------
 */
TupleTableSlot *				/* result tuple from subplan */
ExecMaterial(MaterialState *node)
{
	EState	   *estate;
	ScanDirection dir;
	bool		forward;

	NTupleStore *ts;
	NTupleStoreAccessor *tsa;

	bool		eof_tuplestore;
	TupleTableSlot *slot;
	Material *ma;
	
	/*
	 * get state info from node
	 */
	estate = node->ss.ps.state;
	dir = estate->es_direction;
	forward = ScanDirectionIsForward(dir);

	ts = node->ts_state->matstore;
	tsa = (NTupleStoreAccessor *) node->ts_pos;

	ma = (Material *) node->ss.ps.plan;
	Assert(IsA(ma, Material));

	/*
	 * If first time through, and we need a tuplestore, initialize it.
	 */
	if (ts == NULL && (ma->share_type != SHARE_NOTSHARED || node->randomAccess))
	{
		/* 
		 * For cross slice material, we only run ExecMaterial on DriverSlice 
		 */
		if(ma->share_type == SHARE_MATERIAL_XSLICE)
		{
			char rwfile_prefix[100];

			if(ma->driver_slice != currentSliceId)
			{
				elog(LOG, "Material Exec on CrossSlice, current slice %d", currentSliceId);
				return NULL;
			}
			
			shareinput_create_bufname_prefix(rwfile_prefix, sizeof(rwfile_prefix), ma->share_id); 
			elog(LOG, "Material node creates shareinput rwfile %s", rwfile_prefix);

			ts = ntuplestore_create_readerwriter(rwfile_prefix, PlanStateOperatorMemKB((PlanState *)node) * 1024, true);
			tsa = ntuplestore_create_accessor(ts, true);
		}
		else
		{
			/* Non-shared Materialize node */
			bool isWriter = true;
			workfile_set *work_set = NULL;

			if (gp_workfile_caching)
			{
				work_set = workfile_mgr_find_set( &node->ss.ps);

				if (NULL != work_set)
				{
					/* Reusing cached workfiles. Tell subplan we won't be needing any tuples */
					elog(gp_workfile_caching_loglevel, "Materialize reusing cached workfiles, initiating Squelch walker");

					isWriter = false;
					ExecSquelchNode(outerPlanState(node));
					node->eof_underlying = true;
					node->cached_workfiles_found = true;

					if (node->ss.ps.instrument)
					{
						node->ss.ps.instrument->workfileReused = true;
					}
				}
			}

			if (NULL == work_set)
			{
				/*
				 * No work_set found, this is because:
				 *  a. workfile caching is enabled but we didn't find any reusable set
				 *  b. workfile caching is disabled
				 * Creating new empty workset
				 */
				Assert(!node->cached_workfiles_found);

				/* Don't try to cache when running under a ShareInputScan node */
				bool can_reuse = (ma->share_type == SHARE_NOTSHARED);

				work_set = workfile_mgr_create_set(BUFFILE, can_reuse, &node->ss.ps, NULL_SNAPSHOT);
				isWriter = true;
			}

			Assert(NULL != work_set);
			AssertEquivalent(node->cached_workfiles_found, !isWriter);

			ts = ntuplestore_create_workset(work_set, node->cached_workfiles_found,
					PlanStateOperatorMemKB((PlanState *) node) * 1024);
			tsa = ntuplestore_create_accessor(ts, isWriter);
		}
		
		Assert(ts && tsa);
		node->ts_state->matstore = ts;
		node->ts_pos = (void *) tsa;

        /* CDB: Offer extra info for EXPLAIN ANALYZE. */
        if (node->ss.ps.instrument)
        {
            /* Let the tuplestore share our Instrumentation object. */
			ntuplestore_setinstrument(ts, node->ss.ps.instrument);

            /* Request a callback at end of query. */
            node->ss.ps.cdbexplainfun = ExecMaterialExplainEnd;
        }

		/*
		 * MPP: If requested, fetch all rows from subplan and put them
		 * in the tuplestore.  This decouples a middle slice's receiving
		 * and sending Motion operators to neutralize a deadlock hazard.
		 * MPP TODO: Remove when a better solution is implemented.
		 *
		 * ShareInput: if the material node
		 * is used to share input, we will need to fetch all rows and put
		 * them in tuple store
		 */
		while (((Material *) node->ss.ps.plan)->cdb_strict
				|| ma->share_type != SHARE_NOTSHARED)
		{
			/*
			 * When reusing cached workfiles, we already have all the tuples,
			 * and we don't need to read anything from subplan.
			 */
			if (node->cached_workfiles_found)
			{
				break;
			}
			TupleTableSlot *outerslot = ExecProcNode(outerPlanState(node));

			if (TupIsNull(outerslot))
			{
				node->eof_underlying = true;

				if (ntuplestore_created_reusable_workfiles(ts))
				{
					ntuplestore_flush(ts);
					ntuplestore_mark_workset_complete(ts);
				}

				ntuplestore_acc_seek_bof(tsa);

				break;
			}
			Gpmon_M_Incr(GpmonPktFromMaterialState(node), GPMON_QEXEC_M_ROWSIN); 

			ntuplestore_acc_put_tupleslot(tsa, outerslot);
		}
	
		CheckSendPlanStateGpmonPkt(&node->ss.ps);

		if(forward)
			ntuplestore_acc_seek_bof(tsa);
		else
			ntuplestore_acc_seek_eof(tsa);

		/* for share input, material do not need to return any tuple */
		if(ma->share_type != SHARE_NOTSHARED)
		{
			Assert(ma->share_type == SHARE_MATERIAL || ma->share_type == SHARE_MATERIAL_XSLICE);
			/* 
			 * if the material is shared across slice, notify consumers that
			 * it is ready.
			 */
			if(ma->share_type == SHARE_MATERIAL_XSLICE) 
			{
				if (ma->driver_slice == currentSliceId)
				{
					ntuplestore_flush(ts);

					node->share_lk_ctxt = shareinput_writer_notifyready(ma->share_id, ma->nsharer_xslice,
							estate->es_plannedstmt->planGen);
				}
			}
			return NULL;
		}
	}

	if(ma->share_type != SHARE_NOTSHARED)
		return NULL;

	/*
	 * If we can fetch another tuple from the tuplestore, return it.
	 */
	slot = node->ss.ps.ps_ResultTupleSlot;

	if(forward)
		eof_tuplestore = (tsa == NULL) || !ntuplestore_acc_advance(tsa, 1);
	else
		eof_tuplestore = (tsa == NULL) || !ntuplestore_acc_advance(tsa, -1);

	if(tsa!=NULL && ntuplestore_acc_tell(tsa, NULL))
	{
		ntuplestore_acc_current_tupleslot(tsa, slot);
          	if (!TupIsNull(slot))
                {
          		Gpmon_M_Incr_Rows_Out(GpmonPktFromMaterialState(node)); 
                        CheckSendPlanStateGpmonPkt(&node->ss.ps);
                }
		return slot;
	}

	/*
	 * If necessary, try to fetch another row from the subplan.
	 *
	 * Note: the eof_underlying state variable exists to short-circuit further
	 * subplan calls.  It's not optional, unfortunately, because some plan
	 * node types are not robust about being called again when they've already
	 * returned NULL.
	 * If reusing cached workfiles, there is no need to execute subplan at all.
	 */
	if (eof_tuplestore && !node->eof_underlying)
	{
		PlanState  *outerNode;
		TupleTableSlot *outerslot;

		Assert(!node->cached_workfiles_found && "we shouldn't get here when using cached workfiles");

		/*
		 * We can only get here with forward==true, so no need to worry about
		 * which direction the subplan will go.
		 */
		outerNode = outerPlanState(node);
		outerslot = ExecProcNode(outerNode);
		if (TupIsNull(outerslot))
		{
			node->eof_underlying = true;
			if (ntuplestore_created_reusable_workfiles(ts))
			{
				ntuplestore_flush(ts);
				ntuplestore_mark_workset_complete(ts);
			}

			if (!node->ss.ps.delayEagerFree)
			{
				ExecEagerFreeMaterial(node);
			}

			return NULL;
		}

		Gpmon_M_Incr(GpmonPktFromMaterialState(node), GPMON_QEXEC_M_ROWSIN); 

		if (tsa)
			ntuplestore_acc_put_tupleslot(tsa, outerslot);

		/*
		 * And return a copy of the tuple.	(XXX couldn't we just return the
		 * outerslot?)
		 */
          	Gpmon_M_Incr_Rows_Out(GpmonPktFromMaterialState(node)); 
                CheckSendPlanStateGpmonPkt(&node->ss.ps);
		return ExecCopySlot(slot, outerslot); 
	}


	if (!node->ss.ps.delayEagerFree)
	{
		ExecEagerFreeMaterial(node);
	}

	/*
	 * Nothing left ...
	 */
	return NULL;
}
/*
 * init_tuplestore_state
 *    Initialize the tuplestore state for the Shared node if the state
 *    is not initialized.
 */
static void
init_tuplestore_state(ShareInputScanState *node)
{
	Assert(node->ts_state == NULL);
	
	EState *estate = node->ss.ps.state;
	ShareInputScan *sisc = (ShareInputScan *)node->ss.ps.plan;
	ShareNodeEntry *snEntry = ExecGetShareNodeEntry(estate, sisc->share_id, false);
	PlanState *snState = NULL;

	ShareType share_type = sisc->share_type;

	if(snEntry)
	{
		snState = (PlanState *) snEntry->shareState;
		if(snState)
		{
			ExecProcNode(snState);
		}
		
		else
		{
			Assert(share_type == SHARE_MATERIAL_XSLICE || share_type == SHARE_SORT_XSLICE);
		}
	}

	if(share_type == SHARE_MATERIAL_XSLICE)
	{
		char rwfile_prefix[100];
		shareinput_create_bufname_prefix(rwfile_prefix, sizeof(rwfile_prefix), sisc->share_id);
	
		node->ts_state = palloc0(sizeof(GenericTupStore));

		node->ts_state->matstore = ntuplestore_create_readerwriter(rwfile_prefix, 0, false);
		node->ts_pos = (void *) ntuplestore_create_accessor(node->ts_state->matstore, false);
		ntuplestore_acc_seek_bof((NTupleStoreAccessor *)node->ts_pos);
	}
	else if(share_type == SHARE_MATERIAL)
	{
		/* The materialstate->ts_state structure should have been initialized already, during init of material node */
		node->ts_state = ((MaterialState *)snState)->ts_state;
		Assert(NULL != node->ts_state->matstore);
		node->ts_pos = (void *) ntuplestore_create_accessor(node->ts_state->matstore, false);
		ntuplestore_acc_seek_bof((NTupleStoreAccessor *)node->ts_pos);
	}
	else if(share_type == SHARE_SORT_XSLICE)
	{
		char rwfile_prefix[100];
		shareinput_create_bufname_prefix(rwfile_prefix, sizeof(rwfile_prefix), sisc->share_id);
		node->ts_state = palloc0(sizeof(GenericTupStore));

		if(gp_enable_mk_sort)
		{
			node->ts_state->sortstore_mk = tuplesort_begin_heap_file_readerwriter_mk(
				& node->ss,
				rwfile_prefix, false,
				NULL, 0, NULL, NULL, PlanStateOperatorMemKB((PlanState *) node), true);

			tuplesort_begin_pos_mk(node->ts_state->sortstore_mk, (TuplesortPos_mk **)(&node->ts_pos));
			tuplesort_rescan_pos_mk(node->ts_state->sortstore_mk, (TuplesortPos_mk *)node->ts_pos);
		}
		else
		{
			node->ts_state->sortstore = tuplesort_begin_heap_file_readerwriter(
				rwfile_prefix, false,
				NULL, 0, NULL, NULL, PlanStateOperatorMemKB((PlanState *) node), true);

			tuplesort_begin_pos(node->ts_state->sortstore, (TuplesortPos **)(&node->ts_pos));
			tuplesort_rescan_pos(node->ts_state->sortstore, (TuplesortPos *)node->ts_pos);
		}
	}
	else 
	{
		Assert(sisc->share_type == SHARE_SORT);
		Assert(snState != NULL);

		if(gp_enable_mk_sort)
		{
			node->ts_state = ((SortState *)snState)->tuplesortstate;
			Assert(NULL != node->ts_state->sortstore_mk);
			tuplesort_begin_pos_mk(node->ts_state->sortstore_mk, (TuplesortPos_mk **)(&node->ts_pos));
			tuplesort_rescan_pos_mk(node->ts_state->sortstore_mk, (TuplesortPos_mk *)node->ts_pos);
		}
		else
		{
			node->ts_state = ((SortState *)snState)->tuplesortstate;
			Assert(NULL != node->ts_state->sortstore);
			tuplesort_begin_pos(node->ts_state->sortstore, (TuplesortPos **)(&node->ts_pos));
			tuplesort_rescan_pos(node->ts_state->sortstore, (TuplesortPos *)node->ts_pos);
		}
	}

	Assert(NULL != node->ts_state);
	Assert(NULL != node->ts_state->matstore || NULL != node->ts_state->sortstore || NULL != node->ts_state->sortstore_mk);
}