Beispiel #1
0
void
ExecEagerFreeSort(SortState *node)
{
	Sort	   *plan = (Sort *) node->ss.ps.plan;
	EState	   *estate = node->ss.ps.state;

	/*
	 * If we still have potential readers assocated with this node,
	 * we shouldn't free the tuplesort too early.  The eager-free message
	 * doesn't know about upper ShareInputScan nodes, but those nodes
	 * bumps up the reference count in their initializations and decrement
	 * it in either EagerFree or ExecEnd.
	 */
	Assert(SHARE_MATERIAL != plan->share_type && SHARE_MATERIAL_XSLICE != plan->share_type);
	if (SHARE_SORT == plan->share_type)
	{
		ShareNodeEntry	   *snEntry;

		snEntry = ExecGetShareNodeEntry(estate, plan->share_id, false);

		if (snEntry->refcount > 0)
		{
			return;
		}
	}

	/* clean out the tuple table */
	ExecClearTuple(node->ss.ss_ScanTupleSlot);

	/* must drop pointer to sort result tuple */
	ExecClearTuple(node->ss.ps.ps_ResultTupleSlot);

	if (NULL != node->tuplesortstate->sortstore || NULL != node->tuplesortstate->sortstore_mk)
	{
		Sort *sort = (Sort *) node->ss.ps.plan;

		/* If this is a producer for a ShareScan, then wait for all consumers to be done */
		/* XXX gcaragea: In Materialize, we moved this to End instead of EF, since EF might be too early to do it */
		if(sort->share_type == SHARE_SORT_XSLICE && NULL != node->share_lk_ctxt)
		{
			shareinput_writer_waitdone(node->share_lk_ctxt, sort->share_id, sort->nsharer_xslice);
		}

		if(gp_enable_mk_sort)
		{
			tuplesort_end_mk(node->tuplesortstate->sortstore_mk);
			node->tuplesortstate->sortstore_mk = NULL;
		}
		else
		{
			tuplesort_end(node->tuplesortstate->sortstore);
			node->tuplesortstate->sortstore = NULL;

		}

		ExecSortResetWorkfileState(node);
	}
}
/*
 * During EagerFree ShareInputScan decrements the
 * reference count in ShareNodeEntry when its intra-slice share node.
 * The reference count tells the underlying Material/Sort node not to free
 * too eagerly as this node still needs to read its tuples.  Once this node
 * is freed, the underlying node can free its content.
 * We consider this reference counter only in intra-slice cases, because
 * inter-slice share nodes have their own pointer to the buffer and
 * there is not way to tell this reference over Motions anyway.
 */
void
ExecEagerFreeShareInputScan(ShareInputScanState *node)
{

	/*
	 * no need to call tuplestore end.  Underlying ShareInput will take
	 * care of releasing tuplestore resources
	 */
	/*
	 * XXX Do we need to pfree the tuplestore_state and pos?
	 * XXX nodeMaterial.c does not, need to find out why
	 */

	ShareInputScan * sisc = (ShareInputScan *) node->ss.ps.plan;
	if(sisc->share_type == SHARE_MATERIAL || sisc->share_type == SHARE_MATERIAL_XSLICE)
	{
		if(node->ts_pos != NULL)
			ntuplestore_destroy_accessor((NTupleStoreAccessor *) node->ts_pos);
		if(node->ts_markpos != NULL)
			pfree(node->ts_markpos);

		if(NULL != node->ts_state && NULL != node->ts_state->matstore)
		{
			/* Check if shared X-SLICE. In that case, we can safely destroy our tuplestore */
			if(ntuplestore_is_readerwriter_reader(node->ts_state->matstore))
			{
				ntuplestore_destroy(node->ts_state->matstore);
			}
		}
	}

	/* 
	 * Reset our copy of the pointer to the the ts_state. The tuplestore can still be accessed by 
	 * the other consumers, but we don't have a pointer to it anymore
	 */ 
	node->ts_state = NULL; 
	node->ts_pos = NULL;
	node->ts_markpos = NULL;

	/* This can be called more than once */
	if (!node->freed &&
			(sisc->share_type == SHARE_MATERIAL || sisc->share_type == SHARE_SORT))
	{
		/*
		 * Decrement reference count when it's intra-slice.  We don't need
		 * two-pass tree descending because ShareInputScan should always appear
		 * before the underlying Material/Sort node.
		 */
		EState *estate = node->ss.ps.state;
		ShareNodeEntry *snEntry = ExecGetShareNodeEntry(estate, sisc->share_id, false);

		Assert(snEntry && snEntry->refcount > 0);
		snEntry->refcount--;
	}
	node->freed = true;
}
Beispiel #3
0
void
ExecEagerFreeMaterial(MaterialState *node)
{
	Material   *ma = (Material *) node->ss.ps.plan;
	EState	   *estate = node->ss.ps.state;

	/*
	 * If we still have potential readers assocated with this node,
	 * we shouldn't free the tuplestore too early.  The eager-free message
	 * doesn't know about upper ShareInputScan nodes, but those nodes
	 * bumps up the reference count in their initializations and decrement
	 * it in either EagerFree or ExecEnd.
	 */
	if (ma->share_type == SHARE_MATERIAL)
	{
		ShareNodeEntry	   *snEntry;

		snEntry = ExecGetShareNodeEntry(estate, ma->share_id, false);

		if (snEntry->refcount > 0)
			return;
	}

	/*
	 * Release tuplestore resources
	 */
	if (NULL != node->ts_state->matstore)
	{
		if (ma->share_type == SHARE_MATERIAL_XSLICE && node->share_lk_ctxt)
		{
			/*
			 * MPP-22682: If this is a producer shared XSLICE, don't free up
			 * the tuple store here. For XSLICE producers, that will wait for
			 * consumers that haven't completed yet, which can cause deadlocks.
			 * Wait until ExecEndMaterial to free it, which is safer.
			 */
			return;
		}
		Assert(node->ts_pos);
		
		DestroyTupleStore(node);
	}
}
Beispiel #4
0
/* ----------------------------------------------------------------
 *		ExecInitMaterial
 * ----------------------------------------------------------------
 */
MaterialState *
ExecInitMaterial(Material *node, EState *estate, int eflags)
{
	MaterialState *matstate;
	Plan	   *outerPlan;

	/*
	 * create state structure
	 */
	matstate = makeNode(MaterialState);
	matstate->ss.ps.plan = (Plan *) node;
	matstate->ss.ps.state = estate;

	/*
	 * We must have random access to the subplan output to do backward scan or
	 * mark/restore.  We also prefer to materialize the subplan output if we
	 * might be called on to rewind and replay it many times. However, if none
	 * of these cases apply, we can skip storing the data.
	 */
	matstate->randomAccess = node->cdb_strict ||
							(eflags & (EXEC_FLAG_REWIND |
										EXEC_FLAG_BACKWARD |
										EXEC_FLAG_MARK)) != 0;

	matstate->eof_underlying = false;
	matstate->ts_state = palloc0(sizeof(GenericTupStore));
	matstate->ts_pos = NULL;
	matstate->ts_markpos = NULL;
	matstate->share_lk_ctxt = NULL;
	matstate->ts_destroyed = false;
	ExecMaterialResetWorkfileState(matstate);

	/*
	 * Miscellaneous initialization
	 *
	 * Materialization nodes don't need ExprContexts because they never call
	 * ExecQual or ExecProject.
	 */

#define MATERIAL_NSLOTS 2

	/*
	 * tuple table initialization
	 *
	 * material nodes only return tuples from their materialized relation.
	 */
	ExecInitResultTupleSlot(estate, &matstate->ss.ps);
	matstate->ss.ss_ScanTupleSlot = ExecInitExtraTupleSlot(estate);

	/*
	 * If eflag contains EXEC_FLAG_REWIND or EXEC_FLAG_BACKWARD or EXEC_FLAG_MARK,
	 * then this node is not eager free safe.
	 */
	matstate->ss.ps.delayEagerFree =
		((eflags & (EXEC_FLAG_REWIND | EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)) != 0);

	/*
	 * initialize child nodes
	 *
	 * We shield the child node from the need to support BACKWARD, or
	 * MARK/RESTORE.
	 */
	eflags &= ~(EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK);

	/*
	 * If Materialize does not have any external parameters, then it
	 * can shield the child node from being rescanned as well, hence
	 * we can clear the EXEC_FLAG_REWIND as well. If there are parameters,
	 * don't clear the REWIND flag, as the child will be rewound.
	 */
	if (node->plan.allParam == NULL || node->plan.extParam == NULL)
	{
		eflags &= ~EXEC_FLAG_REWIND;
	}

	outerPlan = outerPlan(node);
	/*
	 * A very basic check to see if the optimizer requires the material to do a projection.
	 * Ideally, this check would recursively compare all the target list expressions. However,
	 * such a check is tricky because of the varno mismatch (outer plan may have a varno that
	 * index into range table, while the material may refer to the same relation as "outer" varno)
	 * [JIRA: MPP-25365]
	 */
	insist_log(list_length(node->plan.targetlist) == list_length(outerPlan->targetlist),
			"Material operator does not support projection");
	outerPlanState(matstate) = ExecInitNode(outerPlan, estate, eflags);

	/*
	 * If the child node of a Material is a Motion, then this Material node is
	 * not eager free safe.
	 */
	if (IsA(outerPlan((Plan *)node), Motion))
	{
		matstate->ss.ps.delayEagerFree = true;
	}

	/*
	 * initialize tuple type.  no need to initialize projection info because
	 * this node doesn't do projections.
	 */
	ExecAssignResultTypeFromTL(&matstate->ss.ps);
	ExecAssignScanTypeFromOuterPlan(&matstate->ss);
	matstate->ss.ps.ps_ProjInfo = NULL;

	/*
	 * If share input, need to register with range table entry
	 */
	if(node->share_type != SHARE_NOTSHARED) 
	{
		ShareNodeEntry *snEntry = ExecGetShareNodeEntry(estate, node->share_id, true); 
		snEntry->sharePlan = (Node *) node;
		snEntry->shareState = (Node *) matstate;
	}

	initGpmonPktForMaterial((Plan *)node, &matstate->ss.ps.gpmon_pkt, estate);

	return matstate;
}
Beispiel #5
0
/* ----------------------------------------------------------------
 *		ExecInitSort
 *
 *		Creates the run-time state information for the sort node
 *		produced by the planner and initializes its outer subtree.
 * ----------------------------------------------------------------
 */
SortState *
ExecInitSort(Sort *node, EState *estate, int eflags)
{
	SortState  *sortstate;

	SO1_printf("ExecInitSort: %s\n",
			   "initializing sort node");

	/*
	 * create state structure
	 */
	sortstate = makeNode(SortState);
	sortstate->ss.ps.plan = (Plan *) node;
	sortstate->ss.ps.state = estate;

	/*
	 * We must have random access to the sort output to do backward scan or
	 * mark/restore.  We also prefer to materialize the sort output if we
	 * might be called on to rewind and replay it many times.
	 */
	sortstate->randomAccess = (eflags & (EXEC_FLAG_REWIND |
										 EXEC_FLAG_BACKWARD |
										 EXEC_FLAG_MARK)) != 0;

	/* If the sort is shared, we need random access */
	if(node->share_type != SHARE_NOTSHARED) 
		sortstate->randomAccess = true;

	sortstate->sort_Done = false;
	sortstate->tuplesortstate = palloc0(sizeof(GenericTupStore));
	sortstate->share_lk_ctxt = NULL;
	ExecSortResetWorkfileState(sortstate);

	/* CDB */

	/* BUT:
	 * The LIMIT optimizations requires exprcontext in which to
	 * evaluate the limit/offset parameters.
	 */
	ExecAssignExprContext(estate, &sortstate->ss.ps);

	/* CDB */ /* evaluate a limit as part of the sort */
	{
		/* pass node state to sort state */
		sortstate->limitOffset = ExecInitExpr((Expr *) node->limitOffset,
											  (PlanState *) sortstate);
		sortstate->limitCount = ExecInitExpr((Expr *) node->limitCount,
											 (PlanState *) sortstate);
		sortstate->noduplicates = node->noduplicates;
	}

	/*
	 * Miscellaneous initialization
	 *
	 * Sort nodes don't initialize their ExprContexts because they never call
	 * ExecQual or ExecProject.
	 */

#define SORT_NSLOTS 2

	/*
	 * tuple table initialization
	 *
	 * sort nodes only return scan tuples from their sorted relation.
	 */
	ExecInitResultTupleSlot(estate, &sortstate->ss.ps);
	sortstate->ss.ss_ScanTupleSlot = ExecInitExtraTupleSlot(estate);

	/* 
	 * CDB: Offer extra info for EXPLAIN ANALYZE.
	 */
	if (estate->es_instrument)
	{
		/* Allocate string buffer. */
		sortstate->ss.ps.cdbexplainbuf = makeStringInfo();

		/* Request a callback at end of query. */
		sortstate->ss.ps.cdbexplainfun = ExecSortExplainEnd;
	}

	/*
	 * If eflag contains EXEC_FLAG_REWIND or EXEC_FLAG_BACKWARD or EXEC_FLAG_MARK,
	 * then this node is not eager free safe.
	 */
	sortstate->ss.ps.delayEagerFree =
		((eflags & (EXEC_FLAG_REWIND | EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)) != 0);

	/*
	 * initialize child nodes
	 *
	 * We shield the child node from the need to support BACKWARD, or
	 * MARK/RESTORE.
	 */

	eflags &= ~(EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK);

	/*
	 * If Sort does not have any external parameters, then it
	 * can shield the child node from being rescanned as well, hence
	 * we can clear the EXEC_FLAG_REWIND as well. If there are parameters,
	 * don't clear the REWIND flag, as the child will be rewound.
	 */

	if (node->plan.allParam == NULL || node->plan.extParam == NULL)
	{
		eflags &= ~EXEC_FLAG_REWIND;
	}

	outerPlanState(sortstate) = ExecInitNode(outerPlan(node), estate, eflags);

	/*
	 * If the child node of a Material is a Motion, then this Material node is
	 * not eager free safe.
	 */
	if (IsA(outerPlan((Plan *)node), Motion))
	{
		sortstate->ss.ps.delayEagerFree = true;
	}

	/*
	 * initialize tuple type.  no need to initialize projection info because
	 * this node doesn't do projections.
	 */
	ExecAssignResultTypeFromTL(&sortstate->ss.ps);
	ExecAssignScanTypeFromOuterPlan(&sortstate->ss);
	sortstate->ss.ps.ps_ProjInfo = NULL;

	if(node->share_type != SHARE_NOTSHARED)
	{
		ShareNodeEntry *snEntry = ExecGetShareNodeEntry(estate, node->share_id, true);
		snEntry->sharePlan = (Node *)node;
		snEntry->shareState = (Node *)sortstate;
	}

	SO1_printf("ExecInitSort: %s\n",
			   "sort node initialized");

	initGpmonPktForSort((Plan *)node, &sortstate->ss.ps.gpmon_pkt, estate);

	return sortstate;
}
/*
 * init_tuplestore_state
 *    Initialize the tuplestore state for the Shared node if the state
 *    is not initialized.
 */
static void
init_tuplestore_state(ShareInputScanState *node)
{
	Assert(node->ts_state == NULL);
	
	EState *estate = node->ss.ps.state;
	ShareInputScan *sisc = (ShareInputScan *)node->ss.ps.plan;
	ShareNodeEntry *snEntry = ExecGetShareNodeEntry(estate, sisc->share_id, false);
	PlanState *snState = NULL;

	ShareType share_type = sisc->share_type;

	if(snEntry)
	{
		snState = (PlanState *) snEntry->shareState;
		if(snState)
		{
			ExecProcNode(snState);
		}
		
		else
		{
			Assert(share_type == SHARE_MATERIAL_XSLICE || share_type == SHARE_SORT_XSLICE);
		}
	}

	if(share_type == SHARE_MATERIAL_XSLICE)
	{
		char rwfile_prefix[100];
		shareinput_create_bufname_prefix(rwfile_prefix, sizeof(rwfile_prefix), sisc->share_id);
	
		node->ts_state = palloc0(sizeof(GenericTupStore));

		node->ts_state->matstore = ntuplestore_create_readerwriter(rwfile_prefix, 0, false);
		node->ts_pos = (void *) ntuplestore_create_accessor(node->ts_state->matstore, false);
		ntuplestore_acc_seek_bof((NTupleStoreAccessor *)node->ts_pos);
	}
	else if(share_type == SHARE_MATERIAL)
	{
		/* The materialstate->ts_state structure should have been initialized already, during init of material node */
		node->ts_state = ((MaterialState *)snState)->ts_state;
		Assert(NULL != node->ts_state->matstore);
		node->ts_pos = (void *) ntuplestore_create_accessor(node->ts_state->matstore, false);
		ntuplestore_acc_seek_bof((NTupleStoreAccessor *)node->ts_pos);
	}
	else if(share_type == SHARE_SORT_XSLICE)
	{
		char rwfile_prefix[100];
		shareinput_create_bufname_prefix(rwfile_prefix, sizeof(rwfile_prefix), sisc->share_id);
		node->ts_state = palloc0(sizeof(GenericTupStore));

		if(gp_enable_mk_sort)
		{
			node->ts_state->sortstore_mk = tuplesort_begin_heap_file_readerwriter_mk(
				& node->ss,
				rwfile_prefix, false,
				NULL, 0, NULL, NULL, PlanStateOperatorMemKB((PlanState *) node), true);

			tuplesort_begin_pos_mk(node->ts_state->sortstore_mk, (TuplesortPos_mk **)(&node->ts_pos));
			tuplesort_rescan_pos_mk(node->ts_state->sortstore_mk, (TuplesortPos_mk *)node->ts_pos);
		}
		else
		{
			node->ts_state->sortstore = tuplesort_begin_heap_file_readerwriter(
				rwfile_prefix, false,
				NULL, 0, NULL, NULL, PlanStateOperatorMemKB((PlanState *) node), true);

			tuplesort_begin_pos(node->ts_state->sortstore, (TuplesortPos **)(&node->ts_pos));
			tuplesort_rescan_pos(node->ts_state->sortstore, (TuplesortPos *)node->ts_pos);
		}
	}
	else 
	{
		Assert(sisc->share_type == SHARE_SORT);
		Assert(snState != NULL);

		if(gp_enable_mk_sort)
		{
			node->ts_state = ((SortState *)snState)->tuplesortstate;
			Assert(NULL != node->ts_state->sortstore_mk);
			tuplesort_begin_pos_mk(node->ts_state->sortstore_mk, (TuplesortPos_mk **)(&node->ts_pos));
			tuplesort_rescan_pos_mk(node->ts_state->sortstore_mk, (TuplesortPos_mk *)node->ts_pos);
		}
		else
		{
			node->ts_state = ((SortState *)snState)->tuplesortstate;
			Assert(NULL != node->ts_state->sortstore);
			tuplesort_begin_pos(node->ts_state->sortstore, (TuplesortPos **)(&node->ts_pos));
			tuplesort_rescan_pos(node->ts_state->sortstore, (TuplesortPos *)node->ts_pos);
		}
	}

	Assert(NULL != node->ts_state);
	Assert(NULL != node->ts_state->matstore || NULL != node->ts_state->sortstore || NULL != node->ts_state->sortstore_mk);
}
/*  ------------------------------------------------------------------
 * 	ExecInitShareInputScan 
 * ------------------------------------------------------------------
 */
ShareInputScanState *
ExecInitShareInputScan(ShareInputScan *node, EState *estate, int eflags)
{
	ShareInputScanState *sisstate;
	Plan *outerPlan;
	TupleDesc tupDesc;

	Assert(innerPlan(node) == NULL);
	
	/* create state data structure */
	sisstate = makeNode(ShareInputScanState);
	sisstate->ss.ps.plan = (Plan *) node;
	sisstate->ss.ps.state = estate;
	
	sisstate->ts_state = NULL;
	sisstate->ts_pos = NULL;
	sisstate->ts_markpos = NULL;

	sisstate->share_lk_ctxt = NULL;
	sisstate->freed = false;

	/* 
	 * init child node.  
	 * if outerPlan is NULL, this is no-op (so that the ShareInput node will be 
	 * only init-ed once).
	 */
	outerPlan = outerPlan(node);
	outerPlanState(sisstate) = ExecInitNode(outerPlan, estate, eflags);

	sisstate->ss.ps.targetlist = (List *) 
		ExecInitExpr((Expr *) node->plan.targetlist, (PlanState *) sisstate);
	Assert(node->plan.qual == NULL);
	sisstate->ss.ps.qual = NULL;

	/* Misc initialization 
	 * 
	 * Create expression context 
	 */
	ExecAssignExprContext(estate, &sisstate->ss.ps);

	/* tuple table init */
	ExecInitResultTupleSlot(estate, &sisstate->ss.ps);
	sisstate->ss.ss_ScanTupleSlot = ExecInitExtraTupleSlot(estate);

	/* 
	 * init tuple type.
	 */
	ExecAssignResultTypeFromTL(&sisstate->ss.ps);

	{
		bool hasoid;
		if (!ExecContextForcesOids(&sisstate->ss.ps, &hasoid))
			hasoid = false;

		tupDesc = ExecTypeFromTL(node->plan.targetlist, hasoid);
	}
		
	ExecAssignScanType(&sisstate->ss, tupDesc);

	sisstate->ss.ps.ps_ProjInfo = NULL;

	/*
	 * If this is an intra-slice share node, increment reference count to
	 * tell the underlying node not to be freed before this node is ready to
	 * be freed.  fCreate flag to ExecGetShareNodeEntry is true because
	 * at this point we don't have the entry which will be initialized in
	 * the underlying node initialization later.
	 */
	if (node->share_type == SHARE_MATERIAL || node->share_type == SHARE_SORT)
	{
		ShareNodeEntry *snEntry = ExecGetShareNodeEntry(estate, node->share_id, true);
		snEntry->refcount++;
	}

	initGpmonPktForShareInputScan((Plan *)node, &sisstate->ss.ps.gpmon_pkt, estate);
	
	return sisstate;
}