Ejemplo n.º 1
0
/*
 * Compose and dispatch the MPPEXEC commands corresponding to a plan tree
 * within a complete parallel plan. (A plan tree will correspond either
 * to an initPlan or to the main plan.)
 *
 * If cancelOnError is true, then any dispatching error, a cancellation
 * request from the client, or an error from any of the associated QEs,
 * may cause the unfinished portion of the plan to be abandoned or canceled;
 * and in the event this occurs before all gangs have been dispatched, this
 * function does not return, but waits for all QEs to stop and exits to
 * the caller's error catcher via ereport(ERROR,...).Otherwise this
 * function returns normally and errors are not reported until later.
 *
 * If cancelOnError is false, the plan is to be dispatched as fully as
 * possible and the QEs allowed to proceed regardless of cancellation
 * requests, errors or connection failures from other QEs, etc.
 *
 * The CdbDispatchResults objects allocated for the plan are returned
 * in *pPrimaryResults. The caller, after calling
 * CdbCheckDispatchResult(), can examine the CdbDispatchResults
 * objects, can keep them as long as needed, and ultimately must free
 * them with cdbdisp_destroyDispatcherState() prior to deallocation of
 * the caller's memory context. Callers should use PG_TRY/PG_CATCH to
 * ensure proper cleanup.
 *
 * To wait for completion, check for errors, and clean up, it is
 * suggested that the caller use cdbdisp_finishCommand().
 *
 * Note that the slice tree dispatched is the one specified in the EState
 * of the argument QueryDesc as es_cur__slice.
 *
 * Note that the QueryDesc params must include PARAM_EXEC_REMOTE parameters
 * containing the values of any initplans required by the slice to be run.
 * (This is handled by calls to addRemoteExecParamsToParamList() from the
 * functions preprocess_initplans() and ExecutorRun().)
 *
 * Each QE receives its assignment as a message of type 'M' in PostgresMain().
 * The message is deserialized and processed by exec_mpp_query() in postgres.c.
 */
void
cdbdisp_dispatchPlan(struct QueryDesc *queryDesc,
					 bool planRequiresTxn,
					 bool cancelOnError, struct CdbDispatcherState *ds)
{
	char *splan,
		 *sddesc,
		 *sparams;

	int	splan_len,
		splan_len_uncompressed,
		sddesc_len,
		sparams_len;

	SliceTable *sliceTbl;
	int rootIdx;
	int oldLocalSlice;
	PlannedStmt *stmt;
	bool is_SRI;

	DispatchCommandQueryParms queryParms;
	CdbComponentDatabaseInfo *qdinfo;

	ds->primaryResults = NULL;
	ds->dispatchThreads = NULL;

	Assert(Gp_role == GP_ROLE_DISPATCH);
	Assert(queryDesc != NULL && queryDesc->estate != NULL);

	/*
	 * Later we'll need to operate with the slice table provided via the
	 * EState structure in the argument QueryDesc.	Cache this information
	 * locally and assert our expectations about it.
	 */
	sliceTbl = queryDesc->estate->es_sliceTable;
	rootIdx = RootSliceIndex(queryDesc->estate);

	Assert(sliceTbl != NULL);
	Assert(rootIdx == 0 ||
		   (rootIdx > sliceTbl->nMotions
			&& rootIdx <= sliceTbl->nMotions + sliceTbl->nInitPlans));

	/*
	 * Keep old value so we can restore it. We use this field as a parameter.
	 */
	oldLocalSlice = sliceTbl->localSlice;

	/*
	 * This function is called only for planned statements.
	 */
	stmt = queryDesc->plannedstmt;
	Assert(stmt);

	/*
	 * Let's evaluate STABLE functions now, so we get consistent values on the QEs
	 *
	 * Also, if this is a single-row INSERT statement, let's evaluate
	 * nextval() and currval() now, so that we get the QD's values, and a
	 * consistent value for everyone
	 *
	 */
	is_SRI = false;

	if (queryDesc->operation == CMD_INSERT)
	{
		Assert(stmt->commandType == CMD_INSERT);

		/*
		 * We might look for constant input relation (instead of SRI), but I'm afraid
		 * * that wouldn't scale.
		 */
		is_SRI = IsA(stmt->planTree, Result)
			&& stmt->planTree->lefttree == NULL;
	}

	if (!is_SRI)
		clear_relsize_cache();

	if (queryDesc->operation == CMD_INSERT ||
		queryDesc->operation == CMD_SELECT ||
		queryDesc->operation == CMD_UPDATE ||
		queryDesc->operation == CMD_DELETE)
	{

		MemoryContext oldContext;

		oldContext = CurrentMemoryContext;
		if (stmt->qdContext)
		{
			oldContext = MemoryContextSwitchTo(stmt->qdContext);
		}
		else
		/*
		 * memory context of plan tree should not change
		 */
		{
			MemoryContext mc = GetMemoryChunkContext(stmt->planTree);

			oldContext = MemoryContextSwitchTo(mc);
		}

		stmt->planTree = (Plan *) exec_make_plan_constant(stmt, is_SRI);

		MemoryContextSwitchTo(oldContext);
	}

	/*
	 * Cursor queries and bind/execute path queries don't run on the
	 * writer-gang QEs; but they require snapshot-synchronization to
	 * get started.
	 *
	 * initPlans, and other work (see the function pre-evaluation
	 * above) may advance the snapshot "segmateSync" value, so we're
	 * best off setting the shared-snapshot-ready value here. This
	 * will dispatch to the writer gang and force it to set its
	 * snapshot; we'll then be able to serialize the same snapshot
	 * version (see qdSerializeDtxContextInfo() below).
	 */
	if (queryDesc->extended_query)
	{
		verify_shared_snapshot_ready();
	}

	/*
	 * serialized plan tree. Note that we're called for a single
	 * slice tree (corresponding to an initPlan or the main plan), so the
	 * parameters are fixed and we can include them in the prefix.
	 */
	splan = serializeNode((Node *) queryDesc->plannedstmt,
						  &splan_len, &splan_len_uncompressed);

	uint64 plan_size_in_kb = ((uint64) splan_len_uncompressed) / (uint64) 1024;
	if (0 < gp_max_plan_size && plan_size_in_kb > gp_max_plan_size)
	{
		ereport(ERROR,
				(errcode(ERRCODE_STATEMENT_TOO_COMPLEX),
				 (errmsg("Query plan size limit exceeded, current size: "
				   UINT64_FORMAT "KB, max allowed size: %dKB",
				   plan_size_in_kb, gp_max_plan_size),
				  errhint("Size controlled by gp_max_plan_size"))));
	}

	Assert(splan != NULL && splan_len > 0 && splan_len_uncompressed > 0);

	if (queryDesc->params != NULL && queryDesc->params->numParams > 0)
	{
		ParamListInfoData *pli;
		ParamExternData *pxd;
		StringInfoData parambuf;
		Size length;
		int	plioff;
		int32 iparam;

		/*
		 * Allocate buffer for params
		 */
		initStringInfo(&parambuf);

		/*
		 * Copy ParamListInfoData header and ParamExternData array
		 */
		pli = queryDesc->params;
		length = (char *) &pli->params[pli->numParams] - (char *) pli;
		plioff = parambuf.len;
		Assert(plioff == MAXALIGN(plioff));
		appendBinaryStringInfo(&parambuf, pli, length);

		/*
		 * Copy pass-by-reference param values.
		 */
		for (iparam = 0; iparam < queryDesc->params->numParams; iparam++)
		{
			int16 typlen;
			bool typbyval;

			/*
			 * Recompute pli each time in case parambuf.data is repalloc'ed 
			 */
			pli = (ParamListInfoData *) (parambuf.data + plioff);
			pxd = &pli->params[iparam];

			if (pxd->ptype == InvalidOid)
				continue;

			/*
			 * Does pxd->value contain the value itself, or a pointer?
			 */
			get_typlenbyval(pxd->ptype, &typlen, &typbyval);
			if (!typbyval)
			{
				char *s = DatumGetPointer(pxd->value);

				if (pxd->isnull || !PointerIsValid(s))
				{
					pxd->isnull = true;
					pxd->value = 0;
				}
				else
				{
					length = datumGetSize(pxd->value, typbyval, typlen);

					/*
					 * We *must* set this before we
					 * append. Appending may realloc, which will
					 * invalidate our pxd ptr. (obviously we could
					 * append first if we recalculate pxd from the new
					 * base address)
					 */
					pxd->value = Int32GetDatum(length);

					appendBinaryStringInfo(&parambuf, &iparam, sizeof(iparam));
					appendBinaryStringInfo(&parambuf, s, length);
				}
			}
		}
		sparams = parambuf.data;
		sparams_len = parambuf.len;
	}
	else
	{
		sparams = NULL;
		sparams_len = 0;
	}

	sddesc = serializeNode((Node *) queryDesc->ddesc, &sddesc_len, NULL /*uncompressed_size */ );

	MemSet(&queryParms, 0, sizeof(queryParms));
	queryParms.strCommand = queryDesc->sourceText;
	queryParms.serializedQuerytree = NULL;
	queryParms.serializedQuerytreelen = 0;
	queryParms.serializedPlantree = splan;
	queryParms.serializedPlantreelen = splan_len;
	queryParms.serializedParams = sparams;
	queryParms.serializedParamslen = sparams_len;
	queryParms.serializedQueryDispatchDesc = sddesc;
	queryParms.serializedQueryDispatchDesclen = sddesc_len;
	queryParms.rootIdx = rootIdx;

	/*
	 * sequence server info
	 */
	qdinfo = &(getComponentDatabases()->entry_db_info[0]);
	Assert(qdinfo != NULL && qdinfo->hostip != NULL);
	queryParms.seqServerHost = pstrdup(qdinfo->hostip);
	queryParms.seqServerHostlen = strlen(qdinfo->hostip) + 1;
	queryParms.seqServerPort = seqServerCtl->seqServerPort;

	/*
	 * serialized a version of our snapshot
	 */
	/*
	 * Generate our transction isolations.	We generally want Plan
	 * based dispatch to be in a global transaction. The executor gets
	 * to decide if the special circumstances exist which allow us to
	 * dispatch without starting a global xact.
	 */
	queryParms.serializedDtxContextInfo =
		qdSerializeDtxContextInfo(&queryParms.serializedDtxContextInfolen,
								  true /* wantSnapshot */ ,
								  queryDesc->extended_query,
								  mppTxnOptions(planRequiresTxn),
								  "cdbdisp_dispatchPlan");

	cdbdisp_dispatchX(&queryParms, cancelOnError, sliceTbl, ds);

	sliceTbl->localSlice = oldLocalSlice;
}
Ejemplo n.º 2
0
/*
 * Function preprocess_initplans() is called from ExecutorRun running a
 * parallel plan on the QD.  The call happens prior to dispatch of the
 * main plan, and only if there are some initplans.
 *
 * Argument queryDesc is the one passed in to ExecutorRun.
 *
 * The function loops through the estate->es_param_exec_vals array, which
 * has plan->nParamExec elements.  Each element is a ParamExecData struct,
 * and the index of the element in the array is the paramid of the Param
 * node in the Plan that corresponds to the result of the subquery.
 *
 * The execPlan member points to a SubPlanState struct for the
 * subquery.  The value and isnull members hold the result
 * of executing the SubPlan.
 * I think that the order of the elements in this array guarantees
 * that for a subplan X within a subplan Y, X will come before Y in the array.
 * If a subplan returns multiple columns (like a MULTIEXPR_SUBLINK), each will be
 * a separate entry in the es_param_exec_vals array, but they will all have
 * the same value for execPlan.
 * In order to evaluate a subplan, we call ExecSetParamPlan.
 * This is a postgres function, but has been modified from its original form
 * to parallelize subplans. Inside ExecSetParamPlan, the
 * datum result(s) of the subplan are stuffed into the value field
 * of the ParamExecData struct(s).	It finds the proper one based on the
 * setParam list in the SubPlan node.
 * In order to handle SubPlans of SubPlans, we pass in the values of the
 * estate->es_param_exec_vals as ParamListInfo structs to the ExecSetParamPlan call.
 * These are then serialized into the mppexec all as parameters.  In this manner, the
 * result of a SubPlan of a SubPlan is available.
 */
void
preprocess_initplans(QueryDesc *queryDesc)
{
	ParamListInfo originalPli,
				augmentedPli;
	int			i;
//	Plan	   *plan = queryDesc->plantree;
	EState	   *estate = queryDesc->estate;
	int			originalRoot,
                originalSlice,
				rootIndex;

	if (queryDesc->plannedstmt->nCrossLevelParams == 0)
		return;

	originalPli = queryDesc->params;
	originalRoot = RootSliceIndex(queryDesc->estate);

    originalSlice = LocallyExecutingSliceIndex(queryDesc->estate);
	Assert(originalSlice == 0); /* Original slice being executed is slice 0 */

	/*
	 * Loop through the estate->es_param_exec_vals. This array has an element
	 * for each PARAM_EXEC (internal) param, and a pointer to the SubPlanState
	 * to execute to evaluate it. It seems that they are created in the proper
	 * order, i.e. if a subplan x has a sublan y, then y will come before x in
	 * the es_param_exec_vals array.
	 */
	for (i = 0; i < queryDesc->plannedstmt->nCrossLevelParams; i++)
	{
		ParamExecData *prm;
		SubPlanState *sps;

		prm = &estate->es_param_exec_vals[i];
		sps = (SubPlanState *) prm->execPlan;

		/*
		 * Append all the es_param_exec_vals datum values on to the external
		 * parameter list so they can be serialized in the mppexec call to the
		 * QEs.  Do this inside the loop since later initplans may depend on
		 * the results of earlier ones.
		 *
		 * TODO Some of the work of addRemoteExecParamsToParmList could be
		 *		factored out of the loop.
		 */
		augmentedPli = addRemoteExecParamsToParamList(queryDesc->plannedstmt,
													  originalPli,
													  estate->es_param_exec_vals);

		if (sps != NULL)
		{
            SubPlan    *subplan = (SubPlan *)sps->xprstate.expr;

            Assert(IsA(subplan, SubPlan) &&
                   subplan->qDispSliceId > 0);

			sps->planstate->plan->nParamExec = queryDesc->plannedstmt->nCrossLevelParams;
			sps->planstate->plan->nMotionNodes = queryDesc->plannedstmt->nMotionNodes;
			sps->planstate->plan->dispatch = DISPATCH_PARALLEL;

			/*
			 * Adjust for the slice to execute on the QD.
			 */
			rootIndex = subplan->qDispSliceId;
			queryDesc->estate->es_sliceTable->localSlice = rootIndex;

			/* set our global sliceid variable for elog. */
			currentSliceId = rootIndex;

			/*
			 * This runs the SubPlan and puts the answer back into prm->value.
			 */
			queryDesc->params = augmentedPli;

			/*
			 * Use ExprContext to set the param. If ExprContext is not initialized,
			 * create a new one here. (see MPP-3511)
			 */
			if (sps->planstate->ps_ExprContext == NULL)
				sps->planstate->ps_ExprContext = CreateExprContext(estate);
			
			/* MPP-12048: Set the right slice index before execution. */
			Assert( (subplan->qDispSliceId > queryDesc->plannedstmt->nMotionNodes)  &&
					(subplan->qDispSliceId <=
							(queryDesc->plannedstmt->nMotionNodes
							+ queryDesc->plannedstmt->nInitPlans) )   );

			Assert(LocallyExecutingSliceIndex(sps->planstate->state) == subplan->qDispSliceId);
		    //sps->planstate->state->es_cur_slice_idx = subplan->qDispSliceId;

			ExecSetParamPlan(sps, sps->planstate->ps_ExprContext, queryDesc);

			/*
			 * We dispatched, and have returned. We may have used the
			 * interconnect; so let's bump the interconnect-id.
			 */
			queryDesc->estate->es_sliceTable->ic_instance_id = ++gp_interconnect_id;
		}

		queryDesc->params = originalPli;
		queryDesc->estate->es_sliceTable->localSlice = originalSlice;
		currentSliceId = originalSlice;

		pfree(augmentedPli);
	}
}