/* * Compose and dispatch the MPPEXEC commands corresponding to a plan tree * within a complete parallel plan. (A plan tree will correspond either * to an initPlan or to the main plan.) * * If cancelOnError is true, then any dispatching error, a cancellation * request from the client, or an error from any of the associated QEs, * may cause the unfinished portion of the plan to be abandoned or canceled; * and in the event this occurs before all gangs have been dispatched, this * function does not return, but waits for all QEs to stop and exits to * the caller's error catcher via ereport(ERROR,...).Otherwise this * function returns normally and errors are not reported until later. * * If cancelOnError is false, the plan is to be dispatched as fully as * possible and the QEs allowed to proceed regardless of cancellation * requests, errors or connection failures from other QEs, etc. * * The CdbDispatchResults objects allocated for the plan are returned * in *pPrimaryResults. The caller, after calling * CdbCheckDispatchResult(), can examine the CdbDispatchResults * objects, can keep them as long as needed, and ultimately must free * them with cdbdisp_destroyDispatcherState() prior to deallocation of * the caller's memory context. Callers should use PG_TRY/PG_CATCH to * ensure proper cleanup. * * To wait for completion, check for errors, and clean up, it is * suggested that the caller use cdbdisp_finishCommand(). * * Note that the slice tree dispatched is the one specified in the EState * of the argument QueryDesc as es_cur__slice. * * Note that the QueryDesc params must include PARAM_EXEC_REMOTE parameters * containing the values of any initplans required by the slice to be run. * (This is handled by calls to addRemoteExecParamsToParamList() from the * functions preprocess_initplans() and ExecutorRun().) * * Each QE receives its assignment as a message of type 'M' in PostgresMain(). * The message is deserialized and processed by exec_mpp_query() in postgres.c. */ void cdbdisp_dispatchPlan(struct QueryDesc *queryDesc, bool planRequiresTxn, bool cancelOnError, struct CdbDispatcherState *ds) { char *splan, *sddesc, *sparams; int splan_len, splan_len_uncompressed, sddesc_len, sparams_len; SliceTable *sliceTbl; int rootIdx; int oldLocalSlice; PlannedStmt *stmt; bool is_SRI; DispatchCommandQueryParms queryParms; CdbComponentDatabaseInfo *qdinfo; ds->primaryResults = NULL; ds->dispatchThreads = NULL; Assert(Gp_role == GP_ROLE_DISPATCH); Assert(queryDesc != NULL && queryDesc->estate != NULL); /* * Later we'll need to operate with the slice table provided via the * EState structure in the argument QueryDesc. Cache this information * locally and assert our expectations about it. */ sliceTbl = queryDesc->estate->es_sliceTable; rootIdx = RootSliceIndex(queryDesc->estate); Assert(sliceTbl != NULL); Assert(rootIdx == 0 || (rootIdx > sliceTbl->nMotions && rootIdx <= sliceTbl->nMotions + sliceTbl->nInitPlans)); /* * Keep old value so we can restore it. We use this field as a parameter. */ oldLocalSlice = sliceTbl->localSlice; /* * This function is called only for planned statements. */ stmt = queryDesc->plannedstmt; Assert(stmt); /* * Let's evaluate STABLE functions now, so we get consistent values on the QEs * * Also, if this is a single-row INSERT statement, let's evaluate * nextval() and currval() now, so that we get the QD's values, and a * consistent value for everyone * */ is_SRI = false; if (queryDesc->operation == CMD_INSERT) { Assert(stmt->commandType == CMD_INSERT); /* * We might look for constant input relation (instead of SRI), but I'm afraid * * that wouldn't scale. */ is_SRI = IsA(stmt->planTree, Result) && stmt->planTree->lefttree == NULL; } if (!is_SRI) clear_relsize_cache(); if (queryDesc->operation == CMD_INSERT || queryDesc->operation == CMD_SELECT || queryDesc->operation == CMD_UPDATE || queryDesc->operation == CMD_DELETE) { MemoryContext oldContext; oldContext = CurrentMemoryContext; if (stmt->qdContext) { oldContext = MemoryContextSwitchTo(stmt->qdContext); } else /* * memory context of plan tree should not change */ { MemoryContext mc = GetMemoryChunkContext(stmt->planTree); oldContext = MemoryContextSwitchTo(mc); } stmt->planTree = (Plan *) exec_make_plan_constant(stmt, is_SRI); MemoryContextSwitchTo(oldContext); } /* * Cursor queries and bind/execute path queries don't run on the * writer-gang QEs; but they require snapshot-synchronization to * get started. * * initPlans, and other work (see the function pre-evaluation * above) may advance the snapshot "segmateSync" value, so we're * best off setting the shared-snapshot-ready value here. This * will dispatch to the writer gang and force it to set its * snapshot; we'll then be able to serialize the same snapshot * version (see qdSerializeDtxContextInfo() below). */ if (queryDesc->extended_query) { verify_shared_snapshot_ready(); } /* * serialized plan tree. Note that we're called for a single * slice tree (corresponding to an initPlan or the main plan), so the * parameters are fixed and we can include them in the prefix. */ splan = serializeNode((Node *) queryDesc->plannedstmt, &splan_len, &splan_len_uncompressed); uint64 plan_size_in_kb = ((uint64) splan_len_uncompressed) / (uint64) 1024; if (0 < gp_max_plan_size && plan_size_in_kb > gp_max_plan_size) { ereport(ERROR, (errcode(ERRCODE_STATEMENT_TOO_COMPLEX), (errmsg("Query plan size limit exceeded, current size: " UINT64_FORMAT "KB, max allowed size: %dKB", plan_size_in_kb, gp_max_plan_size), errhint("Size controlled by gp_max_plan_size")))); } Assert(splan != NULL && splan_len > 0 && splan_len_uncompressed > 0); if (queryDesc->params != NULL && queryDesc->params->numParams > 0) { ParamListInfoData *pli; ParamExternData *pxd; StringInfoData parambuf; Size length; int plioff; int32 iparam; /* * Allocate buffer for params */ initStringInfo(¶mbuf); /* * Copy ParamListInfoData header and ParamExternData array */ pli = queryDesc->params; length = (char *) &pli->params[pli->numParams] - (char *) pli; plioff = parambuf.len; Assert(plioff == MAXALIGN(plioff)); appendBinaryStringInfo(¶mbuf, pli, length); /* * Copy pass-by-reference param values. */ for (iparam = 0; iparam < queryDesc->params->numParams; iparam++) { int16 typlen; bool typbyval; /* * Recompute pli each time in case parambuf.data is repalloc'ed */ pli = (ParamListInfoData *) (parambuf.data + plioff); pxd = &pli->params[iparam]; if (pxd->ptype == InvalidOid) continue; /* * Does pxd->value contain the value itself, or a pointer? */ get_typlenbyval(pxd->ptype, &typlen, &typbyval); if (!typbyval) { char *s = DatumGetPointer(pxd->value); if (pxd->isnull || !PointerIsValid(s)) { pxd->isnull = true; pxd->value = 0; } else { length = datumGetSize(pxd->value, typbyval, typlen); /* * We *must* set this before we * append. Appending may realloc, which will * invalidate our pxd ptr. (obviously we could * append first if we recalculate pxd from the new * base address) */ pxd->value = Int32GetDatum(length); appendBinaryStringInfo(¶mbuf, &iparam, sizeof(iparam)); appendBinaryStringInfo(¶mbuf, s, length); } } } sparams = parambuf.data; sparams_len = parambuf.len; } else { sparams = NULL; sparams_len = 0; } sddesc = serializeNode((Node *) queryDesc->ddesc, &sddesc_len, NULL /*uncompressed_size */ ); MemSet(&queryParms, 0, sizeof(queryParms)); queryParms.strCommand = queryDesc->sourceText; queryParms.serializedQuerytree = NULL; queryParms.serializedQuerytreelen = 0; queryParms.serializedPlantree = splan; queryParms.serializedPlantreelen = splan_len; queryParms.serializedParams = sparams; queryParms.serializedParamslen = sparams_len; queryParms.serializedQueryDispatchDesc = sddesc; queryParms.serializedQueryDispatchDesclen = sddesc_len; queryParms.rootIdx = rootIdx; /* * sequence server info */ qdinfo = &(getComponentDatabases()->entry_db_info[0]); Assert(qdinfo != NULL && qdinfo->hostip != NULL); queryParms.seqServerHost = pstrdup(qdinfo->hostip); queryParms.seqServerHostlen = strlen(qdinfo->hostip) + 1; queryParms.seqServerPort = seqServerCtl->seqServerPort; /* * serialized a version of our snapshot */ /* * Generate our transction isolations. We generally want Plan * based dispatch to be in a global transaction. The executor gets * to decide if the special circumstances exist which allow us to * dispatch without starting a global xact. */ queryParms.serializedDtxContextInfo = qdSerializeDtxContextInfo(&queryParms.serializedDtxContextInfolen, true /* wantSnapshot */ , queryDesc->extended_query, mppTxnOptions(planRequiresTxn), "cdbdisp_dispatchPlan"); cdbdisp_dispatchX(&queryParms, cancelOnError, sliceTbl, ds); sliceTbl->localSlice = oldLocalSlice; }
/* * Function preprocess_initplans() is called from ExecutorRun running a * parallel plan on the QD. The call happens prior to dispatch of the * main plan, and only if there are some initplans. * * Argument queryDesc is the one passed in to ExecutorRun. * * The function loops through the estate->es_param_exec_vals array, which * has plan->nParamExec elements. Each element is a ParamExecData struct, * and the index of the element in the array is the paramid of the Param * node in the Plan that corresponds to the result of the subquery. * * The execPlan member points to a SubPlanState struct for the * subquery. The value and isnull members hold the result * of executing the SubPlan. * I think that the order of the elements in this array guarantees * that for a subplan X within a subplan Y, X will come before Y in the array. * If a subplan returns multiple columns (like a MULTIEXPR_SUBLINK), each will be * a separate entry in the es_param_exec_vals array, but they will all have * the same value for execPlan. * In order to evaluate a subplan, we call ExecSetParamPlan. * This is a postgres function, but has been modified from its original form * to parallelize subplans. Inside ExecSetParamPlan, the * datum result(s) of the subplan are stuffed into the value field * of the ParamExecData struct(s). It finds the proper one based on the * setParam list in the SubPlan node. * In order to handle SubPlans of SubPlans, we pass in the values of the * estate->es_param_exec_vals as ParamListInfo structs to the ExecSetParamPlan call. * These are then serialized into the mppexec all as parameters. In this manner, the * result of a SubPlan of a SubPlan is available. */ void preprocess_initplans(QueryDesc *queryDesc) { ParamListInfo originalPli, augmentedPli; int i; // Plan *plan = queryDesc->plantree; EState *estate = queryDesc->estate; int originalRoot, originalSlice, rootIndex; if (queryDesc->plannedstmt->nCrossLevelParams == 0) return; originalPli = queryDesc->params; originalRoot = RootSliceIndex(queryDesc->estate); originalSlice = LocallyExecutingSliceIndex(queryDesc->estate); Assert(originalSlice == 0); /* Original slice being executed is slice 0 */ /* * Loop through the estate->es_param_exec_vals. This array has an element * for each PARAM_EXEC (internal) param, and a pointer to the SubPlanState * to execute to evaluate it. It seems that they are created in the proper * order, i.e. if a subplan x has a sublan y, then y will come before x in * the es_param_exec_vals array. */ for (i = 0; i < queryDesc->plannedstmt->nCrossLevelParams; i++) { ParamExecData *prm; SubPlanState *sps; prm = &estate->es_param_exec_vals[i]; sps = (SubPlanState *) prm->execPlan; /* * Append all the es_param_exec_vals datum values on to the external * parameter list so they can be serialized in the mppexec call to the * QEs. Do this inside the loop since later initplans may depend on * the results of earlier ones. * * TODO Some of the work of addRemoteExecParamsToParmList could be * factored out of the loop. */ augmentedPli = addRemoteExecParamsToParamList(queryDesc->plannedstmt, originalPli, estate->es_param_exec_vals); if (sps != NULL) { SubPlan *subplan = (SubPlan *)sps->xprstate.expr; Assert(IsA(subplan, SubPlan) && subplan->qDispSliceId > 0); sps->planstate->plan->nParamExec = queryDesc->plannedstmt->nCrossLevelParams; sps->planstate->plan->nMotionNodes = queryDesc->plannedstmt->nMotionNodes; sps->planstate->plan->dispatch = DISPATCH_PARALLEL; /* * Adjust for the slice to execute on the QD. */ rootIndex = subplan->qDispSliceId; queryDesc->estate->es_sliceTable->localSlice = rootIndex; /* set our global sliceid variable for elog. */ currentSliceId = rootIndex; /* * This runs the SubPlan and puts the answer back into prm->value. */ queryDesc->params = augmentedPli; /* * Use ExprContext to set the param. If ExprContext is not initialized, * create a new one here. (see MPP-3511) */ if (sps->planstate->ps_ExprContext == NULL) sps->planstate->ps_ExprContext = CreateExprContext(estate); /* MPP-12048: Set the right slice index before execution. */ Assert( (subplan->qDispSliceId > queryDesc->plannedstmt->nMotionNodes) && (subplan->qDispSliceId <= (queryDesc->plannedstmt->nMotionNodes + queryDesc->plannedstmt->nInitPlans) ) ); Assert(LocallyExecutingSliceIndex(sps->planstate->state) == subplan->qDispSliceId); //sps->planstate->state->es_cur_slice_idx = subplan->qDispSliceId; ExecSetParamPlan(sps, sps->planstate->ps_ExprContext, queryDesc); /* * We dispatched, and have returned. We may have used the * interconnect; so let's bump the interconnect-id. */ queryDesc->estate->es_sliceTable->ic_instance_id = ++gp_interconnect_id; } queryDesc->params = originalPli; queryDesc->estate->es_sliceTable->localSlice = originalSlice; currentSliceId = originalSlice; pfree(augmentedPli); } }