void ExecEagerFreeSort(SortState *node) { Sort *plan = (Sort *) node->ss.ps.plan; EState *estate = node->ss.ps.state; /* * If we still have potential readers assocated with this node, * we shouldn't free the tuplesort too early. The eager-free message * doesn't know about upper ShareInputScan nodes, but those nodes * bumps up the reference count in their initializations and decrement * it in either EagerFree or ExecEnd. */ Assert(SHARE_MATERIAL != plan->share_type && SHARE_MATERIAL_XSLICE != plan->share_type); if (SHARE_SORT == plan->share_type) { ShareNodeEntry *snEntry; snEntry = ExecGetShareNodeEntry(estate, plan->share_id, false); if (snEntry->refcount > 0) { return; } } /* clean out the tuple table */ ExecClearTuple(node->ss.ss_ScanTupleSlot); /* must drop pointer to sort result tuple */ ExecClearTuple(node->ss.ps.ps_ResultTupleSlot); if (NULL != node->tuplesortstate->sortstore || NULL != node->tuplesortstate->sortstore_mk) { Sort *sort = (Sort *) node->ss.ps.plan; /* If this is a producer for a ShareScan, then wait for all consumers to be done */ /* XXX gcaragea: In Materialize, we moved this to End instead of EF, since EF might be too early to do it */ if(sort->share_type == SHARE_SORT_XSLICE && NULL != node->share_lk_ctxt) { shareinput_writer_waitdone(node->share_lk_ctxt, sort->share_id, sort->nsharer_xslice); } if(gp_enable_mk_sort) { tuplesort_end_mk(node->tuplesortstate->sortstore_mk); node->tuplesortstate->sortstore_mk = NULL; } else { tuplesort_end(node->tuplesortstate->sortstore); node->tuplesortstate->sortstore = NULL; } ExecSortResetWorkfileState(node); } }
/* * During EagerFree ShareInputScan decrements the * reference count in ShareNodeEntry when its intra-slice share node. * The reference count tells the underlying Material/Sort node not to free * too eagerly as this node still needs to read its tuples. Once this node * is freed, the underlying node can free its content. * We consider this reference counter only in intra-slice cases, because * inter-slice share nodes have their own pointer to the buffer and * there is not way to tell this reference over Motions anyway. */ void ExecEagerFreeShareInputScan(ShareInputScanState *node) { /* * no need to call tuplestore end. Underlying ShareInput will take * care of releasing tuplestore resources */ /* * XXX Do we need to pfree the tuplestore_state and pos? * XXX nodeMaterial.c does not, need to find out why */ ShareInputScan * sisc = (ShareInputScan *) node->ss.ps.plan; if(sisc->share_type == SHARE_MATERIAL || sisc->share_type == SHARE_MATERIAL_XSLICE) { if(node->ts_pos != NULL) ntuplestore_destroy_accessor((NTupleStoreAccessor *) node->ts_pos); if(node->ts_markpos != NULL) pfree(node->ts_markpos); if(NULL != node->ts_state && NULL != node->ts_state->matstore) { /* Check if shared X-SLICE. In that case, we can safely destroy our tuplestore */ if(ntuplestore_is_readerwriter_reader(node->ts_state->matstore)) { ntuplestore_destroy(node->ts_state->matstore); } } } /* * Reset our copy of the pointer to the the ts_state. The tuplestore can still be accessed by * the other consumers, but we don't have a pointer to it anymore */ node->ts_state = NULL; node->ts_pos = NULL; node->ts_markpos = NULL; /* This can be called more than once */ if (!node->freed && (sisc->share_type == SHARE_MATERIAL || sisc->share_type == SHARE_SORT)) { /* * Decrement reference count when it's intra-slice. We don't need * two-pass tree descending because ShareInputScan should always appear * before the underlying Material/Sort node. */ EState *estate = node->ss.ps.state; ShareNodeEntry *snEntry = ExecGetShareNodeEntry(estate, sisc->share_id, false); Assert(snEntry && snEntry->refcount > 0); snEntry->refcount--; } node->freed = true; }
void ExecEagerFreeMaterial(MaterialState *node) { Material *ma = (Material *) node->ss.ps.plan; EState *estate = node->ss.ps.state; /* * If we still have potential readers assocated with this node, * we shouldn't free the tuplestore too early. The eager-free message * doesn't know about upper ShareInputScan nodes, but those nodes * bumps up the reference count in their initializations and decrement * it in either EagerFree or ExecEnd. */ if (ma->share_type == SHARE_MATERIAL) { ShareNodeEntry *snEntry; snEntry = ExecGetShareNodeEntry(estate, ma->share_id, false); if (snEntry->refcount > 0) return; } /* * Release tuplestore resources */ if (NULL != node->ts_state->matstore) { if (ma->share_type == SHARE_MATERIAL_XSLICE && node->share_lk_ctxt) { /* * MPP-22682: If this is a producer shared XSLICE, don't free up * the tuple store here. For XSLICE producers, that will wait for * consumers that haven't completed yet, which can cause deadlocks. * Wait until ExecEndMaterial to free it, which is safer. */ return; } Assert(node->ts_pos); DestroyTupleStore(node); } }
/* ---------------------------------------------------------------- * ExecInitMaterial * ---------------------------------------------------------------- */ MaterialState * ExecInitMaterial(Material *node, EState *estate, int eflags) { MaterialState *matstate; Plan *outerPlan; /* * create state structure */ matstate = makeNode(MaterialState); matstate->ss.ps.plan = (Plan *) node; matstate->ss.ps.state = estate; /* * We must have random access to the subplan output to do backward scan or * mark/restore. We also prefer to materialize the subplan output if we * might be called on to rewind and replay it many times. However, if none * of these cases apply, we can skip storing the data. */ matstate->randomAccess = node->cdb_strict || (eflags & (EXEC_FLAG_REWIND | EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)) != 0; matstate->eof_underlying = false; matstate->ts_state = palloc0(sizeof(GenericTupStore)); matstate->ts_pos = NULL; matstate->ts_markpos = NULL; matstate->share_lk_ctxt = NULL; matstate->ts_destroyed = false; ExecMaterialResetWorkfileState(matstate); /* * Miscellaneous initialization * * Materialization nodes don't need ExprContexts because they never call * ExecQual or ExecProject. */ #define MATERIAL_NSLOTS 2 /* * tuple table initialization * * material nodes only return tuples from their materialized relation. */ ExecInitResultTupleSlot(estate, &matstate->ss.ps); matstate->ss.ss_ScanTupleSlot = ExecInitExtraTupleSlot(estate); /* * If eflag contains EXEC_FLAG_REWIND or EXEC_FLAG_BACKWARD or EXEC_FLAG_MARK, * then this node is not eager free safe. */ matstate->ss.ps.delayEagerFree = ((eflags & (EXEC_FLAG_REWIND | EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)) != 0); /* * initialize child nodes * * We shield the child node from the need to support BACKWARD, or * MARK/RESTORE. */ eflags &= ~(EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK); /* * If Materialize does not have any external parameters, then it * can shield the child node from being rescanned as well, hence * we can clear the EXEC_FLAG_REWIND as well. If there are parameters, * don't clear the REWIND flag, as the child will be rewound. */ if (node->plan.allParam == NULL || node->plan.extParam == NULL) { eflags &= ~EXEC_FLAG_REWIND; } outerPlan = outerPlan(node); /* * A very basic check to see if the optimizer requires the material to do a projection. * Ideally, this check would recursively compare all the target list expressions. However, * such a check is tricky because of the varno mismatch (outer plan may have a varno that * index into range table, while the material may refer to the same relation as "outer" varno) * [JIRA: MPP-25365] */ insist_log(list_length(node->plan.targetlist) == list_length(outerPlan->targetlist), "Material operator does not support projection"); outerPlanState(matstate) = ExecInitNode(outerPlan, estate, eflags); /* * If the child node of a Material is a Motion, then this Material node is * not eager free safe. */ if (IsA(outerPlan((Plan *)node), Motion)) { matstate->ss.ps.delayEagerFree = true; } /* * initialize tuple type. no need to initialize projection info because * this node doesn't do projections. */ ExecAssignResultTypeFromTL(&matstate->ss.ps); ExecAssignScanTypeFromOuterPlan(&matstate->ss); matstate->ss.ps.ps_ProjInfo = NULL; /* * If share input, need to register with range table entry */ if(node->share_type != SHARE_NOTSHARED) { ShareNodeEntry *snEntry = ExecGetShareNodeEntry(estate, node->share_id, true); snEntry->sharePlan = (Node *) node; snEntry->shareState = (Node *) matstate; } initGpmonPktForMaterial((Plan *)node, &matstate->ss.ps.gpmon_pkt, estate); return matstate; }
/* ---------------------------------------------------------------- * ExecInitSort * * Creates the run-time state information for the sort node * produced by the planner and initializes its outer subtree. * ---------------------------------------------------------------- */ SortState * ExecInitSort(Sort *node, EState *estate, int eflags) { SortState *sortstate; SO1_printf("ExecInitSort: %s\n", "initializing sort node"); /* * create state structure */ sortstate = makeNode(SortState); sortstate->ss.ps.plan = (Plan *) node; sortstate->ss.ps.state = estate; /* * We must have random access to the sort output to do backward scan or * mark/restore. We also prefer to materialize the sort output if we * might be called on to rewind and replay it many times. */ sortstate->randomAccess = (eflags & (EXEC_FLAG_REWIND | EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)) != 0; /* If the sort is shared, we need random access */ if(node->share_type != SHARE_NOTSHARED) sortstate->randomAccess = true; sortstate->sort_Done = false; sortstate->tuplesortstate = palloc0(sizeof(GenericTupStore)); sortstate->share_lk_ctxt = NULL; ExecSortResetWorkfileState(sortstate); /* CDB */ /* BUT: * The LIMIT optimizations requires exprcontext in which to * evaluate the limit/offset parameters. */ ExecAssignExprContext(estate, &sortstate->ss.ps); /* CDB */ /* evaluate a limit as part of the sort */ { /* pass node state to sort state */ sortstate->limitOffset = ExecInitExpr((Expr *) node->limitOffset, (PlanState *) sortstate); sortstate->limitCount = ExecInitExpr((Expr *) node->limitCount, (PlanState *) sortstate); sortstate->noduplicates = node->noduplicates; } /* * Miscellaneous initialization * * Sort nodes don't initialize their ExprContexts because they never call * ExecQual or ExecProject. */ #define SORT_NSLOTS 2 /* * tuple table initialization * * sort nodes only return scan tuples from their sorted relation. */ ExecInitResultTupleSlot(estate, &sortstate->ss.ps); sortstate->ss.ss_ScanTupleSlot = ExecInitExtraTupleSlot(estate); /* * CDB: Offer extra info for EXPLAIN ANALYZE. */ if (estate->es_instrument) { /* Allocate string buffer. */ sortstate->ss.ps.cdbexplainbuf = makeStringInfo(); /* Request a callback at end of query. */ sortstate->ss.ps.cdbexplainfun = ExecSortExplainEnd; } /* * If eflag contains EXEC_FLAG_REWIND or EXEC_FLAG_BACKWARD or EXEC_FLAG_MARK, * then this node is not eager free safe. */ sortstate->ss.ps.delayEagerFree = ((eflags & (EXEC_FLAG_REWIND | EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)) != 0); /* * initialize child nodes * * We shield the child node from the need to support BACKWARD, or * MARK/RESTORE. */ eflags &= ~(EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK); /* * If Sort does not have any external parameters, then it * can shield the child node from being rescanned as well, hence * we can clear the EXEC_FLAG_REWIND as well. If there are parameters, * don't clear the REWIND flag, as the child will be rewound. */ if (node->plan.allParam == NULL || node->plan.extParam == NULL) { eflags &= ~EXEC_FLAG_REWIND; } outerPlanState(sortstate) = ExecInitNode(outerPlan(node), estate, eflags); /* * If the child node of a Material is a Motion, then this Material node is * not eager free safe. */ if (IsA(outerPlan((Plan *)node), Motion)) { sortstate->ss.ps.delayEagerFree = true; } /* * initialize tuple type. no need to initialize projection info because * this node doesn't do projections. */ ExecAssignResultTypeFromTL(&sortstate->ss.ps); ExecAssignScanTypeFromOuterPlan(&sortstate->ss); sortstate->ss.ps.ps_ProjInfo = NULL; if(node->share_type != SHARE_NOTSHARED) { ShareNodeEntry *snEntry = ExecGetShareNodeEntry(estate, node->share_id, true); snEntry->sharePlan = (Node *)node; snEntry->shareState = (Node *)sortstate; } SO1_printf("ExecInitSort: %s\n", "sort node initialized"); initGpmonPktForSort((Plan *)node, &sortstate->ss.ps.gpmon_pkt, estate); return sortstate; }
/* * init_tuplestore_state * Initialize the tuplestore state for the Shared node if the state * is not initialized. */ static void init_tuplestore_state(ShareInputScanState *node) { Assert(node->ts_state == NULL); EState *estate = node->ss.ps.state; ShareInputScan *sisc = (ShareInputScan *)node->ss.ps.plan; ShareNodeEntry *snEntry = ExecGetShareNodeEntry(estate, sisc->share_id, false); PlanState *snState = NULL; ShareType share_type = sisc->share_type; if(snEntry) { snState = (PlanState *) snEntry->shareState; if(snState) { ExecProcNode(snState); } else { Assert(share_type == SHARE_MATERIAL_XSLICE || share_type == SHARE_SORT_XSLICE); } } if(share_type == SHARE_MATERIAL_XSLICE) { char rwfile_prefix[100]; shareinput_create_bufname_prefix(rwfile_prefix, sizeof(rwfile_prefix), sisc->share_id); node->ts_state = palloc0(sizeof(GenericTupStore)); node->ts_state->matstore = ntuplestore_create_readerwriter(rwfile_prefix, 0, false); node->ts_pos = (void *) ntuplestore_create_accessor(node->ts_state->matstore, false); ntuplestore_acc_seek_bof((NTupleStoreAccessor *)node->ts_pos); } else if(share_type == SHARE_MATERIAL) { /* The materialstate->ts_state structure should have been initialized already, during init of material node */ node->ts_state = ((MaterialState *)snState)->ts_state; Assert(NULL != node->ts_state->matstore); node->ts_pos = (void *) ntuplestore_create_accessor(node->ts_state->matstore, false); ntuplestore_acc_seek_bof((NTupleStoreAccessor *)node->ts_pos); } else if(share_type == SHARE_SORT_XSLICE) { char rwfile_prefix[100]; shareinput_create_bufname_prefix(rwfile_prefix, sizeof(rwfile_prefix), sisc->share_id); node->ts_state = palloc0(sizeof(GenericTupStore)); if(gp_enable_mk_sort) { node->ts_state->sortstore_mk = tuplesort_begin_heap_file_readerwriter_mk( & node->ss, rwfile_prefix, false, NULL, 0, NULL, NULL, PlanStateOperatorMemKB((PlanState *) node), true); tuplesort_begin_pos_mk(node->ts_state->sortstore_mk, (TuplesortPos_mk **)(&node->ts_pos)); tuplesort_rescan_pos_mk(node->ts_state->sortstore_mk, (TuplesortPos_mk *)node->ts_pos); } else { node->ts_state->sortstore = tuplesort_begin_heap_file_readerwriter( rwfile_prefix, false, NULL, 0, NULL, NULL, PlanStateOperatorMemKB((PlanState *) node), true); tuplesort_begin_pos(node->ts_state->sortstore, (TuplesortPos **)(&node->ts_pos)); tuplesort_rescan_pos(node->ts_state->sortstore, (TuplesortPos *)node->ts_pos); } } else { Assert(sisc->share_type == SHARE_SORT); Assert(snState != NULL); if(gp_enable_mk_sort) { node->ts_state = ((SortState *)snState)->tuplesortstate; Assert(NULL != node->ts_state->sortstore_mk); tuplesort_begin_pos_mk(node->ts_state->sortstore_mk, (TuplesortPos_mk **)(&node->ts_pos)); tuplesort_rescan_pos_mk(node->ts_state->sortstore_mk, (TuplesortPos_mk *)node->ts_pos); } else { node->ts_state = ((SortState *)snState)->tuplesortstate; Assert(NULL != node->ts_state->sortstore); tuplesort_begin_pos(node->ts_state->sortstore, (TuplesortPos **)(&node->ts_pos)); tuplesort_rescan_pos(node->ts_state->sortstore, (TuplesortPos *)node->ts_pos); } } Assert(NULL != node->ts_state); Assert(NULL != node->ts_state->matstore || NULL != node->ts_state->sortstore || NULL != node->ts_state->sortstore_mk); }
/* ------------------------------------------------------------------ * ExecInitShareInputScan * ------------------------------------------------------------------ */ ShareInputScanState * ExecInitShareInputScan(ShareInputScan *node, EState *estate, int eflags) { ShareInputScanState *sisstate; Plan *outerPlan; TupleDesc tupDesc; Assert(innerPlan(node) == NULL); /* create state data structure */ sisstate = makeNode(ShareInputScanState); sisstate->ss.ps.plan = (Plan *) node; sisstate->ss.ps.state = estate; sisstate->ts_state = NULL; sisstate->ts_pos = NULL; sisstate->ts_markpos = NULL; sisstate->share_lk_ctxt = NULL; sisstate->freed = false; /* * init child node. * if outerPlan is NULL, this is no-op (so that the ShareInput node will be * only init-ed once). */ outerPlan = outerPlan(node); outerPlanState(sisstate) = ExecInitNode(outerPlan, estate, eflags); sisstate->ss.ps.targetlist = (List *) ExecInitExpr((Expr *) node->plan.targetlist, (PlanState *) sisstate); Assert(node->plan.qual == NULL); sisstate->ss.ps.qual = NULL; /* Misc initialization * * Create expression context */ ExecAssignExprContext(estate, &sisstate->ss.ps); /* tuple table init */ ExecInitResultTupleSlot(estate, &sisstate->ss.ps); sisstate->ss.ss_ScanTupleSlot = ExecInitExtraTupleSlot(estate); /* * init tuple type. */ ExecAssignResultTypeFromTL(&sisstate->ss.ps); { bool hasoid; if (!ExecContextForcesOids(&sisstate->ss.ps, &hasoid)) hasoid = false; tupDesc = ExecTypeFromTL(node->plan.targetlist, hasoid); } ExecAssignScanType(&sisstate->ss, tupDesc); sisstate->ss.ps.ps_ProjInfo = NULL; /* * If this is an intra-slice share node, increment reference count to * tell the underlying node not to be freed before this node is ready to * be freed. fCreate flag to ExecGetShareNodeEntry is true because * at this point we don't have the entry which will be initialized in * the underlying node initialization later. */ if (node->share_type == SHARE_MATERIAL || node->share_type == SHARE_SORT) { ShareNodeEntry *snEntry = ExecGetShareNodeEntry(estate, node->share_id, true); snEntry->refcount++; } initGpmonPktForShareInputScan((Plan *)node, &sisstate->ss.ps.gpmon_pkt, estate); return sisstate; }