/* ---------------------------------------------------------------- * ExecInitBitmapHeapScan * * Initializes the scan's state information. * ---------------------------------------------------------------- */ BitmapHeapScanState * ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags) { BitmapHeapScanState *scanstate; Relation currentRelation; int io_concurrency; /* check for unsupported flags */ Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK))); /* * Assert caller didn't ask for an unsafe snapshot --- see comments at * head of file. */ Assert(IsMVCCSnapshot(estate->es_snapshot)); /* * create state structure */ scanstate = makeNode(BitmapHeapScanState); scanstate->ss.ps.plan = (Plan *) node; scanstate->ss.ps.state = estate; scanstate->ss.ps.ExecProcNode = ExecBitmapHeapScan; scanstate->tbm = NULL; scanstate->tbmiterator = NULL; scanstate->tbmres = NULL; scanstate->exact_pages = 0; scanstate->lossy_pages = 0; scanstate->prefetch_iterator = NULL; scanstate->prefetch_pages = 0; scanstate->prefetch_target = 0; /* may be updated below */ scanstate->prefetch_maximum = target_prefetch_pages; scanstate->pscan_len = 0; scanstate->initialized = false; scanstate->shared_tbmiterator = NULL; scanstate->pstate = NULL; /* * Miscellaneous initialization * * create expression context for node */ ExecAssignExprContext(estate, &scanstate->ss.ps); /* * initialize child expressions */ scanstate->ss.ps.qual = ExecInitQual(node->scan.plan.qual, (PlanState *) scanstate); scanstate->bitmapqualorig = ExecInitQual(node->bitmapqualorig, (PlanState *) scanstate); /* * tuple table initialization */ ExecInitResultTupleSlot(estate, &scanstate->ss.ps); ExecInitScanTupleSlot(estate, &scanstate->ss); /* * open the base relation and acquire appropriate lock on it. */ currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid, eflags); /* * Determine the maximum for prefetch_target. If the tablespace has a * specific IO concurrency set, use that to compute the corresponding * maximum value; otherwise, we already initialized to the value computed * by the GUC machinery. */ io_concurrency = get_tablespace_io_concurrency(currentRelation->rd_rel->reltablespace); if (io_concurrency != effective_io_concurrency) { double maximum; if (ComputeIoConcurrency(io_concurrency, &maximum)) scanstate->prefetch_maximum = rint(maximum); } scanstate->ss.ss_currentRelation = currentRelation; /* * Even though we aren't going to do a conventional seqscan, it is useful * to create a HeapScanDesc --- most of the fields in it are usable. */ scanstate->ss.ss_currentScanDesc = heap_beginscan_bm(currentRelation, estate->es_snapshot, 0, NULL); /* * get the scan type from the relation descriptor. */ ExecAssignScanType(&scanstate->ss, RelationGetDescr(currentRelation)); /* * Initialize result tuple type and projection info. */ ExecAssignResultTypeFromTL(&scanstate->ss.ps); ExecAssignScanProjectionInfo(&scanstate->ss); /* * initialize child nodes * * We do this last because the child nodes will open indexscans on our * relation's indexes, and we want to be sure we have acquired a lock on * the relation first. */ outerPlanState(scanstate) = ExecInitNode(outerPlan(node), estate, eflags); /* * all done. */ return scanstate; }
/* ---------------------------------------------------------------- * ExecInitFunctionScan * ---------------------------------------------------------------- */ FunctionScanState * ExecInitFunctionScan(FunctionScan *node, EState *estate, int eflags) { FunctionScanState *scanstate; RangeTblEntry *rte; Oid funcrettype; TypeFuncClass functypclass; TupleDesc tupdesc = NULL; /* * FunctionScan should not have any children. */ Assert(outerPlan(node) == NULL); Assert(innerPlan(node) == NULL); /* * create new ScanState for node */ scanstate = makeNode(FunctionScanState); scanstate->ss.ps.plan = (Plan *) node; scanstate->ss.ps.state = estate; /* * Miscellaneous initialization * * create expression context for node */ ExecAssignExprContext(estate, &scanstate->ss.ps); #define FUNCTIONSCAN_NSLOTS 2 /* * tuple table initialization */ ExecInitResultTupleSlot(estate, &scanstate->ss.ps); ExecInitScanTupleSlot(estate, &scanstate->ss); /* * initialize child expressions */ scanstate->ss.ps.targetlist = (List *) ExecInitExpr((Expr *) node->scan.plan.targetlist, (PlanState *) scanstate); scanstate->ss.ps.qual = (List *) ExecInitExpr((Expr *) node->scan.plan.qual, (PlanState *) scanstate); /* * get info about function */ rte = rt_fetch(node->scan.scanrelid, estate->es_range_table); Assert(rte->rtekind == RTE_FUNCTION); /* * Now determine if the function returns a simple or composite type, and * build an appropriate tupdesc. */ functypclass = get_expr_result_type(rte->funcexpr, &funcrettype, &tupdesc); if (functypclass == TYPEFUNC_COMPOSITE) { /* Composite data type, e.g. a table's row type */ Assert(tupdesc); /* Must copy it out of typcache for safety */ tupdesc = CreateTupleDescCopy(tupdesc); } else if (functypclass == TYPEFUNC_SCALAR) { /* Base data type, i.e. scalar */ char *attname = strVal(linitial(rte->eref->colnames)); tupdesc = CreateTemplateTupleDesc(1, false); TupleDescInitEntry(tupdesc, (AttrNumber) 1, attname, funcrettype, -1, 0); } else if (functypclass == TYPEFUNC_RECORD) { tupdesc = BuildDescForRelation(rte->coldeflist); } else { /* crummy error message, but parser should have caught this */ elog(ERROR, "function in FROM has unsupported return type"); } /* * For RECORD results, make sure a typmod has been assigned. (The * function should do this for itself, but let's cover things in case it * doesn't.) */ BlessTupleDesc(tupdesc); scanstate->tupdesc = tupdesc; ExecAssignScanType(&scanstate->ss, tupdesc, false); /* * Other node-specific setup */ scanstate->tuplestorestate = NULL; scanstate->funcexpr = ExecInitExpr((Expr *) rte->funcexpr, (PlanState *) scanstate); scanstate->ss.ps.ps_TupFromTlist = false; /* * Initialize result tuple type and projection info. */ ExecAssignResultTypeFromTL(&scanstate->ss.ps); ExecAssignScanProjectionInfo(&scanstate->ss); return scanstate; }
/* ---------------------------------------------------------------- * ExecHashTableCreate * * create an empty hashtable data structure for hashjoin. * ---------------------------------------------------------------- */ HashJoinTable ExecHashTableCreate(Hash *node, List *hashOperators, bool keepNulls) { HashJoinTable hashtable; Plan *outerNode; int nbuckets; int nbatch; int num_skew_mcvs; int log2_nbuckets; int nkeys; int i; ListCell *ho; MemoryContext oldcxt; /* * Get information about the size of the relation to be hashed (it's the * "outer" subtree of this node, but the inner relation of the hashjoin). * Compute the appropriate size of the hash table. */ outerNode = outerPlan(node); ExecChooseHashTableSize(outerNode->plan_rows, outerNode->plan_width, OidIsValid(node->skewTable), &nbuckets, &nbatch, &num_skew_mcvs); /* nbuckets must be a power of 2 */ log2_nbuckets = my_log2(nbuckets); Assert(nbuckets == (1 << log2_nbuckets)); /* * Initialize the hash table control block. * * The hashtable control block is just palloc'd from the executor's * per-query memory context. Everything else should be kept inside the * subsidiary hashCxt or batchCxt. */ hashtable = (HashJoinTable) palloc(sizeof(HashJoinTableData)); hashtable->nbuckets = nbuckets; hashtable->nbuckets_original = nbuckets; hashtable->nbuckets_optimal = nbuckets; hashtable->log2_nbuckets = log2_nbuckets; hashtable->log2_nbuckets_optimal = log2_nbuckets; hashtable->buckets = NULL; hashtable->keepNulls = keepNulls; hashtable->skewEnabled = false; hashtable->skewBucket = NULL; hashtable->skewBucketLen = 0; hashtable->nSkewBuckets = 0; hashtable->skewBucketNums = NULL; hashtable->nbatch = nbatch; hashtable->curbatch = 0; hashtable->nbatch_original = nbatch; hashtable->nbatch_outstart = nbatch; hashtable->growEnabled = true; hashtable->totalTuples = 0; hashtable->skewTuples = 0; hashtable->innerBatchFile = NULL; hashtable->outerBatchFile = NULL; hashtable->spaceUsed = 0; hashtable->spacePeak = 0; hashtable->spaceAllowed = work_mem * 1024L; hashtable->spaceUsedSkew = 0; hashtable->spaceAllowedSkew = hashtable->spaceAllowed * SKEW_WORK_MEM_PERCENT / 100; hashtable->chunks = NULL; #ifdef HJDEBUG printf("Hashjoin %p: initial nbatch = %d, nbuckets = %d\n", hashtable, nbatch, nbuckets); #endif /* * Create temporary memory contexts in which to keep the hashtable working * storage. See notes in executor/hashjoin.h. */ hashtable->hashCxt = AllocSetContextCreate(CurrentMemoryContext, "HashTableContext", ALLOCSET_DEFAULT_SIZES); hashtable->batchCxt = AllocSetContextCreate(hashtable->hashCxt, "HashBatchContext", ALLOCSET_DEFAULT_SIZES); /* Allocate data that will live for the life of the hashjoin */ oldcxt = MemoryContextSwitchTo(hashtable->hashCxt); /* * Get info about the hash functions to be used for each hash key. Also * remember whether the join operators are strict. */ nkeys = list_length(hashOperators); hashtable->outer_hashfunctions = (FmgrInfo *) palloc(nkeys * sizeof(FmgrInfo)); hashtable->inner_hashfunctions = (FmgrInfo *) palloc(nkeys * sizeof(FmgrInfo)); hashtable->hashStrict = (bool *) palloc(nkeys * sizeof(bool)); i = 0; foreach(ho, hashOperators) { Oid hashop = lfirst_oid(ho); Oid left_hashfn; Oid right_hashfn; if (!get_op_hash_functions(hashop, &left_hashfn, &right_hashfn)) elog(ERROR, "could not find hash function for hash operator %u", hashop); fmgr_info(left_hashfn, &hashtable->outer_hashfunctions[i]); fmgr_info(right_hashfn, &hashtable->inner_hashfunctions[i]); hashtable->hashStrict[i] = op_strict(hashop); i++; }
/* ---------------------------------------------------------------- * ExecInitExternalScan * ---------------------------------------------------------------- */ ExternalScanState * ExecInitExternalScan(ExternalScan *node, EState *estate, int eflags) { ResultRelSegFileInfo *segfileinfo = NULL; ExternalScanState *externalstate; Relation currentRelation; FileScanDesc currentScanDesc; Assert(outerPlan(node) == NULL); Assert(innerPlan(node) == NULL); /* * create state structure */ externalstate = makeNode(ExternalScanState); externalstate->ss.ps.plan = (Plan *) node; externalstate->ss.ps.state = estate; /* * Miscellaneous initialization * * create expression context for node */ ExecAssignExprContext(estate, &externalstate->ss.ps); /* * initialize child expressions */ externalstate->ss.ps.targetlist = (List *) ExecInitExpr((Expr *) node->scan.plan.targetlist, (PlanState *) externalstate); externalstate->ss.ps.qual = (List *) ExecInitExpr((Expr *) node->scan.plan.qual, (PlanState *) externalstate); /* Check if targetlist or qual contains a var node referencing the ctid column */ externalstate->cdb_want_ctid = contain_ctid_var_reference(&node->scan); ItemPointerSetInvalid(&externalstate->cdb_fake_ctid); #define EXTSCAN_NSLOTS 2 /* * tuple table initialization */ ExecInitResultTupleSlot(estate, &externalstate->ss.ps); ExecInitScanTupleSlot(estate, &externalstate->ss); /* * get the relation object id from the relid'th entry in the range table * and open that relation. */ currentRelation = ExecOpenScanExternalRelation(estate, node->scan.scanrelid); if (Gp_role == GP_ROLE_EXECUTE && node->err_aosegfileinfos) { segfileinfo = (ResultRelSegFileInfo *)list_nth(node->err_aosegfileinfos, GetQEIndex()); } else { segfileinfo = NULL; } currentScanDesc = external_beginscan(currentRelation, node->scan.scanrelid, node->scancounter, node->uriList, node->fmtOpts, node->fmtType, node->isMasterOnly, node->rejLimit, node->rejLimitInRows, node->fmterrtbl, segfileinfo, node->encoding, node->scan.plan.qual); externalstate->ss.ss_currentRelation = currentRelation; externalstate->ess_ScanDesc = currentScanDesc; ExecAssignScanType(&externalstate->ss, RelationGetDescr(currentRelation)); /* * Initialize result tuple type and projection info. */ ExecAssignResultTypeFromTL(&externalstate->ss.ps); ExecAssignScanProjectionInfo(&externalstate->ss); /* * If eflag contains EXEC_FLAG_REWIND or EXEC_FLAG_BACKWARD or EXEC_FLAG_MARK, * then this node is not eager free safe. */ externalstate->ss.ps.delayEagerFree = ((eflags & (EXEC_FLAG_REWIND | EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)) != 0); initGpmonPktForExternalScan((Plan *)node, &externalstate->ss.ps.gpmon_pkt, estate); return externalstate; }
/* Return number of TupleTableSlots used by nodeDML.*/ int ExecCountSlotsDML(DML *node) { return ExecCountSlotsNode(outerPlan(node)) + DML_NSLOTS; }
/* ---------------------------------------------------------------- * ExecInitSetOp * * This initializes the setop node state structures and * the node's subplan. * ---------------------------------------------------------------- */ SetOpState * ExecInitSetOp(SetOp *node, EState *estate, int eflags) { SetOpState *setopstate; /* check for unsupported flags */ Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK))); /* * create state structure */ setopstate = makeNode(SetOpState); setopstate->ps.plan = (Plan *) node; setopstate->ps.state = estate; setopstate->eqfunctions = NULL; setopstate->hashfunctions = NULL; setopstate->setop_done = false; setopstate->numOutput = 0; setopstate->pergroup = NULL; setopstate->grp_firstTuple = NULL; setopstate->hashtable = NULL; setopstate->tableContext = NULL; /* * Miscellaneous initialization * * SetOp nodes have no ExprContext initialization because they never call * ExecQual or ExecProject. But they do need a per-tuple memory context * anyway for calling execTuplesMatch. */ setopstate->tempContext = AllocSetContextCreate(CurrentMemoryContext, "SetOp", ALLOCSET_DEFAULT_SIZES); /* * If hashing, we also need a longer-lived context to store the hash * table. The table can't just be kept in the per-query context because * we want to be able to throw it away in ExecReScanSetOp. */ if (node->strategy == SETOP_HASHED) setopstate->tableContext = AllocSetContextCreate(CurrentMemoryContext, "SetOp hash table", ALLOCSET_DEFAULT_SIZES); /* * Tuple table initialization */ ExecInitResultTupleSlot(estate, &setopstate->ps); /* * initialize child nodes * * If we are hashing then the child plan does not need to handle REWIND * efficiently; see ExecReScanSetOp. */ if (node->strategy == SETOP_HASHED) eflags &= ~EXEC_FLAG_REWIND; outerPlanState(setopstate) = ExecInitNode(outerPlan(node), estate, eflags); /* * setop nodes do no projections, so initialize projection info for this * node appropriately */ ExecAssignResultTypeFromTL(&setopstate->ps); setopstate->ps.ps_ProjInfo = NULL; /* * Precompute fmgr lookup data for inner loop. We need both equality and * hashing functions to do it by hashing, but only equality if not * hashing. */ if (node->strategy == SETOP_HASHED) execTuplesHashPrepare(node->numCols, node->dupOperators, &setopstate->eqfunctions, &setopstate->hashfunctions); else setopstate->eqfunctions = execTuplesMatchPrepare(node->numCols, node->dupOperators); if (node->strategy == SETOP_HASHED) { build_hash_table(setopstate); setopstate->table_filled = false; } else { setopstate->pergroup = (SetOpStatePerGroup) palloc0(sizeof(SetOpStatePerGroupData)); } return setopstate; }
/* ---------------------------------------------------------------- * ExecInitCteScan * ---------------------------------------------------------------- */ CteScanState * ExecInitCteScan(CteScan *node, EState *estate, int eflags) { CteScanState *scanstate; ParamExecData *prmdata; /* check for unsupported flags */ Assert(!(eflags & EXEC_FLAG_MARK)); /* * For the moment we have to force the tuplestore to allow REWIND, because * we might be asked to rescan the CTE even though upper levels didn't * tell us to be prepared to do it efficiently. Annoying, since this * prevents truncation of the tuplestore. XXX FIXME */ eflags |= EXEC_FLAG_REWIND; /* * CteScan should not have any children. */ Assert(outerPlan(node) == NULL); Assert(innerPlan(node) == NULL); /* * create new CteScanState for node */ scanstate = makeNode(CteScanState); scanstate->ss.ps.plan = (Plan *) node; scanstate->ss.ps.state = estate; scanstate->eflags = eflags; scanstate->cte_table = NULL; scanstate->eof_cte = false; /* * Find the already-initialized plan for the CTE query. */ scanstate->cteplanstate = (PlanState *) list_nth(estate->es_subplanstates, node->ctePlanId - 1); /* * The Param slot associated with the CTE query is used to hold a pointer * to the CteState of the first CteScan node that initializes for this * CTE. This node will be the one that holds the shared state for all the * CTEs, particularly the shared tuplestore. */ prmdata = &(estate->es_param_exec_vals[node->cteParam]); Assert(prmdata->execPlan == NULL); Assert(!prmdata->isnull); scanstate->leader = (CteScanState *) DatumGetPointer(prmdata->value); if (scanstate->leader == NULL) { /* I am the leader */ prmdata->value = PointerGetDatum(scanstate); scanstate->leader = scanstate; scanstate->cte_table = tuplestore_begin_heap(true, false, work_mem); tuplestore_set_eflags(scanstate->cte_table, scanstate->eflags); scanstate->readptr = 0; } else { /* Not the leader */ Assert(IsA(scanstate->leader, CteScanState)); scanstate->readptr = tuplestore_alloc_read_pointer(scanstate->leader->cte_table, scanstate->eflags); } /* * Miscellaneous initialization * * create expression context for node */ ExecAssignExprContext(estate, &scanstate->ss.ps); /* * initialize child expressions */ scanstate->ss.ps.targetlist = (List *) ExecInitExpr((Expr *) node->scan.plan.targetlist, (PlanState *) scanstate); scanstate->ss.ps.qual = (List *) ExecInitExpr((Expr *) node->scan.plan.qual, (PlanState *) scanstate); /* * tuple table initialization */ ExecInitResultTupleSlot(estate, &scanstate->ss.ps); ExecInitScanTupleSlot(estate, &scanstate->ss); /* * The scan tuple type (ie, the rowtype we expect to find in the work * table) is the same as the result rowtype of the CTE query. */ ExecAssignScanType(&scanstate->ss, ExecGetResultType(scanstate->cteplanstate)); /* * Initialize result tuple type and projection info. */ ExecAssignResultTypeFromTL(&scanstate->ss.ps); ExecAssignScanProjectionInfo(&scanstate->ss); scanstate->ss.ps.ps_TupFromTlist = false; return scanstate; }
/* ---------------------------------------------------------------- * ExecInitSubqueryScan * ---------------------------------------------------------------- */ SubqueryScanState * ExecInitSubqueryScan(SubqueryScan *node, EState *estate, int eflags) { SubqueryScanState *subquerystate; /* check for unsupported flags */ Assert(!(eflags & EXEC_FLAG_MARK)); /* * SubqueryScan should not have any "normal" children. Also, if planner * left anything in subrtable, it's fishy. */ Assert(outerPlan(node) == NULL); Assert(innerPlan(node) == NULL); Assert(node->subrtable == NIL); /* * Since subquery nodes create its own executor state, * and pass it down to its child nodes, we always * initialize the subquery node. However, some * fields are not initialized if not necessary, see * below. */ /* * create state structure */ subquerystate = makeNode(SubqueryScanState); subquerystate->ss.ps.plan = (Plan *) node; subquerystate->ss.ps.state = estate; /* * Miscellaneous initialization * * create expression context for node */ ExecAssignExprContext(estate, &subquerystate->ss.ps); /* * initialize child expressions */ subquerystate->ss.ps.targetlist = (List *) ExecInitExpr((Expr *) node->scan.plan.targetlist, (PlanState *) subquerystate); subquerystate->ss.ps.qual = (List *) ExecInitExpr((Expr *) node->scan.plan.qual, (PlanState *) subquerystate); /* Check if targetlist or qual contains a var node referencing the ctid column */ subquerystate->cdb_want_ctid = contain_ctid_var_reference(&node->scan); ItemPointerSetInvalid(&subquerystate->cdb_fake_ctid); #define SUBQUERYSCAN_NSLOTS 2 /* * tuple table initialization */ ExecInitResultTupleSlot(estate, &subquerystate->ss.ps); ExecInitScanTupleSlot(estate, &subquerystate->ss); /* * initialize subquery */ subquerystate->subplan = ExecInitNode(node->subplan, estate, eflags); /* return borrowed share node list */ estate->es_sharenode = estate->es_sharenode; /*subquerystate->ss.ps.ps_TupFromTlist = false;*/ /* * Initialize scan tuple type (needed by ExecAssignScanProjectionInfo) */ ExecAssignScanType(&subquerystate->ss, ExecGetResultType(subquerystate->subplan)); /* * Initialize result tuple type and projection info. */ ExecAssignResultTypeFromTL(&subquerystate->ss.ps); ExecAssignScanProjectionInfo(&subquerystate->ss); initGpmonPktForSubqueryScan((Plan *)node, &subquerystate->ss.ps.gpmon_pkt, estate); return subquerystate; }
/* ---------------------------------------------------------------- * ExecInitForeignScan * ---------------------------------------------------------------- */ ForeignScanState * ExecInitForeignScan(ForeignScan *node, EState *estate, int eflags) { ForeignScanState *scanstate; Relation currentRelation = NULL; Index scanrelid = node->scan.scanrelid; Index tlistvarno; FdwRoutine *fdwroutine; /* check for unsupported flags */ Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK))); /* * create state structure */ scanstate = makeNode(ForeignScanState); scanstate->ss.ps.plan = (Plan *) node; scanstate->ss.ps.state = estate; /* * Miscellaneous initialization * * create expression context for node */ ExecAssignExprContext(estate, &scanstate->ss.ps); scanstate->ss.ps.ps_TupFromTlist = false; /* * initialize child expressions */ scanstate->ss.ps.targetlist = (List *) ExecInitExpr((Expr *) node->scan.plan.targetlist, (PlanState *) scanstate); scanstate->ss.ps.qual = (List *) ExecInitExpr((Expr *) node->scan.plan.qual, (PlanState *) scanstate); scanstate->fdw_recheck_quals = (List *) ExecInitExpr((Expr *) node->fdw_recheck_quals, (PlanState *) scanstate); /* * tuple table initialization */ ExecInitResultTupleSlot(estate, &scanstate->ss.ps); ExecInitScanTupleSlot(estate, &scanstate->ss); /* * open the base relation, if any, and acquire an appropriate lock on it; * also acquire function pointers from the FDW's handler */ if (scanrelid > 0) { currentRelation = ExecOpenScanRelation(estate, scanrelid, eflags); scanstate->ss.ss_currentRelation = currentRelation; fdwroutine = GetFdwRoutineForRelation(currentRelation, true); } else { /* We can't use the relcache, so get fdwroutine the hard way */ fdwroutine = GetFdwRoutineByServerId(node->fs_server); } /* * Determine the scan tuple type. If the FDW provided a targetlist * describing the scan tuples, use that; else use base relation's rowtype. */ if (node->fdw_scan_tlist != NIL || currentRelation == NULL) { TupleDesc scan_tupdesc; scan_tupdesc = ExecTypeFromTL(node->fdw_scan_tlist, false); ExecAssignScanType(&scanstate->ss, scan_tupdesc); /* Node's targetlist will contain Vars with varno = INDEX_VAR */ tlistvarno = INDEX_VAR; } else { ExecAssignScanType(&scanstate->ss, RelationGetDescr(currentRelation)); /* Node's targetlist will contain Vars with varno = scanrelid */ tlistvarno = scanrelid; } /* * Initialize result tuple type and projection info. */ ExecAssignResultTypeFromTL(&scanstate->ss.ps); ExecAssignScanProjectionInfoWithVarno(&scanstate->ss, tlistvarno); /* * Initialize FDW-related state. */ scanstate->fdwroutine = fdwroutine; scanstate->fdw_state = NULL; /* Initialize any outer plan. */ if (outerPlan(node)) outerPlanState(scanstate) = ExecInitNode(outerPlan(node), estate, eflags); /* * Tell the FDW to initialize the scan. */ fdwroutine->BeginForeignScan(scanstate, eflags); return scanstate; }
/* ---------------------------------------------------------------- * ExecInitRecursiveUnionScan * ---------------------------------------------------------------- */ RecursiveUnionState * ExecInitRecursiveUnion(RecursiveUnion *node, EState *estate, int eflags) { RecursiveUnionState *rustate; ParamExecData *prmdata; /* check for unsupported flags */ Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK))); /* * create state structure */ rustate = makeNode(RecursiveUnionState); rustate->ps.plan = (Plan *) node; rustate->ps.state = estate; rustate->eqfunctions = NULL; rustate->hashfunctions = NULL; rustate->hashtable = NULL; rustate->tempContext = NULL; rustate->tableContext = NULL; /* initialize processing state */ rustate->recursing = false; rustate->intermediate_empty = true; rustate->working_table = tuplestore_begin_heap(false, false, work_mem); rustate->intermediate_table = tuplestore_begin_heap(false, false, work_mem); /* * If hashing, we need a per-tuple memory context for comparisons, and a * longer-lived context to store the hash table. The table can't just be * kept in the per-query context because we want to be able to throw it * away when rescanning. */ if (node->numCols > 0) { rustate->tempContext = AllocSetContextCreate(CurrentMemoryContext, "RecursiveUnion", ALLOCSET_DEFAULT_SIZES); rustate->tableContext = AllocSetContextCreate(CurrentMemoryContext, "RecursiveUnion hash table", ALLOCSET_DEFAULT_SIZES); } /* * Make the state structure available to descendant WorkTableScan nodes * via the Param slot reserved for it. */ prmdata = &(estate->es_param_exec_vals[node->wtParam]); Assert(prmdata->execPlan == NULL); prmdata->value = PointerGetDatum(rustate); prmdata->isnull = false; /* * Miscellaneous initialization * * RecursiveUnion plans don't have expression contexts because they never * call ExecQual or ExecProject. */ Assert(node->plan.qual == NIL); /* * RecursiveUnion nodes still have Result slots, which hold pointers to * tuples, so we have to initialize them. */ ExecInitResultTupleSlot(estate, &rustate->ps); /* * Initialize result tuple type and projection info. (Note: we have to * set up the result type before initializing child nodes, because * nodeWorktablescan.c expects it to be valid.) */ ExecAssignResultTypeFromTL(&rustate->ps); rustate->ps.ps_ProjInfo = NULL; /* * initialize child nodes */ outerPlanState(rustate) = ExecInitNode(outerPlan(node), estate, eflags); innerPlanState(rustate) = ExecInitNode(innerPlan(node), estate, eflags); /* * If hashing, precompute fmgr lookup data for inner loop, and create the * hash table. */ if (node->numCols > 0) { execTuplesHashPrepare(node->numCols, node->dupOperators, &rustate->eqfunctions, &rustate->hashfunctions); build_hash_table(rustate); } return rustate; }
/* ---------------------------------------------------------------- * ExecInitSubqueryScan * ---------------------------------------------------------------- */ SubqueryScanState * ExecInitSubqueryScan(SubqueryScan *node, EState *estate, int eflags) { SubqueryScanState *subquerystate; /* check for unsupported flags */ Assert(!(eflags & EXEC_FLAG_MARK)); /* SubqueryScan should not have any "normal" children */ Assert(outerPlan(node) == NULL); Assert(innerPlan(node) == NULL); /* * create state structure */ subquerystate = makeNode(SubqueryScanState); subquerystate->ss.ps.plan = (Plan *) node; subquerystate->ss.ps.state = estate; /* * Miscellaneous initialization * * create expression context for node */ ExecAssignExprContext(estate, &subquerystate->ss.ps); /* * initialize child expressions */ subquerystate->ss.ps.targetlist = (List *) ExecInitExpr((Expr *) node->scan.plan.targetlist, (PlanState *) subquerystate); subquerystate->ss.ps.qual = (List *) ExecInitExpr((Expr *) node->scan.plan.qual, (PlanState *) subquerystate); /* * tuple table initialization */ ExecInitResultTupleSlot(estate, &subquerystate->ss.ps); ExecInitScanTupleSlot(estate, &subquerystate->ss); /* * initialize subquery */ subquerystate->subplan = ExecInitNode(node->subplan, estate, eflags); subquerystate->ss.ps.ps_TupFromTlist = false; /* * Initialize scan tuple type (needed by ExecAssignScanProjectionInfo) */ ExecAssignScanType(&subquerystate->ss, ExecGetResultType(subquerystate->subplan)); /* * Initialize result tuple type and projection info. */ ExecAssignResultTypeFromTL(&subquerystate->ss.ps); ExecAssignScanProjectionInfo(&subquerystate->ss); return subquerystate; }
/* ---------------------------------------------------------------- * ExecInitResult * * Creates the run-time state information for the result node * produced by the planner and initializes outer relations * (child nodes). * ---------------------------------------------------------------- */ ResultState * ExecInitResult(Result *node, EState *estate, int eflags) { ResultState *resstate; /* check for unsupported flags */ Assert(!(eflags & (EXEC_FLAG_MARK | EXEC_FLAG_BACKWARD)) || outerPlan(node) != NULL); /* * create state structure */ resstate = makeNode(ResultState); resstate->ps.plan = (Plan *) node; resstate->ps.state = estate; resstate->rs_done = false; resstate->rs_checkqual = (node->resconstantqual == NULL) ? false : true; /* * Miscellaneous initialization * * create expression context for node */ ExecAssignExprContext(estate, &resstate->ps); resstate->ps.ps_TupFromTlist = false; /* * tuple table initialization */ ExecInitResultTupleSlot(estate, &resstate->ps); /* * initialize child expressions */ resstate->ps.targetlist = (List *) ExecInitExpr((Expr *) node->plan.targetlist, (PlanState *) resstate); resstate->ps.qual = (List *) ExecInitExpr((Expr *) node->plan.qual, (PlanState *) resstate); resstate->resconstantqual = ExecInitExpr((Expr *) node->resconstantqual, (PlanState *) resstate); /* * initialize child nodes */ outerPlanState(resstate) = ExecInitNode(outerPlan(node), estate, eflags); /* * we don't use inner plan */ Assert(innerPlan(node) == NULL); /* * initialize tuple type and projection info */ ExecAssignResultTypeFromTL(&resstate->ps); ExecAssignProjectionInfo(&resstate->ps, NULL); return resstate; }
/* ---------------------------------------------------------------- * ExecHash * * build hash table for hashjoin, all do partitioning if more * than one batches are required. * ---------------------------------------------------------------- */ TupleTableSlot * ExecHash(Hash *node) { EState *estate; HashState *hashstate; Plan *outerNode; Var *hashkey; HashJoinTable hashtable; TupleTableSlot *slot; ExprContext *econtext; int nbatch; File *batches; RelativeAddr *batchPos; int *batchSizes; int i; RelativeAddr *innerbatchNames; /* ---------------- * get state info from node * ---------------- */ hashstate = node->hashstate; estate = node->plan.state; outerNode = outerPlan(node); hashtable = node->hashtable; if (hashtable == NULL) elog(WARN, "ExecHash: hash table is NULL."); nbatch = hashtable->nbatch; if (nbatch > 0) { /* if needs hash partition */ innerbatchNames = (RelativeAddr *) ABSADDR(hashtable->innerbatchNames); /* -------------- * allocate space for the file descriptors of batch files * then open the batch files in the current processes. * -------------- */ batches = (File*)palloc(nbatch * sizeof(File)); for (i=0; i<nbatch; i++) { batches[i] = FileNameOpenFile(ABSADDR(innerbatchNames[i]), O_CREAT | O_RDWR, 0600); } hashstate->hashBatches = batches; batchPos = (RelativeAddr*) ABSADDR(hashtable->innerbatchPos); batchSizes = (int*) ABSADDR(hashtable->innerbatchSizes); } /* ---------------- * set expression context * ---------------- */ hashkey = node->hashkey; econtext = hashstate->cstate.cs_ExprContext; /* ---------------- * get tuple and insert into the hash table * ---------------- */ for (;;) { slot = ExecProcNode(outerNode, (Plan*)node); if (TupIsNull(slot)) break; econtext->ecxt_innertuple = slot; ExecHashTableInsert(hashtable, econtext, hashkey, hashstate->hashBatches); ExecClearTuple(slot); } /* * end of build phase, flush all the last pages of the batches. */ for (i=0; i<nbatch; i++) { if (FileSeek(batches[i], 0L, SEEK_END) < 0) perror("FileSeek"); if (FileWrite(batches[i],ABSADDR(hashtable->batch)+i*BLCKSZ,BLCKSZ) < 0) perror("FileWrite"); NDirectFileWrite++; } /* --------------------- * Return the slot so that we have the tuple descriptor * when we need to save/restore them. -Jeff 11 July 1991 * --------------------- */ return slot; }
HashJoinTable ExecHashTableCreate(Hash *node) { Plan *outerNode; int nbatch; int ntuples; int tupsize; IpcMemoryId shmid; HashJoinTable hashtable; HashBucket bucket; int nbuckets; int totalbuckets; int bucketsize; int i; RelativeAddr *outerbatchNames; RelativeAddr *outerbatchPos; RelativeAddr *innerbatchNames; RelativeAddr *innerbatchPos; int *innerbatchSizes; RelativeAddr tempname; nbatch = -1; HashTBSize = NBuffers/2; while (nbatch < 0) { /* * determine number of batches for the hashjoin */ HashTBSize *= 2; nbatch = ExecHashPartition(node); } /* ---------------- * get information about the size of the relation * ---------------- */ outerNode = outerPlan(node); ntuples = outerNode->plan_size; if (ntuples <= 0) ntuples = 1000; /* XXX just a hack */ tupsize = outerNode->plan_width + sizeof(HeapTupleData); /* * totalbuckets is the total number of hash buckets needed for * the entire relation */ totalbuckets = ceil((double)ntuples/NTUP_PER_BUCKET); bucketsize = LONGALIGN (NTUP_PER_BUCKET * tupsize + sizeof(*bucket)); /* * nbuckets is the number of hash buckets for the first pass * of hybrid hashjoin */ nbuckets = (HashTBSize - nbatch) * BLCKSZ / (bucketsize * FUDGE_FAC); if (totalbuckets < nbuckets) totalbuckets = nbuckets; if (nbatch == 0) nbuckets = totalbuckets; #ifdef HJDEBUG printf("nbatch = %d, totalbuckets = %d, nbuckets = %d\n", nbatch, totalbuckets, nbuckets); #endif /* ---------------- * in non-parallel machines, we don't need to put the hash table * in the shared memory. We just palloc it. * ---------------- */ hashtable = (HashJoinTable)palloc((HashTBSize+1)*BLCKSZ); shmid = 0; if (hashtable == NULL) { elog(WARN, "not enough memory for hashjoin."); } /* ---------------- * initialize the hash table header * ---------------- */ hashtable->nbuckets = nbuckets; hashtable->totalbuckets = totalbuckets; hashtable->bucketsize = bucketsize; hashtable->shmid = shmid; hashtable->top = sizeof(HashTableData); hashtable->bottom = HashTBSize * BLCKSZ; /* * hashtable->readbuf has to be long aligned!!! */ hashtable->readbuf = hashtable->bottom; hashtable->nbatch = nbatch; hashtable->curbatch = 0; hashtable->pcount = hashtable->nprocess = 0; if (nbatch > 0) { /* --------------- * allocate and initialize the outer batches * --------------- */ outerbatchNames = (RelativeAddr*)ABSADDR( hashTableAlloc(nbatch * sizeof(RelativeAddr), hashtable)); outerbatchPos = (RelativeAddr*)ABSADDR( hashTableAlloc(nbatch * sizeof(RelativeAddr), hashtable)); for (i=0; i<nbatch; i++) { tempname = hashTableAlloc(12, hashtable); mk_hj_temp(ABSADDR(tempname)); outerbatchNames[i] = tempname; outerbatchPos[i] = -1; } hashtable->outerbatchNames = RELADDR(outerbatchNames); hashtable->outerbatchPos = RELADDR(outerbatchPos); /* --------------- * allocate and initialize the inner batches * --------------- */ innerbatchNames = (RelativeAddr*)ABSADDR( hashTableAlloc(nbatch * sizeof(RelativeAddr), hashtable)); innerbatchPos = (RelativeAddr*)ABSADDR( hashTableAlloc(nbatch * sizeof(RelativeAddr), hashtable)); innerbatchSizes = (int*)ABSADDR( hashTableAlloc(nbatch * sizeof(int), hashtable)); for (i=0; i<nbatch; i++) { tempname = hashTableAlloc(12, hashtable); mk_hj_temp(ABSADDR(tempname)); innerbatchNames[i] = tempname; innerbatchPos[i] = -1; innerbatchSizes[i] = 0; } hashtable->innerbatchNames = RELADDR(innerbatchNames); hashtable->innerbatchPos = RELADDR(innerbatchPos); hashtable->innerbatchSizes = RELADDR(innerbatchSizes); } else { hashtable->outerbatchNames = (RelativeAddr)NULL; hashtable->outerbatchPos = (RelativeAddr)NULL; hashtable->innerbatchNames = (RelativeAddr)NULL; hashtable->innerbatchPos = (RelativeAddr)NULL; hashtable->innerbatchSizes = (RelativeAddr)NULL; } hashtable->batch = (RelativeAddr)LONGALIGN(hashtable->top + bucketsize * nbuckets); hashtable->overflownext=hashtable->batch + nbatch * BLCKSZ; /* ---------------- * initialize each hash bucket * ---------------- */ bucket = (HashBucket)ABSADDR(hashtable->top); for (i=0; i<nbuckets; i++) { bucket->top = RELADDR((char*)bucket + sizeof(*bucket)); bucket->bottom = bucket->top; bucket->firstotuple = bucket->lastotuple = -1; bucket = (HashBucket)LONGALIGN(((char*)bucket + bucketsize)); } return(hashtable); }
int ExecCountSlotsGroup(Group *node) { return ExecCountSlotsNode(outerPlan(node)) + GROUP_NSLOTS; }
/* ---------------------------------------------------------------- * ExecInitBitmapHeapScan * * Initializes the scan's state information. * ---------------------------------------------------------------- */ BitmapHeapScanState * ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags) { BitmapHeapScanState *scanstate; Relation currentRelation; /* check for unsupported flags */ Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK))); /* * Assert caller didn't ask for an unsafe snapshot --- see comments at * head of file. */ Assert(IsMVCCSnapshot(estate->es_snapshot)); /* * create state structure */ scanstate = makeNode(BitmapHeapScanState); scanstate->ss.ps.plan = (Plan *) node; scanstate->ss.ps.state = estate; scanstate->tbm = NULL; scanstate->tbmres = NULL; /* * Miscellaneous initialization * * create expression context for node */ ExecAssignExprContext(estate, &scanstate->ss.ps); scanstate->ss.ps.ps_TupFromTlist = false; /* * initialize child expressions */ scanstate->ss.ps.targetlist = (List *) ExecInitExpr((Expr *) node->scan.plan.targetlist, (PlanState *) scanstate); scanstate->ss.ps.qual = (List *) ExecInitExpr((Expr *) node->scan.plan.qual, (PlanState *) scanstate); scanstate->bitmapqualorig = (List *) ExecInitExpr((Expr *) node->bitmapqualorig, (PlanState *) scanstate); #define BITMAPHEAPSCAN_NSLOTS 2 /* * tuple table initialization */ ExecInitResultTupleSlot(estate, &scanstate->ss.ps); ExecInitScanTupleSlot(estate, &scanstate->ss); /* * open the base relation and acquire appropriate lock on it. */ currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid); scanstate->ss.ss_currentRelation = currentRelation; /* * Even though we aren't going to do a conventional seqscan, it is useful * to create a HeapScanDesc --- this checks the relation size and sets up * statistical infrastructure for us. */ scanstate->ss.ss_currentScanDesc = heap_beginscan(currentRelation, estate->es_snapshot, 0, NULL); /* * One problem is that heap_beginscan counts a "sequential scan" start, * when we actually aren't doing any such thing. Reverse out the added * scan count. (Eventually we may want to count bitmap scans separately.) */ pgstat_discount_heap_scan(&scanstate->ss.ss_currentScanDesc->rs_pgstat_info); /* * get the scan type from the relation descriptor. */ ExecAssignScanType(&scanstate->ss, RelationGetDescr(currentRelation)); /* * Initialize result tuple type and projection info. */ ExecAssignResultTypeFromTL(&scanstate->ss.ps); ExecAssignScanProjectionInfo(&scanstate->ss); /* * initialize child nodes * * We do this last because the child nodes will open indexscans on our * relation's indexes, and we want to be sure we have acquired a lock on * the relation first. */ outerPlanState(scanstate) = ExecInitNode(outerPlan(node), estate, eflags); /* * all done. */ return scanstate; }
/* ---------------------------------------------------------------- * ExecHashTableCreate * * create an empty hashtable data structure for hashjoin. * ---------------------------------------------------------------- */ HashJoinTable ExecHashTableCreate(Hash *node, List *hashOperators) { HashJoinTable hashtable; Plan *outerNode; int nbuckets; int nbatch; int nkeys; int i; ListCell *ho; MemoryContext oldcxt; /* * Get information about the size of the relation to be hashed (it's the * "outer" subtree of this node, but the inner relation of the hashjoin). * Compute the appropriate size of the hash table. */ outerNode = outerPlan(node); ExecChooseHashTableSize(outerNode->plan_rows, outerNode->plan_width, &nbuckets, &nbatch); #ifdef HJDEBUG printf("nbatch = %d, nbuckets = %d\n", nbatch, nbuckets); #endif /* * Initialize the hash table control block. * * The hashtable control block is just palloc'd from the executor's * per-query memory context. */ hashtable = (HashJoinTable) palloc(sizeof(HashJoinTableData)); hashtable->nbuckets = nbuckets; hashtable->buckets = NULL; hashtable->nbatch = nbatch; hashtable->curbatch = 0; hashtable->nbatch_original = nbatch; hashtable->nbatch_outstart = nbatch; hashtable->growEnabled = true; hashtable->totalTuples = 0; hashtable->innerBatchFile = NULL; hashtable->outerBatchFile = NULL; hashtable->spaceUsed = 0; hashtable->spaceAllowed = work_mem * 1024L; /* * Get info about the hash functions to be used for each hash key. */ nkeys = list_length(hashOperators); hashtable->hashfunctions = (FmgrInfo *) palloc(nkeys * sizeof(FmgrInfo)); i = 0; foreach(ho, hashOperators) { Oid hashfn; hashfn = get_op_hash_function(lfirst_oid(ho)); if (!OidIsValid(hashfn)) elog(ERROR, "could not find hash function for hash operator %u", lfirst_oid(ho)); fmgr_info(hashfn, &hashtable->hashfunctions[i]); i++; }
int ExecCountSlotsBitmapHeapScan(BitmapHeapScan *node) { return ExecCountSlotsNode(outerPlan((Plan *) node)) + ExecCountSlotsNode(innerPlan((Plan *) node)) + BITMAPHEAPSCAN_NSLOTS; }
int ExecCountSlotsResult(Result *node) { return ExecCountSlotsNode(outerPlan(node)) + RESULT_NSLOTS; }
/* ----------------- * ExecInitGroup * * Creates the run-time information for the group node produced by the * planner and initializes its outer subtree * ----------------- */ GroupState * ExecInitGroup(Group *node, EState *estate) { GroupState *grpstate; /* * create state structure */ grpstate = makeNode(GroupState); grpstate->ss.ps.plan = (Plan *) node; grpstate->ss.ps.state = estate; grpstate->grp_firstTuple = NULL; grpstate->grp_done = FALSE; /* * create expression context */ ExecAssignExprContext(estate, &grpstate->ss.ps); #define GROUP_NSLOTS 2 /* * tuple table initialization */ ExecInitScanTupleSlot(estate, &grpstate->ss); ExecInitResultTupleSlot(estate, &grpstate->ss.ps); /* * initialize child expressions */ grpstate->ss.ps.targetlist = (List *) ExecInitExpr((Expr *) node->plan.targetlist, (PlanState *) grpstate); grpstate->ss.ps.qual = (List *) ExecInitExpr((Expr *) node->plan.qual, (PlanState *) grpstate); /* * initialize child nodes */ outerPlanState(grpstate) = ExecInitNode(outerPlan(node), estate); /* * initialize tuple type. */ ExecAssignScanTypeFromOuterPlan(&grpstate->ss); /* * Initialize result tuple type and projection info. */ ExecAssignResultTypeFromTL(&grpstate->ss.ps); ExecAssignProjectionInfo(&grpstate->ss.ps); /* * Precompute fmgr lookup data for inner loop */ grpstate->eqfunctions = execTuplesMatchPrepare(ExecGetScanType(&grpstate->ss), node->numCols, node->grpColIdx); return grpstate; }
/* ---------------------------------------------------------------- * ExecInitSeqScan * ---------------------------------------------------------------- */ SeqScanState * ExecInitSeqScan(SeqScan *node, EState *estate, int eflags) { SeqScanState *scanstate; /* * Once upon a time it was possible to have an outerPlan of a SeqScan, but * not any more. */ Assert(outerPlan(node) == NULL); Assert(innerPlan(node) == NULL); /* * create state structure */ scanstate = makeNode(SeqScanState); scanstate->ps.plan = (Plan *) node; scanstate->ps.state = estate; scanstate->ss->scan_state = SCAN_INIT; /* * Miscellaneous initialization * * create expression context for node */ ExecAssignExprContext(estate, &scanstate->ps); /* * initialize child expressions */ scanstate->ps.targetlist = (List *) ExecInitExpr((Expr *) node->plan.targetlist, (PlanState *) scanstate); scanstate->ps.qual = (List *) ExecInitExpr((Expr *) node->plan.qual, (PlanState *) scanstate); #define SEQSCAN_NSLOTS 2 /* * tuple table initialization */ ExecInitResultTupleSlot(estate, &scanstate->ps); ExecInitScanTupleSlot(estate, scanstate); /* * initialize scan relation */ InitScanRelation(scanstate, estate); /* * Initialize result tuple type and projection info. */ ExecAssignResultTypeFromTL(&scanstate->ps); ExecAssignScanProjectionInfo(scanstate); initGpmonPktForSeqScan((Plan *)node, &scanstate->ps.gpmon_pkt, estate); /* * If eflag contains EXEC_FLAG_REWIND or EXEC_FLAG_BACKWARD or EXEC_FLAG_MARK, * then this node is not eager free safe. */ scanstate->ps.delayEagerFree = ((eflags & (EXEC_FLAG_REWIND | EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)) != 0); return scanstate; }
/* ---------------------------------------------------------------- * ExecInitBitmapHeapScan * * Initializes the scan's state information. * ---------------------------------------------------------------- */ BitmapHeapScanState * ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags) { BitmapHeapScanState *scanstate; Relation currentRelation; /* check for unsupported flags */ Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK))); /* * Assert caller didn't ask for an unsafe snapshot --- see comments at * head of file. */ Assert(IsMVCCSnapshot(estate->es_snapshot)); /* * create state structure */ scanstate = makeNode(BitmapHeapScanState); scanstate->ss.ps.plan = (Plan *) node; scanstate->ss.ps.state = estate; scanstate->tbm = NULL; scanstate->tbmiterator = NULL; scanstate->tbmres = NULL; scanstate->exact_pages = 0; scanstate->lossy_pages = 0; scanstate->prefetch_iterator = NULL; scanstate->prefetch_pages = 0; scanstate->prefetch_target = 0; /* * Miscellaneous initialization * * create expression context for node */ ExecAssignExprContext(estate, &scanstate->ss.ps); scanstate->ss.ps.ps_TupFromTlist = false; /* * initialize child expressions */ scanstate->ss.ps.targetlist = (List *) ExecInitExpr((Expr *) node->scan.plan.targetlist, (PlanState *) scanstate); scanstate->ss.ps.qual = (List *) ExecInitExpr((Expr *) node->scan.plan.qual, (PlanState *) scanstate); scanstate->bitmapqualorig = (List *) ExecInitExpr((Expr *) node->bitmapqualorig, (PlanState *) scanstate); /* * tuple table initialization */ ExecInitResultTupleSlot(estate, &scanstate->ss.ps); ExecInitScanTupleSlot(estate, &scanstate->ss); /* * open the base relation and acquire appropriate lock on it. */ currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid, eflags); scanstate->ss.ss_currentRelation = currentRelation; /* * Even though we aren't going to do a conventional seqscan, it is useful * to create a HeapScanDesc --- most of the fields in it are usable. */ scanstate->ss.ss_currentScanDesc = heap_beginscan_bm(currentRelation, estate->es_snapshot, 0, NULL); /* * get the scan type from the relation descriptor. */ ExecAssignScanType(&scanstate->ss, RelationGetDescr(currentRelation)); /* * Initialize result tuple type and projection info. */ ExecAssignResultTypeFromTL(&scanstate->ss.ps); ExecAssignScanProjectionInfo(&scanstate->ss); /* * initialize child nodes * * We do this last because the child nodes will open indexscans on our * relation's indexes, and we want to be sure we have acquired a lock on * the relation first. */ outerPlanState(scanstate) = ExecInitNode(outerPlan(node), estate, eflags); /* * all done. */ return scanstate; }
/* ---------------------------------------------------------------- * ExecInitNestLoop * ---------------------------------------------------------------- */ NestLoopState * ExecInitNestLoop(NestLoop *node, EState *estate) { NestLoopState *nlstate; NL1_printf("ExecInitNestLoop: %s\n", "initializing node"); /* * create state structure */ nlstate = makeNode(NestLoopState); nlstate->js.ps.plan = (Plan *) node; nlstate->js.ps.state = estate; /* * Miscellaneous initialization * * create expression context for node */ ExecAssignExprContext(estate, &nlstate->js.ps); /* * initialize child expressions */ nlstate->js.ps.targetlist = (List *) ExecInitExpr((Expr *) node->join.plan.targetlist, (PlanState *) nlstate); nlstate->js.ps.qual = (List *) ExecInitExpr((Expr *) node->join.plan.qual, (PlanState *) nlstate); nlstate->js.jointype = node->join.jointype; nlstate->js.joinqual = (List *) ExecInitExpr((Expr *) node->join.joinqual, (PlanState *) nlstate); /* * initialize child nodes */ outerPlanState(nlstate) = ExecInitNode(outerPlan(node), estate); innerPlanState(nlstate) = ExecInitNode(innerPlan(node), estate); #define NESTLOOP_NSLOTS 2 /* * tuple table initialization */ ExecInitResultTupleSlot(estate, &nlstate->js.ps); switch (node->join.jointype) { case JOIN_INNER: case JOIN_IN: break; case JOIN_LEFT: nlstate->nl_NullInnerTupleSlot = ExecInitNullTupleSlot(estate, ExecGetResultType(innerPlanState(nlstate))); break; default: elog(ERROR, "unrecognized join type: %d", (int) node->join.jointype); } /* * initialize tuple type and projection info */ ExecAssignResultTypeFromTL(&nlstate->js.ps); ExecAssignProjectionInfo(&nlstate->js.ps); /* * finally, wipe the current outer tuple clean. */ nlstate->js.ps.ps_OuterTupleSlot = NULL; nlstate->js.ps.ps_TupFromTlist = false; nlstate->nl_NeedNewOuter = true; nlstate->nl_MatchedOuter = false; NL1_printf("ExecInitNestLoop: %s\n", "node initialized"); return nlstate; }
/* Returns the number of slots needed for this operator */ int ExecCountSlotsBitmapTableScan(BitmapTableScan *node) { return ExecCountSlotsNode(outerPlan((Plan *) node)) + ExecCountSlotsNode(innerPlan((Plan *) node)) + BITMAPTABLESCAN_NSLOTS; }
/* ---------------------------------------------------------------- * ExecInitGather * ---------------------------------------------------------------- */ GatherState * ExecInitGather(Gather *node, EState *estate, int eflags) { GatherState *gatherstate; Plan *outerNode; bool hasoid; TupleDesc tupDesc; /* Gather node doesn't have innerPlan node. */ Assert(innerPlan(node) == NULL); /* * create state structure */ gatherstate = makeNode(GatherState); gatherstate->ps.plan = (Plan *) node; gatherstate->ps.state = estate; gatherstate->need_to_scan_locally = !node->single_copy; /* * Miscellaneous initialization * * create expression context for node */ ExecAssignExprContext(estate, &gatherstate->ps); /* * initialize child expressions */ gatherstate->ps.targetlist = (List *) ExecInitExpr((Expr *) node->plan.targetlist, (PlanState *) gatherstate); gatherstate->ps.qual = (List *) ExecInitExpr((Expr *) node->plan.qual, (PlanState *) gatherstate); /* * tuple table initialization */ gatherstate->funnel_slot = ExecInitExtraTupleSlot(estate); ExecInitResultTupleSlot(estate, &gatherstate->ps); /* * now initialize outer plan */ outerNode = outerPlan(node); outerPlanState(gatherstate) = ExecInitNode(outerNode, estate, eflags); gatherstate->ps.ps_TupFromTlist = false; /* * Initialize result tuple type and projection info. */ ExecAssignResultTypeFromTL(&gatherstate->ps); ExecAssignProjectionInfo(&gatherstate->ps, NULL); /* * Initialize funnel slot to same tuple descriptor as outer plan. */ if (!ExecContextForcesOids(&gatherstate->ps, &hasoid)) hasoid = false; tupDesc = ExecTypeFromTL(outerNode->targetlist, hasoid); ExecSetSlotDescriptor(gatherstate->funnel_slot, tupDesc); return gatherstate; }
int ExecCountSlotsBitmapIndexScan(BitmapIndexScan *node) { return ExecCountSlotsNode(outerPlan((Plan *) node)) + ExecCountSlotsNode(innerPlan((Plan *) node)) + BITMAPINDEXSCAN_NSLOTS; }
/* ---------------------------------------------------------------- * ExecHashTableCreate * * create an empty hashtable data structure for hashjoin. * ---------------------------------------------------------------- */ HashJoinTable ExecHashTableCreate(HashState *hashState, HashJoinState *hjstate, List *hashOperators, uint64 operatorMemKB) { HashJoinTable hashtable; Plan *outerNode; int nbuckets; int nbatch; int nkeys; int i; ListCell *ho; MemoryContext oldcxt; START_MEMORY_ACCOUNT(hashState->ps.memoryAccount); { Hash *node = (Hash *) hashState->ps.plan; /* * Get information about the size of the relation to be hashed (it's the * "outer" subtree of this node, but the inner relation of the hashjoin). * Compute the appropriate size of the hash table. */ outerNode = outerPlan(node); /* * Initialize the hash table control block. * * The hashtable control block is just palloc'd from the executor's * per-query memory context. */ hashtable = (HashJoinTable)palloc0(sizeof(HashJoinTableData)); hashtable->buckets = NULL; hashtable->bloom = NULL; hashtable->curbatch = 0; hashtable->growEnabled = true; hashtable->totalTuples = 0; hashtable->batches = NULL; hashtable->work_set = NULL; hashtable->state_file = NULL; hashtable->spaceAllowed = operatorMemKB * 1024L; hashtable->stats = NULL; hashtable->eagerlyReleased = false; hashtable->hjstate = hjstate; /* * Create temporary memory contexts in which to keep the hashtable working * storage. See notes in executor/hashjoin.h. */ hashtable->hashCxt = AllocSetContextCreate(CurrentMemoryContext, "HashTableContext", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); hashtable->batchCxt = AllocSetContextCreate(hashtable->hashCxt, "HashBatchContext", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); /* CDB */ /* track temp buf file allocations in separate context */ hashtable->bfCxt = AllocSetContextCreate(CurrentMemoryContext, "hbbfcxt", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); ExecChooseHashTableSize(outerNode->plan_rows, outerNode->plan_width, &hashtable->nbuckets, &hashtable->nbatch, operatorMemKB); nbuckets = hashtable->nbuckets; nbatch = hashtable->nbatch; hashtable->nbatch_original = nbatch; hashtable->nbatch_outstart = nbatch; #ifdef HJDEBUG elog(LOG, "HJ: nbatch = %d, nbuckets = %d\n", nbatch, nbuckets); #endif /* * Get info about the hash functions to be used for each hash key. * Also remember whether the join operators are strict. */ nkeys = list_length(hashOperators); hashtable->outer_hashfunctions = (FmgrInfo *) palloc(nkeys * sizeof(FmgrInfo)); hashtable->inner_hashfunctions = (FmgrInfo *) palloc(nkeys * sizeof(FmgrInfo)); hashtable->hashStrict = (bool *) palloc(nkeys * sizeof(bool)); i = 0; foreach(ho, hashOperators) { Oid hashop = lfirst_oid(ho); Oid left_hashfn; Oid right_hashfn; if (!get_op_hash_functions(hashop, &left_hashfn, &right_hashfn)) elog(ERROR, "could not find hash function for hash operator %u", hashop); fmgr_info(left_hashfn, &hashtable->outer_hashfunctions[i]); fmgr_info(right_hashfn, &hashtable->inner_hashfunctions[i]); hashtable->hashStrict[i] = op_strict(hashop); i++; } /* * Allocate data that will live for the life of the hashjoin */ oldcxt = MemoryContextSwitchTo(hashtable->hashCxt); #ifdef HJDEBUG { /* Memory needed to allocate hashtable->batches, which consists of nbatch pointers */ int md_batch_size = (nbatch * sizeof(hashtable->batches[0])) / (1024 * 1024); /* Memory needed to allocate hashtable->batches entries, which consist of nbatch HashJoinBatchData structures */ int md_batch_data_size = (nbatch * sizeof(HashJoinBatchData)) / (1024 * 1024); /* Memory needed to allocate hashtable->buckets, which consists of nbuckets HashJoinTuple structures*/ int md_buckets_size = (nbuckets * sizeof(HashJoinTuple)) / (1024 * 1024); /* Memory needed to allocate hashtable->bloom, which consists of nbuckets int64 values */ int md_bloom_size = (nbuckets * sizeof(uint64)) / (1024 * 1024); /* Total memory needed for the hashtable metadata */ int md_tot = md_batch_size + md_batch_data_size + md_buckets_size + md_bloom_size; elog(LOG, "About to allocate HashTable. HT_MEMORY=%dMB Memory needed for metadata: MDBATCH_ARR=%dMB, MDBATCH_DATA=%dMB, MDBUCKETS_ARR=%dMB, MDBLOOM_ARR=%dMB, TOTAL=%dMB", (int) (hashtable->spaceAllowed / (1024 * 1024)), md_batch_size, md_batch_data_size, md_buckets_size, md_bloom_size, md_tot); elog(LOG, "sizeof(hashtable->batches[0])=%d, sizeof(HashJoinBatchData)=%d, sizeof(HashJoinTuple)=%d, sizeof(uint64)=%d", (int) sizeof(hashtable->batches[0]), (int) sizeof(HashJoinBatchData), (int) sizeof(HashJoinTuple), (int) sizeof(uint64)); } #endif /* array of BatchData ptrs */ hashtable->batches = (HashJoinBatchData **)palloc(nbatch * sizeof(hashtable->batches[0])); /* one BatchData entry per initial batch */ for (i = 0; i < nbatch; i++) hashtable->batches[i] = (HashJoinBatchData *)palloc0(sizeof(HashJoinBatchData)); /* * Prepare context for the first-scan space allocations; allocate the * hashbucket array therein, and set each bucket "empty". */ MemoryContextSwitchTo(hashtable->batchCxt); hashtable->buckets = (HashJoinTuple *) palloc0(nbuckets * sizeof(HashJoinTuple)); if(gp_hashjoin_bloomfilter!=0) hashtable->bloom = (uint64*) palloc0(nbuckets * sizeof(uint64)); MemoryContextSwitchTo(oldcxt); }
/* ----------------- * ExecInitGroup * * Creates the run-time information for the group node produced by the * planner and initializes its outer subtree * ----------------- */ GroupState * ExecInitGroup(Group *node, EState *estate, int eflags) { GroupState *grpstate; /* check for unsupported flags */ Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK))); /* * create state structure */ grpstate = makeNode(GroupState); grpstate->ss.ps.plan = (Plan *) node; grpstate->ss.ps.state = estate; grpstate->grp_done = FALSE; /* * create expression context */ ExecAssignExprContext(estate, &grpstate->ss.ps); #define GROUP_NSLOTS 2 /* * tuple table initialization */ ExecInitScanTupleSlot(estate, &grpstate->ss); ExecInitResultTupleSlot(estate, &grpstate->ss.ps); /* * initialize child expressions */ grpstate->ss.ps.targetlist = (List *) ExecInitExpr((Expr *) node->plan.targetlist, (PlanState *) grpstate); grpstate->ss.ps.qual = (List *) ExecInitExpr((Expr *) node->plan.qual, (PlanState *) grpstate); /* * initialize child nodes */ outerPlanState(grpstate) = ExecInitNode(outerPlan(node), estate, eflags); /* * initialize tuple type. */ ExecAssignScanTypeFromOuterPlan(&grpstate->ss); /* * Initialize result tuple type and projection info. */ ExecAssignResultTypeFromTL(&grpstate->ss.ps); ExecAssignProjectionInfo(&grpstate->ss.ps, NULL); /* * Precompute fmgr lookup data for inner loop */ grpstate->eqfunctions = execTuplesMatchPrepare(node->numCols, node->grpOperators); return grpstate; }
/* ---------------------------------------------------------------- * ExecInitSort * * Creates the run-time state information for the sort node * produced by the planner and initializes its outer subtree. * ---------------------------------------------------------------- */ SortState * ExecInitSort(Sort *node, EState *estate, int eflags) { SortState *sortstate; SO1_printf("ExecInitSort: %s\n", "initializing sort node"); /* * create state structure */ sortstate = makeNode(SortState); sortstate->ss.ps.plan = (Plan *) node; sortstate->ss.ps.state = estate; /* * We must have random access to the sort output to do backward scan or * mark/restore. We also prefer to materialize the sort output if we * might be called on to rewind and replay it many times. */ sortstate->randomAccess = (eflags & (EXEC_FLAG_REWIND | EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)) != 0; sortstate->bounded = false; sortstate->sort_Done = false; sortstate->tuplesortstate = NULL; /* * Miscellaneous initialization * * Sort nodes don't initialize their ExprContexts because they never call * ExecQual or ExecProject. */ /* * tuple table initialization * * sort nodes only return scan tuples from their sorted relation. */ ExecInitResultTupleSlot(estate, &sortstate->ss.ps); ExecInitScanTupleSlot(estate, &sortstate->ss); /* * initialize child nodes * * We shield the child node from the need to support REWIND, BACKWARD, or * MARK/RESTORE. */ eflags &= ~(EXEC_FLAG_REWIND | EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK); outerPlanState(sortstate) = ExecInitNode(outerPlan(node), estate, eflags); /* * initialize tuple type. no need to initialize projection info because * this node doesn't do projections. */ ExecAssignResultTypeFromTL(&sortstate->ss.ps); ExecAssignScanTypeFromOuterPlan(&sortstate->ss); sortstate->ss.ps.ps_ProjInfo = NULL; SO1_printf("ExecInitSort: %s\n", "sort node initialized"); return sortstate; }
/* ---------------------------------------------------------------- * ExecInitBitmapHeapScan * * Initializes the scan's state information. * ---------------------------------------------------------------- */ BitmapHeapScanState * ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags) { BitmapHeapScanState *scanstate; RangeTblEntry *rtentry; Index relid; Oid reloid; Relation currentRelation; /* check for unsupported flags */ Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK))); /* * create state structure */ scanstate = makeNode(BitmapHeapScanState); scanstate->ss.ps.plan = (Plan *) node; scanstate->ss.ps.state = estate; scanstate->tbm = NULL; scanstate->tbmres = NULL; /* * Miscellaneous initialization * * create expression context for node */ ExecAssignExprContext(estate, &scanstate->ss.ps); /* * initialize child expressions */ scanstate->ss.ps.targetlist = (List *) ExecInitExpr((Expr *) node->scan.plan.targetlist, (PlanState *) scanstate); scanstate->ss.ps.qual = (List *) ExecInitExpr((Expr *) node->scan.plan.qual, (PlanState *) scanstate); scanstate->bitmapqualorig = (List *) ExecInitExpr((Expr *) node->bitmapqualorig, (PlanState *) scanstate); #define BITMAPHEAPSCAN_NSLOTS 2 /* * tuple table initialization */ ExecInitResultTupleSlot(estate, &scanstate->ss.ps); ExecInitScanTupleSlot(estate, &scanstate->ss); CXT1_printf("ExecInitBitmapHeapScan: context is %d\n", CurrentMemoryContext); /* * open the base relation and acquire AccessShareLock on it. */ relid = node->scan.scanrelid; rtentry = rt_fetch(relid, estate->es_range_table); reloid = rtentry->relid; currentRelation = heap_open(reloid, AccessShareLock); scanstate->ss.ss_currentRelation = currentRelation; /* * Even though we aren't going to do a conventional seqscan, it is useful * to create a HeapScanDesc --- this checks the relation size and sets up * statistical infrastructure for us. */ scanstate->ss.ss_currentScanDesc = heap_beginscan(currentRelation, estate->es_snapshot, 0, NULL); /* * One problem is that heap_beginscan counts a "sequential scan" start, * when we actually aren't doing any such thing. Reverse out the added * scan count. (Eventually we may want to count bitmap scans separately.) */ pgstat_discount_heap_scan(&scanstate->ss.ss_currentScanDesc->rs_pgstat_info); /* * get the scan type from the relation descriptor. */ ExecAssignScanType(&scanstate->ss, RelationGetDescr(currentRelation), false); /* * Initialize result tuple type and projection info. */ ExecAssignResultTypeFromTL(&scanstate->ss.ps); ExecAssignScanProjectionInfo(&scanstate->ss); /* * initialize child nodes * * We do this last because the child nodes will open indexscans on our * relation's indexes, and we want to be sure we have acquired a lock * on the relation first. */ outerPlanState(scanstate) = ExecInitNode(outerPlan(node), estate, eflags); if (IsA(outerPlan(node), BitmapIndexScan)) node->inmem = (((BitmapIndexScan*)outerPlan(node))->indexam != BITMAP_AM_OID); else if (IsA(outerPlan(node), BitmapAnd)) node->inmem = ((BitmapAnd*)outerPlan(node))->inmem; else if (IsA(outerPlan(node), BitmapOr)) node->inmem = ((BitmapOr*)outerPlan(node))->inmem; else node->inmem = true; /* * all done. */ return scanstate; }