//--------------------------------------------------------------------------- // @function: // CExpressionHandle::CopyCostCtxtProps // // @doc: // Cache plan properties of cost context and its children on the handle // //--------------------------------------------------------------------------- void CExpressionHandle::CopyCostCtxtProps() { GPOS_ASSERT(NULL != m_pcc); GPOS_ASSERT(NULL == m_pdrgpdp); GPOS_ASSERT(NULL == m_pdp); // add-ref context properties CDrvdProp *pdp = m_pcc->Pdpplan(); pdp->AddRef(); m_pdp = pdp; // add-ref child group expressions' properties const ULONG ulArity = UlArity(); m_pdrgpdp = GPOS_NEW(m_pmp) DrgPdp(m_pmp, ulArity); for (ULONG ul = 0; ul < ulArity; ul++) { CGroup *pgroupChild = (*m_pgexpr)[ul]; if (!pgroupChild->FScalar()) { COptimizationContext *pocChild = (*m_pcc->Pdrgpoc())[ul]; GPOS_ASSERT(NULL != pocChild); CCostContext *pccChild = pocChild->PccBest(); GPOS_ASSERT(NULL != pccChild); pdp = pccChild->Pdpplan(); pdp->AddRef(); m_pdrgpdp->Append(pdp); } } }
//--------------------------------------------------------------------------- // @function: // CExpressionHandle::Pop // // @doc: // Get child operator from handle // //--------------------------------------------------------------------------- COperator * CExpressionHandle::Pop ( ULONG ulChildIndex ) const { GPOS_ASSERT(ulChildIndex < UlArity()); if (NULL != m_pexpr) { GPOS_ASSERT(NULL == m_pgexpr); return (*m_pexpr)[ulChildIndex]->Pop(); } if (NULL != m_pcc) { COptimizationContext *pocChild = (*m_pcc->Pdrgpoc())[ulChildIndex]; GPOS_ASSERT(NULL != pocChild); CCostContext *pccChild = pocChild->PccBest(); GPOS_ASSERT(NULL != pccChild); return pccChild->Pgexpr()->Pop(); } return NULL; }
//--------------------------------------------------------------------------- // @function: // CGroupExpression::CostCompute // // @doc: // Costing scheme. // //--------------------------------------------------------------------------- CCost CGroupExpression::CostCompute ( IMemoryPool *pmp, CCostContext *pcc ) const { GPOS_ASSERT(NULL != pcc); // prepare cost array DrgPoc *pdrgpoc = pcc->Pdrgpoc(); DrgPcost *pdrgpcostChildren = GPOS_NEW(pmp) DrgPcost(pmp); const ULONG ulLen = pdrgpoc->UlLength(); for (ULONG ul = 0; ul < ulLen; ul++) { COptimizationContext *pocChild = (*pdrgpoc)[ul]; pdrgpcostChildren->Append(GPOS_NEW(pmp) CCost(pocChild->PccBest()->Cost())); } CCost cost = pcc->CostCompute(pmp, pdrgpcostChildren); pdrgpcostChildren->Release(); return cost; }
//--------------------------------------------------------------------------- // @function: // CExpressionHandle::DerivePlanProps // // @doc: // Derive the properties of the plan carried by attached cost context // //--------------------------------------------------------------------------- void CExpressionHandle::DerivePlanProps ( CDrvdPropCtxtPlan *pdpctxtplan ) { GPOS_ASSERT(NULL != m_pcc); GPOS_ASSERT(NULL != m_pgexpr); GPOS_ASSERT(NULL == m_pdrgpdp); GPOS_ASSERT(NULL == m_pdp); GPOS_CHECK_ABORT; // check if properties have been already derived if (NULL != m_pcc->Pdpplan()) { CopyCostCtxtProps(); return; } GPOS_ASSERT(NULL != pdpctxtplan); // extract children's properties m_pdrgpdp = GPOS_NEW(m_pmp) DrgPdp(m_pmp); const ULONG ulArity = m_pcc->Pdrgpoc()->UlLength(); for (ULONG ul = 0; ul < ulArity; ul++) { COptimizationContext *pocChild = (*m_pcc->Pdrgpoc())[ul]; CDrvdPropPlan *pdpplan = pocChild->PccBest()->Pdpplan(); GPOS_ASSERT(NULL != pdpplan); pdpplan->AddRef(); m_pdrgpdp->Append(pdpplan); // add child props to derivation context CDrvdPropCtxt::AddDerivedProps(pdpplan, pdpctxtplan); } COperator *pop = m_pgexpr->Pop(); if (COperator::EopPhysicalCTEConsumer == pop->Eopid()) { // copy producer plan properties to passed derived plan properties context ULONG ulCTEId = CPhysicalCTEConsumer::PopConvert(pop)->UlCTEId(); CDrvdPropPlan *pdpplan = m_pcc->Poc()->Prpp()->Pcter()->Pdpplan(ulCTEId); if (NULL != pdpplan) { pdpctxtplan->CopyCTEProducerProps(pdpplan, ulCTEId); } } // set the number of expected partition selectors in the context pdpctxtplan->SetExpectedPartitionSelectors(pop, m_pcc); // create/derive local properties m_pdp = Pop()->PdpCreate(m_pmp); m_pdp->Derive(m_pmp, *this, pdpctxtplan); }
//--------------------------------------------------------------------------- // @function: // CGroupExpression::PccInsertBest // // @doc: // Insert given context in hash table only if a better context // does not already exist, // return the context that is kept in hash table // //--------------------------------------------------------------------------- CCostContext * CGroupExpression::PccInsertBest ( CCostContext *pcc ) { GPOS_ASSERT(NULL != pcc); COptimizationContext *poc = pcc->Poc(); const ULONG ulOptReq = pcc->UlOptReq(); // remove existing cost context, if any CCostContext *pccExisting = PccRemove(poc, ulOptReq); CCostContext *pccKept = NULL; // compare existing context with given context if (NULL == pccExisting || pcc->FBetterThan(pccExisting)) { // insert new context pccKept = PccInsert(pcc); GPOS_ASSERT(pccKept == pcc); if (NULL != pccExisting) { if (pccExisting == poc->PccBest()) { // change best cost context of the corresponding optimization context poc->SetBest(pcc); } pccExisting->Release(); } } else { // re-insert existing context pcc->Release(); pccKept = PccInsert(pccExisting); GPOS_ASSERT(pccKept == pccExisting); } return pccKept; }
//--------------------------------------------------------------------------- // @function: // CPhysicalMotion::FValidContext // // @doc: // Check if optimization context is valid // //--------------------------------------------------------------------------- BOOL CPhysicalMotion::FValidContext ( IMemoryPool *pmp, COptimizationContext *poc, DrgPoc *pdrgpocChild ) const { GPOS_ASSERT(NULL != pdrgpocChild); GPOS_ASSERT(1 == pdrgpocChild->UlLength()); COptimizationContext *pocChild = (*pdrgpocChild)[0]; CCostContext *pccBest = pocChild->PccBest(); GPOS_ASSERT(NULL != pccBest); CDrvdPropPlan *pdpplanChild = pccBest->Pdpplan(); if (pdpplanChild->Ppim()->FContainsUnresolved()) { return false; } CExpressionHandle exprhdl(pmp); exprhdl.Attach(pccBest); exprhdl.DeriveProps(NULL /*CDrvdPropCtxt*/); if (exprhdl.FHasOuterRefs()) { // disallow plans with outer references below motion operator return false; } CEnfdDistribution *ped = poc->Prpp()->Ped(); if (ped->FCompatible(this->Pds()) && ped->FCompatible(pdpplanChild->Pds())) { // required distribution is compatible with the distribution delivered by Motion and its child plan, // in this case, Motion is redundant since child plan delivers the required distribution return false; } return true; }
//--------------------------------------------------------------------------- // @function: // CCostContext::FNeedsNewStats // // @doc: // Check if we need to derive new stats for this context, // by default a cost context inherits stats from the owner group, // the only current exception is when part of the plan below cost // context is affected by partition elimination done by partition // selection in some other part of the plan // //--------------------------------------------------------------------------- BOOL CCostContext::FNeedsNewStats() const { COperator *pop = m_pgexpr->Pop(); if (pop->FScalar()) { // return false if scalar operator return false; } CEnfdPartitionPropagation *pepp = Poc()->Prpp()->Pepp(); if (GPOS_FTRACE(EopttraceDeriveStatsForDPE) && CUtils::FPhysicalScan(pop) && CPhysicalScan::PopConvert(pop)->FDynamicScan() && !pepp->PpfmDerived()->FEmpty()) { // context is attached to a dynamic scan that went through // partition elimination in another part of the plan return true; } // we need to derive stats if any child has modified stats BOOL fDeriveStats = false; const ULONG ulArity = Pdrgpoc()->UlLength(); for (ULONG ul = 0; !fDeriveStats && ul < ulArity; ul++) { COptimizationContext *pocChild = (*Pdrgpoc())[ul]; CCostContext *pccChild = pocChild->PccBest(); GPOS_ASSERT(NULL != pccChild); fDeriveStats = pccChild->FOwnsStats(); } return fDeriveStats; }
//--------------------------------------------------------------------------- // @function: // CCostContext::ComputeCost // // @doc: // Compute cost of current context, // // the function extracts cardinality and row width of owner operator // and child operators, and then adjusts row estimate obtained from // statistics based on data distribution obtained from plan properties, // // statistics row estimate is computed on logical expressions by // estimating the size of the whole relation regardless data // distribution, on the other hand, optimizer's cost model computes // the cost of a plan instance on some segment, // // when a plan produces tuples distributed to multiple segments, we // need to divide statistics row estimate by the number segments to // provide a per-segment row estimate for cost computation, // // Note that this scaling of row estimate cannot happen during // statistics derivation since plans are not created yet at this point // // this function also extracts number of rebinds of owner operator child // operators, if statistics are computed using predicates with external // parameters (outer references), number of rebinds is the total number // of external parameters' values // //--------------------------------------------------------------------------- CCost CCostContext::CostCompute ( IMemoryPool *pmp, DrgPcost *pdrgpcostChildren ) { // derive context stats DeriveStats(); ULONG ulArity = 0; if (NULL != m_pdrgpoc) { ulArity = Pdrgpoc()->UlLength(); } m_pstats->AddRef(); ICostModel::SCostingInfo ci(pmp, ulArity, GPOS_NEW(pmp) ICostModel::CCostingStats(m_pstats)); ICostModel *pcm = COptCtxt::PoctxtFromTLS()->Pcm(); CExpressionHandle exprhdl(pmp); exprhdl.Attach(this); // extract local costing info DOUBLE dRows = m_pstats->DRows().DVal(); if (CDistributionSpec::EdptPartitioned == Pdpplan()->Pds()->Edpt()) { // scale statistics row estimate by number of segments dRows = DRowsPerHost().DVal(); } ci.SetRows(dRows); DOUBLE dWidth = m_pstats->DWidth(pmp, m_poc->Prpp()->PcrsRequired()).DVal(); ci.SetWidth(dWidth); DOUBLE dRebinds = m_pstats->DRebinds().DVal(); ci.SetRebinds(dRebinds); GPOS_ASSERT_IMP(!exprhdl.FHasOuterRefs(), GPOPT_DEFAULT_REBINDS == (ULONG) (dRebinds) && "invalid number of rebinds when there are no outer references"); // extract children costing info for (ULONG ul = 0; ul < ulArity; ul++) { COptimizationContext *pocChild = (*m_pdrgpoc)[ul]; CCostContext *pccChild = pocChild->PccBest(); GPOS_ASSERT(NULL != pccChild); IStatistics *pstatsChild = pccChild->Pstats(); DOUBLE dRowsChild = pstatsChild->DRows().DVal(); if (CDistributionSpec::EdptPartitioned == pccChild->Pdpplan()->Pds()->Edpt()) { // scale statistics row estimate by number of segments dRowsChild = pccChild->DRowsPerHost().DVal(); } ci.SetChildRows(ul, dRowsChild); DOUBLE dWidthChild = pstatsChild->DWidth(pmp, pocChild->Prpp()->PcrsRequired()).DVal(); ci.SetChildWidth(ul, dWidthChild); DOUBLE dRebindsChild = pstatsChild->DRebinds().DVal(); ci.SetChildRebinds(ul, dRebindsChild); GPOS_ASSERT_IMP(!exprhdl.FHasOuterRefs(ul), GPOPT_DEFAULT_REBINDS == (ULONG) (dRebindsChild) && "invalid number of rebinds when there are no outer references"); DOUBLE dCostChild = (*pdrgpcostChildren)[ul]->DVal(); ci.SetChildCost(ul, dCostChild); } // compute cost using the underlying cost model return pcm->Cost(exprhdl, &ci); }
//--------------------------------------------------------------------------- // @function: // CExpressionHandle::DeriveCostContextStats // // @doc: // Stats derivation based on required plan properties // //--------------------------------------------------------------------------- void CExpressionHandle::DeriveCostContextStats() { GPOS_ASSERT(NULL != m_pcc); GPOS_ASSERT(NULL == m_pcc->Pstats()); // copy group properties and stats CopyGroupProps(); CopyStats(); if (NULL != m_pstats && !m_pcc->FNeedsNewStats()) { // there is no need to derive stats, // stats are copied from owner group return; } CEnfdPartitionPropagation *pepp = m_pcc->Poc()->Prpp()->Pepp(); COperator *pop = Pop(); if (CUtils::FPhysicalScan(pop) && CPhysicalScan::PopConvert(pop)->FDynamicScan() && !pepp->PpfmDerived()->FEmpty()) { // derive stats on dynamic table scan using stats of part selector CPhysicalScan *popScan = CPhysicalScan::PopConvert(m_pgexpr->Pop()); IStatistics *pstatsDS = popScan->PstatsDerive(m_pmp, *this, m_pcc->Poc()->Prpp(), m_pcc->Poc()->Pdrgpstat()); CRefCount::SafeRelease(m_pstats); m_pstats = pstatsDS; return; } // release current stats since we will derive new stats CRefCount::SafeRelease(m_pstats); m_pstats = NULL; // load stats from child cost context -- these may be different from child groups stats CRefCount::SafeRelease(m_pdrgpstat); m_pdrgpstat = NULL; m_pdrgpstat = GPOS_NEW(m_pmp) DrgPstat(m_pmp); const ULONG ulArity = m_pcc->Pdrgpoc()->UlLength(); for (ULONG ul = 0; ul < ulArity; ul++) { COptimizationContext *pocChild = (*m_pcc->Pdrgpoc())[ul]; CCostContext *pccChild = pocChild->PccBest(); GPOS_ASSERT(NULL != pccChild); GPOS_ASSERT(NULL != pccChild->Pstats()); pccChild->Pstats()->AddRef(); m_pdrgpstat->Append(pccChild->Pstats()); } if (CPhysical::PopConvert(m_pgexpr->Pop())->FPassThruStats()) { GPOS_ASSERT(1 == m_pdrgpstat->UlLength()); // copy stats from first child (*m_pdrgpstat)[0]->AddRef(); m_pstats = (*m_pdrgpstat)[0]; return; } // derive stats using the best logical expression with the same children as attached physical operator CGroupExpression *pgexprForStats = m_pcc->PgexprForStats(); GPOS_ASSERT(NULL != pgexprForStats); CExpressionHandle exprhdl(m_pmp); exprhdl.Attach(pgexprForStats); exprhdl.DeriveProps(NULL /*pdpctxt*/); m_pdrgpstat->AddRef(); exprhdl.m_pdrgpstat = m_pdrgpstat; exprhdl.ComputeReqdProps(m_pcc->Poc()->Prprel(), 0 /*ulOptReq*/); GPOS_ASSERT(NULL == exprhdl.m_pstats); IStatistics *pstats = m_pgexpr->Pgroup()->PstatsCompute(m_pcc->Poc(), exprhdl, pgexprForStats); // copy stats to main handle GPOS_ASSERT(NULL == m_pstats); GPOS_ASSERT(NULL != pstats); pstats->AddRef(); m_pstats = pstats; GPOS_ASSERT(m_pstats != NULL); }
//--------------------------------------------------------------------------- // @function: // COptimizationContext::PrppCTEProducer // // @doc: // Compute required properties to CTE producer based on plan properties // of CTE consumer // //--------------------------------------------------------------------------- CReqdPropPlan * COptimizationContext::PrppCTEProducer ( IMemoryPool *mp, COptimizationContext *poc, ULONG ulSearchStages ) { GPOS_ASSERT(NULL != poc); GPOS_ASSERT(NULL != poc->PccBest()); CCostContext *pccBest = poc->PccBest(); CGroupExpression *pgexpr = pccBest->Pgexpr(); BOOL fOptimizeCTESequence = ( COperator::EopPhysicalSequence == pgexpr->Pop()->Eopid() && (*pgexpr)[0]->FHasCTEProducer() ); if (!fOptimizeCTESequence) { // best group expression is not a CTE sequence return NULL; } COptimizationContext *pocProducer = (*pgexpr)[0]->PocLookupBest(mp, ulSearchStages, (*pccBest->Pdrgpoc())[0]->Prpp()); if (NULL == pocProducer) { return NULL; } CCostContext *pccProducer = pocProducer->PccBest(); if (NULL == pccProducer) { return NULL; } COptimizationContext *pocConsumer = (*pgexpr)[1]->PocLookupBest(mp, ulSearchStages, (*pccBest->Pdrgpoc())[1]->Prpp()); if (NULL == pocConsumer) { return NULL; } CCostContext *pccConsumer = pocConsumer->PccBest(); if (NULL == pccConsumer) { return NULL; } CColRefSet *pcrsInnerOutput = CDrvdPropRelational::GetRelationalProperties((*pgexpr)[1]->Pdp())->PcrsOutput(); CPhysicalCTEProducer *popProducer = CPhysicalCTEProducer::PopConvert(pccProducer->Pgexpr()->Pop()); UlongToColRefMap *colref_mapping = COptCtxt::PoctxtFromTLS()->Pcteinfo()->PhmulcrConsumerToProducer(mp, popProducer->UlCTEId(), pcrsInnerOutput, popProducer->Pdrgpcr()); CReqdPropPlan *prppProducer = CReqdPropPlan::PrppRemap(mp, pocProducer->Prpp(), pccConsumer->Pdpplan(), colref_mapping); colref_mapping->Release(); if (prppProducer->Equals(pocProducer->Prpp())) { prppProducer->Release(); return NULL; } return prppProducer; }
BOOL CPhysicalSpool::FValidContext ( IMemoryPool *, COptimizationContext *poc, COptimizationContextArray *pdrgpocChild ) const { GPOS_ASSERT(NULL != pdrgpocChild); GPOS_ASSERT(1 == pdrgpocChild->Size()); COptimizationContext *pocChild = (*pdrgpocChild)[0]; CCostContext *pccBest = pocChild->PccBest(); GPOS_ASSERT(NULL != pccBest); // partition selections that happen outside of a physical spool does not do // any good on rescan: a physical spool blocks the rescan from the entire // subtree (in particular, any dynamic scan) underneath it. That means when // we have a dynamic scan under a spool, and a corresponding partition // selector outside the spool, we run the risk of materializing the wrong // results. // For example, the following plan is invalid because the partition selector // won't be able to influence inner side of the nested loop join as intended // ("blocked" by the spool): // +--CPhysicalMotionGather(master) // +--CPhysicalInnerNLJoin // |--CPhysicalPartitionSelector // | +--CPhysicalMotionBroadcast // | +--CPhysicalTableScan "foo" ("foo") // |--CPhysicalSpool // | +--CPhysicalLeftOuterHashJoin // | |--CPhysicalDynamicTableScan "pt" ("pt") // | |--CPhysicalMotionHashDistribute // | | +--CPhysicalTableScan "bar" ("bar") // | +--CScalarCmp (=) // | |--CScalarIdent "d" (19) // | +--CScalarIdent "dk" (9) // +--CScalarCmp (<) // |--CScalarIdent "a" (0) // +--CScalarIdent "partkey" (10) CDrvdPropPlan *pdpplanChild = pccBest->Pdpplan(); if (pdpplanChild->Ppim()->FContainsUnresolved()) { return false; } // Discard any context that is requesting for rewindability with motion hazard handling and // the physical spool is streaming with a motion underneath it. // We do not want to add a blocking spool over a spool as spooling twice will be expensive, // hence invalidate this context. CEnfdRewindability *per = poc->Prpp()->Per(); if(per->PrsRequired()->HasMotionHazard() && pdpplanChild->Prs()->HasMotionHazard()) { return FEager(); } return true; }