//--------------------------------------------------------------------------- // @function: // CExpressionHandle::CopyCostCtxtProps // // @doc: // Cache plan properties of cost context and its children on the handle // //--------------------------------------------------------------------------- void CExpressionHandle::CopyCostCtxtProps() { GPOS_ASSERT(NULL != m_pcc); GPOS_ASSERT(NULL == m_pdrgpdp); GPOS_ASSERT(NULL == m_pdp); // add-ref context properties CDrvdProp *pdp = m_pcc->Pdpplan(); pdp->AddRef(); m_pdp = pdp; // add-ref child group expressions' properties const ULONG ulArity = UlArity(); m_pdrgpdp = GPOS_NEW(m_pmp) DrgPdp(m_pmp, ulArity); for (ULONG ul = 0; ul < ulArity; ul++) { CGroup *pgroupChild = (*m_pgexpr)[ul]; if (!pgroupChild->FScalar()) { COptimizationContext *pocChild = (*m_pcc->Pdrgpoc())[ul]; GPOS_ASSERT(NULL != pocChild); CCostContext *pccChild = pocChild->PccBest(); GPOS_ASSERT(NULL != pccChild); pdp = pccChild->Pdpplan(); pdp->AddRef(); m_pdrgpdp->Append(pdp); } } }
//--------------------------------------------------------------------------- // @function: // CPhysicalMotion::FValidContext // // @doc: // Check if optimization context is valid // //--------------------------------------------------------------------------- BOOL CPhysicalMotion::FValidContext ( IMemoryPool *pmp, COptimizationContext *poc, DrgPoc *pdrgpocChild ) const { GPOS_ASSERT(NULL != pdrgpocChild); GPOS_ASSERT(1 == pdrgpocChild->UlLength()); COptimizationContext *pocChild = (*pdrgpocChild)[0]; CCostContext *pccBest = pocChild->PccBest(); GPOS_ASSERT(NULL != pccBest); CDrvdPropPlan *pdpplanChild = pccBest->Pdpplan(); if (pdpplanChild->Ppim()->FContainsUnresolved()) { return false; } CExpressionHandle exprhdl(pmp); exprhdl.Attach(pccBest); exprhdl.DeriveProps(NULL /*CDrvdPropCtxt*/); if (exprhdl.FHasOuterRefs()) { // disallow plans with outer references below motion operator return false; } CEnfdDistribution *ped = poc->Prpp()->Ped(); if (ped->FCompatible(this->Pds()) && ped->FCompatible(pdpplanChild->Pds())) { // required distribution is compatible with the distribution delivered by Motion and its child plan, // in this case, Motion is redundant since child plan delivers the required distribution return false; } return true; }
//--------------------------------------------------------------------------- // @function: // CCostContext::ComputeCost // // @doc: // Compute cost of current context, // // the function extracts cardinality and row width of owner operator // and child operators, and then adjusts row estimate obtained from // statistics based on data distribution obtained from plan properties, // // statistics row estimate is computed on logical expressions by // estimating the size of the whole relation regardless data // distribution, on the other hand, optimizer's cost model computes // the cost of a plan instance on some segment, // // when a plan produces tuples distributed to multiple segments, we // need to divide statistics row estimate by the number segments to // provide a per-segment row estimate for cost computation, // // Note that this scaling of row estimate cannot happen during // statistics derivation since plans are not created yet at this point // // this function also extracts number of rebinds of owner operator child // operators, if statistics are computed using predicates with external // parameters (outer references), number of rebinds is the total number // of external parameters' values // //--------------------------------------------------------------------------- CCost CCostContext::CostCompute ( IMemoryPool *pmp, DrgPcost *pdrgpcostChildren ) { // derive context stats DeriveStats(); ULONG ulArity = 0; if (NULL != m_pdrgpoc) { ulArity = Pdrgpoc()->UlLength(); } m_pstats->AddRef(); ICostModel::SCostingInfo ci(pmp, ulArity, GPOS_NEW(pmp) ICostModel::CCostingStats(m_pstats)); ICostModel *pcm = COptCtxt::PoctxtFromTLS()->Pcm(); CExpressionHandle exprhdl(pmp); exprhdl.Attach(this); // extract local costing info DOUBLE dRows = m_pstats->DRows().DVal(); if (CDistributionSpec::EdptPartitioned == Pdpplan()->Pds()->Edpt()) { // scale statistics row estimate by number of segments dRows = DRowsPerHost().DVal(); } ci.SetRows(dRows); DOUBLE dWidth = m_pstats->DWidth(pmp, m_poc->Prpp()->PcrsRequired()).DVal(); ci.SetWidth(dWidth); DOUBLE dRebinds = m_pstats->DRebinds().DVal(); ci.SetRebinds(dRebinds); GPOS_ASSERT_IMP(!exprhdl.FHasOuterRefs(), GPOPT_DEFAULT_REBINDS == (ULONG) (dRebinds) && "invalid number of rebinds when there are no outer references"); // extract children costing info for (ULONG ul = 0; ul < ulArity; ul++) { COptimizationContext *pocChild = (*m_pdrgpoc)[ul]; CCostContext *pccChild = pocChild->PccBest(); GPOS_ASSERT(NULL != pccChild); IStatistics *pstatsChild = pccChild->Pstats(); DOUBLE dRowsChild = pstatsChild->DRows().DVal(); if (CDistributionSpec::EdptPartitioned == pccChild->Pdpplan()->Pds()->Edpt()) { // scale statistics row estimate by number of segments dRowsChild = pccChild->DRowsPerHost().DVal(); } ci.SetChildRows(ul, dRowsChild); DOUBLE dWidthChild = pstatsChild->DWidth(pmp, pocChild->Prpp()->PcrsRequired()).DVal(); ci.SetChildWidth(ul, dWidthChild); DOUBLE dRebindsChild = pstatsChild->DRebinds().DVal(); ci.SetChildRebinds(ul, dRebindsChild); GPOS_ASSERT_IMP(!exprhdl.FHasOuterRefs(ul), GPOPT_DEFAULT_REBINDS == (ULONG) (dRebindsChild) && "invalid number of rebinds when there are no outer references"); DOUBLE dCostChild = (*pdrgpcostChildren)[ul]->DVal(); ci.SetChildCost(ul, dCostChild); } // compute cost using the underlying cost model return pcm->Cost(exprhdl, &ci); }
//--------------------------------------------------------------------------- // @function: // COptimizationContext::PrppCTEProducer // // @doc: // Compute required properties to CTE producer based on plan properties // of CTE consumer // //--------------------------------------------------------------------------- CReqdPropPlan * COptimizationContext::PrppCTEProducer ( IMemoryPool *mp, COptimizationContext *poc, ULONG ulSearchStages ) { GPOS_ASSERT(NULL != poc); GPOS_ASSERT(NULL != poc->PccBest()); CCostContext *pccBest = poc->PccBest(); CGroupExpression *pgexpr = pccBest->Pgexpr(); BOOL fOptimizeCTESequence = ( COperator::EopPhysicalSequence == pgexpr->Pop()->Eopid() && (*pgexpr)[0]->FHasCTEProducer() ); if (!fOptimizeCTESequence) { // best group expression is not a CTE sequence return NULL; } COptimizationContext *pocProducer = (*pgexpr)[0]->PocLookupBest(mp, ulSearchStages, (*pccBest->Pdrgpoc())[0]->Prpp()); if (NULL == pocProducer) { return NULL; } CCostContext *pccProducer = pocProducer->PccBest(); if (NULL == pccProducer) { return NULL; } COptimizationContext *pocConsumer = (*pgexpr)[1]->PocLookupBest(mp, ulSearchStages, (*pccBest->Pdrgpoc())[1]->Prpp()); if (NULL == pocConsumer) { return NULL; } CCostContext *pccConsumer = pocConsumer->PccBest(); if (NULL == pccConsumer) { return NULL; } CColRefSet *pcrsInnerOutput = CDrvdPropRelational::GetRelationalProperties((*pgexpr)[1]->Pdp())->PcrsOutput(); CPhysicalCTEProducer *popProducer = CPhysicalCTEProducer::PopConvert(pccProducer->Pgexpr()->Pop()); UlongToColRefMap *colref_mapping = COptCtxt::PoctxtFromTLS()->Pcteinfo()->PhmulcrConsumerToProducer(mp, popProducer->UlCTEId(), pcrsInnerOutput, popProducer->Pdrgpcr()); CReqdPropPlan *prppProducer = CReqdPropPlan::PrppRemap(mp, pocProducer->Prpp(), pccConsumer->Pdpplan(), colref_mapping); colref_mapping->Release(); if (prppProducer->Equals(pocProducer->Prpp())) { prppProducer->Release(); return NULL; } return prppProducer; }
BOOL CPhysicalSpool::FValidContext ( IMemoryPool *, COptimizationContext *poc, COptimizationContextArray *pdrgpocChild ) const { GPOS_ASSERT(NULL != pdrgpocChild); GPOS_ASSERT(1 == pdrgpocChild->Size()); COptimizationContext *pocChild = (*pdrgpocChild)[0]; CCostContext *pccBest = pocChild->PccBest(); GPOS_ASSERT(NULL != pccBest); // partition selections that happen outside of a physical spool does not do // any good on rescan: a physical spool blocks the rescan from the entire // subtree (in particular, any dynamic scan) underneath it. That means when // we have a dynamic scan under a spool, and a corresponding partition // selector outside the spool, we run the risk of materializing the wrong // results. // For example, the following plan is invalid because the partition selector // won't be able to influence inner side of the nested loop join as intended // ("blocked" by the spool): // +--CPhysicalMotionGather(master) // +--CPhysicalInnerNLJoin // |--CPhysicalPartitionSelector // | +--CPhysicalMotionBroadcast // | +--CPhysicalTableScan "foo" ("foo") // |--CPhysicalSpool // | +--CPhysicalLeftOuterHashJoin // | |--CPhysicalDynamicTableScan "pt" ("pt") // | |--CPhysicalMotionHashDistribute // | | +--CPhysicalTableScan "bar" ("bar") // | +--CScalarCmp (=) // | |--CScalarIdent "d" (19) // | +--CScalarIdent "dk" (9) // +--CScalarCmp (<) // |--CScalarIdent "a" (0) // +--CScalarIdent "partkey" (10) CDrvdPropPlan *pdpplanChild = pccBest->Pdpplan(); if (pdpplanChild->Ppim()->FContainsUnresolved()) { return false; } // Discard any context that is requesting for rewindability with motion hazard handling and // the physical spool is streaming with a motion underneath it. // We do not want to add a blocking spool over a spool as spooling twice will be expensive, // hence invalidate this context. CEnfdRewindability *per = poc->Prpp()->Per(); if(per->PrsRequired()->HasMotionHazard() && pdpplanChild->Prs()->HasMotionHazard()) { return FEager(); } return true; }