//--------------------------------------------------------------------------- // @function: // CExpressionHandle::Pop // // @doc: // Get child operator from handle // //--------------------------------------------------------------------------- COperator * CExpressionHandle::Pop ( ULONG ulChildIndex ) const { GPOS_ASSERT(ulChildIndex < UlArity()); if (NULL != m_pexpr) { GPOS_ASSERT(NULL == m_pgexpr); return (*m_pexpr)[ulChildIndex]->Pop(); } if (NULL != m_pcc) { COptimizationContext *pocChild = (*m_pcc->Pdrgpoc())[ulChildIndex]; GPOS_ASSERT(NULL != pocChild); CCostContext *pccChild = pocChild->PccBest(); GPOS_ASSERT(NULL != pccChild); return pccChild->Pgexpr()->Pop(); } return NULL; }
//--------------------------------------------------------------------------- // @function: // CExpressionHandle::CopyCostCtxtProps // // @doc: // Cache plan properties of cost context and its children on the handle // //--------------------------------------------------------------------------- void CExpressionHandle::CopyCostCtxtProps() { GPOS_ASSERT(NULL != m_pcc); GPOS_ASSERT(NULL == m_pdrgpdp); GPOS_ASSERT(NULL == m_pdp); // add-ref context properties CDrvdProp *pdp = m_pcc->Pdpplan(); pdp->AddRef(); m_pdp = pdp; // add-ref child group expressions' properties const ULONG ulArity = UlArity(); m_pdrgpdp = GPOS_NEW(m_pmp) DrgPdp(m_pmp, ulArity); for (ULONG ul = 0; ul < ulArity; ul++) { CGroup *pgroupChild = (*m_pgexpr)[ul]; if (!pgroupChild->FScalar()) { COptimizationContext *pocChild = (*m_pcc->Pdrgpoc())[ul]; GPOS_ASSERT(NULL != pocChild); CCostContext *pccChild = pocChild->PccBest(); GPOS_ASSERT(NULL != pccChild); pdp = pccChild->Pdpplan(); pdp->AddRef(); m_pdrgpdp->Append(pdp); } } }
//--------------------------------------------------------------------------- // @function: // CGroupExpression::FCostContextExists // // @doc: // Check if cost context already exists in group expression hash table // //--------------------------------------------------------------------------- BOOL CGroupExpression::FCostContextExists ( COptimizationContext *poc, DrgPoc *pdrgpoc ) { GPOS_ASSERT(NULL != poc); // lookup context based on required properties CCostContext *pccFound = NULL; { ShtAcc shta(Sht(), poc); pccFound = shta.PtLookup(); } while (NULL != pccFound) { if (COptimizationContext::FEqualContextIds(pdrgpoc, pccFound->Pdrgpoc())) { // a cost context, matching required properties and child contexts, was already created return true; } { ShtAcc shta(Sht(), poc); pccFound = shta.PtNext(pccFound); } } return false; }
//--------------------------------------------------------------------------- // @function: // CGroupExpression::CleanupContexts // // @doc: // Destroy stored cost contexts in hash table // //--------------------------------------------------------------------------- void CGroupExpression::CleanupContexts() { // need to suspend cancellation while cleaning up { CAutoSuspendAbort asa; ShtIter shtit(m_sht); CCostContext *pcc = NULL; while (NULL != pcc || shtit.FAdvance()) { if (NULL != pcc) { pcc->Release(); } // iter's accessor scope { ShtAccIter shtitacc(shtit); if (NULL != (pcc = shtitacc.Pt())) { shtitacc.Remove(pcc); } } } } #ifdef GPOS_DEBUG CWorker::PwrkrSelf()->ResetTimeSlice(); #endif // GPOS_DEBUG }
//--------------------------------------------------------------------------- // @function: // CGroupExpression::OsPrintCostContexts // // @doc: // Print group expression cost contexts // //--------------------------------------------------------------------------- IOstream & CGroupExpression::OsPrintCostContexts ( IOstream &os, const CHAR *szPrefix ) { if (Pop()->FPhysical() && GPOS_FTRACE(EopttracePrintOptCtxt)) { // print cost contexts os << szPrefix << szPrefix << "Cost Ctxts:" << std::endl; CCostContext *pcc = NULL; ShtIter shtit(this->Sht()); while (shtit.FAdvance()) { { ShtAccIter shtitacc(shtit); pcc = shtitacc.Pt(); } if (NULL != pcc) { os << szPrefix << szPrefix << szPrefix; (void) pcc->OsPrint(os); } } } return os; }
//--------------------------------------------------------------------------- // @function: // CGroupExpression::PccInsertBest // // @doc: // Insert given context in hash table only if a better context // does not already exist, // return the context that is kept in hash table // //--------------------------------------------------------------------------- CCostContext * CGroupExpression::PccInsertBest ( CCostContext *pcc ) { GPOS_ASSERT(NULL != pcc); COptimizationContext *poc = pcc->Poc(); const ULONG ulOptReq = pcc->UlOptReq(); // remove existing cost context, if any CCostContext *pccExisting = PccRemove(poc, ulOptReq); CCostContext *pccKept = NULL; // compare existing context with given context if (NULL == pccExisting || pcc->FBetterThan(pccExisting)) { // insert new context pccKept = PccInsert(pcc); GPOS_ASSERT(pccKept == pcc); if (NULL != pccExisting) { if (pccExisting == poc->PccBest()) { // change best cost context of the corresponding optimization context poc->SetBest(pcc); } pccExisting->Release(); } } else { // re-insert existing context pcc->Release(); pccKept = PccInsert(pccExisting); GPOS_ASSERT(pccKept == pccExisting); } return pccKept; }
//--------------------------------------------------------------------------- // @function: // CPhysicalMotion::FValidContext // // @doc: // Check if optimization context is valid // //--------------------------------------------------------------------------- BOOL CPhysicalMotion::FValidContext ( IMemoryPool *pmp, COptimizationContext *poc, DrgPoc *pdrgpocChild ) const { GPOS_ASSERT(NULL != pdrgpocChild); GPOS_ASSERT(1 == pdrgpocChild->UlLength()); COptimizationContext *pocChild = (*pdrgpocChild)[0]; CCostContext *pccBest = pocChild->PccBest(); GPOS_ASSERT(NULL != pccBest); CDrvdPropPlan *pdpplanChild = pccBest->Pdpplan(); if (pdpplanChild->Ppim()->FContainsUnresolved()) { return false; } CExpressionHandle exprhdl(pmp); exprhdl.Attach(pccBest); exprhdl.DeriveProps(NULL /*CDrvdPropCtxt*/); if (exprhdl.FHasOuterRefs()) { // disallow plans with outer references below motion operator return false; } CEnfdDistribution *ped = poc->Prpp()->Ped(); if (ped->FCompatible(this->Pds()) && ped->FCompatible(pdpplanChild->Pds())) { // required distribution is compatible with the distribution delivered by Motion and its child plan, // in this case, Motion is redundant since child plan delivers the required distribution return false; } return true; }
//--------------------------------------------------------------------------- // @function: // CGroupExpression::PccLookupAll // // @doc: // Lookup all valid cost contexts matching given optimization context // //--------------------------------------------------------------------------- DrgPcc * CGroupExpression::PdrgpccLookupAll ( IMemoryPool *pmp, COptimizationContext *poc ) { GPOS_ASSERT(NULL != poc); DrgPcc *pdrgpcc = GPOS_NEW(pmp) DrgPcc(pmp); CCostContext *pccFound = NULL; BOOL fValid = false; { ShtAcc shta(Sht(), poc); pccFound = shta.PtLookup(); fValid = (NULL != pccFound && pccFound->Cost() != GPOPT_INVALID_COST && !pccFound->FPruned()); } while (NULL != pccFound) { if (fValid) { pccFound->AddRef(); pdrgpcc->Append(pccFound); } { ShtAcc shta(Sht(), poc); pccFound = shta.PtNext(pccFound); fValid = (NULL != pccFound && pccFound->Cost() != GPOPT_INVALID_COST && !pccFound->FPruned()); } } return pdrgpcc; }
//--------------------------------------------------------------------------- // @function: // CGroupExpression::PccLookup // // @doc: // Lookup cost context in hash table; // //--------------------------------------------------------------------------- CCostContext * CGroupExpression::PccLookup ( COptimizationContext *poc, ULONG ulOptReq ) { GPOS_ASSERT(NULL != poc); ShtAcc shta(Sht(), poc); CCostContext *pccFound = shta.PtLookup(); while (NULL != pccFound) { if (ulOptReq == pccFound->UlOptReq()) { return pccFound; } pccFound = shta.PtNext(pccFound); } return NULL; }
//--------------------------------------------------------------------------- // @function: // CCostContext::FNeedsNewStats // // @doc: // Check if we need to derive new stats for this context, // by default a cost context inherits stats from the owner group, // the only current exception is when part of the plan below cost // context is affected by partition elimination done by partition // selection in some other part of the plan // //--------------------------------------------------------------------------- BOOL CCostContext::FNeedsNewStats() const { COperator *pop = m_pgexpr->Pop(); if (pop->FScalar()) { // return false if scalar operator return false; } CEnfdPartitionPropagation *pepp = Poc()->Prpp()->Pepp(); if (GPOS_FTRACE(EopttraceDeriveStatsForDPE) && CUtils::FPhysicalScan(pop) && CPhysicalScan::PopConvert(pop)->FDynamicScan() && !pepp->PpfmDerived()->FEmpty()) { // context is attached to a dynamic scan that went through // partition elimination in another part of the plan return true; } // we need to derive stats if any child has modified stats BOOL fDeriveStats = false; const ULONG ulArity = Pdrgpoc()->UlLength(); for (ULONG ul = 0; !fDeriveStats && ul < ulArity; ul++) { COptimizationContext *pocChild = (*Pdrgpoc())[ul]; CCostContext *pccChild = pocChild->PccBest(); GPOS_ASSERT(NULL != pccChild); fDeriveStats = pccChild->FOwnsStats(); } return fDeriveStats; }
//--------------------------------------------------------------------------- // @function: // CCostContext::ComputeCost // // @doc: // Compute cost of current context, // // the function extracts cardinality and row width of owner operator // and child operators, and then adjusts row estimate obtained from // statistics based on data distribution obtained from plan properties, // // statistics row estimate is computed on logical expressions by // estimating the size of the whole relation regardless data // distribution, on the other hand, optimizer's cost model computes // the cost of a plan instance on some segment, // // when a plan produces tuples distributed to multiple segments, we // need to divide statistics row estimate by the number segments to // provide a per-segment row estimate for cost computation, // // Note that this scaling of row estimate cannot happen during // statistics derivation since plans are not created yet at this point // // this function also extracts number of rebinds of owner operator child // operators, if statistics are computed using predicates with external // parameters (outer references), number of rebinds is the total number // of external parameters' values // //--------------------------------------------------------------------------- CCost CCostContext::CostCompute ( IMemoryPool *pmp, DrgPcost *pdrgpcostChildren ) { // derive context stats DeriveStats(); ULONG ulArity = 0; if (NULL != m_pdrgpoc) { ulArity = Pdrgpoc()->UlLength(); } m_pstats->AddRef(); ICostModel::SCostingInfo ci(pmp, ulArity, GPOS_NEW(pmp) ICostModel::CCostingStats(m_pstats)); ICostModel *pcm = COptCtxt::PoctxtFromTLS()->Pcm(); CExpressionHandle exprhdl(pmp); exprhdl.Attach(this); // extract local costing info DOUBLE dRows = m_pstats->DRows().DVal(); if (CDistributionSpec::EdptPartitioned == Pdpplan()->Pds()->Edpt()) { // scale statistics row estimate by number of segments dRows = DRowsPerHost().DVal(); } ci.SetRows(dRows); DOUBLE dWidth = m_pstats->DWidth(pmp, m_poc->Prpp()->PcrsRequired()).DVal(); ci.SetWidth(dWidth); DOUBLE dRebinds = m_pstats->DRebinds().DVal(); ci.SetRebinds(dRebinds); GPOS_ASSERT_IMP(!exprhdl.FHasOuterRefs(), GPOPT_DEFAULT_REBINDS == (ULONG) (dRebinds) && "invalid number of rebinds when there are no outer references"); // extract children costing info for (ULONG ul = 0; ul < ulArity; ul++) { COptimizationContext *pocChild = (*m_pdrgpoc)[ul]; CCostContext *pccChild = pocChild->PccBest(); GPOS_ASSERT(NULL != pccChild); IStatistics *pstatsChild = pccChild->Pstats(); DOUBLE dRowsChild = pstatsChild->DRows().DVal(); if (CDistributionSpec::EdptPartitioned == pccChild->Pdpplan()->Pds()->Edpt()) { // scale statistics row estimate by number of segments dRowsChild = pccChild->DRowsPerHost().DVal(); } ci.SetChildRows(ul, dRowsChild); DOUBLE dWidthChild = pstatsChild->DWidth(pmp, pocChild->Prpp()->PcrsRequired()).DVal(); ci.SetChildWidth(ul, dWidthChild); DOUBLE dRebindsChild = pstatsChild->DRebinds().DVal(); ci.SetChildRebinds(ul, dRebindsChild); GPOS_ASSERT_IMP(!exprhdl.FHasOuterRefs(ul), GPOPT_DEFAULT_REBINDS == (ULONG) (dRebindsChild) && "invalid number of rebinds when there are no outer references"); DOUBLE dCostChild = (*pdrgpcostChildren)[ul]->DVal(); ci.SetChildCost(ul, dCostChild); } // compute cost using the underlying cost model return pcm->Cost(exprhdl, &ci); }
//--------------------------------------------------------------------------- // @function: // CExpressionHandle::DeriveCostContextStats // // @doc: // Stats derivation based on required plan properties // //--------------------------------------------------------------------------- void CExpressionHandle::DeriveCostContextStats() { GPOS_ASSERT(NULL != m_pcc); GPOS_ASSERT(NULL == m_pcc->Pstats()); // copy group properties and stats CopyGroupProps(); CopyStats(); if (NULL != m_pstats && !m_pcc->FNeedsNewStats()) { // there is no need to derive stats, // stats are copied from owner group return; } CEnfdPartitionPropagation *pepp = m_pcc->Poc()->Prpp()->Pepp(); COperator *pop = Pop(); if (CUtils::FPhysicalScan(pop) && CPhysicalScan::PopConvert(pop)->FDynamicScan() && !pepp->PpfmDerived()->FEmpty()) { // derive stats on dynamic table scan using stats of part selector CPhysicalScan *popScan = CPhysicalScan::PopConvert(m_pgexpr->Pop()); IStatistics *pstatsDS = popScan->PstatsDerive(m_pmp, *this, m_pcc->Poc()->Prpp(), m_pcc->Poc()->Pdrgpstat()); CRefCount::SafeRelease(m_pstats); m_pstats = pstatsDS; return; } // release current stats since we will derive new stats CRefCount::SafeRelease(m_pstats); m_pstats = NULL; // load stats from child cost context -- these may be different from child groups stats CRefCount::SafeRelease(m_pdrgpstat); m_pdrgpstat = NULL; m_pdrgpstat = GPOS_NEW(m_pmp) DrgPstat(m_pmp); const ULONG ulArity = m_pcc->Pdrgpoc()->UlLength(); for (ULONG ul = 0; ul < ulArity; ul++) { COptimizationContext *pocChild = (*m_pcc->Pdrgpoc())[ul]; CCostContext *pccChild = pocChild->PccBest(); GPOS_ASSERT(NULL != pccChild); GPOS_ASSERT(NULL != pccChild->Pstats()); pccChild->Pstats()->AddRef(); m_pdrgpstat->Append(pccChild->Pstats()); } if (CPhysical::PopConvert(m_pgexpr->Pop())->FPassThruStats()) { GPOS_ASSERT(1 == m_pdrgpstat->UlLength()); // copy stats from first child (*m_pdrgpstat)[0]->AddRef(); m_pstats = (*m_pdrgpstat)[0]; return; } // derive stats using the best logical expression with the same children as attached physical operator CGroupExpression *pgexprForStats = m_pcc->PgexprForStats(); GPOS_ASSERT(NULL != pgexprForStats); CExpressionHandle exprhdl(m_pmp); exprhdl.Attach(pgexprForStats); exprhdl.DeriveProps(NULL /*pdpctxt*/); m_pdrgpstat->AddRef(); exprhdl.m_pdrgpstat = m_pdrgpstat; exprhdl.ComputeReqdProps(m_pcc->Poc()->Prprel(), 0 /*ulOptReq*/); GPOS_ASSERT(NULL == exprhdl.m_pstats); IStatistics *pstats = m_pgexpr->Pgroup()->PstatsCompute(m_pcc->Poc(), exprhdl, pgexprForStats); // copy stats to main handle GPOS_ASSERT(NULL == m_pstats); GPOS_ASSERT(NULL != pstats); pstats->AddRef(); m_pstats = pstats; GPOS_ASSERT(m_pstats != NULL); }
//--------------------------------------------------------------------------- // @function: // COptimizationContext::PrppCTEProducer // // @doc: // Compute required properties to CTE producer based on plan properties // of CTE consumer // //--------------------------------------------------------------------------- CReqdPropPlan * COptimizationContext::PrppCTEProducer ( IMemoryPool *mp, COptimizationContext *poc, ULONG ulSearchStages ) { GPOS_ASSERT(NULL != poc); GPOS_ASSERT(NULL != poc->PccBest()); CCostContext *pccBest = poc->PccBest(); CGroupExpression *pgexpr = pccBest->Pgexpr(); BOOL fOptimizeCTESequence = ( COperator::EopPhysicalSequence == pgexpr->Pop()->Eopid() && (*pgexpr)[0]->FHasCTEProducer() ); if (!fOptimizeCTESequence) { // best group expression is not a CTE sequence return NULL; } COptimizationContext *pocProducer = (*pgexpr)[0]->PocLookupBest(mp, ulSearchStages, (*pccBest->Pdrgpoc())[0]->Prpp()); if (NULL == pocProducer) { return NULL; } CCostContext *pccProducer = pocProducer->PccBest(); if (NULL == pccProducer) { return NULL; } COptimizationContext *pocConsumer = (*pgexpr)[1]->PocLookupBest(mp, ulSearchStages, (*pccBest->Pdrgpoc())[1]->Prpp()); if (NULL == pocConsumer) { return NULL; } CCostContext *pccConsumer = pocConsumer->PccBest(); if (NULL == pccConsumer) { return NULL; } CColRefSet *pcrsInnerOutput = CDrvdPropRelational::GetRelationalProperties((*pgexpr)[1]->Pdp())->PcrsOutput(); CPhysicalCTEProducer *popProducer = CPhysicalCTEProducer::PopConvert(pccProducer->Pgexpr()->Pop()); UlongToColRefMap *colref_mapping = COptCtxt::PoctxtFromTLS()->Pcteinfo()->PhmulcrConsumerToProducer(mp, popProducer->UlCTEId(), pcrsInnerOutput, popProducer->Pdrgpcr()); CReqdPropPlan *prppProducer = CReqdPropPlan::PrppRemap(mp, pocProducer->Prpp(), pccConsumer->Pdpplan(), colref_mapping); colref_mapping->Release(); if (prppProducer->Equals(pocProducer->Prpp())) { prppProducer->Release(); return NULL; } return prppProducer; }
//--------------------------------------------------------------------------- // @function: // CGroupExpression::PccComputeCost // // @doc: // Compute and store expression's cost under a given context; // the function returns the cost context containing the computed cost // //--------------------------------------------------------------------------- CCostContext * CGroupExpression::PccComputeCost ( IMemoryPool *pmp, COptimizationContext *poc, ULONG ulOptReq, DrgPoc *pdrgpoc, // array of child contexts BOOL fPruned, // is created cost context pruned based on cost bound CCost costLowerBound // lower bound on the cost of plan carried by cost context ) { GPOS_ASSERT(NULL != poc); GPOS_ASSERT_IMP(!fPruned, NULL != pdrgpoc); if (!fPruned && !FValid(pmp, poc, pdrgpoc)) { return NULL; } // check if the same cost context is already created for current group expression if (FCostContextExists(poc, pdrgpoc)) { return NULL; } poc->AddRef(); this->AddRef(); CCostContext *pcc = GPOS_NEW(pmp) CCostContext(pmp, poc, ulOptReq, this); BOOL fValid = true; // computing cost pcc->SetState(CCostContext::estCosting); if (!fPruned) { if (NULL != pdrgpoc) { pdrgpoc->AddRef(); } pcc->SetChildContexts(pdrgpoc); fValid = pcc->FValid(pmp); if (fValid) { CCost cost = CostCompute(pmp, pcc); pcc->SetCost(cost); } GPOS_ASSERT_IMP(COptCtxt::FAllEnforcersEnabled(), fValid && "Cost context carries an invalid plan"); } else { pcc->SetPruned(); pcc->SetCost(costLowerBound); } pcc->SetState(CCostContext::estCosted); if (fValid) { return PccInsertBest(pcc); } pcc->Release(); // invalid cost context return NULL; }
BOOL CPhysicalSpool::FValidContext ( IMemoryPool *, COptimizationContext *poc, COptimizationContextArray *pdrgpocChild ) const { GPOS_ASSERT(NULL != pdrgpocChild); GPOS_ASSERT(1 == pdrgpocChild->Size()); COptimizationContext *pocChild = (*pdrgpocChild)[0]; CCostContext *pccBest = pocChild->PccBest(); GPOS_ASSERT(NULL != pccBest); // partition selections that happen outside of a physical spool does not do // any good on rescan: a physical spool blocks the rescan from the entire // subtree (in particular, any dynamic scan) underneath it. That means when // we have a dynamic scan under a spool, and a corresponding partition // selector outside the spool, we run the risk of materializing the wrong // results. // For example, the following plan is invalid because the partition selector // won't be able to influence inner side of the nested loop join as intended // ("blocked" by the spool): // +--CPhysicalMotionGather(master) // +--CPhysicalInnerNLJoin // |--CPhysicalPartitionSelector // | +--CPhysicalMotionBroadcast // | +--CPhysicalTableScan "foo" ("foo") // |--CPhysicalSpool // | +--CPhysicalLeftOuterHashJoin // | |--CPhysicalDynamicTableScan "pt" ("pt") // | |--CPhysicalMotionHashDistribute // | | +--CPhysicalTableScan "bar" ("bar") // | +--CScalarCmp (=) // | |--CScalarIdent "d" (19) // | +--CScalarIdent "dk" (9) // +--CScalarCmp (<) // |--CScalarIdent "a" (0) // +--CScalarIdent "partkey" (10) CDrvdPropPlan *pdpplanChild = pccBest->Pdpplan(); if (pdpplanChild->Ppim()->FContainsUnresolved()) { return false; } // Discard any context that is requesting for rewindability with motion hazard handling and // the physical spool is streaming with a motion underneath it. // We do not want to add a blocking spool over a spool as spooling twice will be expensive, // hence invalidate this context. CEnfdRewindability *per = poc->Prpp()->Per(); if(per->PrsRequired()->HasMotionHazard() && pdpplanChild->Prs()->HasMotionHazard()) { return FEager(); } return true; }