//--------------------------------------------------------------------------- // @function: // CCostContext::ComputeCost // // @doc: // Compute cost of current context, // // the function extracts cardinality and row width of owner operator // and child operators, and then adjusts row estimate obtained from // statistics based on data distribution obtained from plan properties, // // statistics row estimate is computed on logical expressions by // estimating the size of the whole relation regardless data // distribution, on the other hand, optimizer's cost model computes // the cost of a plan instance on some segment, // // when a plan produces tuples distributed to multiple segments, we // need to divide statistics row estimate by the number segments to // provide a per-segment row estimate for cost computation, // // Note that this scaling of row estimate cannot happen during // statistics derivation since plans are not created yet at this point // // this function also extracts number of rebinds of owner operator child // operators, if statistics are computed using predicates with external // parameters (outer references), number of rebinds is the total number // of external parameters' values // //--------------------------------------------------------------------------- CCost CCostContext::CostCompute ( IMemoryPool *pmp, DrgPcost *pdrgpcostChildren ) { // derive context stats DeriveStats(); ULONG ulArity = 0; if (NULL != m_pdrgpoc) { ulArity = Pdrgpoc()->UlLength(); } m_pstats->AddRef(); ICostModel::SCostingInfo ci(pmp, ulArity, GPOS_NEW(pmp) ICostModel::CCostingStats(m_pstats)); ICostModel *pcm = COptCtxt::PoctxtFromTLS()->Pcm(); CExpressionHandle exprhdl(pmp); exprhdl.Attach(this); // extract local costing info DOUBLE dRows = m_pstats->DRows().DVal(); if (CDistributionSpec::EdptPartitioned == Pdpplan()->Pds()->Edpt()) { // scale statistics row estimate by number of segments dRows = DRowsPerHost().DVal(); } ci.SetRows(dRows); DOUBLE dWidth = m_pstats->DWidth(pmp, m_poc->Prpp()->PcrsRequired()).DVal(); ci.SetWidth(dWidth); DOUBLE dRebinds = m_pstats->DRebinds().DVal(); ci.SetRebinds(dRebinds); GPOS_ASSERT_IMP(!exprhdl.FHasOuterRefs(), GPOPT_DEFAULT_REBINDS == (ULONG) (dRebinds) && "invalid number of rebinds when there are no outer references"); // extract children costing info for (ULONG ul = 0; ul < ulArity; ul++) { COptimizationContext *pocChild = (*m_pdrgpoc)[ul]; CCostContext *pccChild = pocChild->PccBest(); GPOS_ASSERT(NULL != pccChild); IStatistics *pstatsChild = pccChild->Pstats(); DOUBLE dRowsChild = pstatsChild->DRows().DVal(); if (CDistributionSpec::EdptPartitioned == pccChild->Pdpplan()->Pds()->Edpt()) { // scale statistics row estimate by number of segments dRowsChild = pccChild->DRowsPerHost().DVal(); } ci.SetChildRows(ul, dRowsChild); DOUBLE dWidthChild = pstatsChild->DWidth(pmp, pocChild->Prpp()->PcrsRequired()).DVal(); ci.SetChildWidth(ul, dWidthChild); DOUBLE dRebindsChild = pstatsChild->DRebinds().DVal(); ci.SetChildRebinds(ul, dRebindsChild); GPOS_ASSERT_IMP(!exprhdl.FHasOuterRefs(ul), GPOPT_DEFAULT_REBINDS == (ULONG) (dRebindsChild) && "invalid number of rebinds when there are no outer references"); DOUBLE dCostChild = (*pdrgpcostChildren)[ul]->DVal(); ci.SetChildCost(ul, dCostChild); } // compute cost using the underlying cost model return pcm->Cost(exprhdl, &ci); }
//--------------------------------------------------------------------------- // @function: // CExpressionHandle::DeriveCostContextStats // // @doc: // Stats derivation based on required plan properties // //--------------------------------------------------------------------------- void CExpressionHandle::DeriveCostContextStats() { GPOS_ASSERT(NULL != m_pcc); GPOS_ASSERT(NULL == m_pcc->Pstats()); // copy group properties and stats CopyGroupProps(); CopyStats(); if (NULL != m_pstats && !m_pcc->FNeedsNewStats()) { // there is no need to derive stats, // stats are copied from owner group return; } CEnfdPartitionPropagation *pepp = m_pcc->Poc()->Prpp()->Pepp(); COperator *pop = Pop(); if (CUtils::FPhysicalScan(pop) && CPhysicalScan::PopConvert(pop)->FDynamicScan() && !pepp->PpfmDerived()->FEmpty()) { // derive stats on dynamic table scan using stats of part selector CPhysicalScan *popScan = CPhysicalScan::PopConvert(m_pgexpr->Pop()); IStatistics *pstatsDS = popScan->PstatsDerive(m_pmp, *this, m_pcc->Poc()->Prpp(), m_pcc->Poc()->Pdrgpstat()); CRefCount::SafeRelease(m_pstats); m_pstats = pstatsDS; return; } // release current stats since we will derive new stats CRefCount::SafeRelease(m_pstats); m_pstats = NULL; // load stats from child cost context -- these may be different from child groups stats CRefCount::SafeRelease(m_pdrgpstat); m_pdrgpstat = NULL; m_pdrgpstat = GPOS_NEW(m_pmp) DrgPstat(m_pmp); const ULONG ulArity = m_pcc->Pdrgpoc()->UlLength(); for (ULONG ul = 0; ul < ulArity; ul++) { COptimizationContext *pocChild = (*m_pcc->Pdrgpoc())[ul]; CCostContext *pccChild = pocChild->PccBest(); GPOS_ASSERT(NULL != pccChild); GPOS_ASSERT(NULL != pccChild->Pstats()); pccChild->Pstats()->AddRef(); m_pdrgpstat->Append(pccChild->Pstats()); } if (CPhysical::PopConvert(m_pgexpr->Pop())->FPassThruStats()) { GPOS_ASSERT(1 == m_pdrgpstat->UlLength()); // copy stats from first child (*m_pdrgpstat)[0]->AddRef(); m_pstats = (*m_pdrgpstat)[0]; return; } // derive stats using the best logical expression with the same children as attached physical operator CGroupExpression *pgexprForStats = m_pcc->PgexprForStats(); GPOS_ASSERT(NULL != pgexprForStats); CExpressionHandle exprhdl(m_pmp); exprhdl.Attach(pgexprForStats); exprhdl.DeriveProps(NULL /*pdpctxt*/); m_pdrgpstat->AddRef(); exprhdl.m_pdrgpstat = m_pdrgpstat; exprhdl.ComputeReqdProps(m_pcc->Poc()->Prprel(), 0 /*ulOptReq*/); GPOS_ASSERT(NULL == exprhdl.m_pstats); IStatistics *pstats = m_pgexpr->Pgroup()->PstatsCompute(m_pcc->Poc(), exprhdl, pgexprForStats); // copy stats to main handle GPOS_ASSERT(NULL == m_pstats); GPOS_ASSERT(NULL != pstats); pstats->AddRef(); m_pstats = pstats; GPOS_ASSERT(m_pstats != NULL); }