//--------------------------------------------------------------------------- // @function: // CLogicalUnion::PstatsDerive // // @doc: // Derive statistics // //--------------------------------------------------------------------------- IStatistics * CLogicalUnion::PstatsDerive ( IMemoryPool *pmp, CExpressionHandle &exprhdl, DrgPstat * // not used ) const { GPOS_ASSERT(Esp(exprhdl) > EspNone); // union is transformed into a group by over an union all // we follow the same route to compute statistics IStatistics *pstatsUnionAll = CLogicalUnionAll::PstatsDeriveUnionAll(pmp, exprhdl); // computed columns DrgPul *pdrgpulComputedCols = GPOS_NEW(pmp) DrgPul(pmp); IStatistics *pstats = CLogicalGbAgg::PstatsDerive ( pmp, pstatsUnionAll, m_pdrgpcrOutput, // we group by the output columns pdrgpulComputedCols, // no computed columns for set ops NULL // no keys, use all grouping cols ); // clean up pdrgpulComputedCols->Release(); pstatsUnionAll->Release(); return pstats; }
//--------------------------------------------------------------------------- // @function: // CLogicalInnerIndexApply::PstatsDerive // // @doc: // Derive statistics // //--------------------------------------------------------------------------- IStatistics * CLogicalInnerIndexApply::PstatsDerive ( IMemoryPool *pmp, CExpressionHandle &exprhdl, DrgPstat* // pdrgpstatCtxt ) const { GPOS_ASSERT(EspNone < Esp(exprhdl)); IStatistics *pstatsOuter = exprhdl.Pstats(0); IStatistics *pstatsInner = exprhdl.Pstats(1); CExpression *pexprScalar = exprhdl.PexprScalarChild(2 /*ulChildIndex*/); // join stats of the children DrgPstat *pdrgpstat = GPOS_NEW(pmp) DrgPstat(pmp); pstatsOuter->AddRef(); pdrgpstat->Append(pstatsOuter); pstatsInner->AddRef(); pdrgpstat->Append(pstatsInner); IStatistics *pstats = CStatisticsUtils::PstatsJoinArray(pmp, false /*fOuterJoin*/, pdrgpstat, pexprScalar); pdrgpstat->Release(); return pstats; }
//--------------------------------------------------------------------------- // @function: // CGroupExpression::PstatsRecursiveDerive // // @doc: // Derive stats recursively on group expression // //--------------------------------------------------------------------------- IStatistics * CGroupExpression::PstatsRecursiveDerive ( IMemoryPool *, // pmpLocal IMemoryPool *pmpGlobal, CReqdPropRelational *prprel, DrgPstat *pdrgpstatCtxt, BOOL fComputeRootStats ) { GPOS_ASSERT(!Pgroup()->FScalar()); GPOS_ASSERT(!Pgroup()->FImplemented()); GPOS_ASSERT(NULL != pdrgpstatCtxt); GPOS_CHECK_ABORT; // trigger recursive property derivation CExpressionHandle exprhdl(pmpGlobal); exprhdl.Attach(this); exprhdl.DeriveProps(NULL /*pdpctxt*/); // compute required relational properties on child groups exprhdl.ComputeReqdProps(prprel, 0 /*ulOptReq*/); // trigger recursive stat derivation exprhdl.DeriveStats(pdrgpstatCtxt, fComputeRootStats); IStatistics *pstats = exprhdl.Pstats(); if (NULL != pstats) { pstats->AddRef(); } return pstats; }
//--------------------------------------------------------------------------- // @function: // CExpressionHandle::DeriveRootStats // // @doc: // Stat derivation at root operator where handle is attached // //--------------------------------------------------------------------------- void CExpressionHandle::DeriveRootStats ( DrgPstat *pdrgpstatCtxt ) { GPOS_ASSERT(NULL == m_pstats); CLogical *popLogical = CLogical::PopConvert(Pop()); IStatistics *pstatsRoot = NULL; if (FAttachedToLeafPattern()) { // for leaf patterns extracted from memo, trigger state derivation on origin group GPOS_ASSERT(NULL != m_pexpr); GPOS_ASSERT(NULL != m_pexpr->Pgexpr()); pstatsRoot = m_pexpr->Pgexpr()->Pgroup()->PstatsRecursiveDerive(m_pmp, m_pmp, CReqdPropRelational::Prprel(m_prp), pdrgpstatCtxt); pstatsRoot->AddRef(); } else { // otherwise, derive stats using root operator pstatsRoot = popLogical->PstatsDerive(m_pmp, *this, pdrgpstatCtxt); } GPOS_ASSERT(NULL != pstatsRoot); m_pstats = pstatsRoot; }
//--------------------------------------------------------------------------- // @function: // CExpressionHandle::CopyStats // // @doc: // Copy stats from attached expression/group expression to local stats // members // //--------------------------------------------------------------------------- void CExpressionHandle::CopyStats() { if (!FStatsDerived()) { // stats of attached expression (or its children) have not been derived yet return; } IStatistics *pstats = NULL; if (NULL != m_pexpr) { pstats = const_cast<IStatistics *>(m_pexpr->Pstats()); } else { GPOS_ASSERT(NULL != m_pgexpr); pstats = m_pgexpr->Pgroup()->Pstats(); } GPOS_ASSERT(NULL != pstats); // attach stats pstats->AddRef(); GPOS_ASSERT(NULL == m_pstats); m_pstats = pstats; // attach child stats GPOS_ASSERT(NULL == m_pdrgpstat); m_pdrgpstat = GPOS_NEW(m_pmp) DrgPstat(m_pmp); const ULONG ulArity = UlArity(); for (ULONG ul = 0; ul < ulArity; ul++) { IStatistics *pstatsChild = NULL; if (NULL != m_pexpr) { pstatsChild = const_cast<IStatistics *>((*m_pexpr)[ul]->Pstats()); } else { pstatsChild = (*m_pgexpr)[ul]->Pstats(); } if (NULL != pstatsChild) { pstatsChild->AddRef(); } else { GPOS_ASSERT(FScalarChild(ul)); // create dummy stats for missing scalar children pstatsChild = CStatistics::PstatsEmpty(m_pmp); } m_pdrgpstat->Append(pstatsChild); } }
size_t rtree_get_utilization(ISpatialIndex *rtree) { if(rtree == NULL) { return 0; } IStatistics *statistic; rtree->getStatistics(&statistic); size_t nodes_num = statistic->getNumberOfData(); delete statistic; return nodes_num; }
//--------------------------------------------------------------------------- // @function: // CLogicalDifferenceAll::PstatsDerive // // @doc: // Derive statistics // //--------------------------------------------------------------------------- IStatistics * CLogicalDifferenceAll::PstatsDerive ( IMemoryPool *pmp, CExpressionHandle &exprhdl, DrgPstat * // not used ) const { GPOS_ASSERT(Esp(exprhdl) > EspNone); // difference all is transformed into a LASJ, // we follow the same route to compute statistics DrgPcrs *pdrgpcrsOutput = GPOS_NEW(pmp) DrgPcrs(pmp); const ULONG ulSize = m_pdrgpdrgpcrInput->UlLength(); for (ULONG ul = 0; ul < ulSize; ul++) { CColRefSet *pcrs = GPOS_NEW(pmp) CColRefSet(pmp, (*m_pdrgpdrgpcrInput)[ul]); pdrgpcrsOutput->Append(pcrs); } IStatistics *pstatsOuter = exprhdl.Pstats(0); IStatistics *pstatsInner = exprhdl.Pstats(1); // construct the scalar condition for the LASJ CExpression *pexprScCond = CUtils::PexprConjINDFCond(pmp, m_pdrgpdrgpcrInput); // compute the statistics for LASJ CColRefSet *pcrsOuterRefs = exprhdl.Pdprel()->PcrsOuter(); DrgPstatsjoin *pdrgpstatsjoin = CStatsPredUtils::Pdrgpstatsjoin ( pmp, exprhdl, pexprScCond, pdrgpcrsOutput, pcrsOuterRefs ); IStatistics *pstatsLASJ = pstatsOuter->PstatsLASJoin ( pmp, pstatsInner, pdrgpstatsjoin, true /* fIgnoreLasjHistComputation*/ ); // clean up pexprScCond->Release(); pdrgpstatsjoin->Release(); pdrgpcrsOutput->Release(); return pstatsLASJ; }
//--------------------------------------------------------------------------- // @function: // CLogical::PstatsPassThruOuter // // @doc: // Helper for common case of passing through derived stats // //--------------------------------------------------------------------------- IStatistics * CLogical::PstatsPassThruOuter ( CExpressionHandle &exprhdl ) { GPOS_CHECK_ABORT; IStatistics *pstats = exprhdl.Pstats(0); pstats->AddRef(); return pstats; }
//--------------------------------------------------------------------------- // @function: // CLogicalGet::PstatsDerive // // @doc: // Load up statistics from metadata // //--------------------------------------------------------------------------- IStatistics * CLogicalGet::PstatsDerive ( IMemoryPool *mp, CExpressionHandle &exprhdl, IStatisticsArray * // not used ) const { // requesting stats on distribution columns to estimate data skew IStatistics *pstatsTable = PstatsBaseTable(mp, exprhdl, m_ptabdesc, m_pcrsDist); CColRefSet *pcrs = GPOS_NEW(mp) CColRefSet(mp, m_pdrgpcrOutput); CUpperBoundNDVs *upper_bound_NDVs = GPOS_NEW(mp) CUpperBoundNDVs(pcrs, pstatsTable->Rows()); CStatistics::CastStats(pstatsTable)->AddCardUpperBound(upper_bound_NDVs); return pstatsTable; }
//--------------------------------------------------------------------------- // @function: // CLogicalGet::PstatsDerive // // @doc: // Load up statistics from metadata // //--------------------------------------------------------------------------- IStatistics * CLogicalGet::PstatsDerive ( IMemoryPool *pmp, CExpressionHandle &exprhdl, DrgPstat * // not used ) const { // requesting stats on distribution columns to estimate data skew IStatistics *pstatsTable = PstatsBaseTable(pmp, exprhdl, m_ptabdesc, m_pcrsDist); CColRefSet *pcrs = GPOS_NEW(pmp) CColRefSet(pmp, m_pdrgpcrOutput); CUpperBoundNDVs *pubndv = GPOS_NEW(pmp) CUpperBoundNDVs(pcrs, pstatsTable->DRows()); CStatistics::PstatsConvert(pstatsTable)->AddCardUpperBound(pubndv); return pstatsTable; }
//--------------------------------------------------------------------------- // @function: // CExpressionHandle::PdrgpstatOuterRefs // // @doc: // Given an array of stats objects and a child index, return an array // of stats objects starting from the first stats object referenced by // child // //--------------------------------------------------------------------------- DrgPstat * CExpressionHandle::PdrgpstatOuterRefs ( DrgPstat *pdrgpstat, ULONG ulChildIndex ) const { GPOS_ASSERT(NULL != pdrgpstat); GPOS_ASSERT(ulChildIndex < UlArity()); if (FScalarChild(ulChildIndex) || !FHasOuterRefs(ulChildIndex)) { // if child is scalar or has no outer references, return empty array return GPOS_NEW(m_pmp) DrgPstat(m_pmp); } DrgPstat *pdrgpstatResult = GPOS_NEW(m_pmp) DrgPstat(m_pmp); CColRefSet *pcrsOuter = Pdprel(ulChildIndex)->PcrsOuter(); GPOS_ASSERT(0 < pcrsOuter->CElements()); const ULONG ulSize = pdrgpstat->UlLength(); ULONG ulStartIndex = ULONG_MAX; for (ULONG ul = 0; ul < ulSize; ul++) { IStatistics *pstats = (*pdrgpstat)[ul]; CColRefSet *pcrsStats = pstats->Pcrs(m_pmp); BOOL fStatsColsUsed = !pcrsOuter->FDisjoint(pcrsStats); pcrsStats->Release(); if (fStatsColsUsed) { ulStartIndex = ul; break; } } if (ULONG_MAX != ulStartIndex) { // copy stats starting from index of outer-most stats object referenced by child CUtils::AddRefAppend<IStatistics, CleanupStats>(pdrgpstatResult, pdrgpstat, ulStartIndex); } return pdrgpstatResult; }
//--------------------------------------------------------------------------- // @function: // CLogicalAssert::PstatsDerive // // @doc: // Derive statistics based on filter predicates // //--------------------------------------------------------------------------- IStatistics * CLogicalAssert::PstatsDerive ( IMemoryPool *pmp, CExpressionHandle &exprhdl, DrgPstat * // not used ) const { CMaxCard maxcard = CLogicalAssert::PopConvert(exprhdl.Pop())->Maxcard(pmp, exprhdl); if (1 == maxcard.Ull()) { // a max card of one requires re-scaling stats IStatistics *pstats = exprhdl.Pstats(0); return pstats->PstatsScale(pmp, CDouble(1.0 / pstats->DRows())); } return PstatsPassThruOuter(exprhdl); }
//--------------------------------------------------------------------------- // @function: // CLogical::PstatsBaseTable // // @doc: // Helper for deriving statistics on a base table // //--------------------------------------------------------------------------- IStatistics * CLogical::PstatsBaseTable ( IMemoryPool *pmp, CExpressionHandle &exprhdl, CTableDescriptor *ptabdesc, CColRefSet *pcrsStatExtra // additional columns required for stats, not required by parent ) { // extract colids and attribute for which detailed stats are necessary CReqdPropRelational *prprel = CReqdPropRelational::Prprel(exprhdl.Prp()); CColRefSet *pcrsStat = GPOS_NEW(pmp) CColRefSet(pmp); pcrsStat->Include(prprel->PcrsStat()); if (NULL != pcrsStatExtra) { pcrsStat->Include(pcrsStatExtra); } DrgPul *pdrgpulHistColIds = GPOS_NEW(pmp) DrgPul(pmp); DrgPul *pdrgpulHistPos = GPOS_NEW(pmp) DrgPul(pmp); CUtils::ExtractColIdsAttno(pmp, ptabdesc, pcrsStat, pdrgpulHistColIds, pdrgpulHistPos); // extract colids and attribute for which widths are necessary CDrvdPropRelational *pdprel = exprhdl.Pdprel(); CColRefSet *pcrsWidth = pdprel->PcrsOutput(); DrgPul *pdrgpulWidthColIds = GPOS_NEW(pmp) DrgPul(pmp); DrgPul *pdrgpulWidthPos = GPOS_NEW(pmp) DrgPul(pmp); CUtils::ExtractColIdsAttno(pmp, ptabdesc, pcrsWidth, pdrgpulWidthColIds, pdrgpulWidthPos); CMDAccessor *pmda = COptCtxt::PoctxtFromTLS()->Pmda(); IStatistics *pstats = pmda->Pstats(pmp, ptabdesc->Pmdid(), pdrgpulHistPos, pdrgpulHistColIds, pdrgpulWidthPos, pdrgpulWidthColIds); if (!GPOS_FTRACE(EopttraceDonotCollectMissingStatsCols) && !pstats->FEmpty()) { CStatisticsUtils::RecordMissingStatisticsColumns(pmp, ptabdesc, pcrsStat, pstats); } pcrsStat->Release(); return pstats; }
//--------------------------------------------------------------------------- // @function: // CPhysicalPartitionSelector::PpfmDerive // // @doc: // Derive partition filter map // //--------------------------------------------------------------------------- CPartFilterMap * CPhysicalPartitionSelector::PpfmDerive ( IMemoryPool *mp, CExpressionHandle &exprhdl ) const { if (!FHasFilter()) { return PpfmPassThruOuter(exprhdl); } CPartFilterMap *ppfm = PpfmDeriveCombineRelational(mp, exprhdl); IStatistics *stats = exprhdl.Pstats(); GPOS_ASSERT(NULL != stats); m_pexprCombinedPredicate->AddRef(); stats->AddRef(); ppfm->AddPartFilter(mp, m_scan_id, m_pexprCombinedPredicate, stats); return ppfm; }
//--------------------------------------------------------------------------- // @function: // CLogicalSelect::PstatsDerive // // @doc: // Derive statistics based on filter predicates // //--------------------------------------------------------------------------- IStatistics * CLogicalSelect::PstatsDerive ( IMemoryPool *pmp, CExpressionHandle &exprhdl, DrgPstat *pdrgpstatCtxt ) const { GPOS_ASSERT(Esp(exprhdl) > EspNone); IStatistics *pstatsChild = exprhdl.Pstats(0); if (exprhdl.Pdpscalar(1 /*ulChildIndex*/)->FHasSubquery()) { // in case of subquery in select predicate, we return child stats pstatsChild->AddRef(); return pstatsChild; } // remove implied predicates from selection condition to avoid cardinality under-estimation CExpression *pexprScalar = exprhdl.PexprScalarChild(1 /*ulChildIndex*/); CExpression *pexprPredicate = CPredicateUtils::PexprRemoveImpliedConjuncts(pmp, pexprScalar, exprhdl); // split selection predicate into local predicate and predicate involving outer references CExpression *pexprLocal = NULL; CExpression *pexprOuterRefs = NULL; // get outer references from expression handle CColRefSet *pcrsOuter = exprhdl.Pdprel()->PcrsOuter(); CPredicateUtils::SeparateOuterRefs(pmp, pexprPredicate, pcrsOuter, &pexprLocal, &pexprOuterRefs); pexprPredicate->Release(); IStatistics *pstats = CStatisticsUtils::PstatsFilter(pmp, exprhdl, pstatsChild, pexprLocal, pexprOuterRefs, pdrgpstatCtxt); pexprLocal->Release(); pexprOuterRefs->Release(); return pstats; }
//--------------------------------------------------------------------------- // @function: // CLogicalLimit::PstatsDerive // // @doc: // Derive statistics based on limit // //--------------------------------------------------------------------------- IStatistics * CLogicalLimit::PstatsDerive ( IMemoryPool *pmp, CExpressionHandle &exprhdl, DrgPstat * // not used ) const { GPOS_ASSERT(Esp(exprhdl) > EspNone); IStatistics *pstatsChild = exprhdl.Pstats(0); CMaxCard maxcard = this->Maxcard(pmp, exprhdl); CDouble dRowsMax = CDouble(maxcard.Ull()); if (pstatsChild->DRows() <= dRowsMax) { pstatsChild->AddRef(); return pstatsChild; } return pstatsChild->PstatsLimit(pmp, dRowsMax); }
//--------------------------------------------------------------------------- // @function: // CCostContext::ComputeCost // // @doc: // Compute cost of current context, // // the function extracts cardinality and row width of owner operator // and child operators, and then adjusts row estimate obtained from // statistics based on data distribution obtained from plan properties, // // statistics row estimate is computed on logical expressions by // estimating the size of the whole relation regardless data // distribution, on the other hand, optimizer's cost model computes // the cost of a plan instance on some segment, // // when a plan produces tuples distributed to multiple segments, we // need to divide statistics row estimate by the number segments to // provide a per-segment row estimate for cost computation, // // Note that this scaling of row estimate cannot happen during // statistics derivation since plans are not created yet at this point // // this function also extracts number of rebinds of owner operator child // operators, if statistics are computed using predicates with external // parameters (outer references), number of rebinds is the total number // of external parameters' values // //--------------------------------------------------------------------------- CCost CCostContext::CostCompute ( IMemoryPool *pmp, DrgPcost *pdrgpcostChildren ) { // derive context stats DeriveStats(); ULONG ulArity = 0; if (NULL != m_pdrgpoc) { ulArity = Pdrgpoc()->UlLength(); } m_pstats->AddRef(); ICostModel::SCostingInfo ci(pmp, ulArity, GPOS_NEW(pmp) ICostModel::CCostingStats(m_pstats)); ICostModel *pcm = COptCtxt::PoctxtFromTLS()->Pcm(); CExpressionHandle exprhdl(pmp); exprhdl.Attach(this); // extract local costing info DOUBLE dRows = m_pstats->DRows().DVal(); if (CDistributionSpec::EdptPartitioned == Pdpplan()->Pds()->Edpt()) { // scale statistics row estimate by number of segments dRows = DRowsPerHost().DVal(); } ci.SetRows(dRows); DOUBLE dWidth = m_pstats->DWidth(pmp, m_poc->Prpp()->PcrsRequired()).DVal(); ci.SetWidth(dWidth); DOUBLE dRebinds = m_pstats->DRebinds().DVal(); ci.SetRebinds(dRebinds); GPOS_ASSERT_IMP(!exprhdl.FHasOuterRefs(), GPOPT_DEFAULT_REBINDS == (ULONG) (dRebinds) && "invalid number of rebinds when there are no outer references"); // extract children costing info for (ULONG ul = 0; ul < ulArity; ul++) { COptimizationContext *pocChild = (*m_pdrgpoc)[ul]; CCostContext *pccChild = pocChild->PccBest(); GPOS_ASSERT(NULL != pccChild); IStatistics *pstatsChild = pccChild->Pstats(); DOUBLE dRowsChild = pstatsChild->DRows().DVal(); if (CDistributionSpec::EdptPartitioned == pccChild->Pdpplan()->Pds()->Edpt()) { // scale statistics row estimate by number of segments dRowsChild = pccChild->DRowsPerHost().DVal(); } ci.SetChildRows(ul, dRowsChild); DOUBLE dWidthChild = pstatsChild->DWidth(pmp, pocChild->Prpp()->PcrsRequired()).DVal(); ci.SetChildWidth(ul, dWidthChild); DOUBLE dRebindsChild = pstatsChild->DRebinds().DVal(); ci.SetChildRebinds(ul, dRebindsChild); GPOS_ASSERT_IMP(!exprhdl.FHasOuterRefs(ul), GPOPT_DEFAULT_REBINDS == (ULONG) (dRebindsChild) && "invalid number of rebinds when there are no outer references"); DOUBLE dCostChild = (*pdrgpcostChildren)[ul]->DVal(); ci.SetChildCost(ul, dCostChild); } // compute cost using the underlying cost model return pcm->Cost(exprhdl, &ci); }
//--------------------------------------------------------------------------- // @function: // CExpressionHandle::DeriveCostContextStats // // @doc: // Stats derivation based on required plan properties // //--------------------------------------------------------------------------- void CExpressionHandle::DeriveCostContextStats() { GPOS_ASSERT(NULL != m_pcc); GPOS_ASSERT(NULL == m_pcc->Pstats()); // copy group properties and stats CopyGroupProps(); CopyStats(); if (NULL != m_pstats && !m_pcc->FNeedsNewStats()) { // there is no need to derive stats, // stats are copied from owner group return; } CEnfdPartitionPropagation *pepp = m_pcc->Poc()->Prpp()->Pepp(); COperator *pop = Pop(); if (CUtils::FPhysicalScan(pop) && CPhysicalScan::PopConvert(pop)->FDynamicScan() && !pepp->PpfmDerived()->FEmpty()) { // derive stats on dynamic table scan using stats of part selector CPhysicalScan *popScan = CPhysicalScan::PopConvert(m_pgexpr->Pop()); IStatistics *pstatsDS = popScan->PstatsDerive(m_pmp, *this, m_pcc->Poc()->Prpp(), m_pcc->Poc()->Pdrgpstat()); CRefCount::SafeRelease(m_pstats); m_pstats = pstatsDS; return; } // release current stats since we will derive new stats CRefCount::SafeRelease(m_pstats); m_pstats = NULL; // load stats from child cost context -- these may be different from child groups stats CRefCount::SafeRelease(m_pdrgpstat); m_pdrgpstat = NULL; m_pdrgpstat = GPOS_NEW(m_pmp) DrgPstat(m_pmp); const ULONG ulArity = m_pcc->Pdrgpoc()->UlLength(); for (ULONG ul = 0; ul < ulArity; ul++) { COptimizationContext *pocChild = (*m_pcc->Pdrgpoc())[ul]; CCostContext *pccChild = pocChild->PccBest(); GPOS_ASSERT(NULL != pccChild); GPOS_ASSERT(NULL != pccChild->Pstats()); pccChild->Pstats()->AddRef(); m_pdrgpstat->Append(pccChild->Pstats()); } if (CPhysical::PopConvert(m_pgexpr->Pop())->FPassThruStats()) { GPOS_ASSERT(1 == m_pdrgpstat->UlLength()); // copy stats from first child (*m_pdrgpstat)[0]->AddRef(); m_pstats = (*m_pdrgpstat)[0]; return; } // derive stats using the best logical expression with the same children as attached physical operator CGroupExpression *pgexprForStats = m_pcc->PgexprForStats(); GPOS_ASSERT(NULL != pgexprForStats); CExpressionHandle exprhdl(m_pmp); exprhdl.Attach(pgexprForStats); exprhdl.DeriveProps(NULL /*pdpctxt*/); m_pdrgpstat->AddRef(); exprhdl.m_pdrgpstat = m_pdrgpstat; exprhdl.ComputeReqdProps(m_pcc->Poc()->Prprel(), 0 /*ulOptReq*/); GPOS_ASSERT(NULL == exprhdl.m_pstats); IStatistics *pstats = m_pgexpr->Pgroup()->PstatsCompute(m_pcc->Poc(), exprhdl, pgexprForStats); // copy stats to main handle GPOS_ASSERT(NULL == m_pstats); GPOS_ASSERT(NULL != pstats); pstats->AddRef(); m_pstats = pstats; GPOS_ASSERT(m_pstats != NULL); }
//--------------------------------------------------------------------------- // @function: // CExpressionHandle::DeriveStats // // @doc: // Recursive stat derivation // //--------------------------------------------------------------------------- void CExpressionHandle::DeriveStats ( DrgPstat *pdrgpstatCtxt, BOOL fComputeRootStats ) { GPOS_ASSERT(NULL != pdrgpstatCtxt); GPOS_ASSERT(NULL == m_pdrgpstat); GPOS_ASSERT(NULL == m_pstats); GPOS_ASSERT(NULL != m_pdrgprp); // copy input context DrgPstat *pdrgpstatCurrentCtxt = GPOS_NEW(m_pmp) DrgPstat(m_pmp); CUtils::AddRefAppend<IStatistics, CleanupStats>(pdrgpstatCurrentCtxt, pdrgpstatCtxt); // create array of children stats m_pdrgpstat = GPOS_NEW(m_pmp) DrgPstat(m_pmp); ULONG ulMaxChildRisk = 1; const ULONG ulArity = UlArity(); for (ULONG ul = 0; ul < ulArity; ul++) { // create a new context for outer references used by current child DrgPstat *pdrgpstatChildCtxt = PdrgpstatOuterRefs(pdrgpstatCurrentCtxt, ul); IStatistics *pstats = NULL; if (NULL != Pexpr()) { // derive stats recursively on child expression pstats = (*Pexpr())[ul]->PstatsDerive(Prprel(ul), pdrgpstatChildCtxt); } else { // derive stats recursively on child group pstats = (*Pgexpr())[ul]->PstatsRecursiveDerive(m_pmp, m_pmp, Prprel(ul), pdrgpstatChildCtxt); } GPOS_ASSERT(NULL != pstats); // add child stat to current context pstats->AddRef(); pdrgpstatCurrentCtxt->Append(pstats); pdrgpstatChildCtxt->Release(); // add child stat to children stat array pstats->AddRef(); m_pdrgpstat->Append(pstats); if (pstats->UlStatsEstimationRisk() > ulMaxChildRisk) { ulMaxChildRisk = pstats->UlStatsEstimationRisk(); } } if (fComputeRootStats) { // call stat derivation on operator to compute local stats GPOS_ASSERT(NULL == m_pstats); DeriveRootStats(pdrgpstatCtxt); GPOS_ASSERT(NULL != m_pstats); CLogical *popLogical = CLogical::PopConvert(Pop()); ULONG ulRisk = ulMaxChildRisk; if (CStatisticsUtils::FIncreasesRisk(popLogical)) { ++ulRisk; } m_pstats->SetStatsEstimationRisk(ulRisk); } // clean up current stat context pdrgpstatCurrentCtxt->Release(); }
//--------------------------------------------------------------------------- // @function: // CLogicalDynamicGetBase::PstatsDeriveFilter // // @doc: // Derive stats from base table using filters on partition and/or index columns // //--------------------------------------------------------------------------- IStatistics * CLogicalDynamicGetBase::PstatsDeriveFilter ( IMemoryPool *pmp, CExpressionHandle &exprhdl, CExpression *pexprFilter ) const { CExpression *pexprFilterNew = NULL; CConstraint *pcnstr = m_ppartcnstr->PcnstrCombined(); if (m_fPartial && NULL != pcnstr && !pcnstr->FUnbounded()) { if (NULL == pexprFilter) { pexprFilterNew = pcnstr->PexprScalar(pmp); pexprFilterNew->AddRef(); } else { pexprFilterNew = CPredicateUtils::PexprConjunction(pmp, pexprFilter, pcnstr->PexprScalar(pmp)); } } else if (NULL != pexprFilter) { pexprFilterNew = pexprFilter; pexprFilterNew->AddRef(); } CColRefSet *pcrsStat = GPOS_NEW(pmp) CColRefSet(pmp); CDrvdPropScalar *pdpscalar = NULL; if (NULL != pexprFilterNew) { pdpscalar = CDrvdPropScalar::Pdpscalar(pexprFilterNew->PdpDerive()); pcrsStat->Include(pdpscalar->PcrsUsed()); } // requesting statistics on distribution columns to estimate data skew if (NULL != m_pcrsDist) { pcrsStat->Include(m_pcrsDist); } IStatistics *pstatsFullTable = PstatsBaseTable(pmp, exprhdl, m_ptabdesc, pcrsStat); pcrsStat->Release(); if (NULL == pexprFilterNew || pdpscalar->FHasSubquery()) { return pstatsFullTable; } CStatsPred *pstatspred = CStatsPredUtils::PstatspredExtract ( pmp, pexprFilterNew, NULL /*pcrsOuterRefs*/ ); pexprFilterNew->Release(); IStatistics *pstatsResult = pstatsFullTable->PstatsFilter ( pmp, pstatspred, true /* fCapNdvs */ ); pstatspred->Release(); pstatsFullTable->Release(); return pstatsResult; }
//--------------------------------------------------------------------------- // @function: // CLogicalDifference::PstatsDerive // // @doc: // Derive statistics // //--------------------------------------------------------------------------- IStatistics * CLogicalDifference::PstatsDerive ( IMemoryPool *pmp, CExpressionHandle &exprhdl, DrgPstat * // not used ) const { GPOS_ASSERT(Esp(exprhdl) > EspNone); // difference is transformed into an aggregate over a LASJ, // we follow the same route to compute statistics DrgPcrs *pdrgpcrsOutput = GPOS_NEW(pmp) DrgPcrs(pmp); const ULONG ulSize = m_pdrgpdrgpcrInput->UlLength(); for (ULONG ul = 0; ul < ulSize; ul++) { CColRefSet *pcrs = GPOS_NEW(pmp) CColRefSet(pmp, (*m_pdrgpdrgpcrInput)[ul]); pdrgpcrsOutput->Append(pcrs); } IStatistics *pstatsOuter = exprhdl.Pstats(0); IStatistics *pstatsInner = exprhdl.Pstats(1); // construct the scalar condition for the LASJ CExpression *pexprScCond = CUtils::PexprConjINDFCond(pmp, m_pdrgpdrgpcrInput); // compute the statistics for LASJ CColRefSet *pcrsOuterRefs = exprhdl.Pdprel()->PcrsOuter(); DrgPstatsjoin *pdrgpstatsjoin = CStatsPredUtils::Pdrgpstatsjoin ( pmp, exprhdl, pexprScCond, pdrgpcrsOutput, pcrsOuterRefs ); IStatistics *pstatsLASJ = pstatsOuter->PstatsLASJoin ( pmp, pstatsInner, pdrgpstatsjoin, true /* fIgnoreLasjHistComputation */ ); // clean up pexprScCond->Release(); pdrgpstatsjoin->Release(); // computed columns DrgPul *pdrgpulComputedCols = GPOS_NEW(pmp) DrgPul(pmp); IStatistics *pstats = CLogicalGbAgg::PstatsDerive ( pmp, pstatsLASJ, (*m_pdrgpdrgpcrInput)[0], // we group by the columns of the first child pdrgpulComputedCols, // no computed columns for set ops NULL // no keys, use all grouping cols ); // clean up pdrgpulComputedCols->Release(); pstatsLASJ->Release(); pdrgpcrsOutput->Release(); return pstats; }