Ejemplo n.º 1
0
//---------------------------------------------------------------------------
//	@function:
//		CLogicalUnion::PstatsDerive
//
//	@doc:
//		Derive statistics
//
//---------------------------------------------------------------------------
IStatistics *
CLogicalUnion::PstatsDerive
	(
	IMemoryPool *pmp,
	CExpressionHandle &exprhdl,
	DrgPstat * // not used
	)
	const
{
	GPOS_ASSERT(Esp(exprhdl) > EspNone);

	// union is transformed into a group by over an union all
	// we follow the same route to compute statistics
	IStatistics *pstatsUnionAll = CLogicalUnionAll::PstatsDeriveUnionAll(pmp, exprhdl);

	// computed columns
	DrgPul *pdrgpulComputedCols = GPOS_NEW(pmp) DrgPul(pmp);

	IStatistics *pstats = CLogicalGbAgg::PstatsDerive
											(
											pmp,
											pstatsUnionAll,
											m_pdrgpcrOutput, // we group by the output columns
											pdrgpulComputedCols, // no computed columns for set ops
											NULL // no keys, use all grouping cols
											);

	// clean up
	pdrgpulComputedCols->Release();
	pstatsUnionAll->Release();

	return pstats;
}
Ejemplo n.º 2
0
//---------------------------------------------------------------------------
//	@function:
//		CLogicalInnerIndexApply::PstatsDerive
//
//	@doc:
//		Derive statistics
//
//---------------------------------------------------------------------------
IStatistics *
CLogicalInnerIndexApply::PstatsDerive
	(
	IMemoryPool *pmp,
	CExpressionHandle &exprhdl,
	DrgPstat* // pdrgpstatCtxt
	)
	const
{
	GPOS_ASSERT(EspNone < Esp(exprhdl));

	IStatistics *pstatsOuter = exprhdl.Pstats(0);
	IStatistics *pstatsInner = exprhdl.Pstats(1);
	CExpression *pexprScalar = exprhdl.PexprScalarChild(2 /*ulChildIndex*/);

	// join stats of the children
	DrgPstat *pdrgpstat = GPOS_NEW(pmp) DrgPstat(pmp);
	pstatsOuter->AddRef();
	pdrgpstat->Append(pstatsOuter);
	pstatsInner->AddRef();
	pdrgpstat->Append(pstatsInner);
	IStatistics *pstats = CStatisticsUtils::PstatsJoinArray(pmp, false /*fOuterJoin*/, pdrgpstat, pexprScalar);
	pdrgpstat->Release();

	return pstats;
}
Ejemplo n.º 3
0
//---------------------------------------------------------------------------
//	@function:
//		CGroupExpression::PstatsRecursiveDerive
//
//	@doc:
//		Derive stats recursively on group expression
//
//---------------------------------------------------------------------------
IStatistics *
CGroupExpression::PstatsRecursiveDerive
	(
	IMemoryPool *, // pmpLocal
	IMemoryPool *pmpGlobal,
	CReqdPropRelational *prprel,
	DrgPstat *pdrgpstatCtxt,
	BOOL fComputeRootStats
	)
{
	GPOS_ASSERT(!Pgroup()->FScalar());
	GPOS_ASSERT(!Pgroup()->FImplemented());
	GPOS_ASSERT(NULL != pdrgpstatCtxt);
	GPOS_CHECK_ABORT;

	// trigger recursive property derivation
	CExpressionHandle exprhdl(pmpGlobal);
	exprhdl.Attach(this);
	exprhdl.DeriveProps(NULL /*pdpctxt*/);

	// compute required relational properties on child groups
	exprhdl.ComputeReqdProps(prprel, 0 /*ulOptReq*/);

	// trigger recursive stat derivation
	exprhdl.DeriveStats(pdrgpstatCtxt, fComputeRootStats);
	IStatistics *pstats = exprhdl.Pstats();
	if (NULL != pstats)
	{
		pstats->AddRef();
	}

	return pstats;
}
Ejemplo n.º 4
0
//---------------------------------------------------------------------------
//	@function:
//		CExpressionHandle::DeriveRootStats
//
//	@doc:
//		Stat derivation at root operator where handle is attached
//
//---------------------------------------------------------------------------
void
CExpressionHandle::DeriveRootStats
	(
	DrgPstat *pdrgpstatCtxt
	)
{
	GPOS_ASSERT(NULL == m_pstats);

	CLogical *popLogical = CLogical::PopConvert(Pop());
	IStatistics *pstatsRoot = NULL;
	if (FAttachedToLeafPattern())
	{
		// for leaf patterns extracted from memo, trigger state derivation on origin group
		GPOS_ASSERT(NULL != m_pexpr);
		GPOS_ASSERT(NULL != m_pexpr->Pgexpr());

		pstatsRoot = m_pexpr->Pgexpr()->Pgroup()->PstatsRecursiveDerive(m_pmp, m_pmp, CReqdPropRelational::Prprel(m_prp), pdrgpstatCtxt);
		pstatsRoot->AddRef();
	}
	else
	{
		// otherwise, derive stats using root operator
		pstatsRoot = popLogical->PstatsDerive(m_pmp, *this, pdrgpstatCtxt);
	}
	GPOS_ASSERT(NULL != pstatsRoot);

	m_pstats = pstatsRoot;
}
Ejemplo n.º 5
0
//---------------------------------------------------------------------------
//	@function:
//		CExpressionHandle::CopyStats
//
//	@doc:
//		Copy stats from attached expression/group expression to local stats
//		members
//
//---------------------------------------------------------------------------
void
CExpressionHandle::CopyStats()
{
	if (!FStatsDerived())
	{
		// stats of attached expression (or its children) have not been derived yet
		return;
	}

	IStatistics *pstats = NULL;
	if (NULL != m_pexpr)
	{
		pstats = const_cast<IStatistics *>(m_pexpr->Pstats());
	}
	else
	{
		GPOS_ASSERT(NULL != m_pgexpr);
		pstats = m_pgexpr->Pgroup()->Pstats();
	}
	GPOS_ASSERT(NULL != pstats);

	// attach stats
	pstats->AddRef();
	GPOS_ASSERT(NULL == m_pstats);
	m_pstats = pstats;

	// attach child stats
	GPOS_ASSERT(NULL == m_pdrgpstat);
	m_pdrgpstat = GPOS_NEW(m_pmp) DrgPstat(m_pmp);
	const ULONG ulArity = UlArity();
	for (ULONG ul = 0; ul < ulArity; ul++)
	{
		IStatistics *pstatsChild = NULL;
		if (NULL != m_pexpr)
		{
			pstatsChild = const_cast<IStatistics *>((*m_pexpr)[ul]->Pstats());
		}
		else
		{
			pstatsChild = (*m_pgexpr)[ul]->Pstats();
		}

		if (NULL != pstatsChild)
		{
			pstatsChild->AddRef();
		}
		else
		{
			GPOS_ASSERT(FScalarChild(ul));

			// create dummy stats for missing scalar children
			pstatsChild = CStatistics::PstatsEmpty(m_pmp);
		}

		m_pdrgpstat->Append(pstatsChild);
	}
}
Ejemplo n.º 6
0
size_t rtree_get_utilization(ISpatialIndex *rtree) {
    if(rtree == NULL) {
        return 0;
    }

    IStatistics *statistic; 
    rtree->getStatistics(&statistic);
    size_t nodes_num = statistic->getNumberOfData();
    delete statistic;

    return nodes_num;
}
Ejemplo n.º 7
0
//---------------------------------------------------------------------------
//	@function:
//		CLogicalDifferenceAll::PstatsDerive
//
//	@doc:
//		Derive statistics
//
//---------------------------------------------------------------------------
IStatistics *
CLogicalDifferenceAll::PstatsDerive
	(
	IMemoryPool *pmp,
	CExpressionHandle &exprhdl,
	DrgPstat * // not used
	)
	const
{
	GPOS_ASSERT(Esp(exprhdl) > EspNone);

	// difference all is transformed into a LASJ,
	// we follow the same route to compute statistics
	DrgPcrs *pdrgpcrsOutput = GPOS_NEW(pmp) DrgPcrs(pmp);
	const ULONG ulSize = m_pdrgpdrgpcrInput->UlLength();
	for (ULONG ul = 0; ul < ulSize; ul++)
	{
		CColRefSet *pcrs = GPOS_NEW(pmp) CColRefSet(pmp, (*m_pdrgpdrgpcrInput)[ul]);
		pdrgpcrsOutput->Append(pcrs);
	}

	IStatistics *pstatsOuter = exprhdl.Pstats(0);
	IStatistics *pstatsInner = exprhdl.Pstats(1);

	// construct the scalar condition for the LASJ
	CExpression *pexprScCond = CUtils::PexprConjINDFCond(pmp, m_pdrgpdrgpcrInput);

	// compute the statistics for LASJ
	CColRefSet *pcrsOuterRefs = exprhdl.Pdprel()->PcrsOuter();
	DrgPstatsjoin *pdrgpstatsjoin = CStatsPredUtils::Pdrgpstatsjoin
														(
														pmp, 
														exprhdl, 
														pexprScCond, 
														pdrgpcrsOutput, 
														pcrsOuterRefs
														);
	IStatistics *pstatsLASJ = pstatsOuter->PstatsLASJoin
											(
											pmp,
											pstatsInner,
											pdrgpstatsjoin,
											true /* fIgnoreLasjHistComputation*/
											);

	// clean up
	pexprScCond->Release();
	pdrgpstatsjoin->Release();
	pdrgpcrsOutput->Release();

	return pstatsLASJ;
}
Ejemplo n.º 8
0
//---------------------------------------------------------------------------
//	@function:
//		CLogical::PstatsPassThruOuter
//
//	@doc:
//		Helper for common case of passing through derived stats
//
//---------------------------------------------------------------------------
IStatistics *
CLogical::PstatsPassThruOuter
	(
	CExpressionHandle &exprhdl
	)
{
	GPOS_CHECK_ABORT;

	IStatistics *pstats = exprhdl.Pstats(0);
	pstats->AddRef();

	return pstats;
}
Ejemplo n.º 9
0
//---------------------------------------------------------------------------
//	@function:
//		CLogicalGet::PstatsDerive
//
//	@doc:
//		Load up statistics from metadata
//
//---------------------------------------------------------------------------
IStatistics *
CLogicalGet::PstatsDerive
	(
	IMemoryPool *mp,
	CExpressionHandle &exprhdl,
	IStatisticsArray * // not used
	)
	const
{
	// requesting stats on distribution columns to estimate data skew
	IStatistics *pstatsTable = PstatsBaseTable(mp, exprhdl, m_ptabdesc, m_pcrsDist);
	
	CColRefSet *pcrs = GPOS_NEW(mp) CColRefSet(mp, m_pdrgpcrOutput);
	CUpperBoundNDVs *upper_bound_NDVs = GPOS_NEW(mp) CUpperBoundNDVs(pcrs, pstatsTable->Rows());
	CStatistics::CastStats(pstatsTable)->AddCardUpperBound(upper_bound_NDVs);

	return pstatsTable;
}
Ejemplo n.º 10
0
//---------------------------------------------------------------------------
//	@function:
//		CLogicalGet::PstatsDerive
//
//	@doc:
//		Load up statistics from metadata
//
//---------------------------------------------------------------------------
IStatistics *
CLogicalGet::PstatsDerive
	(
	IMemoryPool *pmp,
	CExpressionHandle &exprhdl,
	DrgPstat * // not used
	)
	const
{
	// requesting stats on distribution columns to estimate data skew
	IStatistics *pstatsTable = PstatsBaseTable(pmp, exprhdl, m_ptabdesc, m_pcrsDist);
	
	CColRefSet *pcrs = GPOS_NEW(pmp) CColRefSet(pmp, m_pdrgpcrOutput);
	CUpperBoundNDVs *pubndv = GPOS_NEW(pmp) CUpperBoundNDVs(pcrs, pstatsTable->DRows());
	CStatistics::PstatsConvert(pstatsTable)->AddCardUpperBound(pubndv);

	return pstatsTable;
}
Ejemplo n.º 11
0
//---------------------------------------------------------------------------
//	@function:
//		CExpressionHandle::PdrgpstatOuterRefs
//
//	@doc:
//		Given an array of stats objects and a child index, return an array
//		of stats objects starting from the first stats object referenced by
//		child
//
//---------------------------------------------------------------------------
DrgPstat *
CExpressionHandle::PdrgpstatOuterRefs
	(
	DrgPstat *pdrgpstat,
	ULONG ulChildIndex
	)
	const
{
	GPOS_ASSERT(NULL != pdrgpstat);
	GPOS_ASSERT(ulChildIndex < UlArity());

	if (FScalarChild(ulChildIndex) || !FHasOuterRefs(ulChildIndex))
	{
		// if child is scalar or has no outer references, return empty array
		return  GPOS_NEW(m_pmp) DrgPstat(m_pmp);
	}

	DrgPstat *pdrgpstatResult = GPOS_NEW(m_pmp) DrgPstat(m_pmp);
	CColRefSet *pcrsOuter = Pdprel(ulChildIndex)->PcrsOuter();
	GPOS_ASSERT(0 < pcrsOuter->CElements());

	const ULONG ulSize = pdrgpstat->UlLength();
	ULONG ulStartIndex = ULONG_MAX;
	for (ULONG ul = 0; ul < ulSize; ul++)
	{
		IStatistics *pstats = (*pdrgpstat)[ul];
		CColRefSet *pcrsStats = pstats->Pcrs(m_pmp);
		BOOL fStatsColsUsed = !pcrsOuter->FDisjoint(pcrsStats);
		pcrsStats->Release();
		if (fStatsColsUsed)
		{
			ulStartIndex = ul;
			break;
		}
	}

	if (ULONG_MAX != ulStartIndex)
	{
		// copy stats starting from index of outer-most stats object referenced by child
		CUtils::AddRefAppend<IStatistics, CleanupStats>(pdrgpstatResult, pdrgpstat, ulStartIndex);
	}

	return pdrgpstatResult;
}
Ejemplo n.º 12
0
//---------------------------------------------------------------------------
//	@function:
//		CLogicalAssert::PstatsDerive
//
//	@doc:
//		Derive statistics based on filter predicates
//
//---------------------------------------------------------------------------
IStatistics *
CLogicalAssert::PstatsDerive
	(
	IMemoryPool *pmp,
	CExpressionHandle &exprhdl,
	DrgPstat * // not used
	)
	const
{
	CMaxCard maxcard = CLogicalAssert::PopConvert(exprhdl.Pop())->Maxcard(pmp, exprhdl);
	if (1 == maxcard.Ull())
	{
		// a max card of one requires re-scaling stats
		IStatistics *pstats = exprhdl.Pstats(0);
		return  pstats->PstatsScale(pmp, CDouble(1.0 / pstats->DRows()));
	}

	return PstatsPassThruOuter(exprhdl);
}
Ejemplo n.º 13
0
//---------------------------------------------------------------------------
//	@function:
//		CLogical::PstatsBaseTable
//
//	@doc:
//		Helper for deriving statistics on a base table
//
//---------------------------------------------------------------------------
IStatistics *
CLogical::PstatsBaseTable
	(
	IMemoryPool *pmp,
	CExpressionHandle &exprhdl,
	CTableDescriptor *ptabdesc,
	CColRefSet *pcrsStatExtra   // additional columns required for stats, not required by parent
	)
{
	// extract colids and attribute for which detailed stats are necessary
	CReqdPropRelational *prprel = CReqdPropRelational::Prprel(exprhdl.Prp());
	CColRefSet *pcrsStat = GPOS_NEW(pmp) CColRefSet(pmp);
	pcrsStat->Include(prprel->PcrsStat());
	if (NULL != pcrsStatExtra)
	{
		pcrsStat->Include(pcrsStatExtra);
	}

	DrgPul *pdrgpulHistColIds = GPOS_NEW(pmp) DrgPul(pmp);
	DrgPul *pdrgpulHistPos = GPOS_NEW(pmp) DrgPul(pmp);
	CUtils::ExtractColIdsAttno(pmp, ptabdesc, pcrsStat, pdrgpulHistColIds, pdrgpulHistPos);

	// extract colids and attribute for which widths are necessary
	CDrvdPropRelational *pdprel = exprhdl.Pdprel();
	CColRefSet *pcrsWidth = pdprel->PcrsOutput();
	DrgPul *pdrgpulWidthColIds = GPOS_NEW(pmp) DrgPul(pmp);
	DrgPul *pdrgpulWidthPos = GPOS_NEW(pmp) DrgPul(pmp);
	CUtils::ExtractColIdsAttno(pmp, ptabdesc, pcrsWidth, pdrgpulWidthColIds, pdrgpulWidthPos);

	CMDAccessor *pmda = COptCtxt::PoctxtFromTLS()->Pmda();
	IStatistics *pstats = pmda->Pstats(pmp, ptabdesc->Pmdid(), pdrgpulHistPos, pdrgpulHistColIds, pdrgpulWidthPos, pdrgpulWidthColIds);

	if (!GPOS_FTRACE(EopttraceDonotCollectMissingStatsCols) && !pstats->FEmpty())
	{
		CStatisticsUtils::RecordMissingStatisticsColumns(pmp, ptabdesc, pcrsStat, pstats);
	}

	pcrsStat->Release();

	return pstats;
}
Ejemplo n.º 14
0
//---------------------------------------------------------------------------
//	@function:
//		CPhysicalPartitionSelector::PpfmDerive
//
//	@doc:
//		Derive partition filter map
//
//---------------------------------------------------------------------------
CPartFilterMap *
CPhysicalPartitionSelector::PpfmDerive
	(
	IMemoryPool *mp,
	CExpressionHandle &exprhdl
	)
	const
{
	if (!FHasFilter())
	{
		return PpfmPassThruOuter(exprhdl);
	}

	CPartFilterMap *ppfm = PpfmDeriveCombineRelational(mp, exprhdl);
	IStatistics *stats = exprhdl.Pstats();
	GPOS_ASSERT(NULL != stats);
	m_pexprCombinedPredicate->AddRef();
	stats->AddRef();
	ppfm->AddPartFilter(mp, m_scan_id, m_pexprCombinedPredicate, stats);
	return ppfm;
}
Ejemplo n.º 15
0
//---------------------------------------------------------------------------
//	@function:
//		CLogicalSelect::PstatsDerive
//
//	@doc:
//		Derive statistics based on filter predicates
//
//---------------------------------------------------------------------------
IStatistics *
CLogicalSelect::PstatsDerive
	(
	IMemoryPool *pmp,
	CExpressionHandle &exprhdl,
	DrgPstat *pdrgpstatCtxt
	)
	const
{
	GPOS_ASSERT(Esp(exprhdl) > EspNone);
	IStatistics *pstatsChild = exprhdl.Pstats(0);

	if (exprhdl.Pdpscalar(1 /*ulChildIndex*/)->FHasSubquery())
	{
		// in case of subquery in select predicate, we return child stats
		pstatsChild->AddRef();
		return pstatsChild;
	}

	// remove implied predicates from selection condition to avoid cardinality under-estimation
	CExpression *pexprScalar = exprhdl.PexprScalarChild(1 /*ulChildIndex*/);
	CExpression *pexprPredicate = CPredicateUtils::PexprRemoveImpliedConjuncts(pmp, pexprScalar, exprhdl);


	// split selection predicate into local predicate and predicate involving outer references
	CExpression *pexprLocal = NULL;
	CExpression *pexprOuterRefs = NULL;

	// get outer references from expression handle
	CColRefSet *pcrsOuter = exprhdl.Pdprel()->PcrsOuter();

	CPredicateUtils::SeparateOuterRefs(pmp, pexprPredicate, pcrsOuter, &pexprLocal, &pexprOuterRefs);
	pexprPredicate->Release();

	IStatistics *pstats = CStatisticsUtils::PstatsFilter(pmp, exprhdl, pstatsChild, pexprLocal, pexprOuterRefs, pdrgpstatCtxt);
	pexprLocal->Release();
	pexprOuterRefs->Release();

	return pstats;
}
Ejemplo n.º 16
0
//---------------------------------------------------------------------------
//	@function:
//		CLogicalLimit::PstatsDerive
//
//	@doc:
//		Derive statistics based on limit
//
//---------------------------------------------------------------------------
IStatistics *
CLogicalLimit::PstatsDerive
	(
	IMemoryPool *pmp,
	CExpressionHandle &exprhdl,
	DrgPstat * // not used
	)
	const
{
	GPOS_ASSERT(Esp(exprhdl) > EspNone);
	IStatistics *pstatsChild = exprhdl.Pstats(0);
	CMaxCard maxcard = this->Maxcard(pmp, exprhdl);
	CDouble dRowsMax = CDouble(maxcard.Ull());

	if (pstatsChild->DRows() <= dRowsMax)
	{
		pstatsChild->AddRef();
		return pstatsChild;
	}

	return pstatsChild->PstatsLimit(pmp, dRowsMax);
}
Ejemplo n.º 17
0
//---------------------------------------------------------------------------
//	@function:
//		CCostContext::ComputeCost
//
//	@doc:
//		Compute cost of current context,
//
//		the function extracts cardinality and row width of owner operator
//		and child operators, and then adjusts row estimate obtained from
//		statistics based on data distribution obtained from plan properties,
//
//		statistics row estimate is computed on logical expressions by
//		estimating the size of the whole relation regardless data
//		distribution, on the other hand, optimizer's cost model computes
//		the cost of a plan instance on some segment,
//
//		when a plan produces tuples distributed to multiple segments, we
//		need to divide statistics row estimate by the number segments to
//		provide a per-segment row estimate for cost computation,
//
//		Note that this scaling of row estimate cannot happen during
//		statistics derivation since plans are not created yet at this point
//
// 		this function also extracts number of rebinds of owner operator child
//		operators, if statistics are computed using predicates with external
//		parameters (outer references), number of rebinds is the total number
//		of external parameters' values
//
//---------------------------------------------------------------------------
CCost
CCostContext::CostCompute
	(
	IMemoryPool *pmp,
	DrgPcost *pdrgpcostChildren
	)
{
	// derive context stats
	DeriveStats();

	ULONG ulArity = 0;
	if (NULL != m_pdrgpoc)
	{
		ulArity = Pdrgpoc()->UlLength();
	}

	m_pstats->AddRef();
	ICostModel::SCostingInfo ci(pmp, ulArity, GPOS_NEW(pmp) ICostModel::CCostingStats(m_pstats));

	ICostModel *pcm = COptCtxt::PoctxtFromTLS()->Pcm();

	CExpressionHandle exprhdl(pmp);
	exprhdl.Attach(this);

	// extract local costing info
	DOUBLE dRows = m_pstats->DRows().DVal();
	if (CDistributionSpec::EdptPartitioned == Pdpplan()->Pds()->Edpt())
	{
		// scale statistics row estimate by number of segments
		dRows = DRowsPerHost().DVal();
	}
	ci.SetRows(dRows);

	DOUBLE dWidth = m_pstats->DWidth(pmp, m_poc->Prpp()->PcrsRequired()).DVal();
	ci.SetWidth(dWidth);

	DOUBLE dRebinds = m_pstats->DRebinds().DVal();
	ci.SetRebinds(dRebinds);
	GPOS_ASSERT_IMP(!exprhdl.FHasOuterRefs(), GPOPT_DEFAULT_REBINDS == (ULONG) (dRebinds) && "invalid number of rebinds when there are no outer references");

	// extract children costing info
	for (ULONG ul = 0; ul < ulArity; ul++)
	{
		COptimizationContext *pocChild = (*m_pdrgpoc)[ul];
		CCostContext *pccChild = pocChild->PccBest();
		GPOS_ASSERT(NULL != pccChild);

		IStatistics *pstatsChild = pccChild->Pstats();
		DOUBLE dRowsChild = pstatsChild->DRows().DVal();
		if (CDistributionSpec::EdptPartitioned == pccChild->Pdpplan()->Pds()->Edpt())
		{
			// scale statistics row estimate by number of segments
			dRowsChild = pccChild->DRowsPerHost().DVal();
		}
		ci.SetChildRows(ul, dRowsChild);

		DOUBLE dWidthChild = pstatsChild->DWidth(pmp, pocChild->Prpp()->PcrsRequired()).DVal();
		ci.SetChildWidth(ul, dWidthChild);

		DOUBLE dRebindsChild = pstatsChild->DRebinds().DVal();
		ci.SetChildRebinds(ul, dRebindsChild);
		GPOS_ASSERT_IMP(!exprhdl.FHasOuterRefs(ul), GPOPT_DEFAULT_REBINDS == (ULONG) (dRebindsChild) && "invalid number of rebinds when there are no outer references");

		DOUBLE dCostChild =  (*pdrgpcostChildren)[ul]->DVal();
		ci.SetChildCost(ul, dCostChild);
	}

	// compute cost using the underlying cost model
	return pcm->Cost(exprhdl, &ci);
}
Ejemplo n.º 18
0
//---------------------------------------------------------------------------
//	@function:
//		CExpressionHandle::DeriveCostContextStats
//
//	@doc:
//		Stats derivation based on required plan properties
//
//---------------------------------------------------------------------------
void
CExpressionHandle::DeriveCostContextStats()
{
	GPOS_ASSERT(NULL != m_pcc);
	GPOS_ASSERT(NULL == m_pcc->Pstats());

	// copy group properties and stats
	CopyGroupProps();
	CopyStats();

	if (NULL != m_pstats && !m_pcc->FNeedsNewStats())
	{
		// there is no need to derive stats,
		// stats are copied from owner group

		return;
	}

	CEnfdPartitionPropagation *pepp = m_pcc->Poc()->Prpp()->Pepp();
	COperator *pop = Pop();
	if (CUtils::FPhysicalScan(pop) &&
		CPhysicalScan::PopConvert(pop)->FDynamicScan() &&
		!pepp->PpfmDerived()->FEmpty())
	{
		// derive stats on dynamic table scan using stats of part selector
		CPhysicalScan *popScan = CPhysicalScan::PopConvert(m_pgexpr->Pop());
		IStatistics *pstatsDS = popScan->PstatsDerive(m_pmp, *this, m_pcc->Poc()->Prpp(), m_pcc->Poc()->Pdrgpstat());

		CRefCount::SafeRelease(m_pstats);
		m_pstats = pstatsDS;

		return;
	}

	// release current stats since we will derive new stats
	CRefCount::SafeRelease(m_pstats);
	m_pstats = NULL;

	// load stats from child cost context -- these may be different from child groups stats
	CRefCount::SafeRelease(m_pdrgpstat);
	m_pdrgpstat = NULL;

	m_pdrgpstat = GPOS_NEW(m_pmp) DrgPstat(m_pmp);
	const ULONG ulArity = m_pcc->Pdrgpoc()->UlLength();
	for (ULONG ul = 0; ul < ulArity; ul++)
	{
		COptimizationContext *pocChild = (*m_pcc->Pdrgpoc())[ul];
		CCostContext *pccChild = pocChild->PccBest();
		GPOS_ASSERT(NULL != pccChild);
		GPOS_ASSERT(NULL != pccChild->Pstats());

		pccChild->Pstats()->AddRef();
		m_pdrgpstat->Append(pccChild->Pstats());
	}

	if (CPhysical::PopConvert(m_pgexpr->Pop())->FPassThruStats())
	{
		GPOS_ASSERT(1 == m_pdrgpstat->UlLength());

		// copy stats from first child
		(*m_pdrgpstat)[0]->AddRef();
		m_pstats = (*m_pdrgpstat)[0];

		return;
	}

	// derive stats using the best logical expression with the same children as attached physical operator
	CGroupExpression *pgexprForStats = m_pcc->PgexprForStats();
	GPOS_ASSERT(NULL != pgexprForStats);

	CExpressionHandle exprhdl(m_pmp);
	exprhdl.Attach(pgexprForStats);
	exprhdl.DeriveProps(NULL /*pdpctxt*/);
	m_pdrgpstat->AddRef();
	exprhdl.m_pdrgpstat = m_pdrgpstat;
	exprhdl.ComputeReqdProps(m_pcc->Poc()->Prprel(), 0 /*ulOptReq*/);

	GPOS_ASSERT(NULL == exprhdl.m_pstats);
	IStatistics *pstats = m_pgexpr->Pgroup()->PstatsCompute(m_pcc->Poc(), exprhdl, pgexprForStats);

	// copy stats to main handle
	GPOS_ASSERT(NULL == m_pstats);
	GPOS_ASSERT(NULL != pstats);

	pstats->AddRef();
	m_pstats = pstats;

	GPOS_ASSERT(m_pstats != NULL);
}
Ejemplo n.º 19
0
//---------------------------------------------------------------------------
//	@function:
//		CExpressionHandle::DeriveStats
//
//	@doc:
//		Recursive stat derivation
//
//---------------------------------------------------------------------------
void
CExpressionHandle::DeriveStats
	(
	DrgPstat *pdrgpstatCtxt,
	BOOL fComputeRootStats
	)
{
	GPOS_ASSERT(NULL != pdrgpstatCtxt);
	GPOS_ASSERT(NULL == m_pdrgpstat);
	GPOS_ASSERT(NULL == m_pstats);
	GPOS_ASSERT(NULL != m_pdrgprp);

	// copy input context
	DrgPstat *pdrgpstatCurrentCtxt = GPOS_NEW(m_pmp) DrgPstat(m_pmp);
	CUtils::AddRefAppend<IStatistics, CleanupStats>(pdrgpstatCurrentCtxt, pdrgpstatCtxt);

	// create array of children stats
	m_pdrgpstat = GPOS_NEW(m_pmp) DrgPstat(m_pmp);
	ULONG ulMaxChildRisk = 1;
	const ULONG ulArity = UlArity();
	for (ULONG ul = 0; ul < ulArity; ul++)
	{
		// create a new context for outer references used by current child
		DrgPstat *pdrgpstatChildCtxt = PdrgpstatOuterRefs(pdrgpstatCurrentCtxt, ul);

		IStatistics *pstats = NULL;
		if (NULL != Pexpr())
		{
			// derive stats recursively on child expression
			pstats = (*Pexpr())[ul]->PstatsDerive(Prprel(ul), pdrgpstatChildCtxt);
		}
		else
		{
			// derive stats recursively on child group
			pstats = (*Pgexpr())[ul]->PstatsRecursiveDerive(m_pmp, m_pmp, Prprel(ul), pdrgpstatChildCtxt);
		}
		GPOS_ASSERT(NULL != pstats);

		// add child stat to current context
		pstats->AddRef();
		pdrgpstatCurrentCtxt->Append(pstats);
		pdrgpstatChildCtxt->Release();

		// add child stat to children stat array
		pstats->AddRef();
		m_pdrgpstat->Append(pstats);
		if (pstats->UlStatsEstimationRisk() > ulMaxChildRisk)
		{
			ulMaxChildRisk = pstats->UlStatsEstimationRisk();
		}
	}

	if (fComputeRootStats)
	{
		// call stat derivation on operator to compute local stats
		GPOS_ASSERT(NULL == m_pstats);

		DeriveRootStats(pdrgpstatCtxt);
		GPOS_ASSERT(NULL != m_pstats);

		CLogical *popLogical = CLogical::PopConvert(Pop());
		ULONG ulRisk = ulMaxChildRisk;
		if (CStatisticsUtils::FIncreasesRisk(popLogical))
		{
			++ulRisk;
		}
		m_pstats->SetStatsEstimationRisk(ulRisk);
	}

	// clean up current stat context
	pdrgpstatCurrentCtxt->Release();
}
Ejemplo n.º 20
0
//---------------------------------------------------------------------------
//	@function:
//		CLogicalDynamicGetBase::PstatsDeriveFilter
//
//	@doc:
//		Derive stats from base table using filters on partition and/or index columns
//
//---------------------------------------------------------------------------
IStatistics *
CLogicalDynamicGetBase::PstatsDeriveFilter
	(
	IMemoryPool *pmp,
	CExpressionHandle &exprhdl,
	CExpression *pexprFilter
	)
	const
{
	CExpression *pexprFilterNew = NULL;
	CConstraint *pcnstr = m_ppartcnstr->PcnstrCombined();
	if (m_fPartial && NULL != pcnstr && !pcnstr->FUnbounded())
	{
		if (NULL == pexprFilter)
		{
			pexprFilterNew = pcnstr->PexprScalar(pmp);
			pexprFilterNew->AddRef();
		}
		else
		{
			pexprFilterNew = CPredicateUtils::PexprConjunction(pmp, pexprFilter, pcnstr->PexprScalar(pmp));
		}
	}
	else if (NULL != pexprFilter)
	{
		pexprFilterNew = pexprFilter;
		pexprFilterNew->AddRef();
	}

	CColRefSet *pcrsStat = GPOS_NEW(pmp) CColRefSet(pmp);
	CDrvdPropScalar *pdpscalar = NULL;

	if (NULL != pexprFilterNew)
	{
		pdpscalar = CDrvdPropScalar::Pdpscalar(pexprFilterNew->PdpDerive());
		pcrsStat->Include(pdpscalar->PcrsUsed());
	}

	// requesting statistics on distribution columns to estimate data skew
	if (NULL != m_pcrsDist)
	{
		pcrsStat->Include(m_pcrsDist);
	}


	IStatistics *pstatsFullTable = PstatsBaseTable(pmp, exprhdl, m_ptabdesc, pcrsStat);
	
	pcrsStat->Release();
	
	if (NULL == pexprFilterNew || pdpscalar->FHasSubquery())
	{
		return pstatsFullTable;
	}

	CStatsPred *pstatspred =  CStatsPredUtils::PstatspredExtract
												(
												pmp, 
												pexprFilterNew, 
												NULL /*pcrsOuterRefs*/
												);
	pexprFilterNew->Release();

	IStatistics *pstatsResult = pstatsFullTable->PstatsFilter
													(
													pmp, 
													pstatspred, 
													true /* fCapNdvs */ 
													);
	pstatspred->Release();
	pstatsFullTable->Release();

	return pstatsResult;
}
Ejemplo n.º 21
0
//---------------------------------------------------------------------------
//	@function:
//		CLogicalDifference::PstatsDerive
//
//	@doc:
//		Derive statistics
//
//---------------------------------------------------------------------------
IStatistics *
CLogicalDifference::PstatsDerive
(
    IMemoryPool *pmp,
    CExpressionHandle &exprhdl,
    DrgPstat * // not used
)
const
{
    GPOS_ASSERT(Esp(exprhdl) > EspNone);

    // difference is transformed into an aggregate over a LASJ,
    // we follow the same route to compute statistics
    DrgPcrs *pdrgpcrsOutput = GPOS_NEW(pmp) DrgPcrs(pmp);
    const ULONG ulSize = m_pdrgpdrgpcrInput->UlLength();
    for (ULONG ul = 0; ul < ulSize; ul++)
    {
        CColRefSet *pcrs = GPOS_NEW(pmp) CColRefSet(pmp, (*m_pdrgpdrgpcrInput)[ul]);
        pdrgpcrsOutput->Append(pcrs);
    }

    IStatistics *pstatsOuter = exprhdl.Pstats(0);
    IStatistics *pstatsInner = exprhdl.Pstats(1);

    // construct the scalar condition for the LASJ
    CExpression *pexprScCond = CUtils::PexprConjINDFCond(pmp, m_pdrgpdrgpcrInput);

    // compute the statistics for LASJ
    CColRefSet *pcrsOuterRefs = exprhdl.Pdprel()->PcrsOuter();
    DrgPstatsjoin *pdrgpstatsjoin = CStatsPredUtils::Pdrgpstatsjoin
                                    (
                                        pmp,
                                        exprhdl,
                                        pexprScCond,
                                        pdrgpcrsOutput,
                                        pcrsOuterRefs
                                    );
    IStatistics *pstatsLASJ = pstatsOuter->PstatsLASJoin
                              (
                                  pmp,
                                  pstatsInner,
                                  pdrgpstatsjoin,
                                  true /* fIgnoreLasjHistComputation */
                              );

    // clean up
    pexprScCond->Release();
    pdrgpstatsjoin->Release();

    // computed columns
    DrgPul *pdrgpulComputedCols = GPOS_NEW(pmp) DrgPul(pmp);
    IStatistics *pstats = CLogicalGbAgg::PstatsDerive
                          (
                              pmp,
                              pstatsLASJ,
                              (*m_pdrgpdrgpcrInput)[0], // we group by the columns of the first child
                              pdrgpulComputedCols, // no computed columns for set ops
                              NULL // no keys, use all grouping cols
                          );

    // clean up
    pdrgpulComputedCols->Release();
    pstatsLASJ->Release();
    pdrgpcrsOutput->Release();

    return pstats;
}