示例#1
0
//---------------------------------------------------------------------------
//	@function:
//		CLogicalGet::PstatsDerive
//
//	@doc:
//		Load up statistics from metadata
//
//---------------------------------------------------------------------------
IStatistics *
CLogicalGet::PstatsDerive
	(
	IMemoryPool *pmp,
	CExpressionHandle &exprhdl,
	DrgPstat * // not used
	)
	const
{
	// requesting stats on distribution columns to estimate data skew
	IStatistics *pstatsTable = PstatsBaseTable(pmp, exprhdl, m_ptabdesc, m_pcrsDist);
	
	CColRefSet *pcrs = GPOS_NEW(pmp) CColRefSet(pmp, m_pdrgpcrOutput);
	CUpperBoundNDVs *pubndv = GPOS_NEW(pmp) CUpperBoundNDVs(pcrs, pstatsTable->DRows());
	CStatistics::PstatsConvert(pstatsTable)->AddCardUpperBound(pubndv);

	return pstatsTable;
}
示例#2
0
//---------------------------------------------------------------------------
//	@function:
//		CLogicalAssert::PstatsDerive
//
//	@doc:
//		Derive statistics based on filter predicates
//
//---------------------------------------------------------------------------
IStatistics *
CLogicalAssert::PstatsDerive
	(
	IMemoryPool *pmp,
	CExpressionHandle &exprhdl,
	DrgPstat * // not used
	)
	const
{
	CMaxCard maxcard = CLogicalAssert::PopConvert(exprhdl.Pop())->Maxcard(pmp, exprhdl);
	if (1 == maxcard.Ull())
	{
		// a max card of one requires re-scaling stats
		IStatistics *pstats = exprhdl.Pstats(0);
		return  pstats->PstatsScale(pmp, CDouble(1.0 / pstats->DRows()));
	}

	return PstatsPassThruOuter(exprhdl);
}
示例#3
0
//---------------------------------------------------------------------------
//	@function:
//		CLogicalLimit::PstatsDerive
//
//	@doc:
//		Derive statistics based on limit
//
//---------------------------------------------------------------------------
IStatistics *
CLogicalLimit::PstatsDerive
	(
	IMemoryPool *pmp,
	CExpressionHandle &exprhdl,
	DrgPstat * // not used
	)
	const
{
	GPOS_ASSERT(Esp(exprhdl) > EspNone);
	IStatistics *pstatsChild = exprhdl.Pstats(0);
	CMaxCard maxcard = this->Maxcard(pmp, exprhdl);
	CDouble dRowsMax = CDouble(maxcard.Ull());

	if (pstatsChild->DRows() <= dRowsMax)
	{
		pstatsChild->AddRef();
		return pstatsChild;
	}

	return pstatsChild->PstatsLimit(pmp, dRowsMax);
}
示例#4
0
文件: CCostContext.cpp 项目: d/gporca
//---------------------------------------------------------------------------
//	@function:
//		CCostContext::ComputeCost
//
//	@doc:
//		Compute cost of current context,
//
//		the function extracts cardinality and row width of owner operator
//		and child operators, and then adjusts row estimate obtained from
//		statistics based on data distribution obtained from plan properties,
//
//		statistics row estimate is computed on logical expressions by
//		estimating the size of the whole relation regardless data
//		distribution, on the other hand, optimizer's cost model computes
//		the cost of a plan instance on some segment,
//
//		when a plan produces tuples distributed to multiple segments, we
//		need to divide statistics row estimate by the number segments to
//		provide a per-segment row estimate for cost computation,
//
//		Note that this scaling of row estimate cannot happen during
//		statistics derivation since plans are not created yet at this point
//
// 		this function also extracts number of rebinds of owner operator child
//		operators, if statistics are computed using predicates with external
//		parameters (outer references), number of rebinds is the total number
//		of external parameters' values
//
//---------------------------------------------------------------------------
CCost
CCostContext::CostCompute
	(
	IMemoryPool *pmp,
	DrgPcost *pdrgpcostChildren
	)
{
	// derive context stats
	DeriveStats();

	ULONG ulArity = 0;
	if (NULL != m_pdrgpoc)
	{
		ulArity = Pdrgpoc()->UlLength();
	}

	m_pstats->AddRef();
	ICostModel::SCostingInfo ci(pmp, ulArity, GPOS_NEW(pmp) ICostModel::CCostingStats(m_pstats));

	ICostModel *pcm = COptCtxt::PoctxtFromTLS()->Pcm();

	CExpressionHandle exprhdl(pmp);
	exprhdl.Attach(this);

	// extract local costing info
	DOUBLE dRows = m_pstats->DRows().DVal();
	if (CDistributionSpec::EdptPartitioned == Pdpplan()->Pds()->Edpt())
	{
		// scale statistics row estimate by number of segments
		dRows = DRowsPerHost().DVal();
	}
	ci.SetRows(dRows);

	DOUBLE dWidth = m_pstats->DWidth(pmp, m_poc->Prpp()->PcrsRequired()).DVal();
	ci.SetWidth(dWidth);

	DOUBLE dRebinds = m_pstats->DRebinds().DVal();
	ci.SetRebinds(dRebinds);
	GPOS_ASSERT_IMP(!exprhdl.FHasOuterRefs(), GPOPT_DEFAULT_REBINDS == (ULONG) (dRebinds) && "invalid number of rebinds when there are no outer references");

	// extract children costing info
	for (ULONG ul = 0; ul < ulArity; ul++)
	{
		COptimizationContext *pocChild = (*m_pdrgpoc)[ul];
		CCostContext *pccChild = pocChild->PccBest();
		GPOS_ASSERT(NULL != pccChild);

		IStatistics *pstatsChild = pccChild->Pstats();
		DOUBLE dRowsChild = pstatsChild->DRows().DVal();
		if (CDistributionSpec::EdptPartitioned == pccChild->Pdpplan()->Pds()->Edpt())
		{
			// scale statistics row estimate by number of segments
			dRowsChild = pccChild->DRowsPerHost().DVal();
		}
		ci.SetChildRows(ul, dRowsChild);

		DOUBLE dWidthChild = pstatsChild->DWidth(pmp, pocChild->Prpp()->PcrsRequired()).DVal();
		ci.SetChildWidth(ul, dWidthChild);

		DOUBLE dRebindsChild = pstatsChild->DRebinds().DVal();
		ci.SetChildRebinds(ul, dRebindsChild);
		GPOS_ASSERT_IMP(!exprhdl.FHasOuterRefs(ul), GPOPT_DEFAULT_REBINDS == (ULONG) (dRebindsChild) && "invalid number of rebinds when there are no outer references");

		DOUBLE dCostChild =  (*pdrgpcostChildren)[ul]->DVal();
		ci.SetChildCost(ul, dCostChild);
	}

	// compute cost using the underlying cost model
	return pcm->Cost(exprhdl, &ci);
}