//---------------------------------------------------------------------------
//	@function:
//		CLogicalConstTableGet::PstatsDerive
//
//	@doc:
//		Derive statistics
//
//---------------------------------------------------------------------------
IStatistics *
CLogicalConstTableGet::PstatsDerive
	(
	IMemoryPool *pmp,
	CExpressionHandle &exprhdl,
	DrgPstat * // not used
	)
	const
{
	GPOS_ASSERT(Esp(exprhdl) > EspNone);
	CReqdPropRelational *prprel = CReqdPropRelational::Prprel(exprhdl.Prp());
	CColRefSet *pcrs = prprel->PcrsStat();
	DrgPul *pdrgpulColIds = GPOS_NEW(pmp) DrgPul(pmp);
	pcrs->ExtractColIds(pmp, pdrgpulColIds);
	DrgPul *pdrgpulColWidth = CUtils::Pdrgpul(pmp, m_pdrgpcrOutput);

	IStatistics *pstats = CStatistics::PstatsDummy
										(
										pmp,
										pdrgpulColIds,
										pdrgpulColWidth,
										m_pdrgpdrgpdatum->UlLength()
										);

	// clean up
	pdrgpulColIds->Release();
	pdrgpulColWidth->Release();

	return pstats;
}
Exemple #2
0
//---------------------------------------------------------------------------
//	@function:
//		CLogical::PstatsDeriveDummy
//
//	@doc:
//		Derive dummy statistics
//
//---------------------------------------------------------------------------
IStatistics *
CLogical::PstatsDeriveDummy
	(
	IMemoryPool *pmp,
	CExpressionHandle &exprhdl,
	CDouble dRows
	)
	const
{
	GPOS_CHECK_ABORT;

	// return a dummy stats object that has a histogram for every
	// required-stats column
	GPOS_ASSERT(Esp(exprhdl) > EspNone);
	CReqdPropRelational *prprel = CReqdPropRelational::Prprel(exprhdl.Prp());
	CColRefSet *pcrs = prprel->PcrsStat();
	DrgPul *pdrgpulColIds = GPOS_NEW(pmp) DrgPul(pmp);
	pcrs->ExtractColIds(pmp, pdrgpulColIds);

	IStatistics *pstats = CStatistics::PstatsDummy(pmp, pdrgpulColIds, dRows);

	// clean up
	pdrgpulColIds->Release();

	return pstats;
}
Exemple #3
0
//---------------------------------------------------------------------------
//	@function:
//		CCostContext::DRowsPerHost
//
//	@doc:
//		Return the number of rows per host
//
//---------------------------------------------------------------------------
CDouble
CCostContext::DRowsPerHost() const
{
	DOUBLE dRows = Pstats()->DRows().DVal();
	COptCtxt *poptctxt = COptCtxt::PoctxtFromTLS();
	const ULONG ulHosts = poptctxt->Pcm()->UlHosts();

	CDistributionSpec *pds =  Pdpplan()->Pds();
	if (CDistributionSpec::EdtHashed == pds->Edt())
	{
		CDistributionSpecHashed *pdshashed = CDistributionSpecHashed::PdsConvert(pds);
		DrgPexpr *pdrgpexpr = pdshashed->Pdrgpexpr();
		CColRefSet *pcrsUsed = CUtils::PcrsExtractColumns(m_pmp, pdrgpexpr);

		const CColRefSet *pcrsReqdStats = this->Poc()->Prprel()->PcrsStat();
		if (!pcrsReqdStats->FSubset(pcrsUsed))
		{
			// statistics not available for distribution columns, therefore
			// assume uniform distribution across hosts
			// clean up
			pcrsUsed->Release();

			return CDouble(dRows / ulHosts);
		}

		DrgPul *pdrgpul = GPOS_NEW(m_pmp) DrgPul(m_pmp);
		pcrsUsed->ExtractColIds(m_pmp, pdrgpul);
		pcrsUsed->Release();

		CStatisticsConfig *pstatsconf = poptctxt->Poconf()->Pstatsconf();
		CDouble dNDVs = CStatisticsUtils::DGroups(m_pmp, Pstats(), pstatsconf, pdrgpul, NULL /*pbsKeys*/);
		pdrgpul->Release();

		if (dNDVs < ulHosts)
		{
			// estimated number of distinct values of distribution columns is smaller than number of hosts.
			// We assume data is distributed across a subset of hosts in this case. This results in a larger
			// number of rows per host compared to the uniform case, allowing us to capture data skew in
			// cost computation
			return CDouble(dRows / dNDVs.DVal());
		}
	}

	return CDouble(dRows / ulHosts);
}