示例#1
0
文件: CCostContext.cpp 项目: d/gporca
//---------------------------------------------------------------------------
//	@function:
//		CCostContext::DRowsPerHost
//
//	@doc:
//		Return the number of rows per host
//
//---------------------------------------------------------------------------
CDouble
CCostContext::DRowsPerHost() const
{
	DOUBLE dRows = Pstats()->DRows().DVal();
	COptCtxt *poptctxt = COptCtxt::PoctxtFromTLS();
	const ULONG ulHosts = poptctxt->Pcm()->UlHosts();

	CDistributionSpec *pds =  Pdpplan()->Pds();
	if (CDistributionSpec::EdtHashed == pds->Edt())
	{
		CDistributionSpecHashed *pdshashed = CDistributionSpecHashed::PdsConvert(pds);
		DrgPexpr *pdrgpexpr = pdshashed->Pdrgpexpr();
		CColRefSet *pcrsUsed = CUtils::PcrsExtractColumns(m_pmp, pdrgpexpr);

		const CColRefSet *pcrsReqdStats = this->Poc()->Prprel()->PcrsStat();
		if (!pcrsReqdStats->FSubset(pcrsUsed))
		{
			// statistics not available for distribution columns, therefore
			// assume uniform distribution across hosts
			// clean up
			pcrsUsed->Release();

			return CDouble(dRows / ulHosts);
		}

		DrgPul *pdrgpul = GPOS_NEW(m_pmp) DrgPul(m_pmp);
		pcrsUsed->ExtractColIds(m_pmp, pdrgpul);
		pcrsUsed->Release();

		CStatisticsConfig *pstatsconf = poptctxt->Poconf()->Pstatsconf();
		CDouble dNDVs = CStatisticsUtils::DGroups(m_pmp, Pstats(), pstatsconf, pdrgpul, NULL /*pbsKeys*/);
		pdrgpul->Release();

		if (dNDVs < ulHosts)
		{
			// estimated number of distinct values of distribution columns is smaller than number of hosts.
			// We assume data is distributed across a subset of hosts in this case. This results in a larger
			// number of rows per host compared to the uniform case, allowing us to capture data skew in
			// cost computation
			return CDouble(dRows / dNDVs.DVal());
		}
	}

	return CDouble(dRows / ulHosts);
}
示例#2
0
//---------------------------------------------------------------------------
//	@function:
//		CQueryContext::PqcGenerate
//
//	@doc:
// 		Generate the query context for the given expression and array of
//		output column ref ids
//
//---------------------------------------------------------------------------
CQueryContext *
CQueryContext::PqcGenerate
(
    IMemoryPool *pmp,
    CExpression * pexpr,
    DrgPul *pdrgpulQueryOutputColRefId,
    DrgPmdname *pdrgpmdname,
    BOOL fDeriveStats
)
{
    GPOS_ASSERT(NULL != pexpr && NULL != pdrgpulQueryOutputColRefId);

    CColRefSet *pcrs = GPOS_NEW(pmp) CColRefSet(pmp);
    DrgPcr *pdrgpcr = GPOS_NEW(pmp) DrgPcr(pmp);

    COptCtxt *poptctxt = COptCtxt::PoctxtFromTLS();
    CColumnFactory *pcf = poptctxt->Pcf();
    GPOS_ASSERT(NULL != pcf);

    const ULONG ulLen = pdrgpulQueryOutputColRefId->UlLength();
    for (ULONG ul = 0; ul < ulLen; ul++)
    {
        ULONG *pul = (*pdrgpulQueryOutputColRefId)[ul];
        GPOS_ASSERT(NULL != pul);

        CColRef *pcr = pcf->PcrLookup(*pul);
        GPOS_ASSERT(NULL != pcr);

        pcrs->Include(pcr);
        pdrgpcr->Append(pcr);
    }

    COrderSpec *pos = NULL;
    CExpression *pexprResult = pexpr;
    COperator *popTop = PopTop(pexpr);
    if (COperator::EopLogicalLimit == popTop->Eopid())
    {
        // top level operator is a limit, copy order spec to query context
        pos = CLogicalLimit::PopConvert(popTop)->Pos();
        pos->AddRef();
    }
    else
    {
        // no order required
        pos = GPOS_NEW(pmp) COrderSpec(pmp);
    }

    CDistributionSpec *pds = NULL;

    BOOL fDML = CUtils::FLogicalDML(pexpr->Pop());
    poptctxt->MarkDMLQuery(fDML);

    if (fDML)
    {
        pds = GPOS_NEW(pmp) CDistributionSpecAny(COperator::EopSentinel);
    }
    else
    {
        pds = GPOS_NEW(pmp) CDistributionSpecSingleton(CDistributionSpecSingleton::EstMaster);
    }

    CRewindabilitySpec *prs = GPOS_NEW(pmp) CRewindabilitySpec(CRewindabilitySpec::ErtNone /*ert*/);

    CEnfdOrder *peo = GPOS_NEW(pmp) CEnfdOrder(pos, CEnfdOrder::EomSatisfy);

    // we require satisfy matching on distribution since final query results must be sent to master
    CEnfdDistribution *ped = GPOS_NEW(pmp) CEnfdDistribution(pds, CEnfdDistribution::EdmSatisfy);

    CEnfdRewindability *per = GPOS_NEW(pmp) CEnfdRewindability(prs, CEnfdRewindability::ErmSatisfy);

    CCTEReq *pcter = poptctxt->Pcteinfo()->PcterProducers(pmp);

    CReqdPropPlan *prpp = GPOS_NEW(pmp) CReqdPropPlan(pcrs, peo, ped, per, pcter);

    pdrgpmdname->AddRef();
    return GPOS_NEW(pmp) CQueryContext(pmp, pexprResult, prpp, pdrgpcr, pdrgpmdname, fDeriveStats);
}
示例#3
0
//---------------------------------------------------------------------------
//	@function:
//		CQueryContext::PqcGenerate
//
//	@doc:
// 		Generate the query context for the given expression and array of
//		output column ref ids
//
//---------------------------------------------------------------------------
CQueryContext *
CQueryContext::PqcGenerate
	(
	IMemoryPool *mp,
	CExpression * pexpr,
	ULongPtrArray *pdrgpulQueryOutputColRefId,
	CMDNameArray *pdrgpmdname,
	BOOL fDeriveStats
	)
{
	GPOS_ASSERT(NULL != pexpr && NULL != pdrgpulQueryOutputColRefId);

	CColRefSet *pcrs = GPOS_NEW(mp) CColRefSet(mp);
	CColRefArray *colref_array = GPOS_NEW(mp) CColRefArray(mp);

	COptCtxt *poptctxt = COptCtxt::PoctxtFromTLS();
	CColumnFactory *col_factory = poptctxt->Pcf();
	GPOS_ASSERT(NULL != col_factory);

	// Collect required column references (colref_array)
	const ULONG length = pdrgpulQueryOutputColRefId->Size();
	for (ULONG ul = 0; ul < length; ul++)
	{
		ULONG *pul = (*pdrgpulQueryOutputColRefId)[ul];
		GPOS_ASSERT(NULL != pul);

		CColRef *colref = col_factory->LookupColRef(*pul);
		GPOS_ASSERT(NULL != colref);

		pcrs->Include(colref);
		colref_array->Append(colref);
	}

	// Collect required properties (prpp) at the top level:

	// By default no sort order requirement is added, unless the root operator in
	// the input logical expression is a LIMIT. This is because Orca always
	// attaches top level Sort to a LIMIT node.
	COrderSpec *pos = NULL;
	CExpression *pexprResult = pexpr;
	COperator *popTop = PopTop(pexpr);
	if (COperator::EopLogicalLimit == popTop->Eopid())
	{
		// top level operator is a limit, copy order spec to query context
		pos = CLogicalLimit::PopConvert(popTop)->Pos();
		pos->AddRef();
	}
	else
	{
		// no order required
		pos = GPOS_NEW(mp) COrderSpec(mp);
	}

	CDistributionSpec *pds = NULL;
	
	BOOL fDML = CUtils::FLogicalDML(pexpr->Pop());
	poptctxt->MarkDMLQuery(fDML);

	// DML commands do not have distribution requirement. Otherwise the
	// distribution requirement is Singleton.
	if (fDML)
	{
		pds = GPOS_NEW(mp) CDistributionSpecAny(COperator::EopSentinel);
	}
	else
	{
		pds = GPOS_NEW(mp) CDistributionSpecSingleton(CDistributionSpecSingleton::EstMaster);
	}

	// By default, no rewindability requirement needs to be satisfied at the top level
	CRewindabilitySpec *prs = GPOS_NEW(mp) CRewindabilitySpec(CRewindabilitySpec::ErtNotRewindable, CRewindabilitySpec::EmhtNoMotion);

	// Ensure order, distribution and rewindability meet 'satisfy' matching at the top level
	CEnfdOrder *peo = GPOS_NEW(mp) CEnfdOrder(pos, CEnfdOrder::EomSatisfy);
	CEnfdDistribution *ped = GPOS_NEW(mp) CEnfdDistribution(pds, CEnfdDistribution::EdmSatisfy);
	CEnfdRewindability *per = GPOS_NEW(mp) CEnfdRewindability(prs, CEnfdRewindability::ErmSatisfy);

	// Required CTEs are obtained from the CTEInfo global information in the optimizer context
	CCTEReq *pcter = poptctxt->Pcteinfo()->PcterProducers(mp);

	// NB: Partition propagation requirements are not initialized here.  They are
	// constructed later based on derived relation properties (CPartInfo) by
	// CReqdPropPlan::InitReqdPartitionPropagation().

	CReqdPropPlan *prpp = GPOS_NEW(mp) CReqdPropPlan(pcrs, peo, ped, per, pcter);

	// Finally, create the CQueryContext
	pdrgpmdname->AddRef();
	return GPOS_NEW(mp) CQueryContext(mp, pexprResult, prpp, colref_array, pdrgpmdname, fDeriveStats);
}