//--------------------------------------------------------------------------- // @function: // CPhysical::GetSkew // // @doc: // Helper to compute skew estimate based on given stats and // distribution spec // //--------------------------------------------------------------------------- CDouble CPhysical::GetSkew ( IStatistics *stats, CDistributionSpec *pds ) { CDouble dSkew = 1.0; if (CDistributionSpec::EdtHashed == pds->Edt()) { CDistributionSpecHashed *pdshashed = CDistributionSpecHashed::PdsConvert(pds); const CExpressionArray *pdrgpexpr = pdshashed->Pdrgpexpr(); const ULONG size = pdrgpexpr->Size(); for (ULONG ul = 0; ul < size; ul++) { CExpression *pexpr = (*pdrgpexpr)[ul]; if (COperator::EopScalarIdent == pexpr->Pop()->Eopid()) { // consider only hashed distribution direct columns for now CScalarIdent *popScId = CScalarIdent::PopConvert(pexpr->Pop()); ULONG colid = popScId->Pcr()->Id(); CDouble dSkewCol = stats->GetSkew(colid); if (dSkewCol > dSkew) { dSkew = dSkewCol; } } } } return CDouble(dSkew); }
//--------------------------------------------------------------------------- // @function: // CStatsPredUtils::PstatspredLikeHandleCasting // // @doc: // Create a LIKE statistics filter //--------------------------------------------------------------------------- CStatsPred * CStatsPredUtils::PstatspredLike ( IMemoryPool *pmp, CExpression *pexprPred, CColRefSet *//pcrsOuterRefs, ) { GPOS_ASSERT(NULL != pexprPred); GPOS_ASSERT(CPredicateUtils::FLikePredicate(pexprPred)); CExpression *pexprLeft = (*pexprPred)[0]; CExpression *pexprRight = (*pexprPred)[1]; // we support LIKE predicate of the following patterns // CAST(ScIdent) LIKE Const // CAST(ScIdent) LIKE CAST(Const) // ScIdent LIKE Const // ScIdent LIKE CAST(Const) // CAST(Const) LIKE ScIdent // CAST(Const) LIKE CAST(ScIdent) // const LIKE ScIdent // const LIKE CAST(ScIdent) CExpression *pexprScIdent = NULL; CExpression *pexprScConst = NULL; CPredicateUtils::ExtractLikePredComponents(pexprPred, &pexprScIdent, &pexprScConst); if (NULL == pexprScIdent || NULL == pexprScConst) { return GPOS_NEW(pmp) CStatsPredUnsupported(ULONG_MAX, CStatsPred::EstatscmptLike); } CScalarIdent *popScalarIdent = CScalarIdent::PopConvert(pexprScIdent->Pop()); ULONG ulColId = popScalarIdent->Pcr()->UlId(); CScalarConst *popScalarConst = CScalarConst::PopConvert(pexprScConst->Pop()); IDatum *pdatumLiteral = popScalarConst->Pdatum(); const CColRef *pcr = popScalarIdent->Pcr(); if (!IMDType::FStatsComparable(pcr->Pmdtype(), pdatumLiteral)) { // unsupported stats comparison between the column and datum return GPOS_NEW(pmp) CStatsPredUnsupported(pcr->UlId(), CStatsPred::EstatscmptLike); } CDouble dDefaultScaleFactor(1.0); if (pdatumLiteral->FSupportLikePredicate()) { dDefaultScaleFactor = pdatumLiteral->DLikePredicateScaleFactor(); } pexprLeft->AddRef(); pexprRight->AddRef(); return GPOS_NEW(pmp) CStatsPredLike(ulColId, pexprLeft, pexprRight, dDefaultScaleFactor); }
//--------------------------------------------------------------------------- // @function: // CConstraint::PcnstrFromScalarCmp // // @doc: // Create constraint from scalar comparison // //--------------------------------------------------------------------------- CConstraint * CConstraint::PcnstrFromScalarCmp ( IMemoryPool *pmp, CExpression *pexpr, DrgPcrs **ppdrgpcrs // output equivalence classes ) { GPOS_ASSERT(NULL != pexpr); GPOS_ASSERT(CUtils::FScalarCmp(pexpr)); GPOS_ASSERT(NULL != ppdrgpcrs); GPOS_ASSERT(NULL == *ppdrgpcrs); CExpression *pexprLeft = (*pexpr)[0]; CExpression *pexprRight = (*pexpr)[1]; // check if the scalar comparison is over scalar idents if (COperator::EopScalarIdent == pexprLeft->Pop()->Eopid() && COperator::EopScalarIdent == pexprRight->Pop()->Eopid()) { CScalarIdent *popScIdLeft = CScalarIdent::PopConvert((*pexpr)[0]->Pop()); const CColRef *pcrLeft = popScIdLeft->Pcr(); CScalarIdent *popScIdRight = CScalarIdent::PopConvert((*pexpr)[1]->Pop()); const CColRef *pcrRight = popScIdRight->Pcr(); if (!CUtils::FConstrainableType(pcrLeft->Pmdtype()->Pmdid()) || !CUtils::FConstrainableType(pcrRight->Pmdtype()->Pmdid())) { return NULL; } *ppdrgpcrs = GPOS_NEW(pmp) DrgPcrs(pmp); if (CPredicateUtils::FEquality(pexpr)) { // col1 = col2 CColRefSet *pcrsNew = GPOS_NEW(pmp) CColRefSet(pmp); pcrsNew->Include(pcrLeft); pcrsNew->Include(pcrRight); (*ppdrgpcrs)->Append(pcrsNew); } // create NOT NULL constraints to both columns DrgPcnstr *pdrgpcnstr = GPOS_NEW(pmp) DrgPcnstr(pmp); pdrgpcnstr->Append(CConstraintInterval::PciUnbounded(pmp, pcrLeft, false /*fIncludesNull*/)); pdrgpcnstr->Append(CConstraintInterval::PciUnbounded(pmp, pcrRight, false /*fIncludesNull*/)); return CConstraint::PcnstrConjunction(pmp, pdrgpcnstr); } // TODO: , May 28, 2012; add support for other cases besides (col cmp col) return NULL; }
//--------------------------------------------------------------------------- // @function: // CConstraint::PcnstrFromScalarArrayCmp // // @doc: // Create constraint from scalar array comparison expression // //--------------------------------------------------------------------------- CConstraint * CConstraint::PcnstrFromScalarArrayCmp ( IMemoryPool *pmp, CExpression *pexpr, CColRef *pcr ) { GPOS_ASSERT(NULL != pexpr); GPOS_ASSERT(CUtils::FScalarArrayCmp(pexpr)); CScalarArrayCmp *popScArrayCmp = CScalarArrayCmp::PopConvert(pexpr->Pop()); CScalarArrayCmp::EArrCmpType earrccmpt = popScArrayCmp->Earrcmpt(); if ((CScalarArrayCmp::EarrcmpAny == earrccmpt || CScalarArrayCmp::EarrcmpAll == earrccmpt) && CPredicateUtils::FCompareIdentToConstArray(pexpr)) { // column #ifdef GPOS_DEBUG CScalarIdent *popScId = CScalarIdent::PopConvert((*pexpr)[0]->Pop()); GPOS_ASSERT (pcr == (CColRef *) popScId->Pcr()); #endif // GPOS_DEBUG // get comparison type IMDType::ECmpType ecmpt = CUtils::Ecmpt(popScArrayCmp->PmdidOp()); CExpression *pexprArray = (*pexpr)[1]; DrgPcnstr *pdrgpcnstr = GPOS_NEW(pmp) DrgPcnstr(pmp); const ULONG ulArity = pexprArray->UlArity(); for (ULONG ul = 0; ul < ulArity; ul++) { GPOS_ASSERT(CUtils::FScalarConst((*pexprArray)[ul]) && "expecting a constant"); CScalarConst *popScConst = CScalarConst::PopConvert((*pexprArray)[ul]->Pop()); CConstraintInterval *pci = CConstraintInterval::PciIntervalFromColConstCmp(pmp, pcr, ecmpt, popScConst); pdrgpcnstr->Append(pci); } if (earrccmpt == CScalarArrayCmp::EarrcmpAny) { // predicate is of the form 'A IN (1,2,3)' // return a disjunction of ranges {[1,1], [2,2], [3,3]} return GPOS_NEW(pmp) CConstraintDisjunction(pmp, pdrgpcnstr); } // predicate is of the form 'A NOT IN (1,2,3)' // return a conjunctive negation on {[1,1], [2,2], [3,3]} return GPOS_NEW(pmp) CConstraintConjunction(pmp, pdrgpcnstr); } return NULL; }
//--------------------------------------------------------------------------- // @function: // CStatsPredUtils::PstatspredBoolean // // @doc: // Extract statistics filtering information from boolean predicate // in the form of scalar id or negated scalar id //--------------------------------------------------------------------------- CStatsPred * CStatsPredUtils::PstatspredBoolean ( IMemoryPool *pmp, CExpression *pexprPred, CColRefSet * //pcrsOuterRefs ) { GPOS_ASSERT(NULL != pexprPred); GPOS_ASSERT(CPredicateUtils::FBooleanScalarIdent(pexprPred) || CPredicateUtils::FNegatedBooleanScalarIdent(pexprPred)); COperator *pop = pexprPred->Pop(); CMDAccessor *pmda = COptCtxt::PoctxtFromTLS()->Pmda(); IDatum *pdatum = NULL; ULONG ulColId = ULONG_MAX; if (CPredicateUtils::FBooleanScalarIdent(pexprPred)) { CScalarIdent *popScIdent = CScalarIdent::PopConvert(pop); pdatum = pmda->PtMDType<IMDTypeBool>()->PdatumBool(pmp, true /* fValue */, false /* fNull */); ulColId = popScIdent->Pcr()->UlId(); } else { CExpression *pexprChild = (*pexprPred)[0]; pdatum = pmda->PtMDType<IMDTypeBool>()->PdatumBool(pmp, false /* fValue */, false /* fNull */); ulColId = CScalarIdent::PopConvert(pexprChild->Pop())->Pcr()->UlId(); } if (!pdatum->FStatsComparable(pdatum)) { // stats calculations on such datums unsupported pdatum->Release(); return GPOS_NEW(pmp) CStatsPredUnsupported(ulColId, CStatsPred::EstatscmptEq); } GPOS_ASSERT(NULL != pdatum && ULONG_MAX != ulColId); return GPOS_NEW(pmp) CStatsPredPoint(ulColId, CStatsPred::EstatscmptEq, GPOS_NEW(pmp) CPoint(pdatum)); }
//--------------------------------------------------------------------------- // @function: // CStatsPredUtils::PstatspredNullTest // // @doc: // Extract statistics filtering information from a null test //--------------------------------------------------------------------------- CStatsPred * CStatsPredUtils::PstatspredNullTest ( IMemoryPool *pmp, CExpression *pexprPred, CColRefSet * //pcrsOuterRefs ) { GPOS_ASSERT(NULL != pexprPred); GPOS_ASSERT(FScalarIdentIsNull(pexprPred) || FScalarIdentIsNotNull(pexprPred)); CExpression *pexprNullTest = pexprPred; CStatsPred::EStatsCmpType escmpt = CStatsPred::EstatscmptEq; // 'is null' if (FScalarIdentIsNotNull(pexprPred)) { pexprNullTest = (*pexprPred)[0]; escmpt = CStatsPred::EstatscmptNEq; // 'is not null' } CScalarIdent *popScalarIdent = CScalarIdent::PopConvert((*pexprNullTest)[0]->Pop()); const CColRef *pcr = popScalarIdent->Pcr(); IDatum *pdatum = CStatisticsUtils::PdatumNull(pcr); if (!pdatum->FStatsComparable(pdatum)) { // stats calculations on such datums unsupported pdatum->Release(); return GPOS_NEW(pmp) CStatsPredUnsupported(pcr->UlId(), escmpt); } CPoint *ppoint = GPOS_NEW(pmp) CPoint(pdatum); CStatsPredPoint *pstatspred = GPOS_NEW(pmp) CStatsPredPoint(pcr->UlId(), escmpt, ppoint); return pstatspred; }
//--------------------------------------------------------------------------- // @function: // CConstraint::PcnstrFromScalarArrayCmp // // @doc: // Create constraint from scalar array comparison expression // //--------------------------------------------------------------------------- CConstraint * CConstraint::PcnstrFromScalarArrayCmp ( IMemoryPool *pmp, CExpression *pexpr, CColRef *pcr ) { GPOS_ASSERT(NULL != pexpr); GPOS_ASSERT(CUtils::FScalarArrayCmp(pexpr)); CScalarArrayCmp *popScArrayCmp = CScalarArrayCmp::PopConvert(pexpr->Pop()); CScalarArrayCmp::EArrCmpType earrccmpt = popScArrayCmp->Earrcmpt(); if ((CScalarArrayCmp::EarrcmpAny == earrccmpt || CScalarArrayCmp::EarrcmpAll == earrccmpt) && CPredicateUtils::FCompareIdentToConstArray(pexpr)) { // column #ifdef GPOS_DEBUG CScalarIdent *popScId = CScalarIdent::PopConvert((*pexpr)[0]->Pop()); GPOS_ASSERT (pcr == (CColRef *) popScId->Pcr()); #endif // GPOS_DEBUG // get comparison type IMDType::ECmpType ecmpt = CUtils::Ecmpt(popScArrayCmp->PmdidOp()); CExpression *pexprArray = CUtils::PexprScalarArrayChild(pexpr); const ULONG ulArity = CUtils::UlScalarArrayArity(pexprArray); // When array size exceeds the threshold, don't expand it into a DNF COptimizerConfig *poconf = COptCtxt::PoctxtFromTLS()->Poconf(); ULONG ulArrayExpansionThreshold = poconf->Phint()->UlArrayExpansionThreshold(); if (ulArity > ulArrayExpansionThreshold) { return NULL; } DrgPcnstr *pdrgpcnstr = GPOS_NEW(pmp) DrgPcnstr(pmp); for (ULONG ul = 0; ul < ulArity; ul++) { CScalarConst *popScConst = CUtils::PScalarArrayConstChildAt(pexprArray,ul); CConstraintInterval *pci = CConstraintInterval::PciIntervalFromColConstCmp(pmp, pcr, ecmpt, popScConst); pdrgpcnstr->Append(pci); } if (earrccmpt == CScalarArrayCmp::EarrcmpAny) { // predicate is of the form 'A IN (1,2,3)' // return a disjunction of ranges {[1,1], [2,2], [3,3]} return GPOS_NEW(pmp) CConstraintDisjunction(pmp, pdrgpcnstr); } // predicate is of the form 'A NOT IN (1,2,3)' // return a conjunctive negation on {[1,1], [2,2], [3,3]} return GPOS_NEW(pmp) CConstraintConjunction(pmp, pdrgpcnstr); } return NULL; }
//--------------------------------------------------------------------------- // @function: // CStatsPredUtils::ProcessArrayCmp // // @doc: // Extract statistics filtering information from scalar array comparison //--------------------------------------------------------------------------- void CStatsPredUtils::ProcessArrayCmp ( IMemoryPool *pmp, CExpression *pexprPred, DrgPstatspred *pdrgpstatspred ) { GPOS_ASSERT(NULL != pdrgpstatspred); GPOS_ASSERT(NULL != pexprPred); GPOS_ASSERT(2 == pexprPred->UlArity()); CScalarArrayCmp *popScArrayCmp = CScalarArrayCmp::PopConvert(pexprPred->Pop()); CExpression *pexprLeft = (*pexprPred)[0]; CExpression *pexprRight = (*pexprPred)[1]; BOOL fCompareToConst = ((COperator::EopScalarIdent == pexprLeft->Pop()->Eopid()) && (COperator::EopScalarArray == pexprRight->Pop()->Eopid())); if (!fCompareToConst) { // unsupported predicate for stats calculations pdrgpstatspred->Append(GPOS_NEW(pmp) CStatsPredUnsupported(ULONG_MAX, CStatsPred::EstatscmptOther)); return; } BOOL fAny = (CScalarArrayCmp::EarrcmpAny == popScArrayCmp->Earrcmpt()); DrgPstatspred *pdrgpstatspredChild = pdrgpstatspred; if (fAny) { pdrgpstatspredChild = GPOS_NEW(pmp) DrgPstatspred(pmp); } const ULONG ulConstants = pexprRight->UlArity(); // comparison semantics for statistics purposes is looser than regular comparison. CStatsPred::EStatsCmpType escmpt = Estatscmptype(popScArrayCmp->PmdidOp()); CScalarIdent *popScalarIdent = CScalarIdent::PopConvert(pexprLeft->Pop()); const CColRef *pcr = popScalarIdent->Pcr(); if (!CHistogram::FSupportsFilter(escmpt)) { // unsupported predicate for stats calculations pdrgpstatspred->Append(GPOS_NEW(pmp) CStatsPredUnsupported(pcr->UlId(), escmpt)); return; } for (ULONG ul = 0; ul < ulConstants; ul++) { CExpression *pexprConst = (*pexprRight)[ul]; if (COperator::EopScalarConst == pexprConst->Pop()->Eopid()) { CScalarConst *popScalarConst = CScalarConst::PopConvert(pexprConst->Pop()); IDatum *pdatumLiteral = popScalarConst->Pdatum(); CStatsPred *pstatspredChild = NULL; if (!pdatumLiteral->FStatsComparable(pdatumLiteral)) { // stats calculations on such datums unsupported pstatspredChild = GPOS_NEW(pmp) CStatsPredUnsupported(pcr->UlId(), escmpt); } else { pstatspredChild = GPOS_NEW(pmp) CStatsPredPoint(pmp, pcr, escmpt, pdatumLiteral); } pdrgpstatspredChild->Append(pstatspredChild); } } if (fAny) { CStatsPredDisj *pstatspredOr = GPOS_NEW(pmp) CStatsPredDisj(pdrgpstatspredChild); pdrgpstatspred->Append(pstatspredOr); } }
//--------------------------------------------------------------------------- // @function: // CPhysicalSequenceProject::CreateOrderSpec // // @doc: // Create local order spec that we request relational child to satisfy // //--------------------------------------------------------------------------- void CPhysicalSequenceProject::CreateOrderSpec ( IMemoryPool *pmp ) { GPOS_ASSERT(NULL == m_pos); GPOS_ASSERT(NULL != m_pds); GPOS_ASSERT(NULL != m_pdrgpos); m_pos = GPOS_NEW(pmp) COrderSpec(pmp); // add partition by keys to order spec if (CDistributionSpec::EdtHashed == m_pds->Edt()) { CDistributionSpecHashed *pdshashed = CDistributionSpecHashed::PdsConvert(m_pds); const DrgPexpr *pdrgpexpr = pdshashed->Pdrgpexpr(); const ULONG ulSize = pdrgpexpr->UlLength(); for (ULONG ul = 0; ul < ulSize; ul++) { CExpression *pexpr = (*pdrgpexpr)[ul]; // we assume partition-by keys are always scalar idents CScalarIdent *popScId = CScalarIdent::PopConvert(pexpr->Pop()); const CColRef *pcr = popScId->Pcr(); gpmd::IMDId *pmdid = pcr->Pmdtype()->PmdidCmp(IMDType::EcmptL); pmdid->AddRef(); m_pos->Append(pmdid, pcr, COrderSpec::EntLast); } } if (0 == m_pdrgpos->UlLength()) { return; } COrderSpec *posFirst = (*m_pdrgpos)[0]; #ifdef GPOS_DEBUG const ULONG ulLength = m_pdrgpos->UlLength(); for (ULONG ul = 1; ul < ulLength; ul++) { COrderSpec *posCurrent = (*m_pdrgpos)[ul]; GPOS_ASSERT(posFirst->FSatisfies(posCurrent) && "first order spec must satisfy all other order specs"); } #endif // GPOS_DEBUG // we assume here that the first order spec in the children array satisfies all other // order specs in the array, this happens as part of the initial normalization // so we need to add columns only from the first order spec const ULONG ulSize = posFirst->UlSortColumns(); for (ULONG ul = 0; ul < ulSize; ul++) { const CColRef *pcr = posFirst->Pcr(ul); gpmd::IMDId *pmdid = posFirst->PmdidSortOp(ul); pmdid->AddRef(); COrderSpec::ENullTreatment ent = posFirst->Ent(ul); m_pos->Append(pmdid, pcr, ent); } }