//--------------------------------------------------------------------------- // @function: // CLogical::PpcDeriveConstraintRestrict // // @doc: // Derive constraint property only on the given columns // //--------------------------------------------------------------------------- CPropConstraint * CLogical::PpcDeriveConstraintRestrict ( IMemoryPool *pmp, CExpressionHandle &exprhdl, CColRefSet *pcrsOutput ) { // constraint property from relational child CPropConstraint *ppc = exprhdl.Pdprel(0)->Ppc(); DrgPcrs *pdrgpcrs = ppc->PdrgpcrsEquivClasses(); // construct new array of equivalence classes DrgPcrs *pdrgpcrsNew = GPOS_NEW(pmp) DrgPcrs(pmp); const ULONG ulLen = pdrgpcrs->UlLength(); for (ULONG ul = 0; ul < ulLen; ul++) { CColRefSet *pcrsEquiv = GPOS_NEW(pmp) CColRefSet(pmp); pcrsEquiv->Include((*pdrgpcrs)[ul]); pcrsEquiv->Intersection(pcrsOutput); if (0 < pcrsEquiv->CElements()) { pdrgpcrsNew->Append(pcrsEquiv); } else { pcrsEquiv->Release(); } } CConstraint *pcnstrChild = ppc->Pcnstr(); if (NULL == pcnstrChild) { return GPOS_NEW(pmp) CPropConstraint(pmp, pdrgpcrsNew, NULL); } DrgPcnstr *pdrgpcnstr = GPOS_NEW(pmp) DrgPcnstr(pmp); // include only constraints on given columns CColRefSetIter crsi(*pcrsOutput); while (crsi.FAdvance()) { CColRef *pcr = crsi.Pcr(); CConstraint *pcnstrCol = pcnstrChild->Pcnstr(pmp, pcr); if (NULL == pcnstrCol) { continue; } if (pcnstrCol->FUnbounded()) { pcnstrCol->Release(); continue; } pdrgpcnstr->Append(pcnstrCol); } CConstraint *pcnstr = CConstraint::PcnstrConjunction(pmp, pdrgpcnstr); return GPOS_NEW(pmp) CPropConstraint(pmp, pdrgpcrsNew, pcnstr); }
//--------------------------------------------------------------------------- // @function: // CLogical::PcrsDeriveOuter // // @doc: // Derive outer references // //--------------------------------------------------------------------------- CColRefSet * CLogical::PcrsDeriveOuter ( IMemoryPool *pmp, CExpressionHandle &exprhdl, CColRefSet *pcrsUsedAdditional ) { ULONG ulArity = exprhdl.UlArity(); CColRefSet *pcrsOuter = GPOS_NEW(pmp) CColRefSet(pmp); // collect output columns from relational children // and used columns from scalar children CColRefSet *pcrsOutput = GPOS_NEW(pmp) CColRefSet(pmp); CColRefSet *pcrsUsed = GPOS_NEW(pmp) CColRefSet(pmp); for (ULONG i = 0; i < ulArity; i++) { if (exprhdl.FScalarChild(i)) { CDrvdPropScalar *pdpscalar = exprhdl.Pdpscalar(i); pcrsUsed->Union(pdpscalar->PcrsUsed()); } else { CDrvdPropRelational *pdprel = exprhdl.Pdprel(i); pcrsOutput->Union(pdprel->PcrsOutput()); // add outer references from relational children pcrsOuter->Union(pdprel->PcrsOuter()); } } if (NULL != pcrsUsedAdditional) { pcrsUsed->Include(pcrsUsedAdditional); } // outer references are columns used by scalar child // but are not included in the output columns of relational children pcrsOuter->Union(pcrsUsed); pcrsOuter->Exclude(pcrsOutput); pcrsOutput->Release(); pcrsUsed->Release(); return pcrsOuter; }
//--------------------------------------------------------------------------- // @function: // CLogical::PpcDeriveConstraintFromTable // // @doc: // Derive constraint property from a table/index get // //--------------------------------------------------------------------------- CPropConstraint * CLogical::PpcDeriveConstraintFromTable ( IMemoryPool *pmp, const CTableDescriptor *ptabdesc, const DrgPcr *pdrgpcrOutput ) { DrgPcrs *pdrgpcrs = GPOS_NEW(pmp) DrgPcrs(pmp); DrgPcnstr *pdrgpcnstr = GPOS_NEW(pmp) DrgPcnstr(pmp); const DrgPcoldesc *pdrgpcoldesc = ptabdesc->Pdrgpcoldesc(); const ULONG ulCols = pdrgpcoldesc->UlLength(); DrgPcr *pdrgpcrNonSystem = GPOS_NEW(pmp) DrgPcr(pmp); for (ULONG ul = 0; ul < ulCols; ul++) { CColumnDescriptor *pcoldesc = (*pdrgpcoldesc)[ul]; CColRef *pcr = (*pdrgpcrOutput)[ul]; // we are only interested in non-system columns that are defined as // being NOT NULL if (pcoldesc->FSystemColumn()) { continue; } pdrgpcrNonSystem->Append(pcr); if (pcoldesc->FNullable()) { continue; } // add a "not null" constraint and an equivalence class CConstraint * pcnstr = CConstraintInterval::PciUnbounded(pmp, pcr, false /*fIncludesNull*/); if (pcnstr == NULL) { continue; } pdrgpcnstr->Append(pcnstr); CColRefSet *pcrsEquiv = GPOS_NEW(pmp) CColRefSet(pmp); pcrsEquiv->Include(pcr); pdrgpcrs->Append(pcrsEquiv); } CMDAccessor *pmda = COptCtxt::PoctxtFromTLS()->Pmda(); const IMDRelation *pmdrel = pmda->Pmdrel(ptabdesc->Pmdid()); const ULONG ulCheckConstraint = pmdrel->UlCheckConstraints(); for (ULONG ul = 0; ul < ulCheckConstraint; ul++) { IMDId *pmdidCheckConstraint = pmdrel->PmdidCheckConstraint(ul); const IMDCheckConstraint *pmdCheckConstraint = pmda->Pmdcheckconstraint(pmdidCheckConstraint); // extract the check constraint expression CExpression *pexprCheckConstraint = pmdCheckConstraint->Pexpr(pmp, pmda, pdrgpcrNonSystem); GPOS_ASSERT(NULL != pexprCheckConstraint); GPOS_ASSERT(CUtils::FPredicate(pexprCheckConstraint)); DrgPcrs *pdrgpcrsChild = NULL; CConstraint *pcnstr = CConstraint::PcnstrFromScalarExpr(pmp, pexprCheckConstraint, &pdrgpcrsChild); if (NULL != pcnstr) { pdrgpcnstr->Append(pcnstr); // merge with the equivalence classes we have so far DrgPcrs *pdrgpcrsMerged = CUtils::PdrgpcrsMergeEquivClasses(pmp, pdrgpcrs, pdrgpcrsChild); pdrgpcrs->Release(); pdrgpcrs = pdrgpcrsMerged; } CRefCount::SafeRelease(pdrgpcrsChild); pexprCheckConstraint->Release(); } pdrgpcrNonSystem->Release(); return GPOS_NEW(pmp) CPropConstraint(pmp, pdrgpcrs, CConstraint::PcnstrConjunction(pmp, pdrgpcnstr)); }
//--------------------------------------------------------------------------- // @function: // CPhysicalHashJoin::PdshashedPassThru // // @doc: // Create a child hashed distribution request based on input hashed // distribution, // return NULL if no such request can be created // // //--------------------------------------------------------------------------- CDistributionSpecHashed * CPhysicalHashJoin::PdshashedPassThru ( IMemoryPool *pmp, CExpressionHandle &exprhdl, CDistributionSpecHashed *pdshashedInput, ULONG , // ulChildIndex DrgPdp *, // pdrgpdpCtxt ULONG #ifdef GPOS_DEBUG ulOptReq #endif // GPOS_DEBUG ) const { GPOS_ASSERT(ulOptReq == m_pdrgpdsRedistributeRequests->UlLength()); GPOS_ASSERT(NULL != pdshashedInput); if (!GPOS_FTRACE(EopttraceEnableRedistributeBroadcastHashJoin)) { // this option is disabled return NULL; } // since incoming request is hashed, we attempt here to propagate this request to outer child CColRefSet *pcrsOuterOutput = exprhdl.Pdprel(0 /*ulChildIndex*/)->PcrsOutput(); DrgPexpr *pdrgpexprIncomingRequest = pdshashedInput->Pdrgpexpr(); CColRefSet *pcrsAllUsed = CUtils::PcrsExtractColumns(pmp, pdrgpexprIncomingRequest); BOOL fSubset = pcrsOuterOutput->FSubset(pcrsAllUsed); BOOL fDisjoint = pcrsOuterOutput->FDisjoint(pcrsAllUsed); pcrsAllUsed->Release(); if (fSubset) { // incoming request uses columns from outer child only, pass it through pdshashedInput->AddRef(); return pdshashedInput; } if (!fDisjoint) { // incoming request intersects with columns from outer child, // we restrict the request to outer child columns only, then we pass it through DrgPexpr *pdrgpexprChildRequest = GPOS_NEW(pmp) DrgPexpr(pmp); const ULONG ulSize = pdrgpexprIncomingRequest->UlLength(); for (ULONG ul = 0; ul < ulSize; ul++) { CExpression *pexpr = (*pdrgpexprIncomingRequest)[ul]; CColRefSet *pcrsUsed = CDrvdPropScalar::Pdpscalar(pexpr->PdpDerive())->PcrsUsed(); if (pcrsOuterOutput->FSubset(pcrsUsed)) { // hashed expression uses columns from outer child only, add it to request pexpr->AddRef(); pdrgpexprChildRequest->Append(pexpr); } } GPOS_ASSERT(0 < pdrgpexprChildRequest->UlLength()); CDistributionSpecHashed *pdshashed = GPOS_NEW(pmp) CDistributionSpecHashed(pdrgpexprChildRequest, pdshashedInput->FNullsColocated()); // since the other child of the join is replicated, we need to enforce hashed-distribution across segments here pdshashed->MarkUnsatisfiableBySingleton(); return pdshashed; } return NULL; }
//--------------------------------------------------------------------------- // @function: // CJoinOrderTest::EresUnittest_Expand // // @doc: // Simple expansion test // //--------------------------------------------------------------------------- GPOS_RESULT CJoinOrderTest::EresUnittest_Expand() { CAutoMemoryPool amp; IMemoryPool *pmp = amp.Pmp(); // setup a file-based provider CMDProviderMemory *pmdp = CTestUtils::m_pmdpf; pmdp->AddRef(); CMDAccessor mda(pmp, CMDCache::Pcache(), CTestUtils::m_sysidDefault, pmdp); // install opt context in TLS CAutoOptCtxt aoc ( pmp, &mda, NULL, /* pceeval */ CTestUtils::Pcm(pmp) ); // build test case CExpression *pexpr = CTestUtils::PexprLogicalNAryJoin(pmp); DrgPexpr *pdrgpexpr = GPOS_NEW(pmp) DrgPexpr(pmp); ULONG ulArity = pexpr->UlArity(); for (ULONG ul = 0; ul < ulArity - 1; ul++) { CExpression *pexprChild = (*pexpr)[ul]; pexprChild->AddRef(); pdrgpexpr->Append(pexprChild); } DrgPexpr *pdrgpexprConj = CPredicateUtils::PdrgpexprConjuncts(pmp, (*pexpr)[ulArity - 1]); // add predicates selectively to trigger special case of cross join DrgPexpr *pdrgpexprTest = GPOS_NEW(pmp) DrgPexpr(pmp); for (ULONG ul = 0; ul < pdrgpexprConj->UlLength() - 1; ul++) { CExpression *pexprConjunct = (*pdrgpexprConj)[ul]; pexprConjunct->AddRef(); pdrgpexprTest->Append(pexprConjunct); } pdrgpexprConj->Release(); // single-table predicate CColRefSet *pcrsOutput = CDrvdPropRelational::Pdprel((*pdrgpexpr)[ulArity - 2]->PdpDerive())->PcrsOutput(); CExpression *pexprSingleton = CUtils::PexprScalarEqCmp(pmp, pcrsOutput->PcrAny(), pcrsOutput->PcrAny()); pdrgpexprTest->Append(pexprSingleton); CJoinOrder jo(pmp, pdrgpexpr, pdrgpexprTest); CExpression *pexprResult = jo.PexprExpand(); { CAutoTrace at(pmp); at.Os() << std::endl << "INPUT:" << std::endl << *pexpr << std::endl; at.Os() << std::endl << "OUTPUT:" << std::endl << *pexprResult << std::endl; } CRefCount::SafeRelease(pexprResult); pexpr->Release(); return GPOS_OK; }
//--------------------------------------------------------------------------- // @function: // CPredicateUtilsTest::EresUnittest_Conjunctions // // @doc: // Test extraction and construction of conjuncts // //--------------------------------------------------------------------------- GPOS_RESULT CPredicateUtilsTest::EresUnittest_Conjunctions() { CAutoMemoryPool amp; IMemoryPool *mp = amp.Pmp(); // setup a file-based provider CMDProviderMemory *pmdp = CTestUtils::m_pmdpf; pmdp->AddRef(); CMDAccessor mda(mp, CMDCache::Pcache(), CTestUtils::m_sysidDefault, pmdp); // install opt context in TLS CAutoOptCtxt aoc ( mp, &mda, NULL, /* pceeval */ CTestUtils::GetCostModel(mp) ); // build conjunction CExpressionArray *pdrgpexpr = GPOS_NEW(mp) CExpressionArray(mp); const ULONG ulConjs = 3; for (ULONG ul = 0; ul < ulConjs; ul++) { pdrgpexpr->Append(CUtils::PexprScalarConstBool(mp, true /*fValue*/)); } CExpression *pexprConjunction = CUtils::PexprScalarBoolOp(mp, CScalarBoolOp::EboolopAnd, pdrgpexpr); // break into conjuncts CExpressionArray *pdrgpexprExtract = CPredicateUtils::PdrgpexprConjuncts(mp, pexprConjunction); GPOS_ASSERT(pdrgpexprExtract->Size() == ulConjs); // collapse into single conjunct CExpression *pexpr = CPredicateUtils::PexprConjunction(mp, pdrgpexprExtract); GPOS_ASSERT(NULL != pexpr); GPOS_ASSERT(CUtils::FScalarConstTrue(pexpr)); pexpr->Release(); // collapse empty input array to conjunct CExpression *pexprSingleton = CPredicateUtils::PexprConjunction(mp, NULL /*pdrgpexpr*/); GPOS_ASSERT(NULL != pexprSingleton); pexprSingleton->Release(); pexprConjunction->Release(); // conjunction on scalar comparisons CExpression *pexprGet = CTestUtils::PexprLogicalGet(mp); CColRefSet *pcrs = CDrvdPropRelational::GetRelationalProperties(pexprGet->PdpDerive())->PcrsOutput(); CColRef *pcr1 = pcrs->PcrAny(); CColRef *pcr2 = pcrs->PcrFirst(); CExpression *pexprCmp1 = CUtils::PexprScalarCmp(mp, pcr1, pcr2, IMDType::EcmptEq); CExpression *pexprCmp2 = CUtils::PexprScalarCmp(mp, pcr1, CUtils::PexprScalarConstInt4(mp, 1 /*val*/), IMDType::EcmptEq); CExpression *pexprConj = CPredicateUtils::PexprConjunction(mp, pexprCmp1, pexprCmp2); pdrgpexprExtract = CPredicateUtils::PdrgpexprConjuncts(mp, pexprConj); GPOS_ASSERT(2 == pdrgpexprExtract->Size()); pdrgpexprExtract->Release(); pexprCmp1->Release(); pexprCmp2->Release(); pexprConj->Release(); pexprGet->Release(); return GPOS_OK; }
//--------------------------------------------------------------------------- // @function: // CPhysicalComputeScalar::PdsRequired // // @doc: // Compute required distribution of the n-th child // //--------------------------------------------------------------------------- CDistributionSpec * CPhysicalComputeScalar::PdsRequired ( IMemoryPool *pmp, CExpressionHandle &exprhdl, CDistributionSpec *pdsRequired, ULONG ulChildIndex, DrgPdp *, // pdrgpdpCtxt ULONG ulOptReq ) const { GPOS_ASSERT(0 == ulChildIndex); GPOS_ASSERT(2 > ulOptReq); // check if master-only/replicated distribution needs to be requested CDistributionSpec *pds = PdsMasterOnlyOrReplicated(pmp, exprhdl, pdsRequired, ulChildIndex, ulOptReq); if (NULL != pds) { return pds; } CDrvdPropScalar *pdpscalar = exprhdl.Pdpscalar(1 /*ulChildIndex*/); // if a Project operator has a call to a set function, passing a Random distribution through this // Project may have the effect of not distributing the results of the set function to all nodes, // but only to the nodes on which first child of the Project is distributed. // to avoid that, we don't push the distribution requirement in this case and thus, for a random // distribution, the result of the set function is spread uniformly over all nodes if (pdpscalar->FHasNonScalarFunction()) { return GPOS_NEW(pmp) CDistributionSpecAny(); } // if required distribution uses any defined column, it has to be enforced on top of ComputeScalar, // in this case, we request Any distribution from the child CDistributionSpec::EDistributionType edtRequired = pdsRequired->Edt(); if (CDistributionSpec::EdtHashed == edtRequired) { CDistributionSpecHashed *pdshashed = CDistributionSpecHashed::PdsConvert(pdsRequired); CColRefSet *pcrs = pdshashed->PcrsUsed(m_pmp); BOOL fUsesDefinedCols = FUnaryUsesDefinedColumns(pcrs, exprhdl); pcrs->Release(); if (fUsesDefinedCols) { return GPOS_NEW(pmp) CDistributionSpecAny(); } } if (CDistributionSpec::EdtRouted == edtRequired) { CDistributionSpecRouted *pdsrouted = CDistributionSpecRouted::PdsConvert(pdsRequired); CColRefSet *pcrs = GPOS_NEW(m_pmp) CColRefSet(m_pmp); pcrs->Include(pdsrouted->Pcr()); BOOL fUsesDefinedCols = FUnaryUsesDefinedColumns(pcrs, exprhdl); pcrs->Release(); if (fUsesDefinedCols) { return GPOS_NEW(pmp) CDistributionSpecAny(); } } if (0 == ulOptReq) { // Req0: required distribution will be enforced on top of ComputeScalar return GPOS_NEW(pmp) CDistributionSpecAny(); } // Req1: required distribution will be enforced on top of ComputeScalar's child return PdsPassThru(pmp, exprhdl, pdsRequired, ulChildIndex); }
//--------------------------------------------------------------------------- // @function: // CLogicalDynamicGetBase::PstatsDeriveFilter // // @doc: // Derive stats from base table using filters on partition and/or index columns // //--------------------------------------------------------------------------- IStatistics * CLogicalDynamicGetBase::PstatsDeriveFilter ( IMemoryPool *pmp, CExpressionHandle &exprhdl, CExpression *pexprFilter ) const { CExpression *pexprFilterNew = NULL; CConstraint *pcnstr = m_ppartcnstr->PcnstrCombined(); if (m_fPartial && NULL != pcnstr && !pcnstr->FUnbounded()) { if (NULL == pexprFilter) { pexprFilterNew = pcnstr->PexprScalar(pmp); pexprFilterNew->AddRef(); } else { pexprFilterNew = CPredicateUtils::PexprConjunction(pmp, pexprFilter, pcnstr->PexprScalar(pmp)); } } else if (NULL != pexprFilter) { pexprFilterNew = pexprFilter; pexprFilterNew->AddRef(); } CColRefSet *pcrsStat = GPOS_NEW(pmp) CColRefSet(pmp); CDrvdPropScalar *pdpscalar = NULL; if (NULL != pexprFilterNew) { pdpscalar = CDrvdPropScalar::Pdpscalar(pexprFilterNew->PdpDerive()); pcrsStat->Include(pdpscalar->PcrsUsed()); } // requesting statistics on distribution columns to estimate data skew if (NULL != m_pcrsDist) { pcrsStat->Include(m_pcrsDist); } IStatistics *pstatsFullTable = PstatsBaseTable(pmp, exprhdl, m_ptabdesc, pcrsStat); pcrsStat->Release(); if (NULL == pexprFilterNew || pdpscalar->FHasSubquery()) { return pstatsFullTable; } CStatsPred *pstatspred = CStatsPredUtils::PstatspredExtract ( pmp, pexprFilterNew, NULL /*pcrsOuterRefs*/ ); pexprFilterNew->Release(); IStatistics *pstatsResult = pstatsFullTable->PstatsFilter ( pmp, pstatspred, true /* fCapNdvs */ ); pstatspred->Release(); pstatsFullTable->Release(); return pstatsResult; }
//--------------------------------------------------------------------------- // @function: // CXformJoinAssociativity::CreatePredicates // // @doc: // Extract all conjuncts and divvy them up between upper and lower join // //--------------------------------------------------------------------------- void CXformJoinAssociativity::CreatePredicates ( IMemoryPool *pmp, CExpression *pexpr, DrgPexpr *pdrgpexprLower, DrgPexpr *pdrgpexprUpper ) const { GPOS_CHECK_ABORT; // bind operators CExpression *pexprLeft = (*pexpr)[0]; CExpression *pexprLeftLeft = (*pexprLeft)[0]; CExpression *pexprRight = (*pexpr)[1]; DrgPexpr *pdrgpexprJoins = GPOS_NEW(pmp) DrgPexpr(pmp); pexprLeft->AddRef(); pdrgpexprJoins->Append(pexprLeft); pexpr->AddRef(); pdrgpexprJoins->Append(pexpr); // columns for new lower join CColRefSet *pcrsLower = GPOS_NEW(pmp) CColRefSet(pmp); pcrsLower->Union(CDrvdPropRelational::Pdprel(pexprLeftLeft->PdpDerive())->PcrsOutput()); pcrsLower->Union(CDrvdPropRelational::Pdprel(pexprRight->PdpDerive())->PcrsOutput()); // convert current predicates into arrays of conjuncts DrgPexpr *pdrgpexprOrig = GPOS_NEW(pmp) DrgPexpr(pmp); for (ULONG ul = 0; ul < 2; ul++) { DrgPexpr *pdrgpexprPreds = CPredicateUtils::PdrgpexprConjuncts(pmp, (*(*pdrgpexprJoins)[ul])[2]); ULONG ulLen = pdrgpexprPreds->UlLength(); for (ULONG ulConj = 0; ulConj < ulLen; ulConj++) { CExpression *pexprConj = (*pdrgpexprPreds)[ulConj]; pexprConj->AddRef(); pdrgpexprOrig->Append(pexprConj); } pdrgpexprPreds->Release(); } // divvy up conjuncts for upper and lower join ULONG ulConj = pdrgpexprOrig->UlLength(); for (ULONG ul = 0; ul < ulConj; ul++) { CExpression *pexprPred = (*pdrgpexprOrig)[ul]; CColRefSet *pcrs = CDrvdPropScalar::Pdpscalar(pexprPred->PdpDerive())->PcrsUsed(); pexprPred->AddRef(); if (pcrsLower->FSubset(pcrs)) { pdrgpexprLower->Append(pexprPred); } else { pdrgpexprUpper->Append(pexprPred); } } // No predicates indicate a cross join. And for that, ORCA expects // predicate to be a scalar const "true". if (pdrgpexprLower->UlLength() == 0) { CExpression *pexprCrossLowerJoinPred = CUtils::PexprScalarConstBool(pmp, true, false); pdrgpexprLower->Append(pexprCrossLowerJoinPred); } // Same for upper predicates if (pdrgpexprUpper->UlLength() == 0) { CExpression *pexprCrossUpperJoinPred = CUtils::PexprScalarConstBool(pmp, true, false); pdrgpexprUpper->Append(pexprCrossUpperJoinPred); } // clean up pcrsLower->Release(); pdrgpexprOrig->Release(); pdrgpexprJoins->Release(); }
//--------------------------------------------------------------------------- // @function: // CNormalizer::PexprPullUpAndCombineProjects // // @doc: // Pulls up logical projects as far as possible, and combines consecutive // projects if possible // //--------------------------------------------------------------------------- CExpression * CNormalizer::PexprPullUpAndCombineProjects ( IMemoryPool *pmp, CExpression *pexpr, BOOL *pfSuccess // output to indicate whether anything was pulled up ) { GPOS_ASSERT(NULL != pexpr); GPOS_ASSERT(NULL != pfSuccess); COperator *pop = pexpr->Pop(); const ULONG ulArity = pexpr->UlArity(); if (!pop->FLogical() || 0 == ulArity) { pexpr->AddRef(); return pexpr; } DrgPexpr *pdrgpexprChildren = GPOS_NEW(pmp) DrgPexpr(pmp); DrgPexpr *pdrgpexprPrElPullUp = GPOS_NEW(pmp) DrgPexpr(pmp); CExpressionHandle exprhdl(pmp); exprhdl.Attach(pexpr); CColRefSet *pcrsOutput = CDrvdPropRelational::Pdprel(pexpr->PdpDerive())->PcrsOutput(); // extract the columns used by the scalar expression and the operator itself (for grouping, sorting, etc.) CColRefSet *pcrsUsed = exprhdl.PcrsUsedColumns(pmp); for (ULONG ul = 0; ul < ulArity; ul++) { CExpression *pexprChild = PexprPullUpAndCombineProjects(pmp, (*pexpr)[ul], pfSuccess); if (pop->FLogical() && CLogical::PopConvert(pop)->FCanPullProjectionsUp(ul) && COperator::EopLogicalProject == pexprChild->Pop()->Eopid()) { // this child is a project - see if any project elements can be pulled up CExpression *pexprNewChild = PexprPullUpProjectElements ( pmp, pexprChild, pcrsUsed, pcrsOutput, &pdrgpexprPrElPullUp ); pexprChild->Release(); pexprChild = pexprNewChild; } pdrgpexprChildren->Append(pexprChild); } pcrsUsed->Release(); pop->AddRef(); if (0 < pdrgpexprPrElPullUp->UlLength() && COperator::EopLogicalProject == pop->Eopid()) { // some project elements have been pulled up and the original expression // was a project - combine its project list with the pulled up project elements GPOS_ASSERT(2 == pdrgpexprChildren->UlLength()); *pfSuccess = true; CExpression *pexprRelational = (*pdrgpexprChildren)[0]; CExpression *pexprPrLOld = (*pdrgpexprChildren)[1]; pexprRelational->AddRef(); CUtils::AddRefAppend(pdrgpexprPrElPullUp, pexprPrLOld->PdrgPexpr()); pdrgpexprChildren->Release(); CExpression *pexprPrjList = GPOS_NEW(pmp) CExpression(pmp, GPOS_NEW(pmp) CScalarProjectList(pmp), pdrgpexprPrElPullUp); GPOS_ASSERT(CDrvdPropRelational::Pdprel(pexprRelational->PdpDerive())->PcrsOutput()->FSubset(CDrvdPropScalar::Pdpscalar(pexprPrjList->PdpDerive())->PcrsUsed())); return GPOS_NEW(pmp) CExpression(pmp, pop, pexprRelational, pexprPrjList); } CExpression *pexprOutput = GPOS_NEW(pmp) CExpression(pmp, pop, pdrgpexprChildren); if (0 == pdrgpexprPrElPullUp->UlLength()) { // no project elements were pulled up pdrgpexprPrElPullUp->Release(); return pexprOutput; } // some project elements were pulled - add a project on top of output expression *pfSuccess = true; CExpression *pexprPrjList = GPOS_NEW(pmp) CExpression(pmp, GPOS_NEW(pmp) CScalarProjectList(pmp), pdrgpexprPrElPullUp); GPOS_ASSERT(CDrvdPropRelational::Pdprel(pexprOutput->PdpDerive())->PcrsOutput()->FSubset(CDrvdPropScalar::Pdpscalar(pexprPrjList->PdpDerive())->PcrsUsed())); return GPOS_NEW(pmp) CExpression(pmp, GPOS_NEW(pmp) CLogicalProject(pmp), pexprOutput, pexprPrjList); }
//--------------------------------------------------------------------------- // @function: // CNormalizer::FLocalColsSubsetOfInputCols // // @doc: // Check if the columns used by the operator are a subset of its input columns // //--------------------------------------------------------------------------- BOOL CNormalizer::FLocalColsSubsetOfInputCols ( IMemoryPool *pmp, CExpression *pexpr ) { GPOS_ASSERT(NULL != pexpr); GPOS_CHECK_STACK_SIZE; CExpressionHandle exprhdl(pmp); if (NULL != pexpr->Pgexpr()) { exprhdl.Attach(pexpr->Pgexpr()); } else { exprhdl.Attach(pexpr); } exprhdl.DeriveProps(NULL /*pdpctxt*/); BOOL fValid = true; if (pexpr->Pop()->FLogical()) { if (0 == exprhdl.UlNonScalarChildren()) { return true; } CColRefSet *pcrsInput = GPOS_NEW(pmp) CColRefSet(pmp); const ULONG ulArity = exprhdl.UlArity(); for (ULONG ul = 0; ul < ulArity; ul++) { if (!exprhdl.FScalarChild(ul)) { CDrvdPropRelational *pdprelChild = exprhdl.Pdprel(ul); pcrsInput->Include(pdprelChild->PcrsOutput()); } } // check if the operator's locally used columns are a subset of the input columns CColRefSet *pcrsUsedOp = exprhdl.PcrsUsedColumns(pmp); pcrsUsedOp->Exclude(exprhdl.Pdprel()->PcrsOuter()); fValid = pcrsInput->FSubset(pcrsUsedOp); // release pcrsInput->Release(); pcrsUsedOp->Release(); } // check if its children are valid const ULONG ulExprArity = pexpr->UlArity(); for (ULONG ulChildIdx = 0; ulChildIdx < ulExprArity && fValid; ulChildIdx++) { CExpression *pexprChild = (*pexpr)[ulChildIdx]; fValid = FLocalColsSubsetOfInputCols(pmp, pexprChild); } return fValid; }
//--------------------------------------------------------------------------- // @function: // CColRefSetIterTest::EresUnittest_Basics // // @doc: // Testing ctors/dtor; and pcr decoding; // Other functionality already tested in vanilla CBitSetIter; // //--------------------------------------------------------------------------- GPOS_RESULT CColRefSetIterTest::EresUnittest_Basics() { CAutoMemoryPool amp; IMemoryPool *pmp = amp.Pmp(); // Setup an MD cache with a file-based provider CMDProviderMemory *pmdp = CTestUtils::m_pmdpf; pmdp->AddRef(); CMDAccessor mda(pmp, CMDCache::Pcache()); mda.RegisterProvider(CTestUtils::m_sysidDefault, pmdp); // install opt context in TLS CAutoOptCtxt aoc ( pmp, &mda, NULL /* pceeval */, CTestUtils::Pcm(pmp) ); // get column factory from optimizer context object CColumnFactory *pcf = COptCtxt::PoctxtFromTLS()->Pcf(); CColRefSet *pcrs = GPOS_NEW(pmp) CColRefSet(pmp); CWStringConst strName(GPOS_WSZ_LIT("Test Column")); CName name(&strName); // create a int4 datum const IMDTypeInt4 *pmdtypeint4 = mda.PtMDType<IMDTypeInt4>(); ULONG ulCols = 10; for(ULONG i = 0; i < ulCols; i++) { CColRef *pcr = pcf->PcrCreate(pmdtypeint4, name); pcrs->Include(pcr); GPOS_ASSERT(pcrs->FMember(pcr)); } GPOS_ASSERT(pcrs->CElements() == ulCols); ULONG ulCount = 0; CColRefSetIter crsi(*pcrs); while(crsi.FAdvance()) { GPOS_ASSERT((BOOL)crsi); CColRef *pcr = crsi.Pcr(); GPOS_ASSERT(pcr->Name().FEquals(name)); // to avoid unused variable warnings (void) pcr->UlId(); ulCount++; } GPOS_ASSERT(ulCols == ulCount); GPOS_ASSERT(!((BOOL)crsi)); pcrs->Release(); return GPOS_OK; }
// results: // decorrelated expression, ppexprDecorrelated // +--CLogicalSequenceProject // |--CLogicalGet "b" ("b"), Columns: [...] // +--CScalarProjectList origin: [Grp:8, GrpExpr:0] // +--CScalarProjectElement "avg" (18) origin: [Grp:7, GrpExpr:0] // +--CScalarWindowFunc (avg , Agg: true , Distinct: false , StarArgument: false , SimpleAgg: true) origin: [Grp:6, GrpExpr:0] // +--CScalarIdent "i" (9) origin: [Grp:3, GrpExpr:0] // array of quals // pdrgpexprCorrelations // +--CScalarCmp (=) origin: [Grp:4, GrpExpr:0] // |--CScalarIdent "i" (0) origin: [Grp:2, GrpExpr:0] // +--CScalarIdent "i" (9) origin: [Grp:3, GrpExpr:0] // clang-format on BOOL CDecorrelator::FProcessProject ( IMemoryPool *mp, CExpression *pexpr, BOOL fEqualityOnly, CExpression **ppexprDecorrelated, CExpressionArray *pdrgpexprCorrelations ) { COperator::EOperatorId op_id = pexpr->Pop()->Eopid(); GPOS_ASSERT(COperator::EopLogicalProject == op_id || COperator::EopLogicalSequenceProject == op_id); CExpression *pexprPrjList = (*pexpr)[1]; // fail if project elements have outer references CColRefSet *pcrsOutput = CDrvdPropRelational::GetRelationalProperties((*pexpr)[0]->PdpDerive())->PcrsOutput(); CColRefSet *pcrsUsed = CDrvdPropScalar::GetDrvdScalarProps(pexprPrjList->PdpDerive())->PcrsUsed(); if (!pcrsOutput->ContainsAll(pcrsUsed)) { return false; } if (COperator::EopLogicalSequenceProject == op_id) { (void) pexpr->PdpDerive(); CExpressionHandle exprhdl(mp); exprhdl.Attach(pexpr); exprhdl.DeriveProps(NULL /*pdpctxt*/); // fail decorrelation in the following two cases; // 1. if the LogicalSequenceProject node has local outer references in order by or partition by or window frame // of a window function // ex: select C.j from C where C.i in (select rank() over (order by C.i) from B where B.i=C.i); // 2. if the relational child of LogicalSequenceProject node does not have any aggregate window function // if the project list contains aggregrate on window function, then // we can decorrelate it as the aggregate is performed over a column or count(*). // The IN condition will be translated to a join instead of a correlated plan. // ex: select C.j from C where C.i in (select avg(i) over (partition by B.i) from B where B.i=C.i); // ===> (resulting join condition) b.i = c.i and c.i = avg(i) if (CLogicalSequenceProject::PopConvert(pexpr->Pop())->FHasLocalOuterRefs(exprhdl) || !CUtils::FHasAggWindowFunc(pexprPrjList)) { return false; } } // decorrelate relational child CExpression *pexprRelational = NULL; if (!FProcess(mp, (*pexpr)[0], fEqualityOnly, &pexprRelational, pdrgpexprCorrelations)) { GPOS_ASSERT(NULL == pexprRelational); return false; } // assemble new project COperator *pop = pexpr->Pop(); pop->AddRef(); pexprPrjList->AddRef(); *ppexprDecorrelated = GPOS_NEW(mp) CExpression(mp, pop, pexprRelational, pexprPrjList); return true; }
//--------------------------------------------------------------------------- // @function: // CXformPushDownLeftOuterJoin::Transform // // @doc: // Transform LOJ whose outer child is an NAry-join to be a child // of NAry-join // // Input: // LOJ (a=d) // |---NAry-Join (a=b) and (b=c) // | |--A // | |--B // | +--C // +--D // // Output: // NAry-Join (a=b) and (b=c) // |--B // |--C // +--LOJ (a=d) // |--A // +--D // //--------------------------------------------------------------------------- void CXformPushDownLeftOuterJoin::Transform ( CXformContext *pxfctxt, CXformResult *pxfres, CExpression *pexpr ) const { GPOS_ASSERT(NULL != pxfctxt); GPOS_ASSERT(NULL != pxfres); GPOS_ASSERT(FPromising(pxfctxt->Pmp(), this, pexpr)); GPOS_ASSERT(FCheckPattern(pexpr)); IMemoryPool *pmp = pxfctxt->Pmp(); CExpression *pexprNAryJoin = (*pexpr)[0]; CExpression *pexprLOJInnerChild = (*pexpr)[1]; CExpression *pexprLOJScalarChild = (*pexpr)[2]; CColRefSet *pcrsLOJUsed = CDrvdPropScalar::Pdpscalar(pexprLOJScalarChild->PdpDerive())->PcrsUsed(); DrgPexpr *pdrgpexprLOJChildren = GPOS_NEW(pmp) DrgPexpr(pmp); DrgPexpr *pdrgpexprNAryJoinChildren = GPOS_NEW(pmp) DrgPexpr(pmp); const ULONG ulArity = pexprNAryJoin->UlArity(); CExpression *pexprNAryJoinScalarChild = (*pexprNAryJoin)[ulArity - 1]; for (ULONG ul = 0 ; ul < ulArity - 1; ul++) { CExpression *pexprChild = (*pexprNAryJoin)[ul]; CColRefSet *pcrsOutput = CDrvdPropRelational::Pdprel(pexprChild->PdpDerive())->PcrsOutput(); pexprChild->AddRef(); if (!pcrsOutput->FDisjoint(pcrsLOJUsed)) { pdrgpexprLOJChildren->Append(pexprChild); } else { pdrgpexprNAryJoinChildren->Append(pexprChild); } } CExpression *pexprLOJOuterChild = (*pdrgpexprLOJChildren)[0]; if (1 < pdrgpexprLOJChildren->UlLength()) { // collect all relations needed by LOJ outer side into a cross product, // normalization at the end of this function takes care of pushing NAry // join predicates down pdrgpexprLOJChildren->Append(CPredicateUtils::PexprConjunction(pmp, NULL /*pdrgpexpr*/)); pexprLOJOuterChild = GPOS_NEW(pmp) CExpression(pmp, GPOS_NEW(pmp) CLogicalNAryJoin(pmp), pdrgpexprLOJChildren); // reconstruct LOJ children and add only the created child pdrgpexprLOJChildren = GPOS_NEW(pmp) DrgPexpr(pmp); pdrgpexprLOJChildren->Append(pexprLOJOuterChild); } // continue with rest of LOJ inner and scalar children pexprLOJInnerChild->AddRef(); pdrgpexprLOJChildren->Append(pexprLOJInnerChild); pexprLOJScalarChild->AddRef(); pdrgpexprLOJChildren->Append(pexprLOJScalarChild); // build new LOJ CExpression *pexprLOJNew = GPOS_NEW(pmp) CExpression(pmp, GPOS_NEW(pmp) CLogicalLeftOuterJoin(pmp), pdrgpexprLOJChildren); // add new NAry join children pdrgpexprNAryJoinChildren->Append(pexprLOJNew); pexprNAryJoinScalarChild->AddRef(); pdrgpexprNAryJoinChildren->Append(pexprNAryJoinScalarChild); if (3 > pdrgpexprNAryJoinChildren->UlLength()) { // xform must generate a valid NAry-join expression // for example, in the following case we end-up with the same input // expression, which should be avoided: // // Input: // // LOJ (a=c) and (b=c) // |--NAry-Join (a=b) // | |--A // | +--B // +--C // // Output: // // NAry-Join (true) // +--LOJ (a=c) and (b=c) // |--NAry-Join (a=b) // | |--A // | +--B // +--C pdrgpexprNAryJoinChildren->Release(); return; } // create new NAry join CExpression *pexprNAryJoinNew = GPOS_NEW(pmp) CExpression(pmp, GPOS_NEW(pmp) CLogicalNAryJoin(pmp), pdrgpexprNAryJoinChildren); // normalize resulting expression and add it to xform results CExpression *pexprResult = CNormalizer::PexprNormalize(pmp, pexprNAryJoinNew); pexprNAryJoinNew->Release(); pxfres->Add(pexprResult); }
//--------------------------------------------------------------------------- // @function: // CDecorrelator::FProcessJoin // // @doc: // Decorrelate a join expression; // //--------------------------------------------------------------------------- BOOL CDecorrelator::FProcessJoin ( IMemoryPool *pmp, CExpression *pexpr, BOOL fEqualityOnly, CExpression **ppexprDecorrelated, DrgPexpr *pdrgpexprCorrelations ) { GPOS_ASSERT(CUtils::FLogicalJoin(pexpr->Pop()) || CUtils::FApply(pexpr->Pop())); ULONG ulArity = pexpr->UlArity(); DrgPexpr *pdrgpexpr = GPOS_NEW(pmp) DrgPexpr(pmp, ulArity); CColRefSet *pcrsOutput = GPOS_NEW(pmp) CColRefSet(pmp); // decorrelate all relational children for (ULONG ul = 0; ul < ulArity - 1; ul++) { CExpression *pexprInput = NULL; if (FProcess(pmp, (*pexpr)[ul], fEqualityOnly, &pexprInput, pdrgpexprCorrelations)) { pdrgpexpr->Append(pexprInput); pcrsOutput->Union(CDrvdPropRelational::Pdprel(pexprInput->PdpDerive())->PcrsOutput()); } else { pdrgpexpr->Release(); pcrsOutput->Release(); return false; } } // check for valid semi join correlations if (!FPullableCorrelations(pmp, pexpr, pdrgpexpr, pdrgpexprCorrelations)) { pdrgpexpr->Release(); pcrsOutput->Release(); return false; } // decorrelate predicate and build new join operator CExpression *pexprPredicate = NULL; BOOL fSuccess = FProcessPredicate(pmp, pexpr, (*pexpr)[ulArity - 1], fEqualityOnly, pcrsOutput, &pexprPredicate, pdrgpexprCorrelations); pcrsOutput->Release(); if (fSuccess) { // in case entire predicate is being deferred, plug in a 'true' if (NULL == pexprPredicate) { pexprPredicate = CUtils::PexprScalarConstBool(pmp, true /*fVal*/); } pdrgpexpr->Append(pexprPredicate); COperator *pop = pexpr->Pop(); pop->AddRef(); *ppexprDecorrelated = GPOS_NEW(pmp) CExpression(pmp, pop, pdrgpexpr); } else { pdrgpexpr->Release(); CRefCount::SafeRelease(pexprPredicate); } return fSuccess; }
//--------------------------------------------------------------------------- // @function: // CPhysicalJoin::AddHashKeys // // @doc: // Helper for adding a pair of hash join keys to given arrays // //--------------------------------------------------------------------------- void CPhysicalJoin::AddHashKeys ( CExpression *pexprPred, // equality predicate in the form (ColRef1 = ColRef2) or // in the form (ColRef1 is not distinct from ColRef2) CExpression *pexprOuter, CExpression * #ifdef GPOS_DEBUG pexprInner #endif // GPOS_DEBUG , DrgPexpr *pdrgpexprOuter, // array of outer hash keys DrgPexpr *pdrgpexprInner // array of inner hash keys ) { GPOS_ASSERT(FHashJoinCompatible(pexprPred, pexprOuter, pexprInner)); // output of outer side CColRefSet *pcrsOuter = CDrvdPropRelational::Pdprel(pexprOuter->PdpDerive())->PcrsOutput(); #ifdef GPOS_DEBUG // output of inner side CColRefSet *pcrsInner = CDrvdPropRelational::Pdprel(pexprInner->PdpDerive())->PcrsOutput(); #endif // GPOS_DEBUG // extract outer and inner columns from predicate CExpression *pexprPredOuter = NULL; CExpression *pexprPredInner = NULL; ExtractHashJoinExpressions(pexprPred, &pexprPredOuter, &pexprPredInner); GPOS_ASSERT(NULL != pexprPredOuter); GPOS_ASSERT(NULL != pexprPredInner); CColRefSet *pcrsPredOuter = CDrvdPropScalar::Pdpscalar(pexprPredOuter->PdpDerive())->PcrsUsed(); #ifdef GPOS_DEBUG CColRefSet *pcrsPredInner = CDrvdPropScalar::Pdpscalar(pexprPredInner->PdpDerive())->PcrsUsed(); #endif // GPOS_DEBUG // determine outer and inner hash keys CExpression *pexprKeyOuter = NULL; CExpression *pexprKeyInner = NULL; if (pcrsOuter->FSubset(pcrsPredOuter)) { pexprKeyOuter = pexprPredOuter; GPOS_ASSERT(pcrsInner->FSubset(pcrsPredInner)); pexprKeyInner = pexprPredInner; } else { GPOS_ASSERT(pcrsOuter->FSubset(pcrsPredInner)); pexprKeyOuter = pexprPredInner; GPOS_ASSERT(pcrsInner->FSubset(pcrsPredOuter)); pexprKeyInner = pexprPredOuter; } pexprKeyOuter->AddRef(); pexprKeyInner->AddRef(); pdrgpexprOuter->Append(pexprKeyOuter); pdrgpexprInner->Append(pexprKeyInner); GPOS_ASSERT(pdrgpexprInner->UlLength() == pdrgpexprOuter->UlLength()); }
//--------------------------------------------------------------------------- // @function: // CPredicateUtilsTest::EresUnittest_PlainEqualities // // @doc: // Test the extraction of equality predicates between scalar identifiers // //--------------------------------------------------------------------------- GPOS_RESULT CPredicateUtilsTest::EresUnittest_PlainEqualities() { CAutoMemoryPool amp; IMemoryPool *mp = amp.Pmp(); // setup a file-based provider CMDProviderMemory *pmdp = CTestUtils::m_pmdpf; pmdp->AddRef(); CMDAccessor mda(mp, CMDCache::Pcache(), CTestUtils::m_sysidDefault, pmdp); // install opt context in TLS CAutoOptCtxt aoc ( mp, &mda, NULL, /* pceeval */ CTestUtils::GetCostModel(mp) ); CExpression *pexprLeft = CTestUtils::PexprLogicalGet(mp); CExpression *pexprRight = CTestUtils::PexprLogicalGet(mp); CExpressionArray *pdrgpexprOriginal = GPOS_NEW(mp) CExpressionArray(mp); CColRefSet *pcrsLeft = CDrvdPropRelational::GetRelationalProperties(pexprLeft->PdpDerive())->PcrsOutput(); CColRefSet *pcrsRight = CDrvdPropRelational::GetRelationalProperties(pexprRight->PdpDerive())->PcrsOutput(); CColRef *pcrLeft = pcrsLeft->PcrAny(); CColRef *pcrRight = pcrsRight->PcrAny(); // generate an equality predicate between two column reference CExpression *pexprScIdentEquality = CUtils::PexprScalarEqCmp(mp, pcrLeft, pcrRight); pexprScIdentEquality->AddRef(); pdrgpexprOriginal->Append(pexprScIdentEquality); // generate a non-equality predicate between two column reference CExpression *pexprScIdentInequality = CUtils::PexprScalarCmp(mp, pcrLeft, pcrRight, CWStringConst(GPOS_WSZ_LIT("<")), GPOS_NEW(mp) CMDIdGPDB(GPDB_INT4_LT_OP)); pexprScIdentInequality->AddRef(); pdrgpexprOriginal->Append(pexprScIdentInequality); // generate an equality predicate between a column reference and a constant value CExpression *pexprScalarConstInt4 = CUtils::PexprScalarConstInt4(mp, 10 /*fValue*/); CExpression *pexprScIdentConstEquality = CUtils::PexprScalarEqCmp(mp, pexprScalarConstInt4, pcrRight); pdrgpexprOriginal->Append(pexprScIdentConstEquality); GPOS_ASSERT(3 == pdrgpexprOriginal->Size()); CExpressionArray *pdrgpexprResult = CPredicateUtils::PdrgpexprPlainEqualities(mp, pdrgpexprOriginal); GPOS_ASSERT(1 == pdrgpexprResult->Size()); // clean up pdrgpexprOriginal->Release(); pdrgpexprResult->Release(); pexprLeft->Release(); pexprRight->Release(); pexprScIdentEquality->Release(); pexprScIdentInequality->Release(); return GPOS_OK; }
//--------------------------------------------------------------------------- // @function: // CConstraint::PcnstrFromScalarExpr // // @doc: // Create constraint from scalar expression and pass back any discovered // equivalence classes // //--------------------------------------------------------------------------- CConstraint * CConstraint::PcnstrFromScalarExpr ( IMemoryPool *pmp, CExpression *pexpr, DrgPcrs **ppdrgpcrs // output equivalence classes ) { GPOS_ASSERT(NULL != pexpr); GPOS_ASSERT(pexpr->Pop()->FScalar()); GPOS_ASSERT(NULL != ppdrgpcrs); GPOS_ASSERT(NULL == *ppdrgpcrs); (void) pexpr->PdpDerive(); CDrvdPropScalar *pdpScalar = CDrvdPropScalar::Pdpscalar(pexpr->Pdp(CDrvdProp::EptScalar)); CColRefSet *pcrs = pdpScalar->PcrsUsed(); ULONG ulCols = pcrs->CElements(); if (0 == ulCols) { // TODO: - May 29, 2012: in case of an expr with no columns (e.g. 1 < 2), // possibly evaluate the expression, and return a "TRUE" or "FALSE" constraint return NULL; } if (1 == ulCols) { CColRef *pcr = pcrs->PcrFirst(); if (!CUtils::FConstrainableType(pcr->Pmdtype()->Pmdid())) { return NULL; } CConstraint *pcnstr = NULL; *ppdrgpcrs = GPOS_NEW(pmp) DrgPcrs(pmp); if (CUtils::FScalarArrayCmp(pexpr)) { pcnstr = PcnstrFromScalarArrayCmp(pmp, pexpr, pcr); } else { pcnstr = CConstraintInterval::PciIntervalFromScalarExpr(pmp, pexpr, pcr); } if (NULL != pcnstr) { AddColumnToEquivClasses(pmp, pcr, ppdrgpcrs); } return pcnstr; } switch (pexpr->Pop()->Eopid()) { case COperator::EopScalarBoolOp: return PcnstrFromScalarBoolOp(pmp, pexpr, ppdrgpcrs); case COperator::EopScalarCmp: return PcnstrFromScalarCmp(pmp, pexpr, ppdrgpcrs); default: return NULL; } }
//--------------------------------------------------------------------------- // @function: // CPartitionPropagationSpec::SplitPartPredicates // // @doc: // Split the partition elimination predicates over the various levels // as well as the residual predicate and add them to the appropriate // hashmaps. These are to be used when creating the partition selector // //--------------------------------------------------------------------------- void CPartitionPropagationSpec::SplitPartPredicates ( IMemoryPool *pmp, CExpression *pexprScalar, DrgDrgPcr *pdrgpdrgpcrKeys, HMUlExpr *phmulexprEqFilter, // output HMUlExpr *phmulexprFilter, // output CExpression **ppexprResidual // output ) { GPOS_ASSERT(NULL != pexprScalar); GPOS_ASSERT(NULL != pdrgpdrgpcrKeys); GPOS_ASSERT(NULL != phmulexprEqFilter); GPOS_ASSERT(NULL != phmulexprFilter); GPOS_ASSERT(NULL != ppexprResidual); GPOS_ASSERT(NULL == *ppexprResidual); DrgPexpr *pdrgpexprConjuncts = CPredicateUtils::PdrgpexprConjuncts(pmp, pexprScalar); CBitSet *pbsUsed = GPOS_NEW(pmp) CBitSet(pmp); CColRefSet *pcrsKeys = PcrsKeys(pmp, pdrgpdrgpcrKeys); const ULONG ulLevels = pdrgpdrgpcrKeys->UlLength(); for (ULONG ul = 0; ul < ulLevels; ul++) { CColRef *pcr = CUtils::PcrExtractPartKey(pdrgpdrgpcrKeys, ul); // find conjuncts for this key and mark their positions DrgPexpr *pdrgpexprKey = PdrgpexprPredicatesOnKey(pmp, pdrgpexprConjuncts, pcr, pcrsKeys, &pbsUsed); const ULONG ulLen = pdrgpexprKey->UlLength(); if (0 == ulLen) { // no predicates on this key pdrgpexprKey->Release(); continue; } if (1 < ulLen || (!CPredicateUtils::FEquality((*pdrgpexprKey)[0]))) { // more than one predicate on this key or one non-equality predicate #ifdef GPOS_DEBUG BOOL fResult = #endif // GPOS_DEBUG phmulexprFilter->FInsert(GPOS_NEW(pmp) ULONG(ul), CPredicateUtils::PexprConjunction(pmp, pdrgpexprKey)); GPOS_ASSERT(fResult); continue; } // one equality predicate (key = expr); take out the expression // and add it to the equality filters map CExpression *pexprPartKey = NULL; CExpression *pexprOther = NULL; IMDType::ECmpType ecmpt = IMDType::EcmptOther; CPredicateUtils::ExtractComponents((*pdrgpexprKey)[0], pcr, &pexprPartKey, &pexprOther, &ecmpt); GPOS_ASSERT(NULL != pexprOther); pexprOther->AddRef(); #ifdef GPOS_DEBUG BOOL fResult = #endif // GPOS_DEBUG phmulexprEqFilter->FInsert(GPOS_NEW(pmp) ULONG(ul), pexprOther); GPOS_ASSERT(fResult); pdrgpexprKey->Release(); } (*ppexprResidual) = PexprResidualFilter(pmp, pdrgpexprConjuncts, pbsUsed); pcrsKeys->Release(); pdrgpexprConjuncts->Release(); pbsUsed->Release(); }
//--------------------------------------------------------------------------- // @function: // CConstraint::PdrgpcnstrDeduplicate // // @doc: // Simplify an array of constraints to be used as children for a conjunction // or disjunction. If there are two or more elements that reference only one // particular column, these constraints are combined into one // //--------------------------------------------------------------------------- DrgPcnstr * CConstraint::PdrgpcnstrDeduplicate ( IMemoryPool *pmp, DrgPcnstr *pdrgpcnstr, EConstraintType ect ) const { DrgPcnstr *pdrgpcnstrNew = GPOS_NEW(pmp) DrgPcnstr(pmp); CColRefSet *pcrsDeduped = GPOS_NEW(pmp) CColRefSet(pmp); const ULONG ulLen = pdrgpcnstr->UlLength(); for (ULONG ul = 0; ul < ulLen; ul++) { CConstraint *pcnstrChild = (*pdrgpcnstr)[ul]; CColRefSet *pcrs = pcnstrChild->PcrsUsed(); // we only simplify constraints that reference a single column, otherwise // we add constraint as is if (1 < pcrs->CElements()) { pcnstrChild->AddRef(); pdrgpcnstrNew->Append(pcnstrChild); continue; } CColRef *pcr = pcrs->PcrFirst(); if (pcrsDeduped->FMember(pcr)) { // current constraint has already been combined with a previous one continue; } // get all constraints from the input array that reference this column DrgPcnstr *pdrgpcnstrCol = PdrgpcnstrOnColumn(pmp, pdrgpcnstr, pcr, true /*fExclusive*/); if (1 == pdrgpcnstrCol->UlLength()) { // if there is only one such constraint, then no simplification // for this column pdrgpcnstrCol->Release(); pcnstrChild->AddRef(); pdrgpcnstrNew->Append(pcnstrChild); continue; } CExpression *pexpr = NULL; if (EctConjunction == ect) { pexpr = PexprScalarConjDisj(pmp, pdrgpcnstrCol, true /*fConj*/); } else { GPOS_ASSERT(EctDisjunction == ect); pexpr = PexprScalarConjDisj(pmp, pdrgpcnstrCol, false /*fConj*/); } pdrgpcnstrCol->Release(); GPOS_ASSERT(NULL != pexpr); CConstraint *pcnstrNew = CConstraintInterval::PciIntervalFromScalarExpr(pmp, pexpr, pcr); GPOS_ASSERT(NULL != pcnstrNew); pexpr->Release(); pdrgpcnstrNew->Append(pcnstrNew); pcrsDeduped->Include(pcr); } pcrsDeduped->Release(); pdrgpcnstr->Release(); return pdrgpcnstrNew; }
//--------------------------------------------------------------------------- // @function: // CQueryContext::PqcGenerate // // @doc: // Generate the query context for the given expression and array of // output column ref ids // //--------------------------------------------------------------------------- CQueryContext * CQueryContext::PqcGenerate ( IMemoryPool *mp, CExpression * pexpr, ULongPtrArray *pdrgpulQueryOutputColRefId, CMDNameArray *pdrgpmdname, BOOL fDeriveStats ) { GPOS_ASSERT(NULL != pexpr && NULL != pdrgpulQueryOutputColRefId); CColRefSet *pcrs = GPOS_NEW(mp) CColRefSet(mp); CColRefArray *colref_array = GPOS_NEW(mp) CColRefArray(mp); COptCtxt *poptctxt = COptCtxt::PoctxtFromTLS(); CColumnFactory *col_factory = poptctxt->Pcf(); GPOS_ASSERT(NULL != col_factory); // Collect required column references (colref_array) const ULONG length = pdrgpulQueryOutputColRefId->Size(); for (ULONG ul = 0; ul < length; ul++) { ULONG *pul = (*pdrgpulQueryOutputColRefId)[ul]; GPOS_ASSERT(NULL != pul); CColRef *colref = col_factory->LookupColRef(*pul); GPOS_ASSERT(NULL != colref); pcrs->Include(colref); colref_array->Append(colref); } // Collect required properties (prpp) at the top level: // By default no sort order requirement is added, unless the root operator in // the input logical expression is a LIMIT. This is because Orca always // attaches top level Sort to a LIMIT node. COrderSpec *pos = NULL; CExpression *pexprResult = pexpr; COperator *popTop = PopTop(pexpr); if (COperator::EopLogicalLimit == popTop->Eopid()) { // top level operator is a limit, copy order spec to query context pos = CLogicalLimit::PopConvert(popTop)->Pos(); pos->AddRef(); } else { // no order required pos = GPOS_NEW(mp) COrderSpec(mp); } CDistributionSpec *pds = NULL; BOOL fDML = CUtils::FLogicalDML(pexpr->Pop()); poptctxt->MarkDMLQuery(fDML); // DML commands do not have distribution requirement. Otherwise the // distribution requirement is Singleton. if (fDML) { pds = GPOS_NEW(mp) CDistributionSpecAny(COperator::EopSentinel); } else { pds = GPOS_NEW(mp) CDistributionSpecSingleton(CDistributionSpecSingleton::EstMaster); } // By default, no rewindability requirement needs to be satisfied at the top level CRewindabilitySpec *prs = GPOS_NEW(mp) CRewindabilitySpec(CRewindabilitySpec::ErtNotRewindable, CRewindabilitySpec::EmhtNoMotion); // Ensure order, distribution and rewindability meet 'satisfy' matching at the top level CEnfdOrder *peo = GPOS_NEW(mp) CEnfdOrder(pos, CEnfdOrder::EomSatisfy); CEnfdDistribution *ped = GPOS_NEW(mp) CEnfdDistribution(pds, CEnfdDistribution::EdmSatisfy); CEnfdRewindability *per = GPOS_NEW(mp) CEnfdRewindability(prs, CEnfdRewindability::ErmSatisfy); // Required CTEs are obtained from the CTEInfo global information in the optimizer context CCTEReq *pcter = poptctxt->Pcteinfo()->PcterProducers(mp); // NB: Partition propagation requirements are not initialized here. They are // constructed later based on derived relation properties (CPartInfo) by // CReqdPropPlan::InitReqdPartitionPropagation(). CReqdPropPlan *prpp = GPOS_NEW(mp) CReqdPropPlan(pcrs, peo, ped, per, pcter); // Finally, create the CQueryContext pdrgpmdname->AddRef(); return GPOS_NEW(mp) CQueryContext(mp, pexprResult, prpp, colref_array, pdrgpmdname, fDeriveStats); }
//--------------------------------------------------------------------------- // @function: // CXformSimplifyGbAgg::Transform // // @doc: // Actual transformation to simplify a aggregate expression // //--------------------------------------------------------------------------- void CXformSimplifyGbAgg::Transform ( CXformContext *pxfctxt, CXformResult *pxfres, CExpression *pexpr ) const { GPOS_ASSERT(NULL != pxfctxt); GPOS_ASSERT(NULL != pxfres); GPOS_ASSERT(FPromising(pxfctxt->Pmp(), this, pexpr)); GPOS_ASSERT(FCheckPattern(pexpr)); IMemoryPool *pmp = pxfctxt->Pmp(); if (FDropGbAgg(pmp, pexpr,pxfres)) { // grouping columns could be dropped, GbAgg is transformed to a Select return; } // extract components CLogicalGbAgg *popAgg = CLogicalGbAgg::PopConvert(pexpr->Pop()); CExpression *pexprRelational = (*pexpr)[0]; CExpression *pexprProjectList = (*pexpr)[1]; DrgPcr *pdrgpcr = popAgg->Pdrgpcr(); CColRefSet *pcrsGrpCols = GPOS_NEW(pmp) CColRefSet(pmp); pcrsGrpCols->Include(pdrgpcr); CColRefSet *pcrsCovered = GPOS_NEW(pmp) CColRefSet(pmp); // set of grouping columns covered by FD's CColRefSet *pcrsMinimal = GPOS_NEW(pmp) CColRefSet(pmp); // a set of minimal grouping columns based on FD's DrgPfd *pdrgpfd = CDrvdPropRelational::Pdprel(pexpr->PdpDerive())->Pdrgpfd(); // collect grouping columns FD's const ULONG ulSize = pdrgpfd->UlSafeLength(); for (ULONG ul = 0; ul < ulSize; ul++) { CFunctionalDependency *pfd = (*pdrgpfd)[ul]; if (pfd->FIncluded(pcrsGrpCols)) { pcrsCovered->Include(pfd->PcrsDetermined()); pcrsCovered->Include(pfd->PcrsKey()); pcrsMinimal->Include(pfd->PcrsKey()); } } BOOL fCovered = pcrsCovered->FEqual(pcrsGrpCols); pcrsGrpCols->Release(); pcrsCovered->Release(); if (!fCovered) { // the union of RHS of collected FD's does not cover all grouping columns pcrsMinimal->Release(); return; } // create a new Agg with minimal grouping columns pdrgpcr->AddRef(); CLogicalGbAgg *popAggNew = GPOS_NEW(pmp) CLogicalGbAgg(pmp, pdrgpcr, pcrsMinimal->Pdrgpcr(pmp), popAgg->Egbaggtype()); pcrsMinimal->Release(); GPOS_ASSERT(!popAgg->FMatch(popAggNew) && "Simplified aggregate matches original aggregate"); pexprRelational->AddRef(); pexprProjectList->AddRef(); CExpression *pexprResult = GPOS_NEW(pmp) CExpression(pmp, popAggNew, pexprRelational, pexprProjectList); pxfres->Add(pexprResult); }