//--------------------------------------------------------------------------- // @function: // CPhysicalNLJoin::FOptimizeNLJoin // // @doc: // Check if NL join node should be optimized for the given context // //--------------------------------------------------------------------------- BOOL COptimizationContext::FOptimizeNLJoin ( IMemoryPool *mp, CGroupExpression *, // pgexprParent CGroupExpression *pgexprJoin, COptimizationContext *poc, ULONG // ulSearchStages ) { GPOS_ASSERT(NULL != pgexprJoin); GPOS_ASSERT(NULL != poc); GPOS_ASSERT(CUtils::FNLJoin(pgexprJoin->Pop())); COperator *pop = pgexprJoin->Pop(); if (!CUtils::FCorrelatedNLJoin(pop)) { return true; } // for correlated join, the requested columns must be covered by outer child // columns and columns to be generated from inner child CPhysicalNLJoin *popNLJoin = CPhysicalNLJoin::PopConvert(pop); CColRefSet *pcrs = GPOS_NEW(mp) CColRefSet(mp, popNLJoin->PdrgPcrInner()); CColRefSet *pcrsOuterChild = CDrvdPropRelational::GetRelationalProperties((*pgexprJoin)[0]->Pdp())->PcrsOutput(); pcrs->Include(pcrsOuterChild); BOOL fIncluded = pcrs->ContainsAll(poc->Prpp()->PcrsRequired()); pcrs->Release(); return fIncluded; }
//--------------------------------------------------------------------------- // @function: // CXformCollapseGbAgg::Transform // // @doc: // Actual transformation to collapse two cascaded group by operators; // if the top Gb grouping columns are subset of bottom Gb grouping // columns AND both Gb operators do not define agg functions, we can // remove the bottom group by operator // // //--------------------------------------------------------------------------- void CXformCollapseGbAgg::Transform ( CXformContext *pxfctxt, CXformResult *pxfres, CExpression *pexpr ) const { GPOS_ASSERT(NULL != pxfctxt); GPOS_ASSERT(NULL != pxfres); GPOS_ASSERT(FPromising(pxfctxt->Pmp(), this, pexpr)); GPOS_ASSERT(FCheckPattern(pexpr)); IMemoryPool *mp = pxfctxt->Pmp(); // extract components CLogicalGbAgg *popTopGbAgg = CLogicalGbAgg::PopConvert(pexpr->Pop()); GPOS_ASSERT(0 < popTopGbAgg->Pdrgpcr()->Size()); GPOS_ASSERT(popTopGbAgg->FGlobal()); CExpression *pexprRelational = (*pexpr)[0]; CExpression *pexprTopProjectList = (*pexpr)[1]; CLogicalGbAgg *popBottomGbAgg = CLogicalGbAgg::PopConvert(pexprRelational->Pop()); CExpression *pexprChild = (*pexprRelational)[0]; CExpression *pexprBottomProjectList = (*pexprRelational)[1]; if (!popBottomGbAgg->FGlobal()) { // bottom GbAgg must be global to prevent xform from getting applied to splitted GbAggs return; } if (0 < pexprTopProjectList->Arity() || 0 < pexprBottomProjectList->Arity()) { // exit if any of the Gb operators has an aggregate function return; } #ifdef GPOS_DEBUG // for two cascaded GbAgg ops with no agg functions, top grouping // columns must be a subset of bottom grouping columns CColRefSet *pcrsTopGrpCols = GPOS_NEW(mp) CColRefSet(mp, popTopGbAgg->Pdrgpcr()); CColRefSet *pcrsBottomGrpCols = GPOS_NEW(mp) CColRefSet(mp, popBottomGbAgg->Pdrgpcr()); GPOS_ASSERT(pcrsBottomGrpCols->ContainsAll(pcrsTopGrpCols)); pcrsTopGrpCols->Release(); pcrsBottomGrpCols->Release(); #endif // GPOS_DEBUG pexprChild->AddRef(); CExpression *pexprSelect = CUtils::PexprLogicalSelect(mp, pexprChild, CPredicateUtils::PexprConjunction(mp, NULL /*pdrgpexpr*/)); popTopGbAgg->AddRef(); pexprTopProjectList->AddRef(); CExpression *pexprGbAggNew = GPOS_NEW(mp) CExpression(mp, popTopGbAgg, pexprSelect, pexprTopProjectList); pxfres->Add(pexprGbAggNew); }
//--------------------------------------------------------------------------- // @function: // CPhysicalUnionAll::FProvidesReqdCols // // @doc: // Check if required columns are included in output columns // //--------------------------------------------------------------------------- BOOL CPhysicalUnionAll::FProvidesReqdCols ( CExpressionHandle & #ifdef GPOS_DEBUG exprhdl #endif // GPOS_DEBUG , CColRefSet *pcrsRequired, ULONG // ulOptReq ) const { GPOS_ASSERT(NULL != pcrsRequired); GPOS_ASSERT(PdrgpdrgpcrInput()->Size() == exprhdl.Arity()); CColRefSet *pcrs = GPOS_NEW(m_mp) CColRefSet(m_mp); // include output columns pcrs->Include(PdrgpcrOutput()); BOOL fProvidesCols = pcrs->ContainsAll(pcrsRequired); pcrs->Release(); return fProvidesCols; }
//--------------------------------------------------------------------------- // @function: // CPhysicalStreamAgg::PosCovering // // @doc: // Construct order spec on grouping column so that it covers required // order spec, the function returns NULL if no covering order spec // can be created // //--------------------------------------------------------------------------- COrderSpec * CPhysicalStreamAgg::PosCovering ( IMemoryPool *mp, COrderSpec *posRequired, CColRefArray *pdrgpcrGrp ) const { GPOS_ASSERT(NULL != posRequired); if (0 == posRequired->UlSortColumns()) { // required order must be non-empty return NULL; } // create a set of required sort columns CColRefSet *pcrsReqd = posRequired->PcrsUsed(mp); COrderSpec *pos = NULL; CColRefSet *pcrsGrpCols = GPOS_NEW(mp) CColRefSet(mp, pdrgpcrGrp); if (pcrsGrpCols->ContainsAll(pcrsReqd)) { // required order columns are included in grouping columns, we can // construct a covering order spec pos = GPOS_NEW(mp) COrderSpec(mp); // extract order expressions from required order const ULONG ulReqdSortCols = posRequired->UlSortColumns(); for (ULONG ul = 0; ul < ulReqdSortCols; ul++) { CColRef *colref = const_cast<CColRef *>(posRequired->Pcr(ul)); IMDId *mdid = posRequired->GetMdIdSortOp(ul); COrderSpec::ENullTreatment ent = posRequired->Ent(ul); mdid->AddRef(); pos->Append(mdid, colref, ent); } // augment order with remaining grouping columns const ULONG size = pdrgpcrGrp->Size(); for (ULONG ul = 0; ul < size; ul++) { CColRef *colref = (*pdrgpcrGrp)[ul]; if (!pcrsReqd->FMember(colref)) { IMDId *mdid = colref->RetrieveType()->GetMdidForCmpType(IMDType::EcmptL); mdid->AddRef(); pos->Append(mdid, colref, COrderSpec::EntLast); } } } pcrsGrpCols->Release(); pcrsReqd->Release(); return pos; }
//--------------------------------------------------------------------------- // @function: // CXformSubqJoin2Apply::CollectSubqueries // // @doc: // Collect subqueries that exclusively use columns from one join child // //--------------------------------------------------------------------------- void CXformSubqJoin2Apply::CollectSubqueries ( IMemoryPool *mp, CExpression *pexpr, CColRefSetArray *pdrgpcrs, CExpressionArrays *pdrgpdrgpexprSubqs // array-of-arrays indexed on join child index. // i^{th} entry is an array corresponding to subqueries collected for join child #i ) { GPOS_CHECK_STACK_SIZE; GPOS_ASSERT(NULL != pexpr); GPOS_ASSERT(NULL != pdrgpcrs); GPOS_ASSERT(NULL != pdrgpdrgpexprSubqs); COperator *pop = pexpr->Pop(); if (CUtils::FSubquery(pop)) { // extract outer references below subquery CColRefSet *outer_refs = GPOS_NEW(mp) CColRefSet(mp, *CDrvdPropRelational::GetRelationalProperties((*pexpr)[0]->PdpDerive())->PcrsOuter()); // add columns used by subquery outer_refs->Union(CDrvdPropScalar::GetDrvdScalarProps(pexpr->PdpDerive())->PcrsUsed()); ULONG child_index = gpos::ulong_max; const ULONG size = pdrgpcrs->Size(); for (ULONG ul = 0; ul < size; ul++) { CColRefSet *pcrsOutput = (*pdrgpcrs)[ul]; if (pcrsOutput->ContainsAll(outer_refs)) { // outer columns all come from the same join child, break here child_index = ul; break; } } if (gpos::ulong_max != child_index) { pexpr->AddRef(); (*pdrgpdrgpexprSubqs)[child_index]->Append(pexpr); } outer_refs->Release(); return; } // recursively process children const ULONG arity = pexpr->Arity(); for (ULONG ul = 0; ul < arity; ul++) { CExpression *pexprChild = (*pexpr)[ul]; CollectSubqueries(mp, pexprChild, pdrgpcrs, pdrgpdrgpexprSubqs); } }
//--------------------------------------------------------------------------- // @function: // CPhysical::FUnaryProvidesReqdCols // // @doc: // Helper for checking if output columns of a unary operator that defines // no new columns include the required columns // //--------------------------------------------------------------------------- BOOL CPhysical::FUnaryProvidesReqdCols ( CExpressionHandle &exprhdl, CColRefSet *pcrsRequired ) { GPOS_ASSERT(NULL != pcrsRequired); CColRefSet *pcrsOutput = exprhdl.GetRelationalProperties(0 /*child_index*/)->PcrsOutput(); return pcrsOutput->ContainsAll(pcrsRequired); }
//--------------------------------------------------------------------------- // @function: // CDecorrelator::FProcessAssert // // @doc: // Decorrelate assert operator // //--------------------------------------------------------------------------- BOOL CDecorrelator::FProcessAssert ( IMemoryPool *mp, CExpression *pexpr, BOOL fEqualityOnly, CExpression **ppexprDecorrelated, CExpressionArray *pdrgpexprCorrelations ) { GPOS_ASSERT(NULL != pexpr); COperator *pop = pexpr->Pop(); GPOS_ASSERT(COperator::EopLogicalAssert == pop->Eopid()); CExpression *pexprScalar = (*pexpr)[1]; // fail if assert expression has outer references CColRefSet *pcrsOutput = CDrvdPropRelational::GetRelationalProperties((*pexpr)[0]->PdpDerive())->PcrsOutput(); CColRefSet *pcrsUsed = CDrvdPropScalar::GetDrvdScalarProps(pexprScalar->PdpDerive())->PcrsUsed(); if (!pcrsOutput->ContainsAll(pcrsUsed)) { return false; } // decorrelate relational child CExpression *pexprRelational = NULL; if (!FProcess(mp, (*pexpr)[0], fEqualityOnly, &pexprRelational, pdrgpexprCorrelations)) { GPOS_ASSERT(NULL == pexprRelational); return false; } // assemble new project pop->AddRef(); pexprScalar->AddRef(); *ppexprDecorrelated = GPOS_NEW(mp) CExpression(mp, pop, pexprRelational, pexprScalar); return true; }
//--------------------------------------------------------------------------- // @function: // CPhysicalPartitionSelectorDML::FProvidesReqdCols // // @doc: // Check if required columns are included in output columns // //--------------------------------------------------------------------------- BOOL CPhysicalPartitionSelectorDML::FProvidesReqdCols ( CExpressionHandle &exprhdl, CColRefSet *pcrsRequired, ULONG // ulOptReq ) const { GPOS_ASSERT(NULL != pcrsRequired); GPOS_ASSERT(1 == exprhdl.Arity()); CColRefSet *pcrs = GPOS_NEW(m_mp) CColRefSet(m_mp); // include the defined oid column pcrs->Include(m_pcrOid); // include output columns of the relational child pcrs->Union(exprhdl.GetRelationalProperties(0 /*child_index*/)->PcrsOutput()); BOOL fProvidesCols = pcrs->ContainsAll(pcrsRequired); pcrs->Release(); return fProvidesCols; }
//--------------------------------------------------------------------------- // @function: // CXformSubqJoin2Apply::Transform // // @doc: // Helper of transformation function // //--------------------------------------------------------------------------- void CXformSubqJoin2Apply::Transform ( CXformContext *pxfctxt, CXformResult *pxfres, CExpression *pexpr, BOOL fEnforceCorrelatedApply ) const { GPOS_ASSERT(NULL != pxfctxt); GPOS_ASSERT(FPromising(pxfctxt->Pmp(), this, pexpr)); GPOS_ASSERT(FCheckPattern(pexpr)); IMemoryPool *mp = pxfctxt->Pmp(); CExpression *pexprSelect = CXformUtils::PexprSeparateSubqueryPreds(mp, pexpr); // attempt pushing subqueries to join children, // this optimization may not always succeed since unnested subqueries below joins // could hide columns needed to evaluate join condition CExpression *pexprSubqsPushedDown = PexprSubqueryPushDown(mp, pexprSelect, fEnforceCorrelatedApply); // check if join columns in join condition are still accessible after subquery pushdown CExpression *pexprJoin = (*pexprSubqsPushedDown)[0]; CExpression *pexprJoinCondition = (*pexprJoin)[pexprJoin->Arity() - 1]; CColRefSet *pcrsUsed = CDrvdPropScalar::GetDrvdScalarProps(pexprJoinCondition->PdpDerive())->PcrsUsed(); CColRefSet *pcrsJoinOutput = CDrvdPropRelational::GetRelationalProperties(pexprJoin->PdpDerive())->PcrsOutput(); if (!pcrsJoinOutput->ContainsAll(pcrsUsed)) { // discard expression after subquery push down pexprSubqsPushedDown->Release(); pexprSelect->AddRef(); pexprSubqsPushedDown = pexprSelect; } pexprSelect->Release(); CExpression *pexprResult = NULL; BOOL fHasSubquery = CDrvdPropScalar::GetDrvdScalarProps((*pexprSubqsPushedDown)[1]->PdpDerive())->FHasSubquery(); if (fHasSubquery) { // unnest subqueries remaining in the top Select expression pexprResult = PexprSubqueryUnnest(mp, pexprSubqsPushedDown, fEnforceCorrelatedApply); pexprSubqsPushedDown->Release(); } else { pexprResult = pexprSubqsPushedDown; } if (NULL == pexprResult) { // unnesting failed, return here return; } // normalize resulting expression and add it to xform results container CExpression *pexprNormalized = CNormalizer::PexprNormalize(mp, pexprResult); pexprResult->Release(); pxfres->Add(pexprNormalized); }
// results: // decorrelated expression, ppexprDecorrelated // +--CLogicalSequenceProject // |--CLogicalGet "b" ("b"), Columns: [...] // +--CScalarProjectList origin: [Grp:8, GrpExpr:0] // +--CScalarProjectElement "avg" (18) origin: [Grp:7, GrpExpr:0] // +--CScalarWindowFunc (avg , Agg: true , Distinct: false , StarArgument: false , SimpleAgg: true) origin: [Grp:6, GrpExpr:0] // +--CScalarIdent "i" (9) origin: [Grp:3, GrpExpr:0] // array of quals // pdrgpexprCorrelations // +--CScalarCmp (=) origin: [Grp:4, GrpExpr:0] // |--CScalarIdent "i" (0) origin: [Grp:2, GrpExpr:0] // +--CScalarIdent "i" (9) origin: [Grp:3, GrpExpr:0] // clang-format on BOOL CDecorrelator::FProcessProject ( IMemoryPool *mp, CExpression *pexpr, BOOL fEqualityOnly, CExpression **ppexprDecorrelated, CExpressionArray *pdrgpexprCorrelations ) { COperator::EOperatorId op_id = pexpr->Pop()->Eopid(); GPOS_ASSERT(COperator::EopLogicalProject == op_id || COperator::EopLogicalSequenceProject == op_id); CExpression *pexprPrjList = (*pexpr)[1]; // fail if project elements have outer references CColRefSet *pcrsOutput = CDrvdPropRelational::GetRelationalProperties((*pexpr)[0]->PdpDerive())->PcrsOutput(); CColRefSet *pcrsUsed = CDrvdPropScalar::GetDrvdScalarProps(pexprPrjList->PdpDerive())->PcrsUsed(); if (!pcrsOutput->ContainsAll(pcrsUsed)) { return false; } if (COperator::EopLogicalSequenceProject == op_id) { (void) pexpr->PdpDerive(); CExpressionHandle exprhdl(mp); exprhdl.Attach(pexpr); exprhdl.DeriveProps(NULL /*pdpctxt*/); // fail decorrelation in the following two cases; // 1. if the LogicalSequenceProject node has local outer references in order by or partition by or window frame // of a window function // ex: select C.j from C where C.i in (select rank() over (order by C.i) from B where B.i=C.i); // 2. if the relational child of LogicalSequenceProject node does not have any aggregate window function // if the project list contains aggregrate on window function, then // we can decorrelate it as the aggregate is performed over a column or count(*). // The IN condition will be translated to a join instead of a correlated plan. // ex: select C.j from C where C.i in (select avg(i) over (partition by B.i) from B where B.i=C.i); // ===> (resulting join condition) b.i = c.i and c.i = avg(i) if (CLogicalSequenceProject::PopConvert(pexpr->Pop())->FHasLocalOuterRefs(exprhdl) || !CUtils::FHasAggWindowFunc(pexprPrjList)) { return false; } } // decorrelate relational child CExpression *pexprRelational = NULL; if (!FProcess(mp, (*pexpr)[0], fEqualityOnly, &pexprRelational, pdrgpexprCorrelations)) { GPOS_ASSERT(NULL == pexprRelational); return false; } // assemble new project COperator *pop = pexpr->Pop(); pop->AddRef(); pexprPrjList->AddRef(); *ppexprDecorrelated = GPOS_NEW(mp) CExpression(mp, pop, pexprRelational, pexprPrjList); return true; }