//--------------------------------------------------------------------------- // @function: // CPhysicalComputeScalar::EpetRewindability // // @doc: // Return the enforcing type for rewindability property based on this operator // //--------------------------------------------------------------------------- CEnfdProp::EPropEnforcingType CPhysicalComputeScalar::EpetRewindability ( CExpressionHandle &exprhdl, const CEnfdRewindability *per ) const { CColRefSet *pcrsUsed = exprhdl.Pdpscalar(1 /*ulChidIndex*/)->PcrsUsed(); CColRefSet *pcrsCorrelatedApply = exprhdl.Pdprel()->PcrsCorrelatedApply(); if (!pcrsUsed->FDisjoint(pcrsCorrelatedApply)) { // columns are used from inner children of correlated-apply expressions, // this means that a subplan occurs below the Project operator, // in this case, rewindability needs to be enforced on operator's output return CEnfdProp::EpetRequired; } CRewindabilitySpec *prs = CDrvdPropPlan::Pdpplan(exprhdl.Pdp())->Prs(); if (per->FCompatible(prs)) { // required distribution is already provided return CEnfdProp::EpetUnnecessary; } // rewindability is enforced on operator's output return CEnfdProp::EpetRequired; }
//--------------------------------------------------------------------------- // @function: // CDecorrelator::FDelayable // // @doc: // Check if predicate can be delayed // //--------------------------------------------------------------------------- BOOL CDecorrelator::FDelayable ( CExpression *pexprLogical, // logical parent of predicate tree CExpression *pexprScalar, BOOL fEqualityOnly ) { GPOS_CHECK_STACK_SIZE; GPOS_ASSERT(NULL != pexprLogical); GPOS_ASSERT(pexprLogical->Pop()->FLogical()); GPOS_ASSERT(NULL != pexprScalar); GPOS_ASSERT(pexprScalar->Pop()->FScalar()); BOOL fDelay = true; COperator::EOperatorId eopid = pexprLogical->Pop()->Eopid(); if (COperator::EopLogicalLeftSemiJoin == eopid || COperator::EopLogicalLeftAntiSemiJoin == eopid) { // for semi-joins, we disallow predicates referring to inner child to be pulled above the join CColRefSet *pcrsUsed = CDrvdPropScalar::Pdpscalar(pexprScalar->PdpDerive())->PcrsUsed(); CColRefSet *pcrsInner = CDrvdPropRelational::Pdprel((*pexprLogical)[1]->PdpDerive())->PcrsOutput(); if (!pcrsUsed->FDisjoint(pcrsInner)) { // predicate uses a column produced by semi-join inner child fDelay = false; } } if (fDelay && fEqualityOnly) { // check operator fDelay = FDelayableScalarOp(pexprScalar); } // check its children const ULONG ulArity = pexprScalar->UlArity(); for (ULONG ul = 0; ul < ulArity && fDelay; ul++) { fDelay = FDelayable(pexprLogical, (*pexprScalar)[ul], fEqualityOnly); } return fDelay; }
//--------------------------------------------------------------------------- // @function: // CExpressionHandle::PdrgpstatOuterRefs // // @doc: // Given an array of stats objects and a child index, return an array // of stats objects starting from the first stats object referenced by // child // //--------------------------------------------------------------------------- DrgPstat * CExpressionHandle::PdrgpstatOuterRefs ( DrgPstat *pdrgpstat, ULONG ulChildIndex ) const { GPOS_ASSERT(NULL != pdrgpstat); GPOS_ASSERT(ulChildIndex < UlArity()); if (FScalarChild(ulChildIndex) || !FHasOuterRefs(ulChildIndex)) { // if child is scalar or has no outer references, return empty array return GPOS_NEW(m_pmp) DrgPstat(m_pmp); } DrgPstat *pdrgpstatResult = GPOS_NEW(m_pmp) DrgPstat(m_pmp); CColRefSet *pcrsOuter = Pdprel(ulChildIndex)->PcrsOuter(); GPOS_ASSERT(0 < pcrsOuter->CElements()); const ULONG ulSize = pdrgpstat->UlLength(); ULONG ulStartIndex = ULONG_MAX; for (ULONG ul = 0; ul < ulSize; ul++) { IStatistics *pstats = (*pdrgpstat)[ul]; CColRefSet *pcrsStats = pstats->Pcrs(m_pmp); BOOL fStatsColsUsed = !pcrsOuter->FDisjoint(pcrsStats); pcrsStats->Release(); if (fStatsColsUsed) { ulStartIndex = ul; break; } } if (ULONG_MAX != ulStartIndex) { // copy stats starting from index of outer-most stats object referenced by child CUtils::AddRefAppend<IStatistics, CleanupStats>(pdrgpstatResult, pdrgpstat, ulStartIndex); } return pdrgpstatResult; }
//--------------------------------------------------------------------------- // @function: // CLogical::PcrsDeriveNotNullCombineLogical // // @doc: // Common case of combining not null columns from all logical // children // //--------------------------------------------------------------------------- CColRefSet * CLogical::PcrsDeriveNotNullCombineLogical ( IMemoryPool *pmp, CExpressionHandle &exprhdl ) { CColRefSet *pcrs = GPOS_NEW(pmp) CColRefSet(pmp); // union not nullable columns from the first N-1 children ULONG ulArity = exprhdl.UlArity(); for (ULONG ul = 0; ul < ulArity - 1; ul++) { CColRefSet *pcrsChild = exprhdl.Pdprel(ul)->PcrsNotNull(); GPOS_ASSERT(pcrs->FDisjoint(pcrsChild) && "Input columns are not disjoint"); pcrs->Union(pcrsChild); } return pcrs; }
//--------------------------------------------------------------------------- // @function: // CXformPushDownLeftOuterJoin::Transform // // @doc: // Transform LOJ whose outer child is an NAry-join to be a child // of NAry-join // // Input: // LOJ (a=d) // |---NAry-Join (a=b) and (b=c) // | |--A // | |--B // | +--C // +--D // // Output: // NAry-Join (a=b) and (b=c) // |--B // |--C // +--LOJ (a=d) // |--A // +--D // //--------------------------------------------------------------------------- void CXformPushDownLeftOuterJoin::Transform ( CXformContext *pxfctxt, CXformResult *pxfres, CExpression *pexpr ) const { GPOS_ASSERT(NULL != pxfctxt); GPOS_ASSERT(NULL != pxfres); GPOS_ASSERT(FPromising(pxfctxt->Pmp(), this, pexpr)); GPOS_ASSERT(FCheckPattern(pexpr)); IMemoryPool *pmp = pxfctxt->Pmp(); CExpression *pexprNAryJoin = (*pexpr)[0]; CExpression *pexprLOJInnerChild = (*pexpr)[1]; CExpression *pexprLOJScalarChild = (*pexpr)[2]; CColRefSet *pcrsLOJUsed = CDrvdPropScalar::Pdpscalar(pexprLOJScalarChild->PdpDerive())->PcrsUsed(); DrgPexpr *pdrgpexprLOJChildren = GPOS_NEW(pmp) DrgPexpr(pmp); DrgPexpr *pdrgpexprNAryJoinChildren = GPOS_NEW(pmp) DrgPexpr(pmp); const ULONG ulArity = pexprNAryJoin->UlArity(); CExpression *pexprNAryJoinScalarChild = (*pexprNAryJoin)[ulArity - 1]; for (ULONG ul = 0 ; ul < ulArity - 1; ul++) { CExpression *pexprChild = (*pexprNAryJoin)[ul]; CColRefSet *pcrsOutput = CDrvdPropRelational::Pdprel(pexprChild->PdpDerive())->PcrsOutput(); pexprChild->AddRef(); if (!pcrsOutput->FDisjoint(pcrsLOJUsed)) { pdrgpexprLOJChildren->Append(pexprChild); } else { pdrgpexprNAryJoinChildren->Append(pexprChild); } } CExpression *pexprLOJOuterChild = (*pdrgpexprLOJChildren)[0]; if (1 < pdrgpexprLOJChildren->UlLength()) { // collect all relations needed by LOJ outer side into a cross product, // normalization at the end of this function takes care of pushing NAry // join predicates down pdrgpexprLOJChildren->Append(CPredicateUtils::PexprConjunction(pmp, NULL /*pdrgpexpr*/)); pexprLOJOuterChild = GPOS_NEW(pmp) CExpression(pmp, GPOS_NEW(pmp) CLogicalNAryJoin(pmp), pdrgpexprLOJChildren); // reconstruct LOJ children and add only the created child pdrgpexprLOJChildren = GPOS_NEW(pmp) DrgPexpr(pmp); pdrgpexprLOJChildren->Append(pexprLOJOuterChild); } // continue with rest of LOJ inner and scalar children pexprLOJInnerChild->AddRef(); pdrgpexprLOJChildren->Append(pexprLOJInnerChild); pexprLOJScalarChild->AddRef(); pdrgpexprLOJChildren->Append(pexprLOJScalarChild); // build new LOJ CExpression *pexprLOJNew = GPOS_NEW(pmp) CExpression(pmp, GPOS_NEW(pmp) CLogicalLeftOuterJoin(pmp), pdrgpexprLOJChildren); // add new NAry join children pdrgpexprNAryJoinChildren->Append(pexprLOJNew); pexprNAryJoinScalarChild->AddRef(); pdrgpexprNAryJoinChildren->Append(pexprNAryJoinScalarChild); if (3 > pdrgpexprNAryJoinChildren->UlLength()) { // xform must generate a valid NAry-join expression // for example, in the following case we end-up with the same input // expression, which should be avoided: // // Input: // // LOJ (a=c) and (b=c) // |--NAry-Join (a=b) // | |--A // | +--B // +--C // // Output: // // NAry-Join (true) // +--LOJ (a=c) and (b=c) // |--NAry-Join (a=b) // | |--A // | +--B // +--C pdrgpexprNAryJoinChildren->Release(); return; } // create new NAry join CExpression *pexprNAryJoinNew = GPOS_NEW(pmp) CExpression(pmp, GPOS_NEW(pmp) CLogicalNAryJoin(pmp), pdrgpexprNAryJoinChildren); // normalize resulting expression and add it to xform results CExpression *pexprResult = CNormalizer::PexprNormalize(pmp, pexprNAryJoinNew); pexprNAryJoinNew->Release(); pxfres->Add(pexprResult); }
//--------------------------------------------------------------------------- // @function: // CPhysicalHashJoin::PdshashedPassThru // // @doc: // Create a child hashed distribution request based on input hashed // distribution, // return NULL if no such request can be created // // //--------------------------------------------------------------------------- CDistributionSpecHashed * CPhysicalHashJoin::PdshashedPassThru ( IMemoryPool *pmp, CExpressionHandle &exprhdl, CDistributionSpecHashed *pdshashedInput, ULONG , // ulChildIndex DrgPdp *, // pdrgpdpCtxt ULONG #ifdef GPOS_DEBUG ulOptReq #endif // GPOS_DEBUG ) const { GPOS_ASSERT(ulOptReq == m_pdrgpdsRedistributeRequests->UlLength()); GPOS_ASSERT(NULL != pdshashedInput); if (!GPOS_FTRACE(EopttraceEnableRedistributeBroadcastHashJoin)) { // this option is disabled return NULL; } // since incoming request is hashed, we attempt here to propagate this request to outer child CColRefSet *pcrsOuterOutput = exprhdl.Pdprel(0 /*ulChildIndex*/)->PcrsOutput(); DrgPexpr *pdrgpexprIncomingRequest = pdshashedInput->Pdrgpexpr(); CColRefSet *pcrsAllUsed = CUtils::PcrsExtractColumns(pmp, pdrgpexprIncomingRequest); BOOL fSubset = pcrsOuterOutput->FSubset(pcrsAllUsed); BOOL fDisjoint = pcrsOuterOutput->FDisjoint(pcrsAllUsed); pcrsAllUsed->Release(); if (fSubset) { // incoming request uses columns from outer child only, pass it through pdshashedInput->AddRef(); return pdshashedInput; } if (!fDisjoint) { // incoming request intersects with columns from outer child, // we restrict the request to outer child columns only, then we pass it through DrgPexpr *pdrgpexprChildRequest = GPOS_NEW(pmp) DrgPexpr(pmp); const ULONG ulSize = pdrgpexprIncomingRequest->UlLength(); for (ULONG ul = 0; ul < ulSize; ul++) { CExpression *pexpr = (*pdrgpexprIncomingRequest)[ul]; CColRefSet *pcrsUsed = CDrvdPropScalar::Pdpscalar(pexpr->PdpDerive())->PcrsUsed(); if (pcrsOuterOutput->FSubset(pcrsUsed)) { // hashed expression uses columns from outer child only, add it to request pexpr->AddRef(); pdrgpexprChildRequest->Append(pexpr); } } GPOS_ASSERT(0 < pdrgpexprChildRequest->UlLength()); CDistributionSpecHashed *pdshashed = GPOS_NEW(pmp) CDistributionSpecHashed(pdrgpexprChildRequest, pdshashedInput->FNullsColocated()); // since the other child of the join is replicated, we need to enforce hashed-distribution across segments here pdshashed->MarkUnsatisfiableBySingleton(); return pdshashed; } return NULL; }