//--------------------------------------------------------------------------- // @function: // CPhysicalUnionAll::AssertValidChildDistributions // // @doc: // Helper to validate child distributions // //--------------------------------------------------------------------------- void CPhysicalUnionAll::AssertValidChildDistributions ( IMemoryPool *pmp, CExpressionHandle &exprhdl, CDistributionSpec::EDistributionType *pedt, // array of distribution types to check ULONG ulDistrs, // number of distribution types to check const CHAR *szAssertMsg ) { const ULONG ulArity = exprhdl.UlArity(); for (ULONG ulChild = 0; ulChild < ulArity; ulChild++) { CDistributionSpec *pdsChild = exprhdl.Pdpplan(ulChild)->Pds(); CDistributionSpec::EDistributionType edtChild = pdsChild->Edt(); BOOL fMatch = false; for (ULONG ulDistr = 0; !fMatch && ulDistr < ulDistrs; ulDistr++) { fMatch = (pedt[ulDistr] == edtChild); } if (!fMatch) { CAutoTrace at(pmp); at.Os() << szAssertMsg; } GPOS_ASSERT(fMatch); } }
//--------------------------------------------------------------------------- // @function: // CPhysicalUnionAll::PdsDerive // // @doc: // Derive distribution // //--------------------------------------------------------------------------- CDistributionSpec * CPhysicalUnionAll::PdsDerive ( IMemoryPool *pmp, CExpressionHandle &exprhdl ) const { CDistributionSpecHashed *pdshashed = PdshashedDerive(pmp, exprhdl); if (NULL != pdshashed) { return pdshashed; } CDistributionSpec *pds = PdsDeriveFromChildren(pmp, exprhdl); if (NULL != pds) { // succeeded in deriving output distribution from child distributions pds->AddRef(); return pds; } // output has unknown distribution on all segments return GPOS_NEW(pmp) CDistributionSpecRandom(); }
//--------------------------------------------------------------------------- // @function: // CPhysicalHashJoin::PdsRequiredSingleton // // @doc: // Create (singleton, singleton) optimization request // // //--------------------------------------------------------------------------- CDistributionSpec * CPhysicalHashJoin::PdsRequiredSingleton ( IMemoryPool *pmp, CExpressionHandle &, // exprhdl CDistributionSpec *, // pdsInput ULONG ulChildIndex, DrgPdp *pdrgpdpCtxt ) const { if (FFirstChildToOptimize(ulChildIndex)) { // require first child to be singleton return GPOS_NEW(pmp) CDistributionSpecSingleton(CDistributionSpecSingleton::EstMaster); } // require a matching distribution from second child GPOS_ASSERT(NULL != pdrgpdpCtxt); CDistributionSpec *pdsFirst = CDrvdPropPlan::Pdpplan((*pdrgpdpCtxt)[0])->Pds(); GPOS_ASSERT(NULL != pdsFirst); if (CDistributionSpec::EdtUniversal == pdsFirst->Edt()) { // first child is universal, request second child to execute on the master to avoid duplicates return GPOS_NEW(pmp) CDistributionSpecSingleton(CDistributionSpecSingleton::EstMaster); } GPOS_ASSERT(CDistributionSpec::EdtSingleton == pdsFirst->Edt() || CDistributionSpec::EdtStrictSingleton == pdsFirst->Edt()); // require second child to have matching singleton distribution return CPhysical::PdssMatching(pmp, CDistributionSpecSingleton::PdssConvert(pdsFirst)); }
//--------------------------------------------------------------------------- // @function: // CLogicalSequenceProject::PopRemoveLocalOuterRefs // // @doc: // Filter out outer references from Order By/ Partition By // clauses, and return a new operator // //--------------------------------------------------------------------------- CLogicalSequenceProject * CLogicalSequenceProject::PopRemoveLocalOuterRefs ( IMemoryPool *mp, CExpressionHandle &exprhdl ) { GPOS_ASSERT(this == exprhdl.Pop()); CColRefSet *outer_refs = exprhdl.GetRelationalProperties()->PcrsOuter(); CDistributionSpec *pds = m_pds; if (CDistributionSpec::EdtHashed == m_pds->Edt()) { pds = CDistributionSpecHashed::PdsConvert(m_pds)->PdshashedExcludeColumns(mp, outer_refs); if (NULL == pds) { // if the hashed distribution spec is empty after excluding the outer ref columns, // eliminate Partition clause by creating a singleton spec pds = GPOS_NEW(mp) CDistributionSpecSingleton(); } } else { pds->AddRef(); } COrderSpecArray *pdrgpos = COrderSpec::PdrgposExclude(mp, m_pdrgpos, outer_refs); // for window frame edges, outer references cannot be removed since this can change // the semantics of frame edge from delayed-bounding to unbounded, // we re-use the frame edges without changing here m_pdrgpwf->AddRef(); return GPOS_NEW(mp) CLogicalSequenceProject(mp, pds, pdrgpos, m_pdrgpwf); }
//--------------------------------------------------------------------------- // @function: // CNormalizer::FPushableThruSeqPrjChild // // @doc: // Check if a predicate can be pushed through the child of a sequence // project expression // //--------------------------------------------------------------------------- BOOL CNormalizer::FPushableThruSeqPrjChild ( CExpression *pexprSeqPrj, CExpression *pexprPred ) { GPOS_ASSERT(NULL != pexprSeqPrj); GPOS_ASSERT(NULL != pexprPred); GPOS_ASSERT(CLogical::EopLogicalSequenceProject == pexprSeqPrj->Pop()->Eopid()); CDistributionSpec *pds = CLogicalSequenceProject::PopConvert(pexprSeqPrj->Pop())->Pds(); BOOL fPushable = false; if (CDistributionSpec::EdtHashed == pds->Edt()) { CAutoMemoryPool amp; IMemoryPool *pmp = amp.Pmp(); CColRefSet *pcrsUsed = CDrvdPropScalar::Pdpscalar(pexprPred->PdpDerive())->PcrsUsed(); CColRefSet *pcrsPartCols = CUtils::PcrsExtractColumns(pmp, CDistributionSpecHashed::PdsConvert(pds)->Pdrgpexpr()); if (pcrsPartCols->FSubset(pcrsUsed)) { // predicate is pushable if used columns are included in partition-by expression fPushable = true; } pcrsPartCols->Release(); } return fPushable; }
//--------------------------------------------------------------------------- // @function: // CPhysicalUnionAll::PdsDerive // // @doc: // Derive distribution // //--------------------------------------------------------------------------- CDistributionSpec * CPhysicalUnionAll::PdsDerive ( IMemoryPool *mp, CExpressionHandle &exprhdl ) const { CDistributionSpecHashed *pdshashed = PdshashedDerive(mp, exprhdl); if (NULL != pdshashed) { return pdshashed; } CDistributionSpec *pds = PdsDeriveFromChildren(mp, exprhdl); if (NULL != pds) { // succeeded in deriving output distribution from child distributions pds->AddRef(); return pds; } // derive strict random spec, if parallel union all enforces strict random CDistributionSpecRandom *random_dist_spec = PdsStrictRandomParallelUnionAllChildren(mp, exprhdl); if (NULL != random_dist_spec) { return random_dist_spec; } // output has unknown distribution on all segments return GPOS_NEW(mp) CDistributionSpecRandom(); }
//--------------------------------------------------------------------------- // @function: // CPhysicalUnionAll::PdshashedDerive // // @doc: // Derive hashed distribution from child hashed distributions // //--------------------------------------------------------------------------- CDistributionSpecHashed * CPhysicalUnionAll::PdshashedDerive ( IMemoryPool *mp, CExpressionHandle &exprhdl ) const { BOOL fSuccess = true; const ULONG arity = exprhdl.Arity(); // (1) check that all children deliver a hashed distribution that satisfies their input columns for (ULONG ulChild = 0; fSuccess && ulChild < arity; ulChild++) { CDistributionSpec *pdsChild = exprhdl.Pdpplan(ulChild)->Pds(); CDistributionSpec::EDistributionType edtChild = pdsChild->Edt(); fSuccess = (CDistributionSpec::EdtHashed == edtChild || CDistributionSpec::EdtHashedNoOp == edtChild || CDistributionSpec::EdtStrictHashed == edtChild) && pdsChild->FSatisfies((*m_pdrgpds)[ulChild]); } if (!fSuccess) { // a child does not deliver hashed distribution return NULL; } // (2) check that child hashed distributions map to the same output columns // map outer child hashed distribution to corresponding UnionAll column positions ULongPtrArray *pdrgpulOuter = PdrgpulMap(mp, CDistributionSpecHashed::PdsConvert(exprhdl.Pdpplan(0 /*child_index*/)->Pds())->Pdrgpexpr(), 0/*child_index*/); if (NULL == pdrgpulOuter) { return NULL; } ULongPtrArray *pdrgpulChild = NULL; for (ULONG ulChild = 1; fSuccess && ulChild < arity; ulChild++) { pdrgpulChild = PdrgpulMap(mp, CDistributionSpecHashed::PdsConvert(exprhdl.Pdpplan(ulChild)->Pds())->Pdrgpexpr(), ulChild); // match mapped column positions of current child with outer child fSuccess = (NULL != pdrgpulChild) && Equals(pdrgpulOuter, pdrgpulChild); CRefCount::SafeRelease(pdrgpulChild); } CDistributionSpecHashed *pdsOutput = NULL; if (fSuccess) { pdsOutput = PdsMatching(mp, pdrgpulOuter); } pdrgpulOuter->Release(); return pdsOutput; }
//--------------------------------------------------------------------------- // @function: // CPhysicalJoin::PdsRequired // // @doc: // Compute required distribution of the n-th child; // this function creates a request for ANY distribution on the outer // child, then matches the delivered distribution on the inner child, // this results in sending either a broadcast or singleton distribution // requests to the inner child // //--------------------------------------------------------------------------- CDistributionSpec * CPhysicalJoin::PdsRequired ( IMemoryPool *pmp, CExpressionHandle &exprhdl, CDistributionSpec *pdsRequired, ULONG ulChildIndex, DrgPdp *pdrgpdpCtxt, ULONG // ulOptReq ) const { GPOS_ASSERT(2 > ulChildIndex); // if expression has to execute on master then we need a gather if (exprhdl.FMasterOnly()) { return PdsEnforceMaster(pmp, exprhdl, pdsRequired, ulChildIndex); } if (exprhdl.FHasOuterRefs()) { if (CDistributionSpec::EdtSingleton == pdsRequired->Edt() || CDistributionSpec::EdtReplicated == pdsRequired->Edt()) { return PdsPassThru(pmp, exprhdl, pdsRequired, ulChildIndex); } return GPOS_NEW(pmp) CDistributionSpecReplicated(); } if (1 == ulChildIndex) { // compute a matching distribution based on derived distribution of outer child CDistributionSpec *pdsOuter = CDrvdPropPlan::Pdpplan((*pdrgpdpCtxt)[0])->Pds(); if (CDistributionSpec::EdtUniversal == pdsOuter->Edt()) { // first child is universal, request second child to execute on the master to avoid duplicates return GPOS_NEW(pmp) CDistributionSpecSingleton(CDistributionSpecSingleton::EstMaster); } if (CDistributionSpec::EdtSingleton == pdsOuter->Edt() || CDistributionSpec::EdtStrictSingleton == pdsOuter->Edt()) { // require inner child to have matching singleton distribution return CPhysical::PdssMatching(pmp, CDistributionSpecSingleton::PdssConvert(pdsOuter)); } // otherwise, require inner child to be replicated return GPOS_NEW(pmp) CDistributionSpecReplicated(); } // no distribution requirement on the outer side return GPOS_NEW(pmp) CDistributionSpecAny(this->Eopid()); }
//--------------------------------------------------------------------------- // @function: // CPhysical::PdsDerivePassThruOuter // // @doc: // Helper for common case of distribution derivation // //--------------------------------------------------------------------------- CDistributionSpec * CPhysical::PdsDerivePassThruOuter ( CExpressionHandle &exprhdl ) { CDistributionSpec *pds = exprhdl.Pdpplan(0 /*child_index*/)->Pds(); pds->AddRef(); return pds; }
//--------------------------------------------------------------------------- // @function: // CPhysicalHashJoin::PdsRequiredReplicate // // @doc: // Create (hashed/non-singleton, broadcast) optimization request // // //--------------------------------------------------------------------------- CDistributionSpec * CPhysicalHashJoin::PdsRequiredReplicate ( IMemoryPool *pmp, CExpressionHandle &exprhdl, CDistributionSpec *pdsInput, ULONG ulChildIndex, DrgPdp *pdrgpdpCtxt, ULONG ulOptReq ) const { EChildExecOrder eceo = Eceo(); if (EceoLeftToRight == eceo) { // if optimization order is left to right, fall-back to implementation of parent Join operator return CPhysicalJoin::PdsRequired(pmp, exprhdl, pdsInput, ulChildIndex, pdrgpdpCtxt, 0 /*ulOptReq*/); } GPOS_ASSERT(EceoRightToLeft == eceo); if (1 == ulChildIndex) { // require inner child to be replicated return GPOS_NEW(pmp) CDistributionSpecReplicated(); } GPOS_ASSERT(0 == ulChildIndex); // require a matching distribution from outer child CDistributionSpec *pdsInner = CDrvdPropPlan::Pdpplan((*pdrgpdpCtxt)[0])->Pds(); GPOS_ASSERT(NULL != pdsInner); if (CDistributionSpec::EdtUniversal == pdsInner->Edt()) { // first child is universal, request second child to execute on the master to avoid duplicates return GPOS_NEW(pmp) CDistributionSpecSingleton(CDistributionSpecSingleton::EstMaster); } if (ulOptReq == m_pdrgpdsRedistributeRequests->UlLength() && CDistributionSpec::EdtHashed == pdsInput->Edt()) { // attempt to propagate hashed request to child CDistributionSpecHashed *pdshashed = PdshashedPassThru(pmp, exprhdl, CDistributionSpecHashed::PdsConvert(pdsInput), ulChildIndex, pdrgpdpCtxt, ulOptReq); if (NULL != pdshashed) { return pdshashed; } } // otherwise, require second child to deliver non-singleton distribution GPOS_ASSERT(CDistributionSpec::EdtReplicated == pdsInner->Edt()); return GPOS_NEW(pmp) CDistributionSpecNonSingleton(); }
//--------------------------------------------------------------------------- // @function: // CPhysicalUnionAll::PdsDeriveFromChildren // // @doc: // Derive output distribution based on child distribution // //--------------------------------------------------------------------------- CDistributionSpec * CPhysicalUnionAll::PdsDeriveFromChildren ( IMemoryPool * #ifdef GPOS_DEBUG pmp #endif // GPOS_DEBUG , CExpressionHandle &exprhdl ) const { const ULONG ulArity = exprhdl.UlArity(); CDistributionSpec *pdsOuter = exprhdl.Pdpplan(0 /*ulChildIndex*/)->Pds(); CDistributionSpec *pds = pdsOuter; BOOL fUniversalOuterChild = (CDistributionSpec::EdtUniversal == pdsOuter->Edt()); BOOL fSingletonChild = false; BOOL fReplicatedChild = false; for (ULONG ul = 0; ul < ulArity; ul++) { CDistributionSpec *pdsChild = exprhdl.Pdpplan(ul /*ulChildIndex*/)->Pds(); CDistributionSpec::EDistributionType edtChild = pdsChild->Edt(); if (CDistributionSpec::EdtSingleton == edtChild || CDistributionSpec::EdtStrictSingleton == edtChild) { fSingletonChild = true; pds = pdsChild; break; } if (CDistributionSpec::EdtReplicated == edtChild) { fReplicatedChild = true; pds = pdsChild; break; } } #ifdef GPOS_DEBUG CheckChildDistributions(pmp, exprhdl, fSingletonChild, fReplicatedChild, fUniversalOuterChild); #endif // GPOS_DEBUG if (!(fSingletonChild || fReplicatedChild || fUniversalOuterChild)) { // failed to derive distribution from children pds = NULL; } return pds; }
//--------------------------------------------------------------------------- // @function: // CPhysicalInnerNLJoin::PdsRequired // // @doc: // Compute required distribution of the n-th child; // this function creates two distribution requests: // // (0) Outer child is requested for ANY distribution, and inner child is // requested for a Replicated (or a matching) distribution, // this request is created by calling CPhysicalJoin::PdsRequired() // // (1) Outer child is requested for Replicated distribution, and inner child // is requested for Non-Singleton (or Singleton if outer delivered Universal distribution) // //--------------------------------------------------------------------------- CDistributionSpec * CPhysicalInnerNLJoin::PdsRequired ( IMemoryPool *pmp, CExpressionHandle &exprhdl, CDistributionSpec *pdsRequired, ULONG ulChildIndex, DrgPdp *pdrgpdpCtxt, ULONG ulOptReq ) const { GPOS_ASSERT(2 > ulChildIndex); GPOS_ASSERT(ulOptReq < UlDistrRequests()); // if expression has to execute on master then we need a gather if (exprhdl.FMasterOnly()) { return PdsEnforceMaster(pmp, exprhdl, pdsRequired, ulChildIndex); } if (exprhdl.FHasOuterRefs()) { if (CDistributionSpec::EdtSingleton == pdsRequired->Edt() || CDistributionSpec::EdtReplicated == pdsRequired->Edt()) { return PdsPassThru(pmp, exprhdl, pdsRequired, ulChildIndex); } return GPOS_NEW(pmp) CDistributionSpecReplicated(); } if (GPOS_FTRACE(EopttraceDisableReplicateInnerNLJOuterChild) || 0 == ulOptReq) { return CPhysicalJoin::PdsRequired(pmp, exprhdl, pdsRequired, ulChildIndex, pdrgpdpCtxt, ulOptReq); } GPOS_ASSERT(1 == ulOptReq); if (0 == ulChildIndex) { return GPOS_NEW(pmp) CDistributionSpecReplicated(); } // compute a matching distribution based on derived distribution of outer child CDistributionSpec *pdsOuter = CDrvdPropPlan::Pdpplan((*pdrgpdpCtxt)[0])->Pds(); if (CDistributionSpec::EdtUniversal == pdsOuter->Edt()) { // first child is universal, request second child to execute on the master to avoid duplicates return GPOS_NEW(pmp) CDistributionSpecSingleton(CDistributionSpecSingleton::EstMaster); } return GPOS_NEW(pmp) CDistributionSpecNonSingleton(); }
//--------------------------------------------------------------------------- // @function: // CPhysicalMotion::PdsDerive // // @doc: // Derive distribution // //--------------------------------------------------------------------------- CDistributionSpec * CPhysicalMotion::PdsDerive ( IMemoryPool */*pmp*/, CExpressionHandle &/*exprhdl*/ ) const { CDistributionSpec *pds = Pds(); pds->AddRef(); return pds; }
//--------------------------------------------------------------------------- // @function: // CPhysicalHashJoin::PdshashedRequired // // @doc: // Compute required hashed distribution of the n-th child // //--------------------------------------------------------------------------- CDistributionSpecHashed * CPhysicalHashJoin::PdshashedRequired ( IMemoryPool *, // pmp ULONG, // ulChildIndex ULONG ulReqIndex ) const { GPOS_ASSERT(ulReqIndex < m_pdrgpdsRedistributeRequests->UlLength()); CDistributionSpec *pds = (*m_pdrgpdsRedistributeRequests)[ulReqIndex]; pds->AddRef(); return CDistributionSpecHashed::PdsConvert(pds); }
//--------------------------------------------------------------------------- // @function: // CCostContext::DRowsPerHost // // @doc: // Return the number of rows per host // //--------------------------------------------------------------------------- CDouble CCostContext::DRowsPerHost() const { DOUBLE dRows = Pstats()->DRows().DVal(); COptCtxt *poptctxt = COptCtxt::PoctxtFromTLS(); const ULONG ulHosts = poptctxt->Pcm()->UlHosts(); CDistributionSpec *pds = Pdpplan()->Pds(); if (CDistributionSpec::EdtHashed == pds->Edt()) { CDistributionSpecHashed *pdshashed = CDistributionSpecHashed::PdsConvert(pds); DrgPexpr *pdrgpexpr = pdshashed->Pdrgpexpr(); CColRefSet *pcrsUsed = CUtils::PcrsExtractColumns(m_pmp, pdrgpexpr); const CColRefSet *pcrsReqdStats = this->Poc()->Prprel()->PcrsStat(); if (!pcrsReqdStats->FSubset(pcrsUsed)) { // statistics not available for distribution columns, therefore // assume uniform distribution across hosts // clean up pcrsUsed->Release(); return CDouble(dRows / ulHosts); } DrgPul *pdrgpul = GPOS_NEW(m_pmp) DrgPul(m_pmp); pcrsUsed->ExtractColIds(m_pmp, pdrgpul); pcrsUsed->Release(); CStatisticsConfig *pstatsconf = poptctxt->Poconf()->Pstatsconf(); CDouble dNDVs = CStatisticsUtils::DGroups(m_pmp, Pstats(), pstatsconf, pdrgpul, NULL /*pbsKeys*/); pdrgpul->Release(); if (dNDVs < ulHosts) { // estimated number of distinct values of distribution columns is smaller than number of hosts. // We assume data is distributed across a subset of hosts in this case. This results in a larger // number of rows per host compared to the uniform case, allowing us to capture data skew in // cost computation return CDouble(dRows / dNDVs.DVal()); } } return CDouble(dRows / ulHosts); }
//--------------------------------------------------------------------------- // @function: // CPhysicalSequence::PdsDerive // // @doc: // Derive distribution // //--------------------------------------------------------------------------- CDistributionSpec * CPhysicalSequence::PdsDerive ( IMemoryPool *, // pmp, CExpressionHandle &exprhdl ) const { // pass through distribution from last child const ULONG ulArity = exprhdl.UlArity(); GPOS_ASSERT(1 <= ulArity); CDistributionSpec *pds = exprhdl.Pdpplan(ulArity - 1 /*ulChildIndex*/)->Pds(); pds->AddRef(); return pds; }
//--------------------------------------------------------------------------- // @function: // CPhysicalJoin::PdsRequiredCorrelatedJoin // // @doc: // Helper to compute required distribution of correlated join's children // //--------------------------------------------------------------------------- CDistributionSpec * CPhysicalJoin::PdsRequiredCorrelatedJoin ( IMemoryPool *pmp, CExpressionHandle &exprhdl, CDistributionSpec *pdsRequired, ULONG ulChildIndex, DrgPdp *pdrgpdpCtxt, ULONG ulOptReq ) const { GPOS_ASSERT(3 == exprhdl.UlArity()); GPOS_ASSERT(2 > ulChildIndex); GPOS_ASSERT(CUtils::FCorrelatedNLJoin(exprhdl.Pop())); if (0 == ulOptReq && pdsRequired->FSingletonOrStrictSingleton()) { // propagate Singleton request to children to comply with // correlated execution requirements return PdsPassThru(pmp, exprhdl, pdsRequired, ulChildIndex); } if (exprhdl.PfpChild(1)->FHasVolatileFunctionScan() && exprhdl.FHasOuterRefs(1)) { // if the inner child has a volatile TVF and has outer refs then request // gather from both children return GPOS_NEW(pmp) CDistributionSpecSingleton(CDistributionSpecSingleton::EstMaster); } if (1 == ulChildIndex) { CDistributionSpec *pdsOuter = CDrvdPropPlan::Pdpplan((*pdrgpdpCtxt)[0])->Pds(); if (CDistributionSpec::EdtUniversal == pdsOuter->Edt()) { // if outer child delivers a universal distribution, request inner child // to match Singleton distribution to detect more than one row generated // at runtime, for example: 'select (select 1 union select 2)' return GPOS_NEW(pmp) CDistributionSpecSingleton(CDistributionSpecSingleton::EstMaster); } } return CPhysicalJoin::PdsRequired(pmp, exprhdl, pdsRequired, ulChildIndex, pdrgpdpCtxt, ulOptReq); }
//--------------------------------------------------------------------------- // @function: // CPhysicalComputeScalar::PdsDerive // // @doc: // Derive distribution // //--------------------------------------------------------------------------- CDistributionSpec * CPhysicalComputeScalar::PdsDerive ( IMemoryPool *pmp, CExpressionHandle &exprhdl ) const { CDistributionSpec *pds = exprhdl.Pdpplan(0 /*ulChildIndex*/)->Pds(); if (CDistributionSpec::EdtUniversal == pds->Edt() && IMDFunction::EfsVolatile == exprhdl.Pdpscalar(1 /*ulChildIndex*/)->Pfp()->Efs()) { return GPOS_NEW(pmp) CDistributionSpecStrictSingleton(CDistributionSpecSingleton::EstMaster); } pds->AddRef(); return pds; }
//--------------------------------------------------------------------------- // @function: // CPhysicalInnerHashJoin::PdsDerive // // @doc: // Derive distribution // //--------------------------------------------------------------------------- CDistributionSpec * CPhysicalInnerHashJoin::PdsDerive ( IMemoryPool *pmp, CExpressionHandle &exprhdl ) const { CDistributionSpec *pdsOuter = exprhdl.Pdpplan(0 /*ulChildIndex*/)->Pds(); CDistributionSpec *pdsInner = exprhdl.Pdpplan(1 /*ulChildIndex*/)->Pds(); if (CDistributionSpec::EdtUniversal == pdsOuter->Edt()) { // if outer is universal, pass through inner distribution pdsInner->AddRef(); return pdsInner; } if (CDistributionSpec::EdtHashed == pdsOuter->Edt() && CDistributionSpec::EdtHashed == pdsInner->Edt()) { CDistributionSpec *pdsDerived = PdsDeriveFromHashedChildren(pmp, pdsOuter, pdsInner); if (NULL != pdsDerived) { return pdsDerived; } } if (CDistributionSpec::EdtReplicated == pdsOuter->Edt()) { return PdsDeriveFromReplicatedOuter(pmp, pdsOuter, pdsInner); } if (CDistributionSpec::EdtHashed == pdsOuter->Edt()) { CDistributionSpec *pdsDerived = PdsDeriveFromHashedOuter(pmp, pdsOuter, pdsInner); if (NULL != pdsDerived) { return pdsDerived; } } // otherwise, pass through outer distribution pdsOuter->AddRef(); return pdsOuter; }
//--------------------------------------------------------------------------- // @function: // CPhysicalJoin::PdsDerive // // @doc: // Derive distribution // //--------------------------------------------------------------------------- CDistributionSpec * CPhysicalJoin::PdsDerive ( IMemoryPool *, // pmp, CExpressionHandle &exprhdl ) const { CDistributionSpec *pdsOuter = exprhdl.Pdpplan(0 /*ulChildIndex*/)->Pds(); CDistributionSpec *pdsInner = exprhdl.Pdpplan(1 /*ulChildIndex*/)->Pds(); if (CDistributionSpec::EdtReplicated == pdsOuter->Edt() || CDistributionSpec::EdtUniversal == pdsOuter->Edt()) { // if outer is replicated/universal, return inner distribution pdsInner->AddRef(); return pdsInner; } // otherwise, return outer distribution pdsOuter->AddRef(); return pdsOuter; }
//--------------------------------------------------------------------------- // @function: // CPhysicalSequence::PdsRequired // // @doc: // Compute required distribution of the n-th child // //--------------------------------------------------------------------------- CDistributionSpec * CPhysicalSequence::PdsRequired ( IMemoryPool *pmp, CExpressionHandle & #ifdef GPOS_DEBUG exprhdl #endif // GPOS_DEBUG , CDistributionSpec *pdsRequired, ULONG ulChildIndex, DrgPdp *pdrgpdpCtxt, ULONG ulOptReq ) const { GPOS_ASSERT(2 == exprhdl.UlArity()); GPOS_ASSERT(ulChildIndex < exprhdl.UlArity()); GPOS_ASSERT(ulOptReq < UlDistrRequests()); if (0 == ulOptReq) { if (CDistributionSpec::EdtSingleton == pdsRequired->Edt() || CDistributionSpec::EdtStrictSingleton == pdsRequired->Edt()) { // incoming request is a singleton, request singleton on all children CDistributionSpecSingleton *pdss = CDistributionSpecSingleton::PdssConvert(pdsRequired); return GPOS_NEW(pmp) CDistributionSpecSingleton(pdss->Est()); } // incoming request is a non-singleton, request non-singleton on all children return GPOS_NEW(pmp) CDistributionSpecNonSingleton(); } GPOS_ASSERT(1 == ulOptReq); if (0 == ulChildIndex) { // no distribution requirement on first child return GPOS_NEW(pmp) CDistributionSpecAny(this->Eopid()); } // get derived plan properties of first child CDrvdPropPlan *pdpplan = CDrvdPropPlan::Pdpplan((*pdrgpdpCtxt)[0]); CDistributionSpec *pds = pdpplan->Pds(); if (pds->FSingletonOrStrictSingleton()) { // first child is singleton, request singleton distribution on second child CDistributionSpecSingleton *pdss = CDistributionSpecSingleton::PdssConvert(pds); return GPOS_NEW(pmp) CDistributionSpecSingleton(pdss->Est()); } if (CDistributionSpec::EdtUniversal == pds->Edt()) { // first child is universal, impose no requirements on second child return GPOS_NEW(pmp) CDistributionSpecAny(this->Eopid()); } // first child is non-singleton, request a non-singleton distribution on second child return GPOS_NEW(pmp) CDistributionSpecNonSingleton(); }
//--------------------------------------------------------------------------- // @function: // CPhysicalUnionAll::PdsRequired // // @doc: // Compute required distribution of the n-th child // //--------------------------------------------------------------------------- CDistributionSpec * CPhysicalUnionAll::PdsRequired ( IMemoryPool *pmp, CExpressionHandle &exprhdl, CDistributionSpec *pdsRequired, ULONG ulChildIndex, DrgPdp *pdrgpdpCtxt, ULONG ulOptReq ) const { GPOS_ASSERT(NULL != m_pdrgpdrgpcrInput); GPOS_ASSERT(ulChildIndex < m_pdrgpdrgpcrInput->UlLength()); GPOS_ASSERT(2 > ulOptReq); CDistributionSpec *pds = PdsMasterOnlyOrReplicated(pmp, exprhdl, pdsRequired, ulChildIndex, ulOptReq); if (NULL != pds) { return pds; } if (0 == ulOptReq && CDistributionSpec::EdtHashed == pdsRequired->Edt()) { // attempt passing requested hashed distribution to children CDistributionSpecHashed *pdshashed = PdshashedPassThru(pmp, CDistributionSpecHashed::PdsConvert(pdsRequired), ulChildIndex); if (NULL != pdshashed) { return pdshashed; } } if (0 == ulChildIndex) { // otherwise, ANY distribution is requested from outer child return GPOS_NEW(pmp) CDistributionSpecAny(); } // inspect distribution delivered by outer child CDistributionSpec *pdsOuter = CDrvdPropPlan::Pdpplan((*pdrgpdpCtxt)[0])->Pds(); if (CDistributionSpec::EdtSingleton == pdsOuter->Edt() || CDistributionSpec::EdtStrictSingleton == pdsOuter->Edt()) { // outer child is Singleton, require inner child to have matching Singleton distribution return CPhysical::PdssMatching(pmp, CDistributionSpecSingleton::PdssConvert(pdsOuter)); } if (CDistributionSpec::EdtUniversal == pdsOuter->Edt()) { // require inner child to be on the master segment in order to avoid // duplicate values when doing UnionAll operation with Universal outer child // Example: select 1 union all select i from x; return GPOS_NEW(pmp) CDistributionSpecSingleton(CDistributionSpecSingleton::EstMaster); } if (CDistributionSpec::EdtReplicated == pdsOuter->Edt()) { // outer child is replicated, require inner child to be replicated return GPOS_NEW(pmp) CDistributionSpecReplicated(); } // outer child is non-replicated and is distributed across segments, // we need to the inner child to be distributed across segments that does // not generate duplicate results. That is, inner child should not be replicated. return GPOS_NEW(pmp) CDistributionSpecNonSingleton(false /*fAllowReplicated*/); }