//--------------------------------------------------------------------------- // @function: // CLogicalUnion::PstatsDerive // // @doc: // Derive statistics // //--------------------------------------------------------------------------- IStatistics * CLogicalUnion::PstatsDerive ( IMemoryPool *pmp, CExpressionHandle &exprhdl, DrgPstat * // not used ) const { GPOS_ASSERT(Esp(exprhdl) > EspNone); // union is transformed into a group by over an union all // we follow the same route to compute statistics IStatistics *pstatsUnionAll = CLogicalUnionAll::PstatsDeriveUnionAll(pmp, exprhdl); // computed columns DrgPul *pdrgpulComputedCols = GPOS_NEW(pmp) DrgPul(pmp); IStatistics *pstats = CLogicalGbAgg::PstatsDerive ( pmp, pstatsUnionAll, m_pdrgpcrOutput, // we group by the output columns pdrgpulComputedCols, // no computed columns for set ops NULL // no keys, use all grouping cols ); // clean up pdrgpulComputedCols->Release(); pstatsUnionAll->Release(); return pstats; }
//--------------------------------------------------------------------------- // @function: // CLogicalGbAggDeduplicate::PstatsDerive // // @doc: // Derive statistics // //--------------------------------------------------------------------------- IStatistics * CLogicalGbAggDeduplicate::PstatsDerive ( IMemoryPool *pmp, CExpressionHandle &exprhdl, DrgPstat * // not used ) const { GPOS_ASSERT(Esp(exprhdl) > EspNone); IStatistics *pstatsChild = exprhdl.Pstats(0); // extract computed columns DrgPul *pdrgpulComputedCols = GPOS_NEW(pmp) DrgPul(pmp); exprhdl.Pdpscalar(1 /*ulChildIndex*/)->PcrsDefined()->ExtractColIds(pmp, pdrgpulComputedCols); // construct bitset with keys of join child CBitSet *pbsKeys = GPOS_NEW(pmp) CBitSet(pmp); const ULONG ulKeys = m_pdrgpcrKeys->UlLength(); for (ULONG ul = 0; ul < ulKeys; ul++) { CColRef *pcr = (*m_pdrgpcrKeys)[ul]; pbsKeys->FExchangeSet(pcr->UlId()); } IStatistics *pstats = CLogicalGbAgg::PstatsDerive(pmp, pstatsChild, Pdrgpcr(), pdrgpulComputedCols, pbsKeys); pbsKeys->Release(); pdrgpulComputedCols->Release(); return pstats; }
//--------------------------------------------------------------------------- // @function: // CLogical::PstatsDeriveDummy // // @doc: // Derive dummy statistics // //--------------------------------------------------------------------------- IStatistics * CLogical::PstatsDeriveDummy ( IMemoryPool *pmp, CExpressionHandle &exprhdl, CDouble dRows ) const { GPOS_CHECK_ABORT; // return a dummy stats object that has a histogram for every // required-stats column GPOS_ASSERT(Esp(exprhdl) > EspNone); CReqdPropRelational *prprel = CReqdPropRelational::Prprel(exprhdl.Prp()); CColRefSet *pcrs = prprel->PcrsStat(); DrgPul *pdrgpulColIds = GPOS_NEW(pmp) DrgPul(pmp); pcrs->ExtractColIds(pmp, pdrgpulColIds); IStatistics *pstats = CStatistics::PstatsDummy(pmp, pdrgpulColIds, dRows); // clean up pdrgpulColIds->Release(); return pstats; }
//--------------------------------------------------------------------------- // @function: // CLogicalConstTableGet::PstatsDerive // // @doc: // Derive statistics // //--------------------------------------------------------------------------- IStatistics * CLogicalConstTableGet::PstatsDerive ( IMemoryPool *pmp, CExpressionHandle &exprhdl, DrgPstat * // not used ) const { GPOS_ASSERT(Esp(exprhdl) > EspNone); CReqdPropRelational *prprel = CReqdPropRelational::Prprel(exprhdl.Prp()); CColRefSet *pcrs = prprel->PcrsStat(); DrgPul *pdrgpulColIds = GPOS_NEW(pmp) DrgPul(pmp); pcrs->ExtractColIds(pmp, pdrgpulColIds); DrgPul *pdrgpulColWidth = CUtils::Pdrgpul(pmp, m_pdrgpcrOutput); IStatistics *pstats = CStatistics::PstatsDummy ( pmp, pdrgpulColIds, pdrgpulColWidth, m_pdrgpdrgpdatum->UlLength() ); // clean up pdrgpulColIds->Release(); pdrgpulColWidth->Release(); return pstats; }
//--------------------------------------------------------------------------- // @function: // CPhysicalUnionAll::PdshashedDerive // // @doc: // Derive hashed distribution from child hashed distributions // //--------------------------------------------------------------------------- CDistributionSpecHashed * CPhysicalUnionAll::PdshashedDerive ( IMemoryPool *pmp, CExpressionHandle &exprhdl ) const { BOOL fSuccess = true; const ULONG ulArity = exprhdl.UlArity(); // (1) check that all children deliver a hashed distribution that satisfies their input columns for (ULONG ulChild = 0; fSuccess && ulChild < ulArity; ulChild++) { CDistributionSpec *pdsChild = exprhdl.Pdpplan(ulChild)->Pds(); CDistributionSpec::EDistributionType edtChild = pdsChild->Edt(); fSuccess = (CDistributionSpec::EdtHashed == edtChild) && pdsChild->FSatisfies((*m_pdrgpds)[ulChild]); } if (!fSuccess) { // a child does not deliver hashed distribution return NULL; } // (2) check that child hashed distributions map to the same output columns // map outer child hashed distribution to corresponding UnionAll column positions DrgPul *pdrgpulOuter = PdrgpulMap(pmp, CDistributionSpecHashed::PdsConvert(exprhdl.Pdpplan(0 /*ulChildIndex*/)->Pds())->Pdrgpexpr(), 0/*ulChildIndex*/); if (NULL == pdrgpulOuter) { return NULL; } DrgPul *pdrgpulChild = NULL; for (ULONG ulChild = 1; fSuccess && ulChild < ulArity; ulChild++) { pdrgpulChild = PdrgpulMap(pmp, CDistributionSpecHashed::PdsConvert(exprhdl.Pdpplan(ulChild)->Pds())->Pdrgpexpr(), ulChild); // match mapped column positions of current child with outer child fSuccess = (NULL != pdrgpulChild) && FEqual(pdrgpulOuter, pdrgpulChild); CRefCount::SafeRelease(pdrgpulChild); } CDistributionSpecHashed *pdsOutput = NULL; if (fSuccess) { pdsOutput = PdsMatching(pmp, pdrgpulOuter); } pdrgpulOuter->Release(); return pdsOutput; }
//--------------------------------------------------------------------------- // @function: // CDynamicPtrArrayTest::EresUnittest_PdrgpulSubsequenceIndexes // // @doc: // Finding the first occurrences of the elements of the first array // in the second one. // //--------------------------------------------------------------------------- GPOS_RESULT CDynamicPtrArrayTest::EresUnittest_PdrgpulSubsequenceIndexes() { typedef CDynamicPtrArray<ULONG, CleanupNULL<ULONG> > DrgULONG; CAutoMemoryPool amp; IMemoryPool *pmp = amp.Pmp(); // the array containing elements to look up DrgULONG *pdrgULONGLookup = GPOS_NEW(pmp) DrgULONG(pmp); // the array containing the target elements that will give the positions DrgULONG *pdrgULONGTarget = GPOS_NEW(pmp) DrgULONG(pmp); ULONG *pul1 = GPOS_NEW(pmp) ULONG(10); ULONG *pul2 = GPOS_NEW(pmp) ULONG(20); ULONG *pul3 = GPOS_NEW(pmp) ULONG(30); pdrgULONGLookup->Append(pul1); pdrgULONGLookup->Append(pul2); pdrgULONGLookup->Append(pul3); pdrgULONGLookup->Append(pul3); // since target is empty, there are elements in lookup with no match, so the function // should return NULL GPOS_ASSERT(NULL == CDynamicPtrArrayUtils::PdrgpulSubsequenceIndexes(pmp, pdrgULONGLookup, pdrgULONGTarget)); pdrgULONGTarget->Append(pul1); pdrgULONGTarget->Append(pul3); pdrgULONGTarget->Append(pul3); pdrgULONGTarget->Append(pul3); pdrgULONGTarget->Append(pul2); DrgPul *pdrgpulIndexes = CDynamicPtrArrayUtils::PdrgpulSubsequenceIndexes(pmp, pdrgULONGLookup, pdrgULONGTarget); GPOS_ASSERT(NULL != pdrgpulIndexes); GPOS_ASSERT(4 == pdrgpulIndexes->UlLength()); GPOS_ASSERT(0 == *(*pdrgpulIndexes)[0]); GPOS_ASSERT(4 == *(*pdrgpulIndexes)[1]); GPOS_ASSERT(1 == *(*pdrgpulIndexes)[2]); GPOS_ASSERT(1 == *(*pdrgpulIndexes)[3]); GPOS_DELETE(pul1); GPOS_DELETE(pul2); GPOS_DELETE(pul3); pdrgpulIndexes->Release(); pdrgULONGTarget->Release(); pdrgULONGLookup->Release(); return GPOS_OK; }
//--------------------------------------------------------------------------- // @function: // CPhysicalMotion::PppsRequired // // @doc: // Compute required partition propagation of the n-th child // //--------------------------------------------------------------------------- CPartitionPropagationSpec * CPhysicalMotion::PppsRequired ( IMemoryPool *pmp, CExpressionHandle &exprhdl, CPartitionPropagationSpec *pppsRequired, ULONG #ifdef GPOS_DEBUG ulChildIndex #endif // GPOS_DEBUG , DrgPdp *, //pdrgpdpCtxt, ULONG //ulOptReq ) { GPOS_ASSERT(0 == ulChildIndex); GPOS_ASSERT(NULL != pppsRequired); CPartIndexMap *ppimReqd = pppsRequired->Ppim(); CPartFilterMap *ppfmReqd = pppsRequired->Ppfm(); DrgPul *pdrgpul = ppimReqd->PdrgpulScanIds(pmp); CPartIndexMap *ppimResult = GPOS_NEW(pmp) CPartIndexMap(pmp); CPartFilterMap *ppfmResult = GPOS_NEW(pmp) CPartFilterMap(pmp); /// get derived part consumers CPartInfo *ppartinfo = exprhdl.Pdprel(0)->Ppartinfo(); const ULONG ulPartIndexSize = pdrgpul->UlLength(); for (ULONG ul = 0; ul < ulPartIndexSize; ul++) { ULONG ulPartIndexId = *((*pdrgpul)[ul]); if (!ppartinfo->FContainsScanId(ulPartIndexId)) { // part index id does not exist in child nodes: do not push it below // the motion continue; } ppimResult->AddRequiredPartPropagation(ppimReqd, ulPartIndexId, CPartIndexMap::EppraPreservePropagators); (void) ppfmResult->FCopyPartFilter(m_pmp, ulPartIndexId, ppfmReqd); } pdrgpul->Release(); return GPOS_NEW(pmp) CPartitionPropagationSpec(ppimResult, ppfmResult); }
//--------------------------------------------------------------------------- // @function: // CColRef::Pdrgpul // // @doc: // Extract array of colids from array of colrefs // //--------------------------------------------------------------------------- DrgPul * CColRef::Pdrgpul ( IMemoryPool *pmp, DrgPcr *pdrgpcr ) { DrgPul *pdrgpul = GPOS_NEW(pmp) DrgPul(pmp); const ULONG ulLen = pdrgpcr->UlLength(); for (ULONG ul = 0; ul < ulLen; ul++) { CColRef *pcr = (*pdrgpcr)[ul]; pdrgpul->Append(GPOS_NEW(pmp) ULONG(pcr->UlId())); } return pdrgpul; }
//--------------------------------------------------------------------------- // @function: // CCostContext::DRowsPerHost // // @doc: // Return the number of rows per host // //--------------------------------------------------------------------------- CDouble CCostContext::DRowsPerHost() const { DOUBLE dRows = Pstats()->DRows().DVal(); COptCtxt *poptctxt = COptCtxt::PoctxtFromTLS(); const ULONG ulHosts = poptctxt->Pcm()->UlHosts(); CDistributionSpec *pds = Pdpplan()->Pds(); if (CDistributionSpec::EdtHashed == pds->Edt()) { CDistributionSpecHashed *pdshashed = CDistributionSpecHashed::PdsConvert(pds); DrgPexpr *pdrgpexpr = pdshashed->Pdrgpexpr(); CColRefSet *pcrsUsed = CUtils::PcrsExtractColumns(m_pmp, pdrgpexpr); const CColRefSet *pcrsReqdStats = this->Poc()->Prprel()->PcrsStat(); if (!pcrsReqdStats->FSubset(pcrsUsed)) { // statistics not available for distribution columns, therefore // assume uniform distribution across hosts // clean up pcrsUsed->Release(); return CDouble(dRows / ulHosts); } DrgPul *pdrgpul = GPOS_NEW(m_pmp) DrgPul(m_pmp); pcrsUsed->ExtractColIds(m_pmp, pdrgpul); pcrsUsed->Release(); CStatisticsConfig *pstatsconf = poptctxt->Poconf()->Pstatsconf(); CDouble dNDVs = CStatisticsUtils::DGroups(m_pmp, Pstats(), pstatsconf, pdrgpul, NULL /*pbsKeys*/); pdrgpul->Release(); if (dNDVs < ulHosts) { // estimated number of distinct values of distribution columns is smaller than number of hosts. // We assume data is distributed across a subset of hosts in this case. This results in a larger // number of rows per host compared to the uniform case, allowing us to capture data skew in // cost computation return CDouble(dRows / dNDVs.DVal()); } } return CDouble(dRows / ulHosts); }
//--------------------------------------------------------------------------- // @function: // CParseHandlerAgg::EndElement // // @doc: // Invoked by Xerces to process a closing tag // //--------------------------------------------------------------------------- void CParseHandlerAgg::EndElement ( const XMLCh* const, // xmlszUri, const XMLCh* const xmlszLocalname, const XMLCh* const // xmlszQname ) { if(0 != XMLString::compareString(CDXLTokens::XmlstrToken(EdxltokenPhysicalAggregate), xmlszLocalname)) { CWStringDynamic *pstr = CDXLUtils::PstrFromXMLCh(m_pphm->Pmm(), xmlszLocalname); GPOS_RAISE(gpdxl::ExmaDXL, gpdxl::ExmiDXLUnexpectedTag, pstr->Wsz()); } // construct node from the created child nodes GPOS_ASSERT(5 == this->UlLength()); CParseHandlerProperties *pphProp = dynamic_cast<CParseHandlerProperties *>((*this)[0]); CParseHandlerGroupingColList *pphGrpColList = dynamic_cast<CParseHandlerGroupingColList*>((*this)[1]); CParseHandlerProjList *pphPrL = dynamic_cast<CParseHandlerProjList*>((*this)[2]); CParseHandlerFilter *pphFilter = dynamic_cast<CParseHandlerFilter *>((*this)[3]); CParseHandlerPhysicalOp *pphChild = dynamic_cast<CParseHandlerPhysicalOp *>((*this)[4]); // set grouping cols list GPOS_ASSERT(NULL != pphGrpColList->PdrgpulGroupingCols()); DrgPul *pdrgpul = pphGrpColList->PdrgpulGroupingCols(); pdrgpul->AddRef(); m_pdxlop->SetGroupingCols(pdrgpul); m_pdxln = GPOS_NEW(m_pmp) CDXLNode(m_pmp, m_pdxlop); // set physical properties CParseHandlerUtils::SetProperties(m_pdxln, pphProp); // add children AddChildFromParseHandler(pphPrL); AddChildFromParseHandler(pphFilter); AddChildFromParseHandler(pphChild); // deactivate handler m_pphm->DeactivateHandler(); }
//--------------------------------------------------------------------------- // @function: // CPhysicalUnionAll::PdrgpulMap // // @doc: // Map given array of scalar identifier expressions to positions of // UnionAll input columns in the given child; // the function returns NULL if no mapping could be constructed // //--------------------------------------------------------------------------- DrgPul * CPhysicalUnionAll::PdrgpulMap ( IMemoryPool *pmp, DrgPexpr *pdrgpexpr, ULONG ulChildIndex ) const { GPOS_ASSERT(NULL != pdrgpexpr); DrgPcr *pdrgpcr = (*m_pdrgpdrgpcrInput)[ulChildIndex]; const ULONG ulExprs = pdrgpexpr->UlLength(); const ULONG ulCols = pdrgpcr->UlLength(); DrgPul *pdrgpul = GPOS_NEW(pmp) DrgPul(pmp); for (ULONG ulExpr = 0; ulExpr < ulExprs; ulExpr++) { CExpression *pexpr = (*pdrgpexpr)[ulExpr]; if (COperator::EopScalarIdent != pexpr->Pop()->Eopid()) { continue; } const CColRef *pcr = CScalarIdent::PopConvert(pexpr->Pop())->Pcr(); for (ULONG ulCol = 0; ulCol < ulCols; ulCol++) { if ((*pdrgpcr)[ulCol] == pcr) { pdrgpul->Append(GPOS_NEW(pmp) ULONG(ulCol)); } } } if (0 == pdrgpul->UlLength()) { // mapping failed pdrgpul->Release(); pdrgpul = NULL; } return pdrgpul; }
//--------------------------------------------------------------------------- // @function: // CParseHandlerTraceFlags::StartElement // // @doc: // Invoked by Xerces to process an opening tag // //--------------------------------------------------------------------------- void CParseHandlerTraceFlags::StartElement ( const XMLCh* const , //xmlszUri, const XMLCh* const xmlszLocalname, const XMLCh* const , //xmlszQname, const Attributes& attrs ) { if(0 != XMLString::compareString(CDXLTokens::XmlstrToken(EdxltokenTraceFlags), xmlszLocalname)) { CWStringDynamic *pstr = CDXLUtils::PstrFromXMLCh(m_pphm->Pmm(), xmlszLocalname); GPOS_RAISE(gpdxl::ExmaDXL, gpdxl::ExmiDXLUnexpectedTag, pstr->Wsz()); } // parse and tokenize traceflags const XMLCh *xmlszTraceFlags = CDXLOperatorFactory::XmlstrFromAttrs ( attrs, EdxltokenValue, EdxltokenTraceFlags ); DrgPul *pdrgpul = CDXLOperatorFactory::PdrgpulFromXMLCh ( m_pphm->Pmm(), xmlszTraceFlags, EdxltokenDistrColumns, EdxltokenRelation ); for (ULONG ul = 0; ul < pdrgpul->UlLength(); ul++) { ULONG *pul = (*pdrgpul)[ul]; m_pbs->FExchangeSet(*pul); } pdrgpul->Release(); }
//--------------------------------------------------------------------------- // @function: // CPartIndexMap::PdrgpulScanIds // // @doc: // Extract scan ids // //--------------------------------------------------------------------------- DrgPul * CPartIndexMap::PdrgpulScanIds ( IMemoryPool *pmp, BOOL fConsumersOnly ) const { DrgPul *pdrgpul = GPOS_NEW(pmp) DrgPul(pmp); PartIndexMapIter pimi(m_pim); while (pimi.FAdvance()) { const CPartTableInfo *ppti = pimi.Pt(); if (fConsumersOnly && EpimConsumer != ppti->Epim()) { continue; } pdrgpul->Append(GPOS_NEW(pmp) ULONG(ppti->UlScanId())); } return pdrgpul; }
//--------------------------------------------------------------------------- // @function: // CPhysicalUnionAll::EpetPartitionPropagation // // @doc: // Compute the enforcing type for the operator // //--------------------------------------------------------------------------- CEnfdProp::EPropEnforcingType CPhysicalUnionAll::EpetPartitionPropagation ( CExpressionHandle &exprhdl, const CEnfdPartitionPropagation *pepp ) const { CPartIndexMap *ppimReqd = pepp->PppsRequired()->Ppim(); if (!ppimReqd->FContainsUnresolved()) { // no unresolved partition consumers left return CEnfdProp::EpetUnnecessary; } CPartIndexMap *ppimDrvd = CDrvdPropPlan::Pdpplan(exprhdl.Pdp())->Ppim(); GPOS_ASSERT(NULL != ppimDrvd); BOOL fInScope = pepp->FInScope(m_pmp, ppimDrvd); BOOL fResolved = pepp->FResolved(m_pmp, ppimDrvd); if (fResolved) { // all required partition consumers are resolved return CEnfdProp::EpetUnnecessary; } if (!fInScope) { // some partition consumers are not covered downstream return CEnfdProp::EpetRequired; } DrgPul *pdrgpul = ppimReqd->PdrgpulScanIds(m_pmp); const ULONG ulScanIds = pdrgpul->UlLength(); const ULONG ulArity = exprhdl.UlNonScalarChildren(); for (ULONG ul = 0; ul < ulScanIds; ul++) { ULONG ulScanId = *((*pdrgpul)[ul]); ULONG ulChildrenWithConsumers = 0; for (ULONG ulChildIdx = 0; ulChildIdx < ulArity; ulChildIdx++) { if (exprhdl.Pdprel(ulChildIdx)->Ppartinfo()->FContainsScanId(ulScanId)) { ulChildrenWithConsumers++; } } if (1 < ulChildrenWithConsumers) { // partition consumer exists in more than one child, so enforce it here pdrgpul->Release(); return CEnfdProp::EpetRequired; } } pdrgpul->Release(); // required part propagation can be enforced here or passed to the children return CEnfdProp::EpetOptional; }
//--------------------------------------------------------------------------- // @function: // CPartitionPropagationSpec::AppendEnforcers // // @doc: // Add required enforcers to dynamic array // //--------------------------------------------------------------------------- void CPartitionPropagationSpec::AppendEnforcers ( IMemoryPool *pmp, CExpressionHandle &exprhdl, CReqdPropPlan * #ifdef GPOS_DEBUG prpp #endif // GPOS_DEBUG , DrgPexpr *pdrgpexpr, CExpression *pexpr ) { GPOS_ASSERT(NULL != prpp); GPOS_ASSERT(NULL != pmp); GPOS_ASSERT(NULL != pdrgpexpr); GPOS_ASSERT(NULL != pexpr); DrgPul *pdrgpul = m_ppim->PdrgpulScanIds(pmp); const ULONG ulSize = pdrgpul->UlLength(); for (ULONG ul = 0; ul < ulSize; ul++) { ULONG ulScanId = *((*pdrgpul)[ul]); GPOS_ASSERT(m_ppim->FContains(ulScanId)); if (CPartIndexMap::EpimConsumer != m_ppim->Epim(ulScanId) || 0 < m_ppim->UlExpectedPropagators(ulScanId)) { continue; } if (!FRequiresPartitionPropagation(pmp, pexpr, exprhdl, ulScanId)) { continue; } CExpression *pexprResolver = NULL; IMDId *pmdid = m_ppim->PmdidRel(ulScanId); DrgDrgPcr *pdrgpdrgpcrKeys = NULL; DrgPpartkeys *pdrgppartkeys = m_ppim->Pdrgppartkeys(ulScanId); CPartConstraint *ppartcnstr = m_ppim->PpartcnstrRel(ulScanId); PartCnstrMap *ppartcnstrmap = m_ppim->Ppartcnstrmap(ulScanId); pmdid->AddRef(); ppartcnstr->AddRef(); ppartcnstrmap->AddRef(); pexpr->AddRef(); // check if there is a predicate on this part index id HMUlExpr *phmulexprEqFilter = GPOS_NEW(pmp) HMUlExpr(pmp); HMUlExpr *phmulexprFilter = GPOS_NEW(pmp) HMUlExpr(pmp); CExpression *pexprResidual = NULL; if (m_ppfm->FContainsScanId(ulScanId)) { CExpression *pexprScalar = PexprFilter(pmp, ulScanId); // find out which keys are used in the predicate, in case there are multiple // keys at this point (e.g. from a union of multiple CTE consumers) CColRefSet *pcrsUsed = CDrvdPropScalar::Pdpscalar(pexprScalar->PdpDerive())->PcrsUsed(); const ULONG ulKeysets = pdrgppartkeys->UlLength(); for (ULONG ulKey = 0; NULL == pdrgpdrgpcrKeys && ulKey < ulKeysets; ulKey++) { // get partition key CPartKeys *ppartkeys = (*pdrgppartkeys)[ulKey]; if (ppartkeys->FOverlap(pcrsUsed)) { pdrgpdrgpcrKeys = ppartkeys->Pdrgpdrgpcr(); } } // if we cannot find partition keys mapping the partition predicates, fall back to planner if (NULL == pdrgpdrgpcrKeys) { GPOS_RAISE(gpopt::ExmaGPOPT, gpopt::ExmiUnsatisfiedRequiredProperties); } pdrgpdrgpcrKeys->AddRef(); // split predicates and put them in the appropriate hashmaps SplitPartPredicates(pmp, pexprScalar, pdrgpdrgpcrKeys, phmulexprEqFilter, phmulexprFilter, &pexprResidual); pexprScalar->Release(); } else { // doesn't matter which keys we use here since there is no filter GPOS_ASSERT(1 <= pdrgppartkeys->UlLength()); pdrgpdrgpcrKeys = (*pdrgppartkeys)[0]->Pdrgpdrgpcr(); pdrgpdrgpcrKeys->AddRef(); } pexprResolver = GPOS_NEW(pmp) CExpression ( pmp, GPOS_NEW(pmp) CPhysicalPartitionSelector ( pmp, ulScanId, pmdid, pdrgpdrgpcrKeys, ppartcnstrmap, ppartcnstr, phmulexprEqFilter, phmulexprFilter, pexprResidual ), pexpr ); pdrgpexpr->Append(pexprResolver); } pdrgpul->Release(); }
//--------------------------------------------------------------------------- // @function: // CLogicalDifference::PstatsDerive // // @doc: // Derive statistics // //--------------------------------------------------------------------------- IStatistics * CLogicalDifference::PstatsDerive ( IMemoryPool *pmp, CExpressionHandle &exprhdl, DrgPstat * // not used ) const { GPOS_ASSERT(Esp(exprhdl) > EspNone); // difference is transformed into an aggregate over a LASJ, // we follow the same route to compute statistics DrgPcrs *pdrgpcrsOutput = GPOS_NEW(pmp) DrgPcrs(pmp); const ULONG ulSize = m_pdrgpdrgpcrInput->UlLength(); for (ULONG ul = 0; ul < ulSize; ul++) { CColRefSet *pcrs = GPOS_NEW(pmp) CColRefSet(pmp, (*m_pdrgpdrgpcrInput)[ul]); pdrgpcrsOutput->Append(pcrs); } IStatistics *pstatsOuter = exprhdl.Pstats(0); IStatistics *pstatsInner = exprhdl.Pstats(1); // construct the scalar condition for the LASJ CExpression *pexprScCond = CUtils::PexprConjINDFCond(pmp, m_pdrgpdrgpcrInput); // compute the statistics for LASJ CColRefSet *pcrsOuterRefs = exprhdl.Pdprel()->PcrsOuter(); DrgPstatsjoin *pdrgpstatsjoin = CStatsPredUtils::Pdrgpstatsjoin ( pmp, exprhdl, pexprScCond, pdrgpcrsOutput, pcrsOuterRefs ); IStatistics *pstatsLASJ = pstatsOuter->PstatsLASJoin ( pmp, pstatsInner, pdrgpstatsjoin, true /* fIgnoreLasjHistComputation */ ); // clean up pexprScCond->Release(); pdrgpstatsjoin->Release(); // computed columns DrgPul *pdrgpulComputedCols = GPOS_NEW(pmp) DrgPul(pmp); IStatistics *pstats = CLogicalGbAgg::PstatsDerive ( pmp, pstatsLASJ, (*m_pdrgpdrgpcrInput)[0], // we group by the columns of the first child pdrgpulComputedCols, // no computed columns for set ops NULL // no keys, use all grouping cols ); // clean up pdrgpulComputedCols->Release(); pstatsLASJ->Release(); pdrgpcrsOutput->Release(); return pstats; }
//--------------------------------------------------------------------------- // @function: // CPhysicalHashJoin::PppsRequiredCompute // // @doc: // Compute required partition propagation of the n-th child // //--------------------------------------------------------------------------- CPartitionPropagationSpec * CPhysicalHashJoin::PppsRequiredCompute ( IMemoryPool *pmp, CExpressionHandle &exprhdl, CPartitionPropagationSpec *pppsRequired, ULONG ulChildIndex ) { CPartIndexMap *ppim = pppsRequired->Ppim(); CPartFilterMap *ppfm = pppsRequired->Ppfm(); DrgPul *pdrgpul = ppim->PdrgpulScanIds(pmp); CPartIndexMap *ppimResult = GPOS_NEW(pmp) CPartIndexMap(pmp); CPartFilterMap *ppfmResult = GPOS_NEW(pmp) CPartFilterMap(pmp); // get outer partition consumers CPartInfo *ppartinfo = exprhdl.Pdprel(0)->Ppartinfo(); CColRefSet *pcrsOutputOuter = exprhdl.Pdprel(0)->PcrsOutput(); CColRefSet *pcrsOutputInner = exprhdl.Pdprel(1)->PcrsOutput(); const ULONG ulPartIndexIds = pdrgpul->UlLength(); for (ULONG ul = 0; ul < ulPartIndexIds; ul++) { ULONG ulPartIndexId = *((*pdrgpul)[ul]); if (ppfm->FContainsScanId(ulPartIndexId)) { GPOS_ASSERT(NULL != ppfm->Pexpr(ulPartIndexId)); // a selection-based propagation request pushed from above: do not propagate any // further as the join will reduce cardinality and thus may select more partitions // for scanning continue; } BOOL fOuterPartConsumer = ppartinfo->FContainsScanId(ulPartIndexId); // in order to find interesting join predicates that can be used for DPE, // one side of the predicate must be the partition key, while the other side must only contain // references from the join child that does not have the partition consumer CColRefSet *pcrsAllowedRefs = pcrsOutputOuter; if (fOuterPartConsumer) { pcrsAllowedRefs = pcrsOutputInner; } if (1 == ulChildIndex && !fOuterPartConsumer) { // always push through required partition propagation for consumers on the // inner side of the hash join DrgPpartkeys *pdrgppartkeys = exprhdl.Pdprel(1 /*ulChildIndex*/)->Ppartinfo()->PdrgppartkeysByScanId(ulPartIndexId); GPOS_ASSERT(NULL != pdrgppartkeys); pdrgppartkeys->AddRef(); ppimResult->AddRequiredPartPropagation(ppim, ulPartIndexId, CPartIndexMap::EppraPreservePropagators, pdrgppartkeys); } else { // look for a filter on the part key CExpression *pexprScalar = exprhdl.PexprScalarChild(2 /*ulChildIndex*/); AddFilterOnPartKey(pmp, false /*fNLJoin*/, pexprScalar, ppim, ppfm, ulChildIndex, ulPartIndexId, fOuterPartConsumer, ppimResult, ppfmResult, pcrsAllowedRefs); } } pdrgpul->Release(); return GPOS_NEW(pmp) CPartitionPropagationSpec(ppimResult, ppfmResult); }
//--------------------------------------------------------------------------- // @function: // CPhysicalNLJoin::PppsRequiredNLJoinChild // // @doc: // Compute required partition propagation of the n-th child // //--------------------------------------------------------------------------- CPartitionPropagationSpec * CPhysicalNLJoin::PppsRequiredNLJoinChild ( IMemoryPool *pmp, CExpressionHandle &exprhdl, CPartitionPropagationSpec *pppsRequired, ULONG ulChildIndex, DrgPdp *, //pdrgpdpCtxt, ULONG ulOptReq ) { GPOS_ASSERT(NULL != pppsRequired); if (1 == ulOptReq) { // request (1): push partition propagation requests to join's children, // do not consider possible dynamic partition elimination using join predicate here, // this is handled by optimization request (0) below return CPhysical::PppsRequiredPushThruNAry(pmp, exprhdl, pppsRequired, ulChildIndex); } GPOS_ASSERT(0 == ulOptReq); CPartIndexMap *ppim = pppsRequired->Ppim(); CPartFilterMap *ppfm = pppsRequired->Ppfm(); DrgPul *pdrgpul = ppim->PdrgpulScanIds(pmp); CPartIndexMap *ppimResult = GPOS_NEW(pmp) CPartIndexMap(pmp); CPartFilterMap *ppfmResult = GPOS_NEW(pmp) CPartFilterMap(pmp); CPartInfo *ppartinfoOuter = exprhdl.Pdprel(0)->Ppartinfo(); CColRefSet *pcrsOutputOuter = exprhdl.Pdprel(0)->PcrsOutput(); CColRefSet *pcrsOutputInner = exprhdl.Pdprel(1)->PcrsOutput(); const ULONG ulPartIndexIds = pdrgpul->UlLength(); for (ULONG ul = 0; ul < ulPartIndexIds; ul++) { ULONG ulPartIndexId = *((*pdrgpul)[ul]); if (ppfm->FContainsScanId(ulPartIndexId)) { GPOS_ASSERT(NULL != ppfm->Pexpr(ulPartIndexId)); // a selection-based propagation request pushed from above: do not propagate any // further as the join will reduce cardinality and thus may select more partitions // for scanning continue; } BOOL fOuterPartConsumer = ppartinfoOuter->FContainsScanId(ulPartIndexId); // in order to find interesting join predicates that can be used for DPE, // one side of the predicate must be the partition key, while the other side must only contain // references from the join child that does not have the partition consumer CColRefSet *pcrsAllowedRefs = pcrsOutputOuter; if (fOuterPartConsumer) { pcrsAllowedRefs = pcrsOutputInner; } if (0 == ulChildIndex && fOuterPartConsumer) { // always push through required partition propagation for consumers on the // outer side of the nested loop join DrgPpartkeys *pdrgppartkeys = ppartinfoOuter->PdrgppartkeysByScanId(ulPartIndexId); GPOS_ASSERT(NULL != pdrgppartkeys); pdrgppartkeys->AddRef(); ppimResult->AddRequiredPartPropagation(ppim, ulPartIndexId, CPartIndexMap::EppraPreservePropagators, pdrgppartkeys); } else { // check if there is an interesting condition involving the partition key CExpression *pexprScalar = exprhdl.PexprScalarChild(2 /*ulChildIndex*/); AddFilterOnPartKey(pmp, true /*fNLJoin*/, pexprScalar, ppim, ppfm, ulChildIndex, ulPartIndexId, fOuterPartConsumer, ppimResult, ppfmResult, pcrsAllowedRefs); } } pdrgpul->Release(); return GPOS_NEW(pmp) CPartitionPropagationSpec(ppimResult, ppfmResult); }
//--------------------------------------------------------------------------- // @function: // CCTEReq::PcterUnresolvedSequence // // @doc: // Unresolved CTE requirements given a derived CTE map for a sequence // operator // //--------------------------------------------------------------------------- CCTEReq * CCTEReq::PcterUnresolvedSequence ( IMemoryPool *pmp, CCTEMap *pcm, DrgPdp *pdrgpdpCtxt // context contains derived plan properties of producer tree ) { GPOS_ASSERT(NULL != pcm); CCTEReq *pcterUnresolved = GPOS_NEW(pmp) CCTEReq(pmp); HMCteReqIter hmcri(m_phmcter); while (hmcri.FAdvance()) { const CCTEReqEntry *pcre = hmcri.Pt(); ULONG ulId = pcre->UlId(); CCTEMap::ECteType ect = pcre->Ect(); BOOL fRequired = pcre->FRequired(); CCTEMap::ECteType ectDrvd = pcm->Ect(ulId); if (fRequired && CCTEMap::EctSentinel != ectDrvd) { GPOS_ASSERT(CCTEMap::EctConsumer == ect); GPOS_ASSERT(CCTEMap::EctConsumer == ectDrvd); // already found, so mark it as optional CDrvdPropPlan *pdpplan = pcre->PdpplanProducer(); GPOS_ASSERT(NULL != pdpplan); pdpplan->AddRef(); pcterUnresolved->Insert(ulId, ect, false /*fReqiored*/, pdpplan); } else if (!fRequired && CCTEMap::EctProducer == ect && CCTEMap::EctSentinel != ectDrvd) { GPOS_ASSERT(CCTEMap::EctProducer == ectDrvd); // found a producer. require the corresponding consumer and // extract producer plan properties from passed context pcterUnresolved->InsertConsumer(ulId, pdrgpdpCtxt); } else { // either required and not found yet, or optional // in both cases, pass it down as is CDrvdPropPlan *pdpplan = pcre->PdpplanProducer(); GPOS_ASSERT_IMP(NULL == pdpplan, CCTEMap::EctProducer == ect); if (NULL != pdpplan) { pdpplan->AddRef(); } pcterUnresolved->Insert(ulId, ect, fRequired, pdpplan); } } // if something is in pcm and not in the requirments, it has to be a producer // in which case, add the corresponding consumer as unresolved DrgPul *pdrgpulProducers = pcm->PdrgpulAdditionalProducers(pmp, this); const ULONG ulLen = pdrgpulProducers->UlLength(); for (ULONG ul = 0; ul < ulLen; ul++) { ULONG *pulId = (*pdrgpulProducers)[ul]; pcterUnresolved->InsertConsumer(*pulId, pdrgpdpCtxt); } pdrgpulProducers->Release(); return pcterUnresolved; }
//--------------------------------------------------------------------------- // @function: // CPhysicalPartitionSelector::PppsRequired // // @doc: // Compute required partition propagation of the n-th child // //--------------------------------------------------------------------------- CPartitionPropagationSpec * CPhysicalPartitionSelector::PppsRequired ( IMemoryPool *pmp, CExpressionHandle & exprhdl, CPartitionPropagationSpec *pppsRequired, ULONG #ifdef GPOS_DEBUG ulChildIndex #endif // GPOS_DEBUG , DrgPdp *, //pdrgpdpCtxt, ULONG //ulOptReq ) { GPOS_ASSERT(0 == ulChildIndex); GPOS_ASSERT(NULL != pppsRequired); CPartIndexMap *ppimInput = pppsRequired->Ppim(); CPartFilterMap *ppfmInput = pppsRequired->Ppfm(); DrgPul *pdrgpulInputScanIds = ppimInput->PdrgpulScanIds(pmp); CPartIndexMap *ppim = GPOS_NEW(pmp) CPartIndexMap(pmp); CPartFilterMap *ppfm = GPOS_NEW(pmp) CPartFilterMap(pmp); CPartInfo *ppartinfo = exprhdl.Pdprel(0)->Ppartinfo(); const ULONG ulScanIds = pdrgpulInputScanIds->UlLength(); for (ULONG ul = 0; ul < ulScanIds; ul++) { ULONG ulScanId = *((*pdrgpulInputScanIds)[ul]); ULONG ulExpectedPropagators = ppimInput->UlExpectedPropagators(ulScanId); if (ulScanId == m_ulScanId) { // partition propagation resolved - do not need to require from children continue; } if (!ppartinfo->FContainsScanId(ulScanId) && ppartinfo->FContainsScanId(m_ulScanId)) { // dynamic scan for the required id not defined below, but the current one is: do not push request down continue; } IMDId *pmdid = ppimInput->PmdidRel(ulScanId); DrgPpartkeys *pdrgppartkeys = ppimInput->Pdrgppartkeys(ulScanId); PartCnstrMap *ppartcnstrmap = ppimInput->Ppartcnstrmap(ulScanId); CPartConstraint *ppartcnstr = ppimInput->PpartcnstrRel(ulScanId); CPartIndexMap::EPartIndexManipulator epim = ppimInput->Epim(ulScanId); pmdid->AddRef(); pdrgppartkeys->AddRef(); ppartcnstrmap->AddRef(); ppartcnstr->AddRef(); ppim->Insert(ulScanId, ppartcnstrmap, epim, ulExpectedPropagators, pmdid, pdrgppartkeys, ppartcnstr); (void) ppfm->FCopyPartFilter(m_pmp, ulScanId, ppfmInput); } // cleanup pdrgpulInputScanIds->Release(); return GPOS_NEW(pmp) CPartitionPropagationSpec(ppim, ppfm); }