//--------------------------------------------------------------------------- // @function: // CPhysical::GetSkew // // @doc: // Helper to compute skew estimate based on given stats and // distribution spec // //--------------------------------------------------------------------------- CDouble CPhysical::GetSkew ( IStatistics *stats, CDistributionSpec *pds ) { CDouble dSkew = 1.0; if (CDistributionSpec::EdtHashed == pds->Edt()) { CDistributionSpecHashed *pdshashed = CDistributionSpecHashed::PdsConvert(pds); const CExpressionArray *pdrgpexpr = pdshashed->Pdrgpexpr(); const ULONG size = pdrgpexpr->Size(); for (ULONG ul = 0; ul < size; ul++) { CExpression *pexpr = (*pdrgpexpr)[ul]; if (COperator::EopScalarIdent == pexpr->Pop()->Eopid()) { // consider only hashed distribution direct columns for now CScalarIdent *popScId = CScalarIdent::PopConvert(pexpr->Pop()); ULONG colid = popScId->Pcr()->Id(); CDouble dSkewCol = stats->GetSkew(colid); if (dSkewCol > dSkew) { dSkew = dSkewCol; } } } } return CDouble(dSkew); }
//--------------------------------------------------------------------------- // @function: // CXformInnerJoin2DynamicBitmapIndexGetApply::Transform // // @doc: // Actual transformation // //--------------------------------------------------------------------------- void CXformInnerJoin2DynamicBitmapIndexGetApply::Transform ( CXformContext *pxfctxt, CXformResult *pxfres, CExpression *pexpr ) const { GPOS_ASSERT(NULL != pxfctxt); GPOS_ASSERT(FPromising(pxfctxt->Pmp(), this, pexpr)); GPOS_ASSERT(FCheckPattern(pexpr)); IMemoryPool *pmp = pxfctxt->Pmp(); // extract components CExpression *pexprOuter = (*pexpr)[0]; CExpression *pexprInner = (*pexpr)[1]; CExpression *pexprScalar = (*pexpr)[2]; GPOS_ASSERT(COperator::EopLogicalDynamicGet == pexprInner->Pop()->Eopid()); CLogicalDynamicGet *popDynamicGet = CLogicalDynamicGet::PopConvert(pexprInner->Pop()); CreateHomogeneousIndexApplyAlternatives ( pmp, pexpr->Pop()->UlOpId(), pexprOuter, pexprInner, pexprScalar, popDynamicGet->Ptabdesc(), popDynamicGet, pxfres, IMDIndex::EmdindBitmap ); }
//--------------------------------------------------------------------------- // @function: // CPhysicalUnionAll::PdshashedPassThru // // @doc: // Compute required hashed distribution of the n-th child // //--------------------------------------------------------------------------- CDistributionSpecHashed * CPhysicalUnionAll::PdshashedPassThru ( IMemoryPool *pmp, CDistributionSpecHashed *pdshashedRequired, ULONG ulChildIndex ) const { DrgPexpr *pdrgpexprRequired = pdshashedRequired->Pdrgpexpr(); DrgPcr *pdrgpcrChild = (*m_pdrgpdrgpcrInput)[ulChildIndex]; const ULONG ulExprs = pdrgpexprRequired->UlLength(); const ULONG ulOutputCols = m_pdrgpcrOutput->UlLength(); DrgPexpr *pdrgpexprChildRequired = GPOS_NEW(pmp) DrgPexpr(pmp); for (ULONG ulExpr = 0; ulExpr < ulExprs; ulExpr++) { CExpression *pexpr = (*pdrgpexprRequired)[ulExpr]; if (COperator::EopScalarIdent != pexpr->Pop()->Eopid()) { // skip expressions that are not in form of scalar identifiers continue; } const CColRef *pcrHashed = CScalarIdent::PopConvert(pexpr->Pop())->Pcr(); const IMDType *pmdtype = pcrHashed->Pmdtype(); if (!pmdtype->FHashable()) { // skip non-hashable columns continue; } for (ULONG ulCol = 0; ulCol < ulOutputCols; ulCol++) { const CColRef *pcrOutput = (*m_pdrgpcrOutput)[ulCol]; if (pcrOutput == pcrHashed) { const CColRef *pcrInput = (*pdrgpcrChild)[ulCol]; pdrgpexprChildRequired->Append(CUtils::PexprScalarIdent(pmp, pcrInput)); } } } if (0 < pdrgpexprChildRequired->UlLength()) { return GPOS_NEW(pmp) CDistributionSpecHashed(pdrgpexprChildRequired, true /* fNullsCollocated */); } // failed to create a matching hashed distribution pdrgpexprChildRequired->Release(); if (NULL != pdshashedRequired->PdshashedEquiv()) { // try again with equivalent distribution return PdshashedPassThru(pmp, pdshashedRequired->PdshashedEquiv(), ulChildIndex); } // failed to create hashed distribution return NULL; }
//--------------------------------------------------------------------------- // @function: // CStatsPredUtils::PstatspredLikeHandleCasting // // @doc: // Create a LIKE statistics filter //--------------------------------------------------------------------------- CStatsPred * CStatsPredUtils::PstatspredLike ( IMemoryPool *pmp, CExpression *pexprPred, CColRefSet *//pcrsOuterRefs, ) { GPOS_ASSERT(NULL != pexprPred); GPOS_ASSERT(CPredicateUtils::FLikePredicate(pexprPred)); CExpression *pexprLeft = (*pexprPred)[0]; CExpression *pexprRight = (*pexprPred)[1]; // we support LIKE predicate of the following patterns // CAST(ScIdent) LIKE Const // CAST(ScIdent) LIKE CAST(Const) // ScIdent LIKE Const // ScIdent LIKE CAST(Const) // CAST(Const) LIKE ScIdent // CAST(Const) LIKE CAST(ScIdent) // const LIKE ScIdent // const LIKE CAST(ScIdent) CExpression *pexprScIdent = NULL; CExpression *pexprScConst = NULL; CPredicateUtils::ExtractLikePredComponents(pexprPred, &pexprScIdent, &pexprScConst); if (NULL == pexprScIdent || NULL == pexprScConst) { return GPOS_NEW(pmp) CStatsPredUnsupported(ULONG_MAX, CStatsPred::EstatscmptLike); } CScalarIdent *popScalarIdent = CScalarIdent::PopConvert(pexprScIdent->Pop()); ULONG ulColId = popScalarIdent->Pcr()->UlId(); CScalarConst *popScalarConst = CScalarConst::PopConvert(pexprScConst->Pop()); IDatum *pdatumLiteral = popScalarConst->Pdatum(); const CColRef *pcr = popScalarIdent->Pcr(); if (!IMDType::FStatsComparable(pcr->Pmdtype(), pdatumLiteral)) { // unsupported stats comparison between the column and datum return GPOS_NEW(pmp) CStatsPredUnsupported(pcr->UlId(), CStatsPred::EstatscmptLike); } CDouble dDefaultScaleFactor(1.0); if (pdatumLiteral->FSupportLikePredicate()) { dDefaultScaleFactor = pdatumLiteral->DLikePredicateScaleFactor(); } pexprLeft->AddRef(); pexprRight->AddRef(); return GPOS_NEW(pmp) CStatsPredLike(ulColId, pexprLeft, pexprRight, dDefaultScaleFactor); }
//--------------------------------------------------------------------------- // @function: // CConstraint::PcnstrFromScalarCmp // // @doc: // Create constraint from scalar comparison // //--------------------------------------------------------------------------- CConstraint * CConstraint::PcnstrFromScalarCmp ( IMemoryPool *pmp, CExpression *pexpr, DrgPcrs **ppdrgpcrs // output equivalence classes ) { GPOS_ASSERT(NULL != pexpr); GPOS_ASSERT(CUtils::FScalarCmp(pexpr)); GPOS_ASSERT(NULL != ppdrgpcrs); GPOS_ASSERT(NULL == *ppdrgpcrs); CExpression *pexprLeft = (*pexpr)[0]; CExpression *pexprRight = (*pexpr)[1]; // check if the scalar comparison is over scalar idents if (COperator::EopScalarIdent == pexprLeft->Pop()->Eopid() && COperator::EopScalarIdent == pexprRight->Pop()->Eopid()) { CScalarIdent *popScIdLeft = CScalarIdent::PopConvert((*pexpr)[0]->Pop()); const CColRef *pcrLeft = popScIdLeft->Pcr(); CScalarIdent *popScIdRight = CScalarIdent::PopConvert((*pexpr)[1]->Pop()); const CColRef *pcrRight = popScIdRight->Pcr(); if (!CUtils::FConstrainableType(pcrLeft->Pmdtype()->Pmdid()) || !CUtils::FConstrainableType(pcrRight->Pmdtype()->Pmdid())) { return NULL; } *ppdrgpcrs = GPOS_NEW(pmp) DrgPcrs(pmp); if (CPredicateUtils::FEquality(pexpr)) { // col1 = col2 CColRefSet *pcrsNew = GPOS_NEW(pmp) CColRefSet(pmp); pcrsNew->Include(pcrLeft); pcrsNew->Include(pcrRight); (*ppdrgpcrs)->Append(pcrsNew); } // create NOT NULL constraints to both columns DrgPcnstr *pdrgpcnstr = GPOS_NEW(pmp) DrgPcnstr(pmp); pdrgpcnstr->Append(CConstraintInterval::PciUnbounded(pmp, pcrLeft, false /*fIncludesNull*/)); pdrgpcnstr->Append(CConstraintInterval::PciUnbounded(pmp, pcrRight, false /*fIncludesNull*/)); return CConstraint::PcnstrConjunction(pmp, pdrgpcnstr); } // TODO: , May 28, 2012; add support for other cases besides (col cmp col) return NULL; }
//--------------------------------------------------------------------------- // @function: // CNormalizer::PexprSelect // // @doc: // Return a Select expression, if needed, with a scalar condition made of // given array of conjuncts // //--------------------------------------------------------------------------- CExpression * CNormalizer::PexprSelect ( IMemoryPool *pmp, CExpression *pexpr, DrgPexpr *pdrgpexpr ) { GPOS_ASSERT(NULL != pexpr); GPOS_ASSERT(NULL != pdrgpexpr); if (0 == pdrgpexpr->UlLength()) { // no predicate, return given expression pdrgpexpr->Release(); return pexpr; } // result expression is a select over predicates CExpression *pexprConjunction = CPredicateUtils::PexprConjunction(pmp, pdrgpexpr); CExpression *pexprSelect = CUtils::PexprSafeSelect(pmp, pexpr, pexprConjunction); if (COperator::EopLogicalSelect != pexprSelect->Pop()->Eopid()) { // Select node was pruned, return created expression return pexprSelect; } CExpression *pexprLogicalChild = (*pexprSelect)[0]; COperator::EOperatorId eopidChild = pexprLogicalChild->Pop()->Eopid(); if (COperator::EopLogicalLeftOuterJoin != eopidChild) { // child of Select is not an outer join, return created Select expression return pexprSelect; } // we have a Select on top of Outer Join expression, attempt simplifying expression into InnerJoin CExpression *pexprSimplified = NULL; if (FSimplifySelectOnOuterJoin(pmp, pexprLogicalChild, (*pexprSelect)[1], &pexprSimplified)) { // simplification succeeded, normalize resulting expression pexprSelect->Release(); CExpression *pexprResult = PexprNormalize(pmp, pexprSimplified); pexprSimplified->Release(); return pexprResult; } // simplification failed, return created Select expression return pexprSelect; }
//--------------------------------------------------------------------------- // @function: // CXformPushGbWithHavingBelowJoin::Transform // // @doc: // Actual transformation // //--------------------------------------------------------------------------- void CXformPushGbWithHavingBelowJoin::Transform ( CXformContext *pxfctxt, CXformResult *pxfres, CExpression *pexpr ) const { GPOS_ASSERT(NULL != pxfctxt); GPOS_ASSERT(FPromising(pxfctxt->Pmp(), this, pexpr)); GPOS_ASSERT(FCheckPattern(pexpr)); IMemoryPool *mp = pxfctxt->Pmp(); CExpression *pexprGb = (*pexpr)[0]; CLogicalGbAgg *popGbAgg = CLogicalGbAgg::PopConvert(pexprGb->Pop()); if (!popGbAgg->FGlobal()) { // xform only applies to global aggs return; } CExpression *pexprResult = CXformUtils::PexprPushGbBelowJoin(mp, pexpr); if (NULL != pexprResult) { // add alternative to results pxfres->Add(pexprResult); } }
//--------------------------------------------------------------------------- // @function: // CXformCollapseGbAgg::Transform // // @doc: // Actual transformation to collapse two cascaded group by operators; // if the top Gb grouping columns are subset of bottom Gb grouping // columns AND both Gb operators do not define agg functions, we can // remove the bottom group by operator // // //--------------------------------------------------------------------------- void CXformCollapseGbAgg::Transform ( CXformContext *pxfctxt, CXformResult *pxfres, CExpression *pexpr ) const { GPOS_ASSERT(NULL != pxfctxt); GPOS_ASSERT(NULL != pxfres); GPOS_ASSERT(FPromising(pxfctxt->Pmp(), this, pexpr)); GPOS_ASSERT(FCheckPattern(pexpr)); IMemoryPool *pmp = pxfctxt->Pmp(); // extract components CLogicalGbAgg *popTopGbAgg = CLogicalGbAgg::PopConvert(pexpr->Pop()); GPOS_ASSERT(0 < popTopGbAgg->Pdrgpcr()->UlLength()); GPOS_ASSERT(popTopGbAgg->FGlobal()); CExpression *pexprRelational = (*pexpr)[0]; CExpression *pexprTopProjectList = (*pexpr)[1]; CLogicalGbAgg *popBottomGbAgg = CLogicalGbAgg::PopConvert(pexprRelational->Pop()); CExpression *pexprChild = (*pexprRelational)[0]; CExpression *pexprBottomProjectList = (*pexprRelational)[1]; if (!popBottomGbAgg->FGlobal()) { // bottom GbAgg must be global to prevent xform from getting applied to splitted GbAggs return; } if (0 < pexprTopProjectList->UlArity() || 0 < pexprBottomProjectList->UlArity()) { // exit if any of the Gb operators has an aggregate function return; } #ifdef GPOS_DEBUG // for two cascaded GbAgg ops with no agg functions, top grouping // columns must be a subset of bottom grouping columns CColRefSet *pcrsTopGrpCols = GPOS_NEW(pmp) CColRefSet(pmp, popTopGbAgg->Pdrgpcr()); CColRefSet *pcrsBottomGrpCols = GPOS_NEW(pmp) CColRefSet(pmp, popBottomGbAgg->Pdrgpcr()); GPOS_ASSERT(pcrsBottomGrpCols->FSubset(pcrsTopGrpCols)); pcrsTopGrpCols->Release(); pcrsBottomGrpCols->Release(); #endif // GPOS_DEBUG pexprChild->AddRef(); CExpression *pexprSelect = CUtils::PexprLogicalSelect(pmp, pexprChild, CPredicateUtils::PexprConjunction(pmp, NULL /*pdrgpexpr*/)); popTopGbAgg->AddRef(); pexprTopProjectList->AddRef(); CExpression *pexprGbAggNew = GPOS_NEW(pmp) CExpression(pmp, popTopGbAgg, pexprSelect, pexprTopProjectList); pxfres->Add(pexprGbAggNew); }
// test cardinality for predicates of the form: a + c = b // for such predicates, NDV based cardinality estimation is not applicable GPOS_RESULT CJoinCardinalityNDVBasedEqPredTest::EresUnittest_NDVCardEstimationNotApplicableMultipleIdents() { // cartesian product / 2.5 // 2.5 = 1/.4 -- where .4 is default selectivity CDouble dRowsExpected(76004000); Fixture f(file_name); IMemoryPool *mp = f.Pmp(); IStatisticsArray *statistics_array = f.PdrgPstat(); CExpression *pexprLgGet = CTestUtils::PexprLogicalGet(mp); CLogicalGet *popGet = CLogicalGet::PopConvert(pexprLgGet->Pop()); CColRefArray *colref_array = popGet->PdrgpcrOutput(); // use the colid available in the input xml file CColRef *pcrLeft1 = (*colref_array)[2]; CColRef *pcrLeft2 = (*colref_array)[1]; CColRef *pcrRight = (*colref_array)[0]; // create a scalar ident // CScalarIdent "column_0000" (0) CExpression *pexprScalarIdentRight = CUtils::PexprScalarIdent(mp, pcrRight); CExpression *pexprScalarIdentLeft2 = CUtils::PexprScalarIdent(mp, pcrLeft2); // create a scalar op expression column_0002 + column_0001 // CScalarOp (+) // |--CScalarIdent "column_0002" (2) // +--CScalarIdent "column_0001" (1) CExpression *pexprScOp = CUtils::PexprScalarOp(mp, pcrLeft1, pexprScalarIdentLeft2, CWStringConst(GPOS_WSZ_LIT("+")), GPOS_NEW(mp) CMDIdGPDB(GPDB_INT4_ADD_OP)); // create a scalar comparision operator // +--CScalarCmp (=) // |--CScalarOp (+) // | |--CScalarIdent "column_0002" (2) // | +--CScalarIdent "column_0001" (1) // +--CScalarIdent "column_0000" (0) CExpression *pScalarCmp = CUtils::PexprScalarEqCmp(mp, pexprScOp, pexprScalarIdentRight); IStatistics *join_stats = CJoinStatsProcessor::CalcAllJoinStats(mp, statistics_array, pScalarCmp, IStatistics::EsjtInnerJoin); GPOS_ASSERT(NULL != join_stats); CDouble dRowsActual(join_stats->Rows()); GPOS_RESULT eres = GPOS_OK; if (floor(dRowsActual.Get()) != dRowsExpected) { eres = GPOS_FAILED; } join_stats->Release(); pexprLgGet->Release(); pScalarCmp->Release(); return eres; }
//--------------------------------------------------------------------------- // @function: // CQueryContext::PopTop // // @doc: // Return top level operator in the given expression // //--------------------------------------------------------------------------- COperator * CQueryContext::PopTop ( CExpression *pexpr ) { GPOS_ASSERT(NULL != pexpr); // skip CTE anchors if any CExpression *pexprCurr = pexpr; while (COperator::EopLogicalCTEAnchor == pexprCurr->Pop()->Eopid()) { pexprCurr = (*pexprCurr)[0]; GPOS_ASSERT(NULL != pexprCurr); } return pexprCurr->Pop(); }
//--------------------------------------------------------------------------- // @function: // CNormalizer::PushThruSelect // // @doc: // Push a conjunct through a select // //--------------------------------------------------------------------------- void CNormalizer::PushThruSelect ( IMemoryPool *pmp, CExpression *pexprSelect, CExpression *pexprConj, CExpression **ppexprResult ) { GPOS_ASSERT(NULL != pexprConj); GPOS_ASSERT(NULL != ppexprResult); CExpression *pexprLogicalChild = (*pexprSelect)[0]; CExpression *pexprScalarChild = (*pexprSelect)[1]; CExpression *pexprPred = CPredicateUtils::PexprConjunction(pmp, pexprScalarChild, pexprConj); if (CUtils::FScalarConstTrue(pexprPred)) { pexprPred->Release(); *ppexprResult = PexprNormalize(pmp, pexprLogicalChild); return; } COperator::EOperatorId eopid = pexprLogicalChild->Pop()->Eopid(); if (COperator::EopLogicalLeftOuterJoin == eopid) { CExpression *pexprSimplified = NULL; if (FSimplifySelectOnOuterJoin(pmp, pexprLogicalChild, pexprPred, &pexprSimplified)) { // simplification succeeded, normalize resulting expression *ppexprResult = PexprNormalize(pmp, pexprSimplified); pexprPred->Release(); pexprSimplified->Release(); return; } } if (FPushThruOuterChild(pexprLogicalChild)) { PushThruOuterChild(pmp, pexprLogicalChild, pexprPred, ppexprResult); } else { // logical child may not pass all predicates through, we need to collect // unpushable predicates, if any, into a top Select node DrgPexpr *pdrgpexprConjuncts = CPredicateUtils::PdrgpexprConjuncts(pmp, pexprPred); DrgPexpr *pdrgpexprRemaining = NULL; CExpression *pexpr = NULL; PushThru(pmp, pexprLogicalChild, pdrgpexprConjuncts, &pexpr, &pdrgpexprRemaining); *ppexprResult = PexprSelect(pmp, pexpr, pdrgpexprRemaining); pdrgpexprConjuncts->Release(); } pexprPred->Release(); }
// test cardinality for predicates of the form: a + 1 = b // for such predicates, NDV based cardinality estimation is applicable GPOS_RESULT CJoinCardinalityNDVBasedEqPredTest::EresUnittest_NDVEqCardEstimation() { CDouble dRowsExpected(10000); // the minimum cardinality is min(NDV a, NDV b) Fixture f(file_name); IMemoryPool *mp = f.Pmp(); IStatisticsArray *statistics_array = f.PdrgPstat(); CExpression *pexprLgGet = CTestUtils::PexprLogicalGet(mp); CLogicalGet *popGet = CLogicalGet::PopConvert(pexprLgGet->Pop()); CColRefArray *colref_array = popGet->PdrgpcrOutput(); // use the colid available in the input xml file CColRef *pcrLeft = (*colref_array)[2]; CColRef *pcrRight = (*colref_array)[0]; // create a scalar ident // CScalarIdent "column_0000" (0) CExpression *pexprScalarIdentRight = CUtils::PexprScalarIdent(mp, pcrRight); // create a scalar op expression column_0002 + 10 // CScalarOp (+) // |--CScalarIdent "column_0002" (2) // +--CScalarConst (10) CExpression *pexprScConst = CUtils::PexprScalarConstInt4(mp, 10 /* val */); CExpression *pexprScOp = CUtils::PexprScalarOp(mp, pcrLeft, pexprScConst, CWStringConst(GPOS_WSZ_LIT("+")), GPOS_NEW(mp) CMDIdGPDB(GPDB_INT4_ADD_OP)); // create a scalar comparision operator // +--CScalarCmp (=) // |--CScalarOp (+) // | |--CScalarIdent "column_0002" (2) // | +--CScalarConst (10) // +--CScalarIdent "column_0000" (0) CExpression *pScalarCmp = CUtils::PexprScalarEqCmp(mp, pexprScOp, pexprScalarIdentRight); IStatistics *join_stats = CJoinStatsProcessor::CalcAllJoinStats(mp, statistics_array, pScalarCmp, IStatistics::EsjtInnerJoin); GPOS_ASSERT(NULL != join_stats); CDouble dRowsActual(join_stats->Rows()); GPOS_RESULT eres = GPOS_OK; if (std::floor(dRowsActual.Get()) != dRowsExpected) { eres = GPOS_FAILED; } join_stats->Release(); pexprLgGet->Release(); pScalarCmp->Release(); return eres; }
//--------------------------------------------------------------------------- // @function: // CPhysicalUnionAll::PdrgpulMap // // @doc: // Map given array of scalar identifier expressions to positions of // UnionAll input columns in the given child; // the function returns NULL if no mapping could be constructed // //--------------------------------------------------------------------------- ULongPtrArray * CPhysicalUnionAll::PdrgpulMap ( IMemoryPool *mp, CExpressionArray *pdrgpexpr, ULONG child_index ) const { GPOS_ASSERT(NULL != pdrgpexpr); CColRefArray *colref_array = (*PdrgpdrgpcrInput())[child_index]; const ULONG ulExprs = pdrgpexpr->Size(); const ULONG num_cols = colref_array->Size(); ULongPtrArray *pdrgpul = GPOS_NEW(mp) ULongPtrArray(mp); for (ULONG ulExpr = 0; ulExpr < ulExprs; ulExpr++) { CExpression *pexpr = (*pdrgpexpr)[ulExpr]; if (COperator::EopScalarIdent != pexpr->Pop()->Eopid()) { continue; } const CColRef *colref = CScalarIdent::PopConvert(pexpr->Pop())->Pcr(); for (ULONG ulCol = 0; ulCol < num_cols; ulCol++) { if ((*colref_array)[ulCol] == colref) { pdrgpul->Append(GPOS_NEW(mp) ULONG(ulCol)); } } } if (0 == pdrgpul->Size()) { // mapping failed pdrgpul->Release(); pdrgpul = NULL; } return pdrgpul; }
//--------------------------------------------------------------------------- // @function: // CPhysicalUnionAll::PdrgpulMap // // @doc: // Map given array of scalar identifier expressions to positions of // UnionAll input columns in the given child; // the function returns NULL if no mapping could be constructed // //--------------------------------------------------------------------------- DrgPul * CPhysicalUnionAll::PdrgpulMap ( IMemoryPool *pmp, DrgPexpr *pdrgpexpr, ULONG ulChildIndex ) const { GPOS_ASSERT(NULL != pdrgpexpr); DrgPcr *pdrgpcr = (*m_pdrgpdrgpcrInput)[ulChildIndex]; const ULONG ulExprs = pdrgpexpr->UlLength(); const ULONG ulCols = pdrgpcr->UlLength(); DrgPul *pdrgpul = GPOS_NEW(pmp) DrgPul(pmp); for (ULONG ulExpr = 0; ulExpr < ulExprs; ulExpr++) { CExpression *pexpr = (*pdrgpexpr)[ulExpr]; if (COperator::EopScalarIdent != pexpr->Pop()->Eopid()) { continue; } const CColRef *pcr = CScalarIdent::PopConvert(pexpr->Pop())->Pcr(); for (ULONG ulCol = 0; ulCol < ulCols; ulCol++) { if ((*pdrgpcr)[ulCol] == pcr) { pdrgpul->Append(GPOS_NEW(pmp) ULONG(ulCol)); } } } if (0 == pdrgpul->UlLength()) { // mapping failed pdrgpul->Release(); pdrgpul = NULL; } return pdrgpul; }
//--------------------------------------------------------------------------- // @function: // CXformInnerJoinWithInnerSelect2IndexGetApply::Transform // // @doc: // Actual transformation // //--------------------------------------------------------------------------- void CXformInnerJoinWithInnerSelect2IndexGetApply::Transform ( CXformContext *pxfctxt, CXformResult *pxfres, CExpression *pexpr ) const { GPOS_ASSERT(NULL != pxfctxt); GPOS_ASSERT(FPromising(pxfctxt->Pmp(), this, pexpr)); GPOS_ASSERT(FCheckPattern(pexpr)); IMemoryPool *pmp = pxfctxt->Pmp(); // extract components CExpression *pexprOuter = (*pexpr)[0]; CExpression *pexprInner = (*pexpr)[1]; CExpression *pexprScalar = (*pexpr)[2]; GPOS_ASSERT(COperator::EopLogicalSelect == pexprInner->Pop()->Eopid()); CExpression *pexprGet = (*pexprInner)[0]; GPOS_ASSERT(COperator::EopLogicalGet == pexprGet->Pop()->Eopid()); CTableDescriptor *ptabdescInner = CLogicalGet::PopConvert (pexprGet->Pop ())->Ptabdesc(); CExpression *pexprAllPredicates = CPredicateUtils::PexprConjunction(pmp, pexprScalar, (*pexprInner)[1]); CreateHomogeneousIndexApplyAlternatives ( pmp, pexpr->Pop()->UlOpId(), pexprOuter, pexprGet, pexprAllPredicates, ptabdescInner, NULL, // popDynamicGet pxfres, IMDIndex::EmdindBtree ); pexprAllPredicates->Release(); }
//--------------------------------------------------------------------------- // @function: // CCNFConverter::PexprNot2CNF // // @doc: // Convert a NOT tree into CNF // //--------------------------------------------------------------------------- CExpression * CCNFConverter::PexprNot2CNF ( IMemoryPool *pmp, CExpression *pexpr ) { GPOS_ASSERT(NULL != pmp); GPOS_ASSERT(NULL != pexpr); GPOS_ASSERT(1 == pexpr->UlArity()); CExpression *pexprNotChild = (*pexpr)[0]; if (!FScalarBoolOp(pexprNotChild)) { pexpr->AddRef(); return pexpr; } CScalarBoolOp::EBoolOperator eboolopChild = CScalarBoolOp::PopConvert(pexprNotChild->Pop())->Eboolop(); // apply DeMorgan laws // NOT(NOT(A)) ==> A if (CScalarBoolOp::EboolopNot == eboolopChild) { return Pexpr2CNF(pmp, (*pexprNotChild)[0]); } // Not child must be either an AND or an OR // NOT(A AND B) ==> NOT(A) OR NOT(B) // NOT(A OR B) ==> NOT(A) AND NOT(B) DrgPexpr *pdrgpexpr = GPOS_NEW(pmp) DrgPexpr(pmp); const ULONG ulArity = pexprNotChild->UlArity(); for (ULONG ul = 0; ul < ulArity; ul++) { (*pexprNotChild)[ul]->AddRef(); pdrgpexpr->Append(CUtils::PexprNegate(pmp, (*pexprNotChild)[ul])); } CScalarBoolOp::EBoolOperator eboolop = CScalarBoolOp::EboolopAnd; if (CScalarBoolOp::EboolopAnd == eboolopChild) { eboolop = CScalarBoolOp::EboolopOr; } CExpression *pexprScalarBoolOp = CUtils::PexprScalarBoolOp(pmp, eboolop, pdrgpexpr); CExpression *pexprResult = Pexpr2CNF(pmp, pexprScalarBoolOp); pexprScalarBoolOp->Release(); return pexprResult; }
//--------------------------------------------------------------------------- // @function: // CXformPushDownLeftOuterJoin::Exfp // // @doc: // Xform promise // //--------------------------------------------------------------------------- CXform::EXformPromise CXformPushDownLeftOuterJoin::Exfp ( CExpressionHandle &exprhdl ) const { CExpression *pexprScalar = exprhdl.PexprScalarChild(2); if (COperator::EopScalarConst == pexprScalar->Pop()->Eopid()) { return CXform::ExfpNone; } return CXform::ExfpHigh; }
//--------------------------------------------------------------------------- // @function: // CExpressionHandle::DeriveProducerStats // // @doc: // If the child (ulChildIndex) is a CTE consumer, then derive is corresponding // producer statistics. // //--------------------------------------------------------------------------- void CExpressionHandle::DeriveProducerStats ( ULONG ulChildIndex, CColRefSet *pcrsStats ) { // check to see if there are any CTE consumers in the group whose properties have // to be pushed to its corresponding CTE producer CGroupExpression *pgexpr = Pgexpr(); if (NULL != pgexpr) { CGroup *pgroupChild = (*pgexpr)[ulChildIndex]; if (pgroupChild->FHasAnyCTEConsumer()) { CGroupExpression *pgexprCTEConsumer = pgroupChild->PgexprAnyCTEConsumer(); CLogicalCTEConsumer *popConsumer = CLogicalCTEConsumer::PopConvert(pgexprCTEConsumer->Pop()); COptCtxt::PoctxtFromTLS()->Pcteinfo()->DeriveProducerStats(popConsumer, pcrsStats); } return; } // statistics are also derived on expressions representing the producer that may have // multiple CTE consumers. We should ensure that their properties are to pushed to their // corresponding CTE producer CExpression *pexpr = Pexpr(); if (NULL != pexpr) { CExpression *pexprChild = (*pexpr)[ulChildIndex]; if (COperator::EopLogicalCTEConsumer == pexprChild->Pop()->Eopid()) { CLogicalCTEConsumer *popConsumer = CLogicalCTEConsumer::PopConvert(pexprChild->Pop()); COptCtxt::PoctxtFromTLS()->Pcteinfo()->DeriveProducerStats(popConsumer, pcrsStats); } } }
//--------------------------------------------------------------------------- // @function: // CPartitionPropagationSpec::PdrgpexprPredicatesOnKey // // @doc: // Returns an array of predicates on the given partitioning key given // an array of predicates on all keys // //--------------------------------------------------------------------------- DrgPexpr * CPartitionPropagationSpec::PdrgpexprPredicatesOnKey ( IMemoryPool *pmp, DrgPexpr *pdrgpexpr, CColRef *pcr, CColRefSet *pcrsKeys, CBitSet **ppbs ) { GPOS_ASSERT(NULL != pdrgpexpr); GPOS_ASSERT(NULL != pcr); GPOS_ASSERT(NULL != ppbs); GPOS_ASSERT(NULL != *ppbs); DrgPexpr *pdrgpexprResult = GPOS_NEW(pmp) DrgPexpr(pmp); const ULONG ulLen = pdrgpexpr->UlLength(); for (ULONG ul = 0; ul < ulLen; ul++) { if ((*ppbs)->FBit(ul)) { // this expression has already been added for another column continue; } CExpression *pexpr = (*pdrgpexpr)[ul]; GPOS_ASSERT(pexpr->Pop()->FScalar()); CColRefSet *pcrsUsed = CDrvdPropScalar::Pdpscalar(pexpr->PdpDerive())->PcrsUsed(); CColRefSet *pcrsUsedKeys = GPOS_NEW(pmp) CColRefSet(pmp, *pcrsUsed); pcrsUsedKeys->Intersection(pcrsKeys); if (1 == pcrsUsedKeys->CElements() && pcrsUsedKeys->FMember(pcr)) { pexpr->AddRef(); pdrgpexprResult->Append(pexpr); (*ppbs)->FExchangeSet(ul); } pcrsUsedKeys->Release(); } return pdrgpexprResult; }
//--------------------------------------------------------------------------- // @function: // CPartitionPropagationSpec::PdrgpexprPredicatesOnKey // // @doc: // Returns an array of predicates on the given partitioning key given // an array of predicates on all keys // //--------------------------------------------------------------------------- CExpressionArray * CPartitionPropagationSpec::PdrgpexprPredicatesOnKey ( IMemoryPool *mp, CExpressionArray *pdrgpexpr, CColRef *colref, CColRefSet *pcrsKeys, CBitSet **ppbs ) { GPOS_ASSERT(NULL != pdrgpexpr); GPOS_ASSERT(NULL != colref); GPOS_ASSERT(NULL != ppbs); GPOS_ASSERT(NULL != *ppbs); CExpressionArray *pdrgpexprResult = GPOS_NEW(mp) CExpressionArray(mp); const ULONG length = pdrgpexpr->Size(); for (ULONG ul = 0; ul < length; ul++) { if ((*ppbs)->Get(ul)) { // this expression has already been added for another column continue; } CExpression *pexpr = (*pdrgpexpr)[ul]; GPOS_ASSERT(pexpr->Pop()->FScalar()); CColRefSet *pcrsUsed = CDrvdPropScalar::GetDrvdScalarProps(pexpr->PdpDerive())->PcrsUsed(); CColRefSet *pcrsUsedKeys = GPOS_NEW(mp) CColRefSet(mp, *pcrsUsed); pcrsUsedKeys->Intersection(pcrsKeys); if (1 == pcrsUsedKeys->Size() && pcrsUsedKeys->FMember(colref)) { pexpr->AddRef(); pdrgpexprResult->Append(pexpr); (*ppbs)->ExchangeSet(ul); } pcrsUsedKeys->Release(); } return pdrgpexprResult; }
//--------------------------------------------------------------------------- // @function: // CStatsPredUtils::PstatspredBoolean // // @doc: // Extract statistics filtering information from boolean predicate // in the form of scalar id or negated scalar id //--------------------------------------------------------------------------- CStatsPred * CStatsPredUtils::PstatspredBoolean ( IMemoryPool *pmp, CExpression *pexprPred, CColRefSet * //pcrsOuterRefs ) { GPOS_ASSERT(NULL != pexprPred); GPOS_ASSERT(CPredicateUtils::FBooleanScalarIdent(pexprPred) || CPredicateUtils::FNegatedBooleanScalarIdent(pexprPred)); COperator *pop = pexprPred->Pop(); CMDAccessor *pmda = COptCtxt::PoctxtFromTLS()->Pmda(); IDatum *pdatum = NULL; ULONG ulColId = ULONG_MAX; if (CPredicateUtils::FBooleanScalarIdent(pexprPred)) { CScalarIdent *popScIdent = CScalarIdent::PopConvert(pop); pdatum = pmda->PtMDType<IMDTypeBool>()->PdatumBool(pmp, true /* fValue */, false /* fNull */); ulColId = popScIdent->Pcr()->UlId(); } else { CExpression *pexprChild = (*pexprPred)[0]; pdatum = pmda->PtMDType<IMDTypeBool>()->PdatumBool(pmp, false /* fValue */, false /* fNull */); ulColId = CScalarIdent::PopConvert(pexprChild->Pop())->Pcr()->UlId(); } if (!pdatum->FStatsComparable(pdatum)) { // stats calculations on such datums unsupported pdatum->Release(); return GPOS_NEW(pmp) CStatsPredUnsupported(ulColId, CStatsPred::EstatscmptEq); } GPOS_ASSERT(NULL != pdatum && ULONG_MAX != ulColId); return GPOS_NEW(pmp) CStatsPredPoint(ulColId, CStatsPred::EstatscmptEq, GPOS_NEW(pmp) CPoint(pdatum)); }
//--------------------------------------------------------------------------- // @function: // CLogicalLimit::Maxcard // // @doc: // Derive max card // //--------------------------------------------------------------------------- CMaxCard CLogicalLimit::Maxcard ( IMemoryPool *, // pmp CExpressionHandle &exprhdl ) const { CExpression *pexprCount = exprhdl.PexprScalarChild(2 /*ulChildIndex*/); if (CUtils::FScalarConstInt<IMDTypeInt8>(pexprCount)) { CScalarConst *popScalarConst = CScalarConst::PopConvert(pexprCount->Pop()); IDatumInt8 *pdatumInt8 = dynamic_cast<IDatumInt8 *>(popScalarConst->Pdatum()); return CMaxCard(pdatumInt8->LValue()); } // pass on max card of first child return exprhdl.Pdprel(0)->Maxcard(); }
//--------------------------------------------------------------------------- // @function: // CDistributionSpecHashed::PdshashedExcludeColumns // // @doc: // Return a copy of the distribution spec after excluding the given // columns, return NULL if all distribution expressions are excluded // //--------------------------------------------------------------------------- CDistributionSpecHashed * CDistributionSpecHashed::PdshashedExcludeColumns ( IMemoryPool *pmp, CColRefSet *pcrs ) { GPOS_ASSERT(NULL != pcrs); DrgPexpr *pdrgpexprNew = GPOS_NEW(pmp) DrgPexpr(pmp); const ULONG ulExprs = m_pdrgpexpr->UlLength(); for (ULONG ul = 0; ul < ulExprs; ul++) { CExpression *pexpr = (*m_pdrgpexpr)[ul]; COperator *pop = pexpr->Pop(); if (COperator::EopScalarIdent == pop->Eopid()) { // we only care here about column identifiers, // any more complicated expressions are copied to output const CColRef *pcr = CScalarIdent::PopConvert(pop)->Pcr(); if (pcrs->FMember(pcr)) { continue; } } pexpr->AddRef(); pdrgpexprNew->Append(pexpr); } if (0 == pdrgpexprNew->UlLength()) { pdrgpexprNew->Release(); return NULL; } return GPOS_NEW(pmp) CDistributionSpecHashed(pdrgpexprNew, m_fNullsColocated); }
//--------------------------------------------------------------------------- // @function: // CXformSubqueryUnnest::PexprSubqueryUnnest // // @doc: // Helper for unnesting subquery under a given context // //--------------------------------------------------------------------------- CExpression * CXformSubqueryUnnest::PexprSubqueryUnnest ( IMemoryPool *pmp, CExpression *pexpr, BOOL fEnforceCorrelatedApply ) { GPOS_ASSERT(NULL != pexpr); if (GPOS_FTRACE(EopttraceEnforceCorrelatedExecution) && !fEnforceCorrelatedApply) { // if correlated execution is enforced, we cannot generate an expression // that does not use correlated Apply return NULL; } // extract components CExpression *pexprOuter = (*pexpr)[0]; CExpression *pexprScalar = (*pexpr)[1]; // we add-ref the logical child since the resulting expression must re-use it pexprOuter->AddRef(); CExpression *pexprNewOuter = NULL; CExpression *pexprResidualScalar = NULL; CSubqueryHandler::ESubqueryCtxt esqctxt = CSubqueryHandler::EsqctxtFilter; if (COperator::EopScalarProjectList == pexprScalar->Pop()->Eopid()) { esqctxt = CSubqueryHandler::EsqctxtValue; } // calling the handler removes subqueries and sets new logical and scalar expressions CSubqueryHandler sh(pmp, fEnforceCorrelatedApply); if (!CSubqueryHandler::FProcess ( sh, pexprOuter, pexprScalar, false /* fDisjunctionOrNegation */, esqctxt, &pexprNewOuter, &pexprResidualScalar ) ) { CRefCount::SafeRelease(pexprNewOuter); CRefCount::SafeRelease(pexprResidualScalar); return NULL; } // create a new alternative using the new logical and scalar expressions CExpression *pexprResult = NULL; if (CSubqueryHandler::EsqctxtFilter == esqctxt) { pexprResult = CUtils::PexprLogicalSelect(pmp, pexprNewOuter, pexprResidualScalar); } else { GPOS_ASSERT(CSubqueryHandler::EsqctxtValue == esqctxt); CLogicalSequenceProject *popSeqPrj = NULL; CLogicalGbAgg *popGbAgg = NULL; COperator::EOperatorId eopid = pexpr->Pop()->Eopid(); switch (eopid) { case COperator::EopLogicalProject: pexprResult = CUtils::PexprLogicalProject(pmp, pexprNewOuter, pexprResidualScalar, false /*fNewComputedCol*/); break; case COperator::EopLogicalGbAgg: popGbAgg = CLogicalGbAgg::PopConvert(pexpr->Pop()); popGbAgg->Pdrgpcr()->AddRef(); pexprResult = CUtils::PexprLogicalGbAgg(pmp, popGbAgg->Pdrgpcr(), pexprNewOuter, pexprResidualScalar, popGbAgg->Egbaggtype()); break; case COperator::EopLogicalSequenceProject: popSeqPrj = CLogicalSequenceProject::PopConvert(pexpr->Pop()); popSeqPrj->Pds()->AddRef(); popSeqPrj->Pdrgpos()->AddRef(); popSeqPrj->Pdrgpwf()->AddRef(); pexprResult = CUtils::PexprLogicalSequenceProject(pmp, popSeqPrj->Pds(), popSeqPrj->Pdrgpos(), popSeqPrj->Pdrgpwf(), pexprNewOuter, pexprResidualScalar); break; default: GPOS_ASSERT(!"Unnesting subqueries for an invalid operator"); break; } } // normalize resulting expression CExpression *pexprNormalized = CNormalizer::PexprNormalize(pmp, pexprResult); pexprResult->Release(); // pull up projections CExpression *pexprPullUpProjections = CNormalizer::PexprPullUpProjections(pmp, pexprNormalized); pexprNormalized->Release(); return pexprPullUpProjections; }
//--------------------------------------------------------------------------- // @function: // CStatsPredUtils::ProcessArrayCmp // // @doc: // Extract statistics filtering information from scalar array comparison //--------------------------------------------------------------------------- void CStatsPredUtils::ProcessArrayCmp ( IMemoryPool *pmp, CExpression *pexprPred, DrgPstatspred *pdrgpstatspred ) { GPOS_ASSERT(NULL != pdrgpstatspred); GPOS_ASSERT(NULL != pexprPred); GPOS_ASSERT(2 == pexprPred->UlArity()); CScalarArrayCmp *popScArrayCmp = CScalarArrayCmp::PopConvert(pexprPred->Pop()); CExpression *pexprLeft = (*pexprPred)[0]; CExpression *pexprRight = (*pexprPred)[1]; BOOL fCompareToConst = ((COperator::EopScalarIdent == pexprLeft->Pop()->Eopid()) && (COperator::EopScalarArray == pexprRight->Pop()->Eopid())); if (!fCompareToConst) { // unsupported predicate for stats calculations pdrgpstatspred->Append(GPOS_NEW(pmp) CStatsPredUnsupported(ULONG_MAX, CStatsPred::EstatscmptOther)); return; } BOOL fAny = (CScalarArrayCmp::EarrcmpAny == popScArrayCmp->Earrcmpt()); DrgPstatspred *pdrgpstatspredChild = pdrgpstatspred; if (fAny) { pdrgpstatspredChild = GPOS_NEW(pmp) DrgPstatspred(pmp); } const ULONG ulConstants = pexprRight->UlArity(); // comparison semantics for statistics purposes is looser than regular comparison. CStatsPred::EStatsCmpType escmpt = Estatscmptype(popScArrayCmp->PmdidOp()); CScalarIdent *popScalarIdent = CScalarIdent::PopConvert(pexprLeft->Pop()); const CColRef *pcr = popScalarIdent->Pcr(); if (!CHistogram::FSupportsFilter(escmpt)) { // unsupported predicate for stats calculations pdrgpstatspred->Append(GPOS_NEW(pmp) CStatsPredUnsupported(pcr->UlId(), escmpt)); return; } for (ULONG ul = 0; ul < ulConstants; ul++) { CExpression *pexprConst = (*pexprRight)[ul]; if (COperator::EopScalarConst == pexprConst->Pop()->Eopid()) { CScalarConst *popScalarConst = CScalarConst::PopConvert(pexprConst->Pop()); IDatum *pdatumLiteral = popScalarConst->Pdatum(); CStatsPred *pstatspredChild = NULL; if (!pdatumLiteral->FStatsComparable(pdatumLiteral)) { // stats calculations on such datums unsupported pstatspredChild = GPOS_NEW(pmp) CStatsPredUnsupported(pcr->UlId(), escmpt); } else { pstatspredChild = GPOS_NEW(pmp) CStatsPredPoint(pmp, pcr, escmpt, pdatumLiteral); } pdrgpstatspredChild->Append(pstatspredChild); } } if (fAny) { CStatsPredDisj *pstatspredOr = GPOS_NEW(pmp) CStatsPredDisj(pdrgpstatspredChild); pdrgpstatspred->Append(pstatspredOr); } }
//--------------------------------------------------------------------------- // @function: // CXformInnerApplyWithOuterKey2InnerJoin::Transform // // @doc: // Actual transformation // //--------------------------------------------------------------------------- void CXformInnerApplyWithOuterKey2InnerJoin::Transform ( CXformContext *pxfctxt, CXformResult *pxfres, CExpression *pexpr ) const { GPOS_ASSERT(NULL != pxfctxt); GPOS_ASSERT(FPromising(pxfctxt->Pmp(), this, pexpr)); GPOS_ASSERT(FCheckPattern(pexpr)); IMemoryPool *pmp = pxfctxt->Pmp(); // extract components CExpression *pexprOuter = (*pexpr)[0]; CExpression *pexprGb = (*pexpr)[1]; CExpression *pexprScalar = (*pexpr)[2]; if (0 < CLogicalGbAgg::PopConvert(pexprGb->Pop())->Pdrgpcr()->UlLength()) { // xform is not applicable if inner Gb has grouping columns return; } if (CUtils::FHasSubqueryOrApply((*pexprGb)[0])) { // Subquery/Apply must be unnested before reaching here return; } // decorrelate Gb's relational child (*pexprGb)[0]->ResetDerivedProperties(); CExpression *pexprInner = NULL; DrgPexpr *pdrgpexpr = GPOS_NEW(pmp) DrgPexpr(pmp); if (!CDecorrelator::FProcess(pmp, (*pexprGb)[0], false /*fEqualityOnly*/, &pexprInner, pdrgpexpr)) { pdrgpexpr->Release(); return; } GPOS_ASSERT(NULL != pexprInner); CExpression *pexprPredicate = CPredicateUtils::PexprConjunction(pmp, pdrgpexpr); // join outer child with Gb's decorrelated child pexprOuter->AddRef(); CExpression *pexprInnerJoin = GPOS_NEW(pmp) CExpression ( pmp, GPOS_NEW(pmp) CLogicalInnerJoin(pmp), pexprOuter, pexprInner, pexprPredicate ); // create grouping columns from the output of outer child DrgPcr *pdrgpcrKey = NULL; DrgPcr *pdrgpcr = CUtils::PdrgpcrGroupingKey(pmp, pexprOuter, &pdrgpcrKey); pdrgpcrKey->Release(); // key is not used here CLogicalGbAgg *popGbAgg = GPOS_NEW(pmp) CLogicalGbAgg(pmp, pdrgpcr, COperator::EgbaggtypeGlobal /*egbaggtype*/); CExpression *pexprPrjList = (*pexprGb)[1]; pexprPrjList->AddRef(); CExpression *pexprNewGb = GPOS_NEW(pmp) CExpression (pmp, popGbAgg, pexprInnerJoin, pexprPrjList); // add Apply predicate in a top Select node pexprScalar->AddRef(); CExpression *pexprSelect = CUtils::PexprLogicalSelect(pmp, pexprNewGb, pexprScalar); pxfres->Add(pexprSelect); }
//--------------------------------------------------------------------------- // @function: // CXformSelect2PartialDynamicIndexGet::Transform // // @doc: // Actual transformation // //--------------------------------------------------------------------------- void CXformSelect2PartialDynamicIndexGet::Transform ( CXformContext *pxfctxt, CXformResult *pxfres, CExpression *pexpr ) const { GPOS_ASSERT(NULL != pxfctxt); GPOS_ASSERT(FPromising(pxfctxt->Pmp(), this, pexpr)); GPOS_ASSERT(FCheckPattern(pexpr)); IMemoryPool *pmp = pxfctxt->Pmp(); // extract components CExpression *pexprRelational = (*pexpr)[0]; CExpression *pexprScalar = (*pexpr)[1]; // get the indexes on this relation CLogicalDynamicGet *popGet = CLogicalDynamicGet::PopConvert(pexprRelational->Pop()); if (popGet->FPartial()) { // already a partial dynamic get; do not try to split further return; } CTableDescriptor *ptabdesc = popGet->Ptabdesc(); CMDAccessor *pmda = COptCtxt::PoctxtFromTLS()->Pmda(); const IMDRelation *pmdrel = pmda->Pmdrel(ptabdesc->Pmdid()); const ULONG ulIndices = pmdrel->UlIndices(); if (0 == ulIndices) { // no indexes on the table return; } // array of expressions in the scalar expression DrgPexpr *pdrgpexpr = CPredicateUtils::PdrgpexprConjuncts(pmp, pexprScalar); GPOS_ASSERT(0 < pdrgpexpr->UlLength()); // derive the scalar and relational properties to build set of required columns CColRefSet *pcrsOutput = CDrvdPropRelational::Pdprel(pexpr->PdpDerive())->PcrsOutput(); CColRefSet *pcrsScalarExpr = CDrvdPropScalar::Pdpscalar(pexprScalar->PdpDerive())->PcrsUsed(); CColRefSet *pcrsReqd = GPOS_NEW(pmp) CColRefSet(pmp); pcrsReqd->Include(pcrsOutput); pcrsReqd->Include(pcrsScalarExpr); CPartConstraint *ppartcnstr = popGet->Ppartcnstr(); ppartcnstr->AddRef(); // find a candidate set of partial index combinations DrgPdrgPpartdig *pdrgpdrgppartdig = CXformUtils::PdrgpdrgppartdigCandidates ( pmp, pmda, pdrgpexpr, popGet->PdrgpdrgpcrPart(), pmdrel, ppartcnstr, popGet->PdrgpcrOutput(), pcrsReqd, pcrsScalarExpr, NULL // pcrsAcceptedOuterRefs ); // construct alternative partial index scan plans const ULONG ulCandidates = pdrgpdrgppartdig->UlLength(); for (ULONG ul = 0; ul < ulCandidates; ul++) { DrgPpartdig *pdrgppartdig = (*pdrgpdrgppartdig)[ul]; CreatePartialIndexGetPlan(pmp, pexpr, pdrgppartdig, pmdrel, pxfres); } ppartcnstr->Release(); pcrsReqd->Release(); pdrgpexpr->Release(); pdrgpdrgppartdig->Release(); }
//--------------------------------------------------------------------------- // @function: // CXformSelect2PartialDynamicIndexGet::CreatePartialIndexGetPlan // // @doc: // Create a plan as a union of the given partial index get candidates and // possibly a dynamic table scan // //--------------------------------------------------------------------------- void CXformSelect2PartialDynamicIndexGet::CreatePartialIndexGetPlan ( IMemoryPool *pmp, CExpression *pexpr, DrgPpartdig *pdrgppartdig, const IMDRelation *pmdrel, CXformResult *pxfres ) const { CExpression *pexprRelational = (*pexpr)[0]; CExpression *pexprScalar = (*pexpr)[1]; CLogicalDynamicGet *popGet = CLogicalDynamicGet::PopConvert(pexprRelational->Pop()); DrgPcr *pdrgpcrGet = popGet->PdrgpcrOutput(); const ULONG ulPartialIndexes = pdrgppartdig->UlLength(); DrgDrgPcr *pdrgpdrgpcrInput = GPOS_NEW(pmp) DrgDrgPcr(pmp); DrgPexpr *pdrgpexprInput = GPOS_NEW(pmp) DrgPexpr(pmp); for (ULONG ul = 0; ul < ulPartialIndexes; ul++) { SPartDynamicIndexGetInfo *ppartdig = (*pdrgppartdig)[ul]; const IMDIndex *pmdindex = ppartdig->m_pmdindex; CPartConstraint *ppartcnstr = ppartdig->m_ppartcnstr; DrgPexpr *pdrgpexprIndex = ppartdig->m_pdrgpexprIndex; DrgPexpr *pdrgpexprResidual = ppartdig->m_pdrgpexprResidual; DrgPcr *pdrgpcrNew = pdrgpcrGet; if (0 < ul) { pdrgpcrNew = CUtils::PdrgpcrCopy(pmp, pdrgpcrGet); } else { pdrgpcrNew->AddRef(); } CExpression *pexprDynamicScan = NULL; if (NULL != pmdindex) { pexprDynamicScan = CXformUtils::PexprPartialDynamicIndexGet ( pmp, popGet, pexpr->Pop()->UlOpId(), pdrgpexprIndex, pdrgpexprResidual, pdrgpcrNew, pmdindex, pmdrel, ppartcnstr, NULL, // pcrsAcceptedOuterRefs NULL, // pdrgpcrOuter NULL // pdrgpcrNewOuter ); } else { pexprDynamicScan = PexprSelectOverDynamicGet ( pmp, popGet, pexprScalar, pdrgpcrNew, ppartcnstr ); } GPOS_ASSERT(NULL != pexprDynamicScan); pdrgpdrgpcrInput->Append(pdrgpcrNew); pdrgpexprInput->Append(pexprDynamicScan); } ULONG ulInput = pdrgpexprInput->UlLength(); if (0 < ulInput) { CExpression *pexprResult = NULL; if (1 < ulInput) { pdrgpcrGet->AddRef(); DrgPcr *pdrgpcrOuter = pdrgpcrGet; // construct a new union all operator pexprResult = GPOS_NEW(pmp) CExpression ( pmp, GPOS_NEW(pmp) CLogicalUnionAll(pmp, pdrgpcrOuter, pdrgpdrgpcrInput, popGet->UlScanId()), pdrgpexprInput ); } else { pexprResult = (*pdrgpexprInput)[0]; pexprResult->AddRef(); // clean up pdrgpexprInput->Release(); pdrgpdrgpcrInput->Release(); } // if scalar expression involves the partitioning key, keep a SELECT node // on top for the purposes of partition selection DrgDrgPcr *pdrgpdrgpcrPartKeys = popGet->PdrgpdrgpcrPart(); CExpression *pexprPredOnPartKey = CPredicateUtils::PexprExtractPredicatesOnPartKeys ( pmp, pexprScalar, pdrgpdrgpcrPartKeys, NULL, /*pcrsAllowedRefs*/ true /*fUseConstraints*/ ); if (NULL != pexprPredOnPartKey) { pexprResult = GPOS_NEW(pmp) CExpression(pmp, GPOS_NEW(pmp) CLogicalSelect(pmp), pexprResult, pexprPredOnPartKey); } pxfres->Add(pexprResult); return; } // clean up pdrgpdrgpcrInput->Release(); pdrgpexprInput->Release(); }
//--------------------------------------------------------------------------- // @function: // CXformSubqJoin2Apply::PexprSubqueryPushdown // // @doc: // Push down subquery below join // //--------------------------------------------------------------------------- CExpression * CXformSubqJoin2Apply::PexprSubqueryPushDown ( IMemoryPool *mp, CExpression *pexpr, BOOL fEnforceCorrelatedApply ) { GPOS_ASSERT(NULL != pexpr); GPOS_ASSERT(COperator::EopLogicalSelect == pexpr->Pop()->Eopid()); CExpression *pexprJoin = (*pexpr)[0]; const ULONG arity = pexprJoin->Arity(); CExpression *pexprScalar = (*pexpr)[1]; CExpression *join_pred_expr = (*pexprJoin)[arity - 1]; // collect output columns of all logical children CColRefSetArray *pdrgpcrs = GPOS_NEW(mp) CColRefSetArray(mp); CExpressionArrays *pdrgpdrgpexprSubqs = GPOS_NEW(mp) CExpressionArrays(mp); for (ULONG ul = 0; ul < arity - 1; ul++) { CExpression *pexprChild = (*pexprJoin)[ul]; CColRefSet *pcrsOutput = CDrvdPropRelational::GetRelationalProperties(pexprChild->PdpDerive())->PcrsOutput(); pcrsOutput->AddRef(); pdrgpcrs->Append(pcrsOutput); pdrgpdrgpexprSubqs->Append(GPOS_NEW(mp) CExpressionArray(mp)); } // collect subqueries that exclusively use columns from each join child CollectSubqueries(mp, pexprScalar, pdrgpcrs, pdrgpdrgpexprSubqs); // create new join children by pushing subqueries to Project nodes on top // of corresponding join children CExpressionArray *pdrgpexprNewChildren = GPOS_NEW(mp) CExpressionArray(mp); ExprToColRefMap *phmexprcr = GPOS_NEW(mp) ExprToColRefMap(mp); for (ULONG ulChild = 0; ulChild < arity - 1; ulChild++) { CExpression *pexprChild = (*pexprJoin)[ulChild]; pexprChild->AddRef(); CExpression *pexprNewChild = pexprChild; CExpressionArray *pdrgpexprSubqs = (*pdrgpdrgpexprSubqs)[ulChild]; const ULONG ulSubqs = pdrgpexprSubqs->Size(); if (0 < ulSubqs) { // join child has pushable subqueries pexprNewChild = CUtils::PexprAddProjection(mp, pexprChild, pdrgpexprSubqs); CExpression *pexprPrjList = (*pexprNewChild)[1]; // add pushed subqueries to map for (ULONG ulSubq = 0; ulSubq < ulSubqs; ulSubq++) { CExpression *pexprSubq = (*pdrgpexprSubqs)[ulSubq]; pexprSubq->AddRef(); CColRef *colref = CScalarProjectElement::PopConvert((*pexprPrjList)[ulSubq]->Pop())->Pcr(); #ifdef GPOS_DEBUG BOOL fInserted = #endif // GPOS_DEBUG phmexprcr->Insert(pexprSubq, colref); GPOS_ASSERT(fInserted); } // unnest subqueries in newly created child CExpression *pexprUnnested = PexprSubqueryUnnest(mp, pexprNewChild, fEnforceCorrelatedApply); if (NULL != pexprUnnested) { pexprNewChild->Release(); pexprNewChild = pexprUnnested; } } pdrgpexprNewChildren->Append(pexprNewChild); } join_pred_expr->AddRef(); pdrgpexprNewChildren->Append(join_pred_expr); // replace subqueries in the original scalar expression with // scalar identifiers based on constructed map CExpression *pexprNewScalar = PexprReplaceSubqueries(mp, pexprScalar, phmexprcr); phmexprcr->Release(); pdrgpcrs->Release(); pdrgpdrgpexprSubqs->Release(); // build the new join expression COperator *pop = pexprJoin->Pop(); pop->AddRef(); CExpression *pexprNewJoin = GPOS_NEW(mp) CExpression(mp, pop, pdrgpexprNewChildren); // return a new Select expression pop = pexpr->Pop(); pop->AddRef(); return GPOS_NEW(mp) CExpression(mp, pop, pexprNewJoin, pexprNewScalar); }
//--------------------------------------------------------------------------- // @function: // CSubqueryHandlerTest::EresUnittest_Subquery2Apply // // @doc: // Test of subquery handler // //--------------------------------------------------------------------------- GPOS_RESULT CSubqueryHandlerTest::EresUnittest_Subquery2Apply() { CAutoMemoryPool amp; IMemoryPool *mp = amp.Pmp(); // setup a file-based provider CMDProviderMemory *pmdp = CTestUtils::m_pmdpf; pmdp->AddRef(); CMDAccessor mda(mp, CMDCache::Pcache(), CTestUtils::m_sysidDefault, pmdp); typedef CExpression *(*Pfpexpr)(IMemoryPool*, BOOL); Pfpexpr rgpf[] = { CSubqueryTestUtils::PexprSelectWithAggSubquery, CSubqueryTestUtils::PexprSelectWithAggSubqueryConstComparison, CSubqueryTestUtils::PexprProjectWithAggSubquery, CSubqueryTestUtils::PexprSelectWithAnySubquery, CSubqueryTestUtils::PexprProjectWithAnySubquery, CSubqueryTestUtils::PexprSelectWithAllSubquery, CSubqueryTestUtils::PexprProjectWithAllSubquery, CSubqueryTestUtils::PexprSelectWithExistsSubquery, CSubqueryTestUtils::PexprProjectWithExistsSubquery, CSubqueryTestUtils::PexprSelectWithNotExistsSubquery, CSubqueryTestUtils::PexprProjectWithNotExistsSubquery, CSubqueryTestUtils::PexprSelectWithNestedCmpSubquery, CSubqueryTestUtils::PexprSelectWithCmpSubqueries, CSubqueryTestUtils::PexprSelectWithSubqueryConjuncts, CSubqueryTestUtils::PexprProjectWithSubqueries, CSubqueryTestUtils::PexprSelectWith2LevelsCorrSubquery, CSubqueryTestUtils::PexprJoinWithAggSubquery, CSubqueryTestUtils::PexprSelectWithAggSubqueryOverJoin, CSubqueryTestUtils::PexprSelectWithNestedSubquery, CSubqueryTestUtils::PexprSubqueriesInNullTestContext, CSubqueryTestUtils::PexprSubqueriesInDifferentContexts, CSubqueryTestUtils::PexprSelectWithSubqueryDisjuncts, CSubqueryTestUtils::PexprSelectWithNestedAnySubqueries, CSubqueryTestUtils::PexprSelectWithNestedAllSubqueries, CSubqueryTestUtils::PexprUndecorrelatableAnySubquery, CSubqueryTestUtils::PexprUndecorrelatableAllSubquery, CSubqueryTestUtils::PexprUndecorrelatableExistsSubquery, CSubqueryTestUtils::PexprUndecorrelatableNotExistsSubquery, CSubqueryTestUtils::PexprUndecorrelatableScalarSubquery, }; // xforms to test CXformSet *xform_set = GPOS_NEW(mp) CXformSet(mp); (void) xform_set->ExchangeSet(CXform::ExfSubqJoin2Apply); (void) xform_set->ExchangeSet(CXform::ExfSelect2Apply); (void) xform_set->ExchangeSet(CXform::ExfProject2Apply); BOOL fCorrelated = true; // we generate two expressions using each generator const ULONG size = 2 * GPOS_ARRAY_SIZE(rgpf); for (ULONG ul = 0; ul < size; ul++) { ULONG ulIndex = ul / 2; // install opt context in TLS CAutoOptCtxt aoc ( mp, &mda, NULL, /* pceeval */ CTestUtils::GetCostModel(mp) ); // generate expression CExpression *pexpr = rgpf[ulIndex](mp, fCorrelated); // check for subq xforms CXformSet *pxfsCand = CLogical::PopConvert(pexpr->Pop())->PxfsCandidates(mp); pxfsCand->Intersection(xform_set); CXformSetIter xsi(*pxfsCand); while (xsi.Advance()) { CXform *pxform = CXformFactory::Pxff()->Pxf(xsi.TBit()); GPOS_ASSERT(NULL != pxform); CWStringDynamic str(mp); COstreamString oss(&str); oss << std::endl << "INPUT:" << std::endl << *pexpr << std::endl; CXformContext *pxfctxt = GPOS_NEW(mp) CXformContext(mp); CXformResult *pxfres = GPOS_NEW(mp) CXformResult(mp); // calling the xform to perform subquery to Apply transformation pxform->Transform(pxfctxt, pxfres, pexpr); CExpression *pexprResult = pxfres->PexprNext(); oss << std::endl << "OUTPUT:" << std::endl; if (NULL != pexprResult) { oss << *pexprResult << std::endl; } else { oss << "\tNo subquery unnesting output" << std::endl; } GPOS_TRACE(str.GetBuffer()); str.Reset(); pxfres->Release(); pxfctxt->Release(); } pxfsCand->Release(); pexpr->Release(); fCorrelated = !fCorrelated; } xform_set->Release(); return GPOS_OK; }