//--------------------------------------------------------------------------- // @function: // CLogical::PosFromIndex // // @doc: // Compute an order spec based on an index // //--------------------------------------------------------------------------- COrderSpec * CLogical::PosFromIndex ( IMemoryPool *pmp, const IMDIndex *pmdindex, DrgPcr *pdrgpcr ) { // compute the order spec COrderSpec *pos = GPOS_NEW(pmp) COrderSpec(pmp); const ULONG ulLenIncludeCols = pmdindex->UlKeys(); for (ULONG ul = 0; ul < ulLenIncludeCols; ul++) { ULONG ulPos = pmdindex->UlKey(ul); CColRef *pcr = (*pdrgpcr)[ulPos]; IMDId *pmdid = pcr->Pmdtype()->PmdidCmp(IMDType::EcmptL); pmdid->AddRef(); // TODO: March 27th 2012; we hard-code NULL treatment // need to revisit pos->Append(pmdid, pcr, COrderSpec::EntLast); } return pos; }
//--------------------------------------------------------------------------- // @function: // CPhysicalStreamAgg::PosCovering // // @doc: // Construct order spec on grouping column so that it covers required // order spec, the function returns NULL if no covering order spec // can be created // //--------------------------------------------------------------------------- COrderSpec * CPhysicalStreamAgg::PosCovering ( IMemoryPool *mp, COrderSpec *posRequired, CColRefArray *pdrgpcrGrp ) const { GPOS_ASSERT(NULL != posRequired); if (0 == posRequired->UlSortColumns()) { // required order must be non-empty return NULL; } // create a set of required sort columns CColRefSet *pcrsReqd = posRequired->PcrsUsed(mp); COrderSpec *pos = NULL; CColRefSet *pcrsGrpCols = GPOS_NEW(mp) CColRefSet(mp, pdrgpcrGrp); if (pcrsGrpCols->ContainsAll(pcrsReqd)) { // required order columns are included in grouping columns, we can // construct a covering order spec pos = GPOS_NEW(mp) COrderSpec(mp); // extract order expressions from required order const ULONG ulReqdSortCols = posRequired->UlSortColumns(); for (ULONG ul = 0; ul < ulReqdSortCols; ul++) { CColRef *colref = const_cast<CColRef *>(posRequired->Pcr(ul)); IMDId *mdid = posRequired->GetMdIdSortOp(ul); COrderSpec::ENullTreatment ent = posRequired->Ent(ul); mdid->AddRef(); pos->Append(mdid, colref, ent); } // augment order with remaining grouping columns const ULONG size = pdrgpcrGrp->Size(); for (ULONG ul = 0; ul < size; ul++) { CColRef *colref = (*pdrgpcrGrp)[ul]; if (!pcrsReqd->FMember(colref)) { IMDId *mdid = colref->RetrieveType()->GetMdidForCmpType(IMDType::EcmptL); mdid->AddRef(); pos->Append(mdid, colref, COrderSpec::EntLast); } } } pcrsGrpCols->Release(); pcrsReqd->Release(); return pos; }
//--------------------------------------------------------------------------- // @function: // CPhysical::PosDerivePassThruOuter // // @doc: // Helper for common case of sort order derivation // //--------------------------------------------------------------------------- COrderSpec * CPhysical::PosDerivePassThruOuter ( CExpressionHandle &exprhdl ) { COrderSpec *pos = exprhdl.Pdpplan(0 /*child_index*/)->Pos(); pos->AddRef(); return pos; }
//--------------------------------------------------------------------------- // @function: // CPhysicalDML::PosComputeRequired // // @doc: // Compute required sort order based on the key information in the table // descriptor: // 1. If a table has no keys, no sort order is necessary. // // 2. If a table has keys, but they are not modified in the update, no sort // order is necessary. This relies on the fact that Split always produces // Delete tuples before Insert tuples, so we cannot have two versions of the // same tuple on the same time. Consider for example tuple (A: 1, B: 2), where // A is key and an update "set B=B+1". Since there cannot be any other tuple // with A=1, and the tuple (1,2) is deleted before tuple (1,3) gets inserted, // we don't need to enforce specific order of deletes and inserts. // // 3. If the update changes a key column, enforce order on the Action column // to deliver Delete tuples before Insert tuples. This is done to avoid a // conflict between a newly inserted tuple and an old tuple that is about to be // deleted. Consider table with tuples (A: 1),(A: 2), where A is key, and // update "set A=A+1". Split will generate tuples (1,"D"), (2,"I"), (2,"D"), (3,"I"). // If (2,"I") happens before (2,"D") we will have a violation of the key constraint. // Therefore we need to enforce sort order on Action to get all old tuples // tuples deleted before the new ones are inserted. // //--------------------------------------------------------------------------- COrderSpec * CPhysicalDML::PosComputeRequired ( IMemoryPool *pmp, CTableDescriptor *ptabdesc ) { COrderSpec *pos = GPOS_NEW(pmp) COrderSpec(pmp); const DrgPbs *pdrgpbsKeys = ptabdesc->PdrgpbsKeys(); if (1 < pdrgpbsKeys->UlLength() && CLogicalDML::EdmlUpdate == m_edmlop) { // if this is an update on the target table's keys, enforce order on // the action column, see explanation in function's comment const ULONG ulKeySets = pdrgpbsKeys->UlLength(); BOOL fNeedsSort = false; for (ULONG ul = 0; ul < ulKeySets && !fNeedsSort; ul++) { CBitSet *pbs = (*pdrgpbsKeys)[ul]; if (!pbs->FDisjoint(m_pbsModified)) { fNeedsSort = true; break; } } if (fNeedsSort) { IMDId *pmdid = m_pcrAction->Pmdtype()->PmdidCmp(IMDType::EcmptL); pmdid->AddRef(); pos->Append(pmdid, m_pcrAction, COrderSpec::EntAuto); } } else if (m_ptabdesc->FPartitioned()) { COptimizerConfig *poconf = COptCtxt::PoctxtFromTLS()->Poconf(); BOOL fInsertSortOnParquet = FInsertSortOnParquet(); BOOL fInsertSortOnRows = FInsertSortOnRows(poconf); if (fInsertSortOnParquet || fInsertSortOnRows) { GPOS_ASSERT(CLogicalDML::EdmlInsert == m_edmlop); m_fInputSorted = true; // if this is an INSERT over a partitioned Parquet or Row-oriented table, // sort tuples by their table oid IMDId *pmdid = m_pcrTableOid->Pmdtype()->PmdidCmp(IMDType::EcmptL); pmdid->AddRef(); pos->Append(pmdid, m_pcrTableOid, COrderSpec::EntAuto); } } return pos; }
//--------------------------------------------------------------------------- // @function: // CPhysicalStreamAgg::PosRequiredStreamAgg // // @doc: // Compute required sort columns of the n-th child // //--------------------------------------------------------------------------- COrderSpec * CPhysicalStreamAgg::PosRequiredStreamAgg ( IMemoryPool *mp, CExpressionHandle &exprhdl, COrderSpec *posRequired, ULONG #ifdef GPOS_DEBUG child_index #endif // GPOS_DEBUG , CColRefArray *pdrgpcrGrp ) const { GPOS_ASSERT(0 == child_index); COrderSpec *pos = PosCovering(mp, posRequired, pdrgpcrGrp); if (NULL == pos) { // failed to find a covering order spec, use local order spec m_pos->AddRef(); pos = m_pos; } // extract sort columns from order spec CColRefSet *pcrs = pos->PcrsUsed(mp); // get key collection of the relational child CKeyCollection *pkc = exprhdl.GetRelationalProperties(0)->Pkc(); if (NULL != pkc && pkc->FKey(pcrs, false /*fExactMatch*/)) { CColRefSet *pcrsReqd = posRequired->PcrsUsed(m_mp); BOOL fUsesDefinedCols = FUnaryUsesDefinedColumns(pcrsReqd, exprhdl); pcrsReqd->Release(); if (!fUsesDefinedCols) { // we are grouping on child's key, // stream agg does not need to sort child and we can pass through input spec pos->Release(); posRequired->AddRef(); pos = posRequired; } } pcrs->Release(); return pos; }
//--------------------------------------------------------------------------- // @function: // CPhysicalSequence::PosDerive // // @doc: // Derive sort order // //--------------------------------------------------------------------------- COrderSpec * CPhysicalSequence::PosDerive ( IMemoryPool *, // pmp, CExpressionHandle &exprhdl ) const { // pass through sort order from last child const ULONG ulArity = exprhdl.UlArity(); GPOS_ASSERT(1 <= ulArity); COrderSpec *pos = exprhdl.Pdpplan(ulArity - 1 /*ulChildIndex*/)->Pos(); pos->AddRef(); return pos; }
//--------------------------------------------------------------------------- // @function: // CQueryContext::PqcGenerate // // @doc: // Generate the query context for the given expression and array of // output column ref ids // //--------------------------------------------------------------------------- CQueryContext * CQueryContext::PqcGenerate ( IMemoryPool *pmp, CExpression * pexpr, DrgPul *pdrgpulQueryOutputColRefId, DrgPmdname *pdrgpmdname, BOOL fDeriveStats ) { GPOS_ASSERT(NULL != pexpr && NULL != pdrgpulQueryOutputColRefId); CColRefSet *pcrs = GPOS_NEW(pmp) CColRefSet(pmp); DrgPcr *pdrgpcr = GPOS_NEW(pmp) DrgPcr(pmp); COptCtxt *poptctxt = COptCtxt::PoctxtFromTLS(); CColumnFactory *pcf = poptctxt->Pcf(); GPOS_ASSERT(NULL != pcf); const ULONG ulLen = pdrgpulQueryOutputColRefId->UlLength(); for (ULONG ul = 0; ul < ulLen; ul++) { ULONG *pul = (*pdrgpulQueryOutputColRefId)[ul]; GPOS_ASSERT(NULL != pul); CColRef *pcr = pcf->PcrLookup(*pul); GPOS_ASSERT(NULL != pcr); pcrs->Include(pcr); pdrgpcr->Append(pcr); } COrderSpec *pos = NULL; CExpression *pexprResult = pexpr; COperator *popTop = PopTop(pexpr); if (COperator::EopLogicalLimit == popTop->Eopid()) { // top level operator is a limit, copy order spec to query context pos = CLogicalLimit::PopConvert(popTop)->Pos(); pos->AddRef(); } else { // no order required pos = GPOS_NEW(pmp) COrderSpec(pmp); } CDistributionSpec *pds = NULL; BOOL fDML = CUtils::FLogicalDML(pexpr->Pop()); poptctxt->MarkDMLQuery(fDML); if (fDML) { pds = GPOS_NEW(pmp) CDistributionSpecAny(COperator::EopSentinel); } else { pds = GPOS_NEW(pmp) CDistributionSpecSingleton(CDistributionSpecSingleton::EstMaster); } CRewindabilitySpec *prs = GPOS_NEW(pmp) CRewindabilitySpec(CRewindabilitySpec::ErtNone /*ert*/); CEnfdOrder *peo = GPOS_NEW(pmp) CEnfdOrder(pos, CEnfdOrder::EomSatisfy); // we require satisfy matching on distribution since final query results must be sent to master CEnfdDistribution *ped = GPOS_NEW(pmp) CEnfdDistribution(pds, CEnfdDistribution::EdmSatisfy); CEnfdRewindability *per = GPOS_NEW(pmp) CEnfdRewindability(prs, CEnfdRewindability::ErmSatisfy); CCTEReq *pcter = poptctxt->Pcteinfo()->PcterProducers(pmp); CReqdPropPlan *prpp = GPOS_NEW(pmp) CReqdPropPlan(pcrs, peo, ped, per, pcter); pdrgpmdname->AddRef(); return GPOS_NEW(pmp) CQueryContext(pmp, pexprResult, prpp, pdrgpcr, pdrgpmdname, fDeriveStats); }
//--------------------------------------------------------------------------- // @function: // CQueryContext::PqcGenerate // // @doc: // Generate the query context for the given expression and array of // output column ref ids // //--------------------------------------------------------------------------- CQueryContext * CQueryContext::PqcGenerate ( IMemoryPool *mp, CExpression * pexpr, ULongPtrArray *pdrgpulQueryOutputColRefId, CMDNameArray *pdrgpmdname, BOOL fDeriveStats ) { GPOS_ASSERT(NULL != pexpr && NULL != pdrgpulQueryOutputColRefId); CColRefSet *pcrs = GPOS_NEW(mp) CColRefSet(mp); CColRefArray *colref_array = GPOS_NEW(mp) CColRefArray(mp); COptCtxt *poptctxt = COptCtxt::PoctxtFromTLS(); CColumnFactory *col_factory = poptctxt->Pcf(); GPOS_ASSERT(NULL != col_factory); // Collect required column references (colref_array) const ULONG length = pdrgpulQueryOutputColRefId->Size(); for (ULONG ul = 0; ul < length; ul++) { ULONG *pul = (*pdrgpulQueryOutputColRefId)[ul]; GPOS_ASSERT(NULL != pul); CColRef *colref = col_factory->LookupColRef(*pul); GPOS_ASSERT(NULL != colref); pcrs->Include(colref); colref_array->Append(colref); } // Collect required properties (prpp) at the top level: // By default no sort order requirement is added, unless the root operator in // the input logical expression is a LIMIT. This is because Orca always // attaches top level Sort to a LIMIT node. COrderSpec *pos = NULL; CExpression *pexprResult = pexpr; COperator *popTop = PopTop(pexpr); if (COperator::EopLogicalLimit == popTop->Eopid()) { // top level operator is a limit, copy order spec to query context pos = CLogicalLimit::PopConvert(popTop)->Pos(); pos->AddRef(); } else { // no order required pos = GPOS_NEW(mp) COrderSpec(mp); } CDistributionSpec *pds = NULL; BOOL fDML = CUtils::FLogicalDML(pexpr->Pop()); poptctxt->MarkDMLQuery(fDML); // DML commands do not have distribution requirement. Otherwise the // distribution requirement is Singleton. if (fDML) { pds = GPOS_NEW(mp) CDistributionSpecAny(COperator::EopSentinel); } else { pds = GPOS_NEW(mp) CDistributionSpecSingleton(CDistributionSpecSingleton::EstMaster); } // By default, no rewindability requirement needs to be satisfied at the top level CRewindabilitySpec *prs = GPOS_NEW(mp) CRewindabilitySpec(CRewindabilitySpec::ErtNotRewindable, CRewindabilitySpec::EmhtNoMotion); // Ensure order, distribution and rewindability meet 'satisfy' matching at the top level CEnfdOrder *peo = GPOS_NEW(mp) CEnfdOrder(pos, CEnfdOrder::EomSatisfy); CEnfdDistribution *ped = GPOS_NEW(mp) CEnfdDistribution(pds, CEnfdDistribution::EdmSatisfy); CEnfdRewindability *per = GPOS_NEW(mp) CEnfdRewindability(prs, CEnfdRewindability::ErmSatisfy); // Required CTEs are obtained from the CTEInfo global information in the optimizer context CCTEReq *pcter = poptctxt->Pcteinfo()->PcterProducers(mp); // NB: Partition propagation requirements are not initialized here. They are // constructed later based on derived relation properties (CPartInfo) by // CReqdPropPlan::InitReqdPartitionPropagation(). CReqdPropPlan *prpp = GPOS_NEW(mp) CReqdPropPlan(pcrs, peo, ped, per, pcter); // Finally, create the CQueryContext pdrgpmdname->AddRef(); return GPOS_NEW(mp) CQueryContext(mp, pexprResult, prpp, colref_array, pdrgpmdname, fDeriveStats); }
//--------------------------------------------------------------------------- // @function: // CPhysicalSequenceProject::CreateOrderSpec // // @doc: // Create local order spec that we request relational child to satisfy // //--------------------------------------------------------------------------- void CPhysicalSequenceProject::CreateOrderSpec ( IMemoryPool *pmp ) { GPOS_ASSERT(NULL == m_pos); GPOS_ASSERT(NULL != m_pds); GPOS_ASSERT(NULL != m_pdrgpos); m_pos = GPOS_NEW(pmp) COrderSpec(pmp); // add partition by keys to order spec if (CDistributionSpec::EdtHashed == m_pds->Edt()) { CDistributionSpecHashed *pdshashed = CDistributionSpecHashed::PdsConvert(m_pds); const DrgPexpr *pdrgpexpr = pdshashed->Pdrgpexpr(); const ULONG ulSize = pdrgpexpr->UlLength(); for (ULONG ul = 0; ul < ulSize; ul++) { CExpression *pexpr = (*pdrgpexpr)[ul]; // we assume partition-by keys are always scalar idents CScalarIdent *popScId = CScalarIdent::PopConvert(pexpr->Pop()); const CColRef *pcr = popScId->Pcr(); gpmd::IMDId *pmdid = pcr->Pmdtype()->PmdidCmp(IMDType::EcmptL); pmdid->AddRef(); m_pos->Append(pmdid, pcr, COrderSpec::EntLast); } } if (0 == m_pdrgpos->UlLength()) { return; } COrderSpec *posFirst = (*m_pdrgpos)[0]; #ifdef GPOS_DEBUG const ULONG ulLength = m_pdrgpos->UlLength(); for (ULONG ul = 1; ul < ulLength; ul++) { COrderSpec *posCurrent = (*m_pdrgpos)[ul]; GPOS_ASSERT(posFirst->FSatisfies(posCurrent) && "first order spec must satisfy all other order specs"); } #endif // GPOS_DEBUG // we assume here that the first order spec in the children array satisfies all other // order specs in the array, this happens as part of the initial normalization // so we need to add columns only from the first order spec const ULONG ulSize = posFirst->UlSortColumns(); for (ULONG ul = 0; ul < ulSize; ul++) { const CColRef *pcr = posFirst->Pcr(ul); gpmd::IMDId *pmdid = posFirst->PmdidSortOp(ul); pmdid->AddRef(); COrderSpec::ENullTreatment ent = posFirst->Ent(ul); m_pos->Append(pmdid, pcr, ent); } }