//--------------------------------------------------------------------------- // @function: // CBitSetTest::EresUnittest_SetOps // // @doc: // Test for set operations // //--------------------------------------------------------------------------- GPOS_RESULT CBitSetTest::EresUnittest_SetOps() { // create memory pool CAutoMemoryPool amp; IMemoryPool *pmp = amp.Pmp(); ULONG cSizeBits = 32; ULONG cInserts = 10; CBitSet *pbs1 = GPOS_NEW(pmp) CBitSet(pmp, cSizeBits); for (ULONG i = 0; i < cInserts; i += 2) { pbs1->FExchangeSet(i * cSizeBits); } CBitSet *pbs2 = GPOS_NEW(pmp) CBitSet(pmp, cSizeBits); for (ULONG i = 1; i < cInserts; i += 2) { pbs2->FExchangeSet(i * cSizeBits); } CBitSet *pbs = GPOS_NEW(pmp) CBitSet(pmp, cSizeBits); pbs->Union(pbs1); GPOS_ASSERT(pbs->FEqual(pbs1)); pbs->Intersection(pbs1); GPOS_ASSERT(pbs->FEqual(pbs1)); GPOS_ASSERT(pbs->FEqual(pbs)); GPOS_ASSERT(pbs1->FEqual(pbs1)); pbs->Union(pbs2); GPOS_ASSERT(!pbs->FEqual(pbs1) && !pbs->FEqual(pbs2)); GPOS_ASSERT(pbs->FSubset(pbs1) && pbs->FSubset(pbs2)); pbs->Difference(pbs2); GPOS_ASSERT(pbs->FEqual(pbs1)); pbs1->Release(); pbs->Union(pbs2); pbs->Intersection(pbs2); GPOS_ASSERT(pbs->FEqual(pbs2)); GPOS_ASSERT(pbs->FSubset(pbs2)); GPOS_ASSERT(pbs->CElements() == pbs2->CElements()); pbs2->Release(); pbs->Release(); return GPOS_OK; }
//--------------------------------------------------------------------------- // @function: // CJoinOrderDP::PexprBestJoinOrder // // @doc: // find best join order for a given set of elements; // //--------------------------------------------------------------------------- CExpression * CJoinOrderDP::PexprBestJoinOrder ( CBitSet *pbs ) { GPOS_CHECK_STACK_SIZE; GPOS_CHECK_ABORT; GPOS_ASSERT(NULL != pbs); // start by looking-up cost in the DP map CExpression *pexpr = PexprLookup(pbs); if (pexpr == m_pexprDummy) { // no join order could be created return NULL; } if (NULL != pexpr) { // join order is found by looking up map return pexpr; } // find maximal covered subset CBitSet *pbsCovered = PbsCovered(pbs); if (0 == pbsCovered->Size()) { // set is not covered, return a cross product pbsCovered->Release(); return PexprCross(pbs); } if (!pbsCovered->Equals(pbs)) { // create a cross product for uncovered subset CBitSet *pbsUncovered = GPOS_NEW(m_mp) CBitSet(m_mp, *pbs); pbsUncovered->Difference(pbsCovered); CExpression *pexprResult = PexprJoinCoveredSubsetWithUncoveredSubset(pbs, pbsCovered, pbsUncovered); pbsCovered->Release(); pbsUncovered->Release(); return pexprResult; } pbsCovered->Release(); // if set has size 2, there is only one possible solution if (2 == pbs->Size()) { return PexprJoin(pbs); } // otherwise, compute best join order using dynamic programming CExpression *pexprBestJoinOrder = PexprBestJoinOrderDP(pbs); if (pexprBestJoinOrder == m_pexprDummy) { // no join order could be created return NULL; } return pexprBestJoinOrder; }
//--------------------------------------------------------------------------- // @function: // CJoinOrderDP::PexprBestJoinOrderDP // // @doc: // Find the best join order of a given set of elements using dynamic // programming; // given a set of elements (e.g., {A, B, C}), we find all possible splits // of the set (e.g., {A}, {B, C}) where at least one edge connects the // two subsets resulting from the split, // for each split, we find the best join orders of left and right subsets // recursively, // the function finds the split with the least cost, and stores the join // of its two subsets as the best join order of the given set // // //--------------------------------------------------------------------------- CExpression * CJoinOrderDP::PexprBestJoinOrderDP ( CBitSet *pbs // set of elements to be joined ) { CDouble dMinCost(0.0); CExpression *pexprResult = NULL; CBitSetArray *pdrgpbsSubsets = PdrgpbsSubsets(m_mp, pbs); const ULONG ulSubsets = pdrgpbsSubsets->Size(); for (ULONG ul = 0; ul < ulSubsets; ul++) { CBitSet *pbsCurrent = (*pdrgpbsSubsets)[ul]; CBitSet *pbsRemaining = GPOS_NEW(m_mp) CBitSet(m_mp, *pbs); pbsRemaining->Difference(pbsCurrent); // check if subsets are connected with one or more edges CExpression *pexprPred = PexprPred(pbsCurrent, pbsRemaining); if (NULL != pexprPred) { // compute solutions of left and right subsets recursively CExpression *pexprLeft = PexprBestJoinOrder(pbsCurrent); CExpression *pexprRight = PexprBestJoinOrder(pbsRemaining); if (NULL != pexprLeft && NULL != pexprRight) { // we found solutions of left and right subsets, we check if // this gives a better solution for the input set CExpression *pexprJoin = PexprJoin(pbsCurrent, pbsRemaining); CDouble dCost = DCost(pexprJoin); if (NULL == pexprResult || dCost < dMinCost) { // this is the first solution, or we found a better solution dMinCost = dCost; CRefCount::SafeRelease(pexprResult); pexprJoin->AddRef(); pexprResult = pexprJoin; } if (m_ulComps == pbs->Size()) { AddJoinOrder(pexprJoin, dCost); } pexprJoin->Release(); } } pbsRemaining->Release(); } pdrgpbsSubsets->Release(); // store solution in DP table if (NULL == pexprResult) { m_pexprDummy->AddRef(); pexprResult = m_pexprDummy; } DeriveStats(pexprResult); pbs->AddRef(); #ifdef GPOS_DEBUG BOOL fInserted = #endif // GPOS_DEBUG m_phmbsexpr->Insert(pbs, pexprResult); GPOS_ASSERT(fInserted); // add expression cost to cost map InsertExpressionCost(pexprResult, dMinCost, false /*fValidateInsert*/); return pexprResult; }