ValueIdSet AppliedStatMan::getPotentialOutputs( const CANodeIdSet & jbbcsNodeSet) { ValueIdSet potentialOutputs; for (CANodeId jbbc = jbbcsNodeSet.init(); jbbcsNodeSet.next(jbbc); jbbcsNodeSet.advance(jbbc)) { if (NodeAnalysis * jbbcNodeAnalysis = jbbc.getNodeAnalysis()) { ValueIdSet outputs; const Join * jbbcParentJoin = jbbcNodeAnalysis->getJBBC()-> getOriginalParentJoin(); if((!jbbcParentJoin) || (jbbcParentJoin && jbbcParentJoin->isInnerNonSemiJoin())) outputs = jbbcNodeAnalysis->getOriginalExpr()->\ getGroupAttr()->getCharacteristicOutputs(); else if (jbbcParentJoin->isLeftJoin()) outputs = jbbcParentJoin->nullInstantiatedOutput(); potentialOutputs.insert(outputs); } } return potentialOutputs; } // AppliedStatMan::getPotentialOutputs
MultiJoin::MultiJoin(const JBBSubset & jbbSubset, CollHeap *oHeap) : RelExpr(REL_MULTI_JOIN, NULL, NULL, oHeap) , jbbSubset_(jbbSubset) , childrenMap_(oHeap) , scheduledLSRs_(oHeap) { // Need to initialize the childrenMap // This will set all children to NULL CANodeIdSet jbbcs = jbbSubset_.getJBBCs(); Lng32 index = 0; for (CANodeId x= jbbcs.init(); jbbcs.next(x); jbbcs.advance(x) ) { JBBCExprGroupEntry* entry = new (oHeap) JBBCExprGroupEntry(x, (RelExpr*)NULL, oHeap); childrenMap_.insertAt(index, entry); index++; } lsrC_ = new (oHeap) LSRConfidence(oHeap); #pragma warning (disable : 4018) //warning elimination CMPASSERT (getArity() == jbbcs.entries()); #pragma warning (default : 4018) //warning elimination }
// this method assume jbbNodeSet contains nodes from the same JBB EstLogPropSharedPtr AppliedStatMan::getStatsForCANodeIdSet( const CANodeIdSet & jbbNodeSet) { EstLogPropSharedPtr outputEstLogProp; CANodeIdSet combinedNodeSet = jbbNodeSet; combinedNodeSet += *(jbbNodeSet.getJBBInput()->getNodeSet()); EstLogPropSharedPtr jBBInput = jbbNodeSet.getJBBInput(); if ((outputEstLogProp = getCachedStatistics(&combinedNodeSet)) == NULL) outputEstLogProp = synthesizeLogProp(&jbbNodeSet, jBBInput); return outputEstLogProp; }
void MvQueryRewriteHandler::dumpAnalysisToFile(QueryAnalysis* qa, RelExpr* expr) { // Dump the QueryAnalysis data to a file. NAString analysisFileName = fileNamePrefix_ + ".analysis"; NAString str; expr->unparse(str, OPTIMIZER_PHASE, MVINFO_FORMAT); str += "\n"; str += qa->getText(); // Add in some stuff to look at join predicates for the JBBCs. str += "Join Predicates\n"; str += "==============="; char buffer[20]; ARRAY(JBB*) jbbs = qa->getJBBs(); for (CollIndex jbbInx = 0; jbbInx < jbbs.entries(); jbbInx++) { JBB* jbb = jbbs[jbbInx]; str_itoa(jbbInx, buffer); ((str += "\nJBB #") += NAString(buffer)) += ":\n"; CANodeIdSet jbbcs = jbb->getJBBCs(); for (CANodeId jbbcId=jbbcs.init(); jbbcs.next(jbbcId); jbbcs.advance(jbbcId) ) { str_itoa(jbbcId, buffer); ((str += "\nJBBC with CANodeId ") += NAString(buffer)) += ":\n"; ValueIdSet joinPreds = jbbcId.getNodeAnalysis()->getJBBC()->getJoinPreds(); str += valueIdSetGetText(joinPreds); if (joinPreds.entries() > 0) { str.append("\n(value ids of predicates are "); NABoolean first = true; for (ValueId jpVid=joinPreds.init(); joinPreds.next(jpVid); joinPreds.advance(jpVid)) { if (first) first = FALSE; else str.append(", "); str_itoa(jpVid, buffer); str.append(buffer); } str.append(")\n"); } } str += '\n'; } dumpToFile(analysisFileName.data(), str.data()); } // dumpAnalysisToFile()
EstLogPropSharedPtr AppliedStatMan::synthesizeLogProp( const CANodeIdSet * nodeSet, EstLogPropSharedPtr &inLP) { EstLogPropSharedPtr outputEstLogProp; CANodeIdSet combinedNodeSetWithInput = *nodeSet; if (inLP->isCacheable()) { CANodeIdSet * inNodeSet = inLP->getNodeSet(); // if inLP are cacheable these should have a nodeSet attached // if not, assert in debug mode. In release mode, set the properties // as not cacheable. These will then be looked into group attr cache if (inNodeSet == NULL) { CCMPASSERT(inNodeSet != NULL); inLP->setCacheableFlag(FALSE); } else { // check ASM cache for the estLogProps of nodeSet for the given // inLP combinedNodeSetWithInput.insert(*inNodeSet); if ((outputEstLogProp =\ getCachedStatistics(&combinedNodeSetWithInput)) != NULL) return outputEstLogProp; } } if(nodeSet->entries() == 1) return getStatsForCANodeId(nodeSet->getFirst(), inLP); JBBSubset * jbbSubset = nodeSet->jbbcsToJBBSubset(); Join * preferredJoin = jbbSubset->getPreferredJoin(); //CMPASSERT(preferredJoin->isJoinFromMJSynthLogProp()); outputEstLogProp = preferredJoin->getGroupAttr()->outputLogProp(inLP); return outputEstLogProp; } // AppliedStatMan::synthesizeLogProp
// -------------------------------------------------------------------- // use origExprs from NodeAnalysis to set this MultiJoin childrenMap_ // -------------------------------------------------------------------- void MultiJoin::setChildrenFromOrigExprs(QueryAnalysis * qa) { CollHeap* outHeap = qa->outHeap(); CANodeIdSet jbbcs = jbbSubset_.getJBBCs(); CMPASSERT (qa->getJBBCs().contains(jbbcs)); Lng32 index = 0; for (CANodeId x= jbbcs.init(); jbbcs.next(x); jbbcs.advance(x) ) { JBBCExprGroupEntry* entry = new (outHeap) JBBCExprGroupEntry(x, qa->getNodeAnalysis(x)->getOriginalExpr(), outHeap); childrenMap_.insertAt(index, entry); index++; } return; }
EstLogPropSharedPtr AppliedStatMan::joinJBBChildren( const CANodeIdSet & leftChildren, const CANodeIdSet & rightChildren, EstLogPropSharedPtr & inLP) { EstLogPropSharedPtr inputLP = inLP; EstLogPropSharedPtr outputEstLogProp; if(inputLP == (*GLOBAL_EMPTY_INPUT_LOGPROP)) inputLP = leftChildren.getJBBInput(); // Because there exist a nodeSet for the left, right and the outer // child, hence these properties are cacheable. Check to see if the // outputEstLogProp of the join for the given inLP exist in the cache CANodeIdSet combinedNodeSet = leftChildren; combinedNodeSet.insert(rightChildren); CANodeIdSet * inNodeSet = NULL; if (inputLP->isCacheable()) { inNodeSet = inputLP->getNodeSet(); CANodeIdSet combinedWithInputNodeSet = combinedNodeSet; combinedWithInputNodeSet.insert(*inNodeSet); outputEstLogProp = getCachedStatistics(&combinedWithInputNodeSet); } if(outputEstLogProp == NULL) outputEstLogProp = synthesizeLogProp(&combinedNodeSet, inputLP); return outputEstLogProp; } // AppliedStatMan::joinJBBChildren
// -------------------------------------------------------------------- // use the input JBBCExprGroupMap to set this MultiJoin childrenMap_ // -------------------------------------------------------------------- void MultiJoin::setChildren(const JBBCExprGroupMap & map) { // everything here goes to statement heap CollHeap* outHeap = CmpCommon::statementHeap(); CANodeIdSet jbbcs = jbbSubset_.getJBBCs(); CMPASSERT (map.getJBBCs().contains(jbbcs)); Lng32 index = 0; for (CANodeId x= jbbcs.init(); jbbcs.next(x); jbbcs.advance(x) ) { JBBCExprGroupEntry* entry = new (outHeap) JBBCExprGroupEntry(x, map.getExprGroupIdOfJBBC(x), outHeap); childrenMap_.insertAt(index, entry); index++; } return; }
CostScalar AppliedStatMan::computeJoinReduction( const CANodeIdSet & leftChildren, const CANodeIdSet & rightChildren) { CostScalar result = 0; // get stats for left EstLogPropSharedPtr leftCard = getStatsForCANodeIdSet(leftChildren); // get stats for right EstLogPropSharedPtr rightCard = getStatsForCANodeIdSet(rightChildren); CANodeIdSet jbbcsJoinedToRight; CANodeIdSet allPredecessors; CANodeIdSet allSuccessors; for( CANodeId rChild = rightChildren.init(); rightChildren.next(rChild); rightChildren.advance(rChild)) { JBBC * rChildJBBC = rChild.getNodeAnalysis()->getJBBC(); jbbcsJoinedToRight += rChildJBBC->getJoinedJBBCs(); jbbcsJoinedToRight += rChildJBBC->getPredecessorJBBCs(); allPredecessors += rChildJBBC->getPredecessorJBBCs(); jbbcsJoinedToRight += rChildJBBC->getSuccessorJBBCs(); allSuccessors += rChildJBBC->getSuccessorJBBCs(); } CANodeIdSet dependencyCausingNodesFromLeft = leftChildren; dependencyCausingNodesFromLeft.intersectSet(allPredecessors + allSuccessors); CANodeIdSet leftNodesJoinedToRight = leftChildren; leftNodesJoinedToRight.intersectSet(jbbcsJoinedToRight); if(!leftNodesJoinedToRight.entries()) { result = rightCard->getResultCardinality(); return result; } CANodeIdSet leftSetPredecessors; CANodeIdSet newNodes = leftNodesJoinedToRight; CANodeIdSet nodesConsidered; while(newNodes.entries()) { for( CANodeId lChild = newNodes.init(); newNodes.next(lChild); newNodes.advance(lChild)) { JBBC * lChildJBBC = lChild.getNodeAnalysis()->getJBBC(); leftSetPredecessors += lChildJBBC->getPredecessorJBBCs(); nodesConsidered += lChild; } leftSetPredecessors.intersectSet(leftChildren); newNodes = leftSetPredecessors; newNodes -= nodesConsidered; } leftNodesJoinedToRight += leftSetPredecessors; // for a JBBSubset to be legal it has to have at least one // independent jbbc i.e. a jbbcs connect via a innerNonSemiNonTsjJoin // Assumption: leftChildren represents a legal JBBSubset CANodeIdSet independentJBBCsInLeftNodesJoinedToRight = QueryAnalysis::Instance()->getInnerNonSemiNonTSJJBBCs(); independentJBBCsInLeftNodesJoinedToRight.intersectSet(leftNodesJoinedToRight); if(!independentJBBCsInLeftNodesJoinedToRight.entries()) leftNodesJoinedToRight += leftChildren.jbbcsToJBBSubset()-> getJBBSubsetAnalysis()-> getLargestIndependentNode(); EstLogPropSharedPtr cardLeftNodesJoinedToRight = getStatsForCANodeIdSet(leftNodesJoinedToRight); // All nodes connected via a join CANodeIdSet connectedNodes(leftNodesJoinedToRight); connectedNodes += rightChildren; EstLogPropSharedPtr cardConnectedNodes = joinJBBChildren(leftNodesJoinedToRight,rightChildren); result = cardConnectedNodes->getResultCardinality() / cardLeftNodesJoinedToRight->getResultCardinality(); return result; }
Join * AppliedStatMan::formJoinExprWithCANodeSets( const CANodeIdSet & leftNodeSet, const CANodeIdSet & rightNodeSet, EstLogPropSharedPtr& inLP, const ValueIdSet * joinPreds, const NABoolean cacheable) { EstLogPropSharedPtr leftEstLogProp = NULL; EstLogPropSharedPtr rightEstLogProp = NULL; CANodeIdSet * inputNodeSet = NULL; if (inLP->isCacheable()) { inputNodeSet = inLP->getNodeSet(); // if inLP are cacheable these should have a nodeSet attached // if it is not for some reason, assert in debug mode. In release // mode do not look for properties in ASM cache, instead get them // from group attr cache. if (inputNodeSet == NULL) { CCMPASSERT(inputNodeSet != NULL); inLP->setCacheableFlag(FALSE); } } CANodeIdSet commonNodeSet = leftNodeSet; commonNodeSet.intersectSet(rightNodeSet); // remove CANodeIds which are common to both left and the right children // from the child, whose estLogProps are not cached. If the estLogProps // of both children are not cached, then remove it from the child which // has a larger CANodeIdSet associated with it. CANodeIdSet tempLeftNodeSet = leftNodeSet; CANodeIdSet tempRightNodeSet = rightNodeSet; if (commonNodeSet.entries() > 0) { if (lookup(leftNodeSet)) tempRightNodeSet.subtractSet(commonNodeSet); else if (lookup(rightNodeSet)) tempLeftNodeSet.subtractSet(commonNodeSet); else if (leftNodeSet.entries() > rightNodeSet.entries()) tempLeftNodeSet.subtractSet(commonNodeSet); else tempRightNodeSet.subtractSet(commonNodeSet); } // get the estLogProps for the left and the right child. // If these are not in the cache, then synthesize them incrementally // starting from the left most JBBC in the JBBSubset if (inputNodeSet) { // leftEstLogProp cached? CANodeIdSet combinedNodeSetWithInput = tempLeftNodeSet; combinedNodeSetWithInput.insert(*inputNodeSet); leftEstLogProp = getCachedStatistics(&combinedNodeSetWithInput); combinedNodeSetWithInput = tempRightNodeSet; combinedNodeSetWithInput.insert(*inputNodeSet); rightEstLogProp = getCachedStatistics(&combinedNodeSetWithInput); } if (leftEstLogProp == NULL) leftEstLogProp = synthesizeLogProp(&tempLeftNodeSet, inLP); // if the estimate logical properties have been computed for non-cacheable // inLP, then these would not contain nodeSet. But we do need the nodeSet // to compute potential output values. Hence we shall add this now if (!leftEstLogProp->getNodeSet()) { CANodeIdSet * copyLeftNodeSet = new (STMTHEAP) CANodeIdSet (tempLeftNodeSet); leftEstLogProp->setNodeSet(copyLeftNodeSet); } if (rightEstLogProp == NULL) rightEstLogProp = synthesizeLogProp(&tempRightNodeSet, inLP); if (!rightEstLogProp->getNodeSet()) { CANodeIdSet * copyRightNodeSet = new (STMTHEAP) CANodeIdSet (tempRightNodeSet); rightEstLogProp->setNodeSet(copyRightNodeSet); } // Now form the join expressions with these EstLogProp, // inLP and the joinPred will be same as those for which the // estLogProp are to be synthesized. Cacheable flag would depend // on whether left, right and the outer child are caheable, or // if the join is on all columns or not // Since the join expression consists of the left and the right // JBBSubsets, the JBBSubset for this Join expression would be // the superset of left and right JBBSubset JBBSubset * combinedSet = leftNodeSet.jbbcsToJBBSubset(); combinedSet->addSubset(*(rightNodeSet.jbbcsToJBBSubset())); // Now form the join expressions with these EstLogProp, // inLP and the joinPred will be same as those for which the // estLogProp are to be synthesized. Cacheable flag would depend // on whether left, right and the outer child are ccaheable, or // if the join is on all columns or not return formJoinExprWithEstLogProps(leftEstLogProp, rightEstLogProp, inLP, joinPreds, cacheable, combinedSet); } // AppliedStatMan::formJoinExprWithCANodeSets
// LCOV_EXCL_START :cnu EstLogPropSharedPtr AppliedStatMan::joinEstLogProps ( const EstLogPropSharedPtr& leftEstLogProp, const EstLogPropSharedPtr& rightEstLogProp, const EstLogPropSharedPtr& inLP) { EstLogPropSharedPtr outputEstLogProp; NABoolean cacheable = FALSE; CANodeIdSet * inputNodeSet = inLP->getNodeSet(); // These nodesets could be NULL, if the estLogProps to which they // belong are not cacheable CANodeIdSet * leftNodeSet = leftEstLogProp->getNodeSet(); CANodeIdSet * rightNodeSet = rightEstLogProp->getNodeSet(); if ((leftEstLogProp->isCacheable()) && (rightEstLogProp->isCacheable()) && (inLP->isCacheable()) ) { CCMPASSERT(leftNodeSet != NULL); CCMPASSERT(rightNodeSet != NULL); CCMPASSERT(inputNodeSet != NULL); if (leftNodeSet && rightNodeSet && inputNodeSet) { cacheable = TRUE; } } if (cacheable) { // check the ASM cache to see if outputEstLogProp for these // NodeSets appear for the given inputEstLogProp CANodeIdSet combineNodeSet = *leftNodeSet; combineNodeSet.insert(*rightNodeSet); CANodeIdSet combinedWithInputNodeSet = combineNodeSet; combinedWithInputNodeSet.insert(*inputNodeSet); outputEstLogProp = getCachedStatistics(&combinedWithInputNodeSet); if (outputEstLogProp != NULL) return outputEstLogProp; } JBBSubset * newJBBSubset = NULL; ValueIdSet setOfPredicates; if (leftNodeSet && rightNodeSet) { // join predicates can be obtained from EstLogProp, only // if these corresponded to complete set of predicates - // all local or complete join. Also, we need a // combinedJBBSubset to set in the fake join expression // that we will be creating. newJBBSubset = leftNodeSet->computeJBBSubset(); JBBSubset rightJBBSubset = *(rightNodeSet->computeJBBSubset()); setOfPredicates = newJBBSubset->joinPredsWithOther(rightJBBSubset); // Since the properties from this group are cacheable, hence the // group attributes for the new join expression should contain // the combined JBBsubset of the left and the right children newJBBSubset->addSubset(rightJBBSubset); } // inputEstLogProp would be either empty input estLogProp or from the // outer child. If cacheable is TRUE, then newJBBsubset should // contain the combined left and the right JBB subset. But if // cacheable is FALSE, newJBBsubset should be NULL Join * joinExpr = formJoinExprWithEstLogProps( leftEstLogProp, rightEstLogProp, inLP, &setOfPredicates, cacheable, newJBBSubset); // Now do the actual synthesis and cache statistics in the cache outputEstLogProp = joinExpr->getGroupAttr()->outputLogProp(inLP); return outputEstLogProp; }
// This method forms the join expression for join on JBBC specified by jbbcId // inputEstLogProp should not be cacheable Join * AppliedStatMan::formJoinExprForJoinOnJBBC( CANodeIdSet jbbSubset, CANodeId jbbcId, const ValueIdSet * jbbcLocalPreds, const ValueIdSet * joinPreds, const EstLogPropSharedPtr& inputEstLogProp, const NABoolean cacheable) { NABoolean origInputIsCacheable = inputEstLogProp->isCacheable(); if(origInputIsCacheable) { inputEstLogProp->setCacheableFlag(FALSE); CCMPASSERT("Expecting Non Cacheable Input"); } RelExpr * jbbcExpr = getExprForCANodeId(jbbcId, inputEstLogProp, jbbcLocalPreds); jbbcExpr->getGroupAttr()->outputLogProp(inputEstLogProp); RelExpr * jbbSubsetExpr = jbbSubset.jbbcsToJBBSubset()->getPreferredJoin(); if(!jbbSubsetExpr) if(jbbSubset.entries()==1) if(!inputEstLogProp->isCacheable()) { inputEstLogProp->setCacheableFlag(TRUE); jbbSubsetExpr = getExprForCANodeId(jbbSubset.getFirst(), inputEstLogProp); inputEstLogProp->setCacheableFlag(FALSE); } else jbbSubsetExpr = getExprForCANodeId(jbbSubset.getFirst(), inputEstLogProp); else { CCMPASSERT("No Subset expression, need at least one entry in set"); } RelExpr * leftChildExpr = jbbSubsetExpr; RelExpr * rightChildExpr = jbbcExpr; GroupAttributes * galeft = jbbSubsetExpr->getGroupAttr(); GroupAttributes * garight = jbbcExpr->getGroupAttr(); // xxx JBBC * jbbc = jbbcId.getNodeAnalysis()->getJBBC(); Join * jbbcParentJoin = jbbc->getOriginalParentJoin(); ValueIdSet leftOuterJoinFilterPreds; Join * joinExpr = NULL; if(jbbcParentJoin) { if(jbbcParentJoin->isSemiJoin()) joinExpr = new STMTHEAP Join(leftChildExpr, rightChildExpr, REL_SEMIJOIN, NULL); if(jbbcParentJoin->isAntiSemiJoin()) joinExpr = new STMTHEAP Join(leftChildExpr, rightChildExpr, REL_ANTI_SEMIJOIN, NULL); if(jbbcParentJoin->isLeftJoin()) { joinExpr = new STMTHEAP Join(leftChildExpr, rightChildExpr, REL_LEFT_JOIN, NULL); leftOuterJoinFilterPreds += jbbc->getLeftJoinFilterPreds(); } if(joinExpr) { joinExpr->setJoinPred(jbbc->getPredsWithPredecessors()); joinExpr->nullInstantiatedOutput().insert(jbbc->nullInstantiatedOutput()); } } if(!joinExpr) { // now form a JoinExpr with these left and right children. joinExpr = new STMTHEAP Join(leftChildExpr, rightChildExpr, REL_JOIN, NULL); } ValueIdSet selPredsAndLOJFilter = leftOuterJoinFilterPreds; selPredsAndLOJFilter += (*joinPreds); joinExpr->setSelectionPredicates(selPredsAndLOJFilter); // set groupAttr of this Join expression GroupAttributes * gaJoin = new STMTHEAP GroupAttributes(); // set required outputs of Join as sum of characteristic // outputs of the left and the right children ValueIdSet requiredOutputs; requiredOutputs.addSet(getPotentialOutputs(jbbSubset)); requiredOutputs.addSet(getPotentialOutputs(jbbcId)); gaJoin->setCharacteristicOutputs(requiredOutputs); // set JBBSubset for this group, if all estLogProps are cacheable. // Else JBBSubset is NULL CANodeIdSet combinedSet = jbbSubset; combinedSet += jbbcId; if (cacheable) gaJoin->getGroupAnalysis()->setLocalJBBView(combinedSet.jbbcsToJBBSubset()); gaJoin->setMinChildEstRowCount(MINOF(garight->getMinChildEstRowCount(), galeft->getMinChildEstRowCount() ) ); // if there are some probes coming into the join // then join type = tsj. if ((inputEstLogProp->getResultCardinality() > 1) || (inputEstLogProp->getColStats().entries() > 1)) { if (cacheable) { CANodeIdSet inputNodeSet = *(inputEstLogProp->getNodeSet()); gaJoin->setCharacteristicInputs(getPotentialOutputs(inputNodeSet)); } } joinExpr->setGroupAttr(gaJoin); gaJoin->setLogExprForSynthesis(joinExpr); joinExpr->synthLogProp(); inputEstLogProp->setCacheableFlag(origInputIsCacheable); return joinExpr; } // AppliedStatMan::formJoinExprForJoinOnJBBC
ULng32 AppliedStatMan::hashASM(const CANodeIdSet &key) { return key.hash(); } // AppliedStatMan::hashASM
// This method forms the join expression with the estLogProps. Join * AppliedStatMan::formJoinExprWithEstLogProps( const EstLogPropSharedPtr& leftEstLogProp, const EstLogPropSharedPtr& rightEstLogProp, const EstLogPropSharedPtr& inputEstLogProp, const ValueIdSet * setOfPredicates, const NABoolean cacheable, JBBSubset * combinedJBBSubset) { // Form a join expression with these estLogProps. // form the left child. Since the estLogProps of the left and the // right children exist, these can be treated as Scan expressions Scan * leftChildExpr = new STMTHEAP Scan(); GroupAttributes * galeft = new STMTHEAP GroupAttributes(); // set GroupAttr of the leftChild galeft->inputLogPropList().insert(inputEstLogProp); galeft->outputLogPropList().insert(leftEstLogProp); CANodeIdSet * leftNodeSet = leftEstLogProp->getNodeSet(); CANodeId nodeId; if (leftNodeSet) { if (leftNodeSet->entries() == 1) { nodeId = leftNodeSet->getFirst(); if(nodeId.getNodeAnalysis()->getTableAnalysis()) leftChildExpr->setTableAttributes(nodeId); } CostScalar minEstCard = leftNodeSet->getMinChildEstRowCount(); galeft->setMinChildEstRowCount(minEstCard); } leftChildExpr->setGroupAttr(galeft); galeft->setLogExprForSynthesis(leftChildExpr); // form the right child and set its groupAttr Scan * rightChildExpr = new STMTHEAP Scan(); GroupAttributes * garight = new STMTHEAP GroupAttributes(); garight->inputLogPropList().insert(inputEstLogProp); garight->outputLogPropList().insert(rightEstLogProp); CANodeIdSet * rightNodeSet = rightEstLogProp->getNodeSet(); // xxx JBBC * singleRightChild = NULL; Join * singleRightChildParentJoin = NULL; ValueIdSet leftOuterJoinFilterPreds; if (rightNodeSet) { if (rightNodeSet->entries() == 1) { nodeId = rightNodeSet->getFirst(); if(nodeId.getNodeAnalysis()->getTableAnalysis()) rightChildExpr->setTableAttributes(nodeId); if(nodeId.getNodeAnalysis()->getJBBC()) { singleRightChild = nodeId.getNodeAnalysis()->getJBBC(); if(singleRightChild) singleRightChildParentJoin = singleRightChild->getOriginalParentJoin(); } } CostScalar minEstCard = rightNodeSet->getMinChildEstRowCount(); garight->setMinChildEstRowCount(minEstCard); } rightChildExpr->setGroupAttr(garight); garight->setLogExprForSynthesis(rightChildExpr); Join * joinExpr = NULL; if(singleRightChild && singleRightChildParentJoin) { if(singleRightChildParentJoin->isSemiJoin()) joinExpr = new STMTHEAP Join(leftChildExpr, rightChildExpr, REL_SEMIJOIN, NULL); if(singleRightChildParentJoin->isAntiSemiJoin()) joinExpr = new STMTHEAP Join(leftChildExpr, rightChildExpr, REL_ANTI_SEMIJOIN, NULL); if(singleRightChildParentJoin->isLeftJoin()) { joinExpr = new STMTHEAP Join(leftChildExpr, rightChildExpr, REL_LEFT_JOIN, NULL); leftOuterJoinFilterPreds += singleRightChild->getLeftJoinFilterPreds(); } if(joinExpr) { joinExpr->setJoinPred(singleRightChild->getPredsWithPredecessors()); joinExpr->nullInstantiatedOutput().insert(singleRightChild-> nullInstantiatedOutput()); } } if(!joinExpr) { // now form a JoinExpr with these left and right children. joinExpr = new STMTHEAP Join(leftChildExpr, // left child rightChildExpr, // right child REL_JOIN, // join type NULL); // join predicates } ValueIdSet selPredsAndLOJFilter = leftOuterJoinFilterPreds; selPredsAndLOJFilter += (*setOfPredicates); joinExpr->setSelectionPredicates(selPredsAndLOJFilter); // set groupAttr of this Join expression GroupAttributes * gaJoin = new STMTHEAP GroupAttributes(); // set required outputs of Join as sum of characteristic // outputs of the left and the right children ValueIdSet requiredOutputs; if (leftNodeSet) requiredOutputs.addSet(getPotentialOutputs(*(leftNodeSet))); if (rightNodeSet) requiredOutputs.addSet(getPotentialOutputs(*(rightNodeSet))); gaJoin->setCharacteristicOutputs(requiredOutputs); // set JBBSubset for this group, if all estLogProps are cacheable. // Else JBBSubset is NULL if (cacheable) gaJoin->getGroupAnalysis()->setLocalJBBView(combinedJBBSubset); gaJoin->setMinChildEstRowCount(MINOF(garight->getMinChildEstRowCount(), galeft->getMinChildEstRowCount() ) ); joinExpr->setGroupAttr(gaJoin); // if there are some probes coming into the join // then join type = tsj. if ((inputEstLogProp->getResultCardinality() > 1) || (inputEstLogProp->getColStats().entries() > 1)) { if (cacheable) { CANodeIdSet inputNodeSet = *(inputEstLogProp->getNodeSet()); gaJoin->setCharacteristicInputs(getPotentialOutputs(inputNodeSet)); } } joinExpr->setGroupAttr(gaJoin); gaJoin->setLogExprForSynthesis(joinExpr); return joinExpr; } // AppliedStatMan::formJoinExprWithEstLogProps
Join* MultiJoin::createLeftLinearJoinTree (const NAList<CANodeIdSet> * const leftDeepJoinSequence, NAList<MJJoinDirective *> * joinDirectives) const { Join* result = NULL; Join* currentJoin=NULL; NABoolean reUseMultiJoins = FALSE; //Set of all JBBCs in this multi-join. //This set will be broken up to make the join tree //representing the substitue. //The loop below will construct the join tree, //starting from the top join. CANodeIdSet childSet = getJBBSubset().getJBBCs(); // variables used in loop below MultiJoin * currentMJoin = (MultiJoin *) this; // in an iteration this is the parent join // e.g. when we are create JOIN3, this will // be JOIN2 Join * parentJoin = NULL; #ifdef _DEBUG if ( CmpCommon::getDefault( NSK_DBG ) == DF_ON && CmpCommon::getDefault( NSK_DBG_MJRULES_TRACKING ) == DF_ON ) { // LCOV_EXCL_START - dpm CURRCONTEXT_OPTDEBUG->stream() << "Following is left deep join sequence: " << endl; CURRCONTEXT_OPTDEBUG->stream() << endl; // LCOV_EXCL_STOP } #endif UInt32 numJoinChildren = leftDeepJoinSequence->entries(); CANodeId currentTable = NULL_CA_ID; for (UInt32 i = 0; i < (numJoinChildren-1); i++) { //create JBBSubset representing a comprising component of the //leftDeepJoinSequence. JBBSubset * joinRightChild = ((*leftDeepJoinSequence)[i]).computeJBBSubset(); MJJoinDirective * joinDirective = (*joinDirectives)[i]; //remove all tables that will become right side of join childSet.remove((*leftDeepJoinSequence)[i]); #ifdef _DEBUG //print the right child of the current join if ( CmpCommon::getDefault( NSK_DBG ) == DF_ON && CmpCommon::getDefault( NSK_DBG_MJRULES_TRACKING ) == DF_ON ) { CURRCONTEXT_OPTDEBUG->stream() << ((*leftDeepJoinSequence)[i]).getText() << endl; // LCOV_EXCL_LINE - dpm } #endif //Get JBBSubset for left side of join JBBSubset * joinLeftChild = childSet.computeJBBSubset(); //create the join by doing a split of the multi-join currentJoin = currentMJoin->splitSubset(*joinLeftChild, *joinRightChild, reUseMultiJoins); joinDirective->setupJoin(currentJoin); if ( CmpCommon::getDefault(COMP_BOOL_120) == DF_OFF) currentJoin->updatePotential(-3); // if this is the first iteration // set the result to be the top join if (i == 0) result = currentJoin; //set the current multi-join to the left child of the //join just created //change getChild to child().getPointer currentMJoin = (MultiJoin*) currentJoin->getChild(0); //if there was a parent join, set the left child to //point to the new join we just created i.e. currentJoin. if (parentJoin) parentJoin->setChild(0,currentJoin); //set currentJoin to be the parent for the next iteration parentJoin = currentJoin; } #ifdef _DEBUG //print the left most child if ( CmpCommon::getDefault( NSK_DBG ) == DF_ON && CmpCommon::getDefault( NSK_DBG_MJRULES_TRACKING ) == DF_ON ) { // LCOV_EXCL_START - dpm CURRCONTEXT_OPTDEBUG->stream() << ((*leftDeepJoinSequence)[(numJoinChildren-1)]).getText() << endl; CURRCONTEXT_OPTDEBUG->stream() << endl; // LCOV_EXCL_STOP } #endif // end - construct the join tree // synth the join result->synthLogProp(); //if the right child of the top-most join is a multi-Join, //synthesize_it if(result->child(1)) if(result->child(1)->getOperatorType()==REL_MULTI_JOIN) result->child(1)->synthLogProp(); // synth the left child too result->child(0)->synthLogProp(); return result; } // MultiJoin::createLeftLinearJoinTree()
// AppliedStatMan::setupASMCacheForJBB method will be called from // Query::Analyze after connectivity analysis has been done and // empty logical properties have been set. void AppliedStatMan::setupASMCacheForJBB(JBB & jbb) { EstLogPropSharedPtr myEstLogProp; // get all JBBCs of JBB const CANodeIdSet jbbcNodeIdSet = jbb.getMainJBBSubset().getJBBCs(); CANodeId jbbcId; // for all jbbcs for (jbbcId = jbbcNodeIdSet.init(); jbbcNodeIdSet.next(jbbcId); jbbcNodeIdSet.advance(jbbcId)) { if (NodeAnalysis * jbbcNode = jbbcId.getNodeAnalysis()) { // Evaluate local predicates only if it is a table. RelExpr * jbbcExpr = jbbcNode->getOriginalExpr(); if ((jbbcNode->getTableAnalysis() != NULL) && (jbbcExpr->getOperatorType() == REL_SCAN)) { // get the original expression of the jbbc Scan * scanExpr = (Scan *) jbbcExpr; ValueIdSet localPreds = scanExpr->getSelectionPredicates(); // if local predicates have already been computed, then skip if ((localPreds.entries() > 0) || !(lookup(jbbcId))) { // check to see this GA has already been associated with // a logExpr for synthesis. If not, then synthesize // log. expression, and then apply local predicates to it if (NOT scanExpr->getGroupAttr()->existsLogExprForSynthesis()) scanExpr->synthLogProp(); myEstLogProp = getStatsForCANodeId(jbbcId); } } } } // Now do a second traversal of the JBB looking for join reducers for (jbbcId = jbbcNodeIdSet.init(); jbbcNodeIdSet.next(jbbcId); jbbcNodeIdSet.advance(jbbcId)) { // now look for all two way joins for this child if (jbbcId.getNodeAnalysis()) { // get all JBBCs connected to this JBBC, and do a two-way // join with all of them CANodeIdSet connectedNodes = jbbcId.getNodeAnalysis()->\ getJBBC()->getJoinedJBBCs(); for (CANodeId connectedTable = connectedNodes.init(); connectedNodes.next(connectedTable); connectedNodes.advance(connectedTable)) { if (connectedTable.getNodeAnalysis()) { // ASM does not concern itself with the order of the tables, // hence it is possible that the join has already been computed CANodeIdSet tableSet = jbbcId; tableSet.insert(connectedTable); if ((myEstLogProp = getCachedStatistics(&tableSet)) == NULL) { CANodeIdSet setForjbbcId(jbbcId); CANodeIdSet setForConnectedTable(connectedTable); myEstLogProp = joinJBBChildren(setForjbbcId, setForConnectedTable); } } } } } } // AppliedStatMan::setupASMCacheForJBB
EstLogPropSharedPtr AppliedStatMan::getStatsForCANodeId( CANodeId jbbc, const EstLogPropSharedPtr &inLP, const ValueIdSet * predIdSet) { EstLogPropSharedPtr inputLP = inLP; if(inputLP == (*GLOBAL_EMPTY_INPUT_LOGPROP)) inputLP = jbbc.getJBBInput(); EstLogPropSharedPtr outputEstLogProp = NULL; // 1. Try to find Logical Properties from cache if cacheable. // The estimate Logical Properties can be cacheable if all local // predicates are to be applied and if inNodeSet is provided, // or the inLP are cacheable if ((inputLP->isCacheable()) && (predIdSet == NULL) ) { CANodeIdSet combinedSet = jbbc; // get the nodeIdSet of the outer child, if not already given. This // along with the present jbbc is used as a key in the cache CANodeIdSet * inputNodeSet; inputNodeSet = inputLP->getNodeSet(); // if inLP are cacheable these should have a nodeSet attached CCMPASSERT(inputNodeSet != NULL); if (inputNodeSet) { combinedSet.insert(*inputNodeSet); // if estLogProp for all local predicates is required, // then it might already exist in the cache outputEstLogProp = getCachedStatistics(&combinedSet); } } if (outputEstLogProp == NULL) { // 2. properties do not exist in the cache, so synthesize them. //if specified by the user apply those predicates, // else apply predicates in the original expr NodeAnalysis * jbbcNode = jbbc.getNodeAnalysis(); TableAnalysis * tableAnalysis = jbbcNode->getTableAnalysis(); if (tableAnalysis && predIdSet) { TableDesc * tableDesc = tableAnalysis->getTableDesc(); const QualifiedName& qualName = tableDesc->getNATable()->getTableName(); CorrName name(qualName, STMTHEAP); Scan *scanExpr = new STMTHEAP Scan(name, tableDesc, REL_SCAN, STMTHEAP); Cardinality rc = tableDesc->getNATable()->getEstRowCount(); const CardinalityHint* cardHint = tableDesc->getCardinalityHint(); if ( cardHint ) rc = (cardHint->getScanCardinality()).getValue(); if ( !cardHint && tableDesc->getNATable()->isHbaseTable() ) { NATable* nt = (NATable*)(tableDesc->getNATable()); StatsList* statsList = nt->getColStats(); if ( statsList && statsList->entries() > 0 ) { ColStatsSharedPtr cStatsPtr = statsList->getSingleColumnColStats(0); if ( cStatsPtr ) rc = (cStatsPtr->getRowcount()).getValue(); } } scanExpr->setBaseCardinality(MIN_ONE (rc)); GroupAttributes * gaExpr = new STMTHEAP GroupAttributes(); scanExpr->setSelectionPredicates(*predIdSet); ValueIdSet requiredOutputs = jbbc.getNodeAnalysis()->\ getOriginalExpr()->getGroupAttr()->getCharacteristicOutputs(); gaExpr->setCharacteristicOutputs(requiredOutputs); scanExpr->setGroupAttr(gaExpr); gaExpr->setLogExprForSynthesis(scanExpr); EstLogPropSharedPtr nonCacheableInLP(new (HISTHEAP) EstLogProp (*inputLP)); nonCacheableInLP->setCacheableFlag(FALSE); scanExpr->synthLogProp(); outputEstLogProp = scanExpr->getGroupAttr()->outputLogProp(nonCacheableInLP); } else { NodeAnalysis * nodeAnalysis = jbbc.getNodeAnalysis(); RelExpr * relExpr = nodeAnalysis->getModifiedExpr(); if (relExpr == NULL) relExpr = nodeAnalysis->getOriginalExpr(); // synthesize and cache estLogProp for the given inLP. outputEstLogProp = relExpr->getGroupAttr()->outputLogProp(inputLP); } } return outputEstLogProp; } // getStatsForCANodeId