EstLogPropSharedPtr AppliedStatMan::joinJBBChildren( const CANodeIdSet & leftChildren, const CANodeIdSet & rightChildren, EstLogPropSharedPtr & inLP) { EstLogPropSharedPtr inputLP = inLP; EstLogPropSharedPtr outputEstLogProp; if(inputLP == (*GLOBAL_EMPTY_INPUT_LOGPROP)) inputLP = leftChildren.getJBBInput(); // Because there exist a nodeSet for the left, right and the outer // child, hence these properties are cacheable. Check to see if the // outputEstLogProp of the join for the given inLP exist in the cache CANodeIdSet combinedNodeSet = leftChildren; combinedNodeSet.insert(rightChildren); CANodeIdSet * inNodeSet = NULL; if (inputLP->isCacheable()) { inNodeSet = inputLP->getNodeSet(); CANodeIdSet combinedWithInputNodeSet = combinedNodeSet; combinedWithInputNodeSet.insert(*inNodeSet); outputEstLogProp = getCachedStatistics(&combinedWithInputNodeSet); } if(outputEstLogProp == NULL) outputEstLogProp = synthesizeLogProp(&combinedNodeSet, inputLP); return outputEstLogProp; } // AppliedStatMan::joinJBBChildren
EstLogPropSharedPtr AppliedStatMan::synthesizeLogProp( const CANodeIdSet * nodeSet, EstLogPropSharedPtr &inLP) { EstLogPropSharedPtr outputEstLogProp; CANodeIdSet combinedNodeSetWithInput = *nodeSet; if (inLP->isCacheable()) { CANodeIdSet * inNodeSet = inLP->getNodeSet(); // if inLP are cacheable these should have a nodeSet attached // if not, assert in debug mode. In release mode, set the properties // as not cacheable. These will then be looked into group attr cache if (inNodeSet == NULL) { CCMPASSERT(inNodeSet != NULL); inLP->setCacheableFlag(FALSE); } else { // check ASM cache for the estLogProps of nodeSet for the given // inLP combinedNodeSetWithInput.insert(*inNodeSet); if ((outputEstLogProp =\ getCachedStatistics(&combinedNodeSetWithInput)) != NULL) return outputEstLogProp; } } if(nodeSet->entries() == 1) return getStatsForCANodeId(nodeSet->getFirst(), inLP); JBBSubset * jbbSubset = nodeSet->jbbcsToJBBSubset(); Join * preferredJoin = jbbSubset->getPreferredJoin(); //CMPASSERT(preferredJoin->isJoinFromMJSynthLogProp()); outputEstLogProp = preferredJoin->getGroupAttr()->outputLogProp(inLP); return outputEstLogProp; } // AppliedStatMan::synthesizeLogProp
// AppliedStatMan::setupASMCacheForJBB method will be called from // Query::Analyze after connectivity analysis has been done and // empty logical properties have been set. void AppliedStatMan::setupASMCacheForJBB(JBB & jbb) { EstLogPropSharedPtr myEstLogProp; // get all JBBCs of JBB const CANodeIdSet jbbcNodeIdSet = jbb.getMainJBBSubset().getJBBCs(); CANodeId jbbcId; // for all jbbcs for (jbbcId = jbbcNodeIdSet.init(); jbbcNodeIdSet.next(jbbcId); jbbcNodeIdSet.advance(jbbcId)) { if (NodeAnalysis * jbbcNode = jbbcId.getNodeAnalysis()) { // Evaluate local predicates only if it is a table. RelExpr * jbbcExpr = jbbcNode->getOriginalExpr(); if ((jbbcNode->getTableAnalysis() != NULL) && (jbbcExpr->getOperatorType() == REL_SCAN)) { // get the original expression of the jbbc Scan * scanExpr = (Scan *) jbbcExpr; ValueIdSet localPreds = scanExpr->getSelectionPredicates(); // if local predicates have already been computed, then skip if ((localPreds.entries() > 0) || !(lookup(jbbcId))) { // check to see this GA has already been associated with // a logExpr for synthesis. If not, then synthesize // log. expression, and then apply local predicates to it if (NOT scanExpr->getGroupAttr()->existsLogExprForSynthesis()) scanExpr->synthLogProp(); myEstLogProp = getStatsForCANodeId(jbbcId); } } } } // Now do a second traversal of the JBB looking for join reducers for (jbbcId = jbbcNodeIdSet.init(); jbbcNodeIdSet.next(jbbcId); jbbcNodeIdSet.advance(jbbcId)) { // now look for all two way joins for this child if (jbbcId.getNodeAnalysis()) { // get all JBBCs connected to this JBBC, and do a two-way // join with all of them CANodeIdSet connectedNodes = jbbcId.getNodeAnalysis()->\ getJBBC()->getJoinedJBBCs(); for (CANodeId connectedTable = connectedNodes.init(); connectedNodes.next(connectedTable); connectedNodes.advance(connectedTable)) { if (connectedTable.getNodeAnalysis()) { // ASM does not concern itself with the order of the tables, // hence it is possible that the join has already been computed CANodeIdSet tableSet = jbbcId; tableSet.insert(connectedTable); if ((myEstLogProp = getCachedStatistics(&tableSet)) == NULL) { CANodeIdSet setForjbbcId(jbbcId); CANodeIdSet setForConnectedTable(connectedTable); myEstLogProp = joinJBBChildren(setForjbbcId, setForConnectedTable); } } } } } } // AppliedStatMan::setupASMCacheForJBB
// LCOV_EXCL_START :cnu EstLogPropSharedPtr AppliedStatMan::joinEstLogProps ( const EstLogPropSharedPtr& leftEstLogProp, const EstLogPropSharedPtr& rightEstLogProp, const EstLogPropSharedPtr& inLP) { EstLogPropSharedPtr outputEstLogProp; NABoolean cacheable = FALSE; CANodeIdSet * inputNodeSet = inLP->getNodeSet(); // These nodesets could be NULL, if the estLogProps to which they // belong are not cacheable CANodeIdSet * leftNodeSet = leftEstLogProp->getNodeSet(); CANodeIdSet * rightNodeSet = rightEstLogProp->getNodeSet(); if ((leftEstLogProp->isCacheable()) && (rightEstLogProp->isCacheable()) && (inLP->isCacheable()) ) { CCMPASSERT(leftNodeSet != NULL); CCMPASSERT(rightNodeSet != NULL); CCMPASSERT(inputNodeSet != NULL); if (leftNodeSet && rightNodeSet && inputNodeSet) { cacheable = TRUE; } } if (cacheable) { // check the ASM cache to see if outputEstLogProp for these // NodeSets appear for the given inputEstLogProp CANodeIdSet combineNodeSet = *leftNodeSet; combineNodeSet.insert(*rightNodeSet); CANodeIdSet combinedWithInputNodeSet = combineNodeSet; combinedWithInputNodeSet.insert(*inputNodeSet); outputEstLogProp = getCachedStatistics(&combinedWithInputNodeSet); if (outputEstLogProp != NULL) return outputEstLogProp; } JBBSubset * newJBBSubset = NULL; ValueIdSet setOfPredicates; if (leftNodeSet && rightNodeSet) { // join predicates can be obtained from EstLogProp, only // if these corresponded to complete set of predicates - // all local or complete join. Also, we need a // combinedJBBSubset to set in the fake join expression // that we will be creating. newJBBSubset = leftNodeSet->computeJBBSubset(); JBBSubset rightJBBSubset = *(rightNodeSet->computeJBBSubset()); setOfPredicates = newJBBSubset->joinPredsWithOther(rightJBBSubset); // Since the properties from this group are cacheable, hence the // group attributes for the new join expression should contain // the combined JBBsubset of the left and the right children newJBBSubset->addSubset(rightJBBSubset); } // inputEstLogProp would be either empty input estLogProp or from the // outer child. If cacheable is TRUE, then newJBBsubset should // contain the combined left and the right JBB subset. But if // cacheable is FALSE, newJBBsubset should be NULL Join * joinExpr = formJoinExprWithEstLogProps( leftEstLogProp, rightEstLogProp, inLP, &setOfPredicates, cacheable, newJBBSubset); // Now do the actual synthesis and cache statistics in the cache outputEstLogProp = joinExpr->getGroupAttr()->outputLogProp(inLP); return outputEstLogProp; }
Join * AppliedStatMan::formJoinExprWithCANodeSets( const CANodeIdSet & leftNodeSet, const CANodeIdSet & rightNodeSet, EstLogPropSharedPtr& inLP, const ValueIdSet * joinPreds, const NABoolean cacheable) { EstLogPropSharedPtr leftEstLogProp = NULL; EstLogPropSharedPtr rightEstLogProp = NULL; CANodeIdSet * inputNodeSet = NULL; if (inLP->isCacheable()) { inputNodeSet = inLP->getNodeSet(); // if inLP are cacheable these should have a nodeSet attached // if it is not for some reason, assert in debug mode. In release // mode do not look for properties in ASM cache, instead get them // from group attr cache. if (inputNodeSet == NULL) { CCMPASSERT(inputNodeSet != NULL); inLP->setCacheableFlag(FALSE); } } CANodeIdSet commonNodeSet = leftNodeSet; commonNodeSet.intersectSet(rightNodeSet); // remove CANodeIds which are common to both left and the right children // from the child, whose estLogProps are not cached. If the estLogProps // of both children are not cached, then remove it from the child which // has a larger CANodeIdSet associated with it. CANodeIdSet tempLeftNodeSet = leftNodeSet; CANodeIdSet tempRightNodeSet = rightNodeSet; if (commonNodeSet.entries() > 0) { if (lookup(leftNodeSet)) tempRightNodeSet.subtractSet(commonNodeSet); else if (lookup(rightNodeSet)) tempLeftNodeSet.subtractSet(commonNodeSet); else if (leftNodeSet.entries() > rightNodeSet.entries()) tempLeftNodeSet.subtractSet(commonNodeSet); else tempRightNodeSet.subtractSet(commonNodeSet); } // get the estLogProps for the left and the right child. // If these are not in the cache, then synthesize them incrementally // starting from the left most JBBC in the JBBSubset if (inputNodeSet) { // leftEstLogProp cached? CANodeIdSet combinedNodeSetWithInput = tempLeftNodeSet; combinedNodeSetWithInput.insert(*inputNodeSet); leftEstLogProp = getCachedStatistics(&combinedNodeSetWithInput); combinedNodeSetWithInput = tempRightNodeSet; combinedNodeSetWithInput.insert(*inputNodeSet); rightEstLogProp = getCachedStatistics(&combinedNodeSetWithInput); } if (leftEstLogProp == NULL) leftEstLogProp = synthesizeLogProp(&tempLeftNodeSet, inLP); // if the estimate logical properties have been computed for non-cacheable // inLP, then these would not contain nodeSet. But we do need the nodeSet // to compute potential output values. Hence we shall add this now if (!leftEstLogProp->getNodeSet()) { CANodeIdSet * copyLeftNodeSet = new (STMTHEAP) CANodeIdSet (tempLeftNodeSet); leftEstLogProp->setNodeSet(copyLeftNodeSet); } if (rightEstLogProp == NULL) rightEstLogProp = synthesizeLogProp(&tempRightNodeSet, inLP); if (!rightEstLogProp->getNodeSet()) { CANodeIdSet * copyRightNodeSet = new (STMTHEAP) CANodeIdSet (tempRightNodeSet); rightEstLogProp->setNodeSet(copyRightNodeSet); } // Now form the join expressions with these EstLogProp, // inLP and the joinPred will be same as those for which the // estLogProp are to be synthesized. Cacheable flag would depend // on whether left, right and the outer child are caheable, or // if the join is on all columns or not // Since the join expression consists of the left and the right // JBBSubsets, the JBBSubset for this Join expression would be // the superset of left and right JBBSubset JBBSubset * combinedSet = leftNodeSet.jbbcsToJBBSubset(); combinedSet->addSubset(*(rightNodeSet.jbbcsToJBBSubset())); // Now form the join expressions with these EstLogProp, // inLP and the joinPred will be same as those for which the // estLogProp are to be synthesized. Cacheable flag would depend // on whether left, right and the outer child are ccaheable, or // if the join is on all columns or not return formJoinExprWithEstLogProps(leftEstLogProp, rightEstLogProp, inLP, joinPreds, cacheable, combinedSet); } // AppliedStatMan::formJoinExprWithCANodeSets
EstLogPropSharedPtr AppliedStatMan::getStatsForCANodeId( CANodeId jbbc, const EstLogPropSharedPtr &inLP, const ValueIdSet * predIdSet) { EstLogPropSharedPtr inputLP = inLP; if(inputLP == (*GLOBAL_EMPTY_INPUT_LOGPROP)) inputLP = jbbc.getJBBInput(); EstLogPropSharedPtr outputEstLogProp = NULL; // 1. Try to find Logical Properties from cache if cacheable. // The estimate Logical Properties can be cacheable if all local // predicates are to be applied and if inNodeSet is provided, // or the inLP are cacheable if ((inputLP->isCacheable()) && (predIdSet == NULL) ) { CANodeIdSet combinedSet = jbbc; // get the nodeIdSet of the outer child, if not already given. This // along with the present jbbc is used as a key in the cache CANodeIdSet * inputNodeSet; inputNodeSet = inputLP->getNodeSet(); // if inLP are cacheable these should have a nodeSet attached CCMPASSERT(inputNodeSet != NULL); if (inputNodeSet) { combinedSet.insert(*inputNodeSet); // if estLogProp for all local predicates is required, // then it might already exist in the cache outputEstLogProp = getCachedStatistics(&combinedSet); } } if (outputEstLogProp == NULL) { // 2. properties do not exist in the cache, so synthesize them. //if specified by the user apply those predicates, // else apply predicates in the original expr NodeAnalysis * jbbcNode = jbbc.getNodeAnalysis(); TableAnalysis * tableAnalysis = jbbcNode->getTableAnalysis(); if (tableAnalysis && predIdSet) { TableDesc * tableDesc = tableAnalysis->getTableDesc(); const QualifiedName& qualName = tableDesc->getNATable()->getTableName(); CorrName name(qualName, STMTHEAP); Scan *scanExpr = new STMTHEAP Scan(name, tableDesc, REL_SCAN, STMTHEAP); Cardinality rc = tableDesc->getNATable()->getEstRowCount(); const CardinalityHint* cardHint = tableDesc->getCardinalityHint(); if ( cardHint ) rc = (cardHint->getScanCardinality()).getValue(); if ( !cardHint && tableDesc->getNATable()->isHbaseTable() ) { NATable* nt = (NATable*)(tableDesc->getNATable()); StatsList* statsList = nt->getColStats(); if ( statsList && statsList->entries() > 0 ) { ColStatsSharedPtr cStatsPtr = statsList->getSingleColumnColStats(0); if ( cStatsPtr ) rc = (cStatsPtr->getRowcount()).getValue(); } } scanExpr->setBaseCardinality(MIN_ONE (rc)); GroupAttributes * gaExpr = new STMTHEAP GroupAttributes(); scanExpr->setSelectionPredicates(*predIdSet); ValueIdSet requiredOutputs = jbbc.getNodeAnalysis()->\ getOriginalExpr()->getGroupAttr()->getCharacteristicOutputs(); gaExpr->setCharacteristicOutputs(requiredOutputs); scanExpr->setGroupAttr(gaExpr); gaExpr->setLogExprForSynthesis(scanExpr); EstLogPropSharedPtr nonCacheableInLP(new (HISTHEAP) EstLogProp (*inputLP)); nonCacheableInLP->setCacheableFlag(FALSE); scanExpr->synthLogProp(); outputEstLogProp = scanExpr->getGroupAttr()->outputLogProp(nonCacheableInLP); } else { NodeAnalysis * nodeAnalysis = jbbc.getNodeAnalysis(); RelExpr * relExpr = nodeAnalysis->getModifiedExpr(); if (relExpr == NULL) relExpr = nodeAnalysis->getOriginalExpr(); // synthesize and cache estLogProp for the given inLP. outputEstLogProp = relExpr->getGroupAttr()->outputLogProp(inputLP); } } return outputEstLogProp; } // getStatsForCANodeId