EstLogPropSharedPtr AppliedStatMan::synthesizeLogProp( const CANodeIdSet * nodeSet, EstLogPropSharedPtr &inLP) { EstLogPropSharedPtr outputEstLogProp; CANodeIdSet combinedNodeSetWithInput = *nodeSet; if (inLP->isCacheable()) { CANodeIdSet * inNodeSet = inLP->getNodeSet(); // if inLP are cacheable these should have a nodeSet attached // if not, assert in debug mode. In release mode, set the properties // as not cacheable. These will then be looked into group attr cache if (inNodeSet == NULL) { CCMPASSERT(inNodeSet != NULL); inLP->setCacheableFlag(FALSE); } else { // check ASM cache for the estLogProps of nodeSet for the given // inLP combinedNodeSetWithInput.insert(*inNodeSet); if ((outputEstLogProp =\ getCachedStatistics(&combinedNodeSetWithInput)) != NULL) return outputEstLogProp; } } if(nodeSet->entries() == 1) return getStatsForCANodeId(nodeSet->getFirst(), inLP); JBBSubset * jbbSubset = nodeSet->jbbcsToJBBSubset(); Join * preferredJoin = jbbSubset->getPreferredJoin(); //CMPASSERT(preferredJoin->isJoinFromMJSynthLogProp()); outputEstLogProp = preferredJoin->getGroupAttr()->outputLogProp(inLP); return outputEstLogProp; } // AppliedStatMan::synthesizeLogProp
// LCOV_EXCL_START // Used for other RelExpr but not MultiJoin void MultiJoin::synthEstLogProp(const EstLogPropSharedPtr& inputEstLogProp) { CMPASSERT(inputEstLogProp->getNodeSet()); Join * preferredJoin = jbbSubset_.getPreferredJoin(); CMPASSERT(preferredJoin->isJoinFromMJSynthLogProp()); EstLogPropSharedPtr myEstLogProp = preferredJoin->getGroupAttr()->outputLogProp(inputEstLogProp); getGroupAttr()->addInputOutputLogProp (inputEstLogProp, myEstLogProp, NULL); } // MultiJoin::synthEstLogProp
NABoolean AppliedStatMan::insertCachePredStatEntry( const CANodeIdSet & jbbcNodeSet, const EstLogPropSharedPtr& estLogProp) { CANodeIdSet * tableSet = new (STMTHEAP) CANodeIdSet (jbbcNodeSet); // FIXME!!! Must properly create cacheASM_ CANodeIdSet * result = cacheASM_-> insert(tableSet, estLogProp.get()); if (result == NULL) return FALSE; // insert failed. else return TRUE; // insert successful } // AppliedStatMan::insertCachePredStatEntry
EstLogPropSharedPtr AppliedStatMan::joinJBBChildren( const CANodeIdSet & leftChildren, const CANodeIdSet & rightChildren, EstLogPropSharedPtr & inLP) { EstLogPropSharedPtr inputLP = inLP; EstLogPropSharedPtr outputEstLogProp; if(inputLP == (*GLOBAL_EMPTY_INPUT_LOGPROP)) inputLP = leftChildren.getJBBInput(); // Because there exist a nodeSet for the left, right and the outer // child, hence these properties are cacheable. Check to see if the // outputEstLogProp of the join for the given inLP exist in the cache CANodeIdSet combinedNodeSet = leftChildren; combinedNodeSet.insert(rightChildren); CANodeIdSet * inNodeSet = NULL; if (inputLP->isCacheable()) { inNodeSet = inputLP->getNodeSet(); CANodeIdSet combinedWithInputNodeSet = combinedNodeSet; combinedWithInputNodeSet.insert(*inNodeSet); outputEstLogProp = getCachedStatistics(&combinedWithInputNodeSet); } if(outputEstLogProp == NULL) outputEstLogProp = synthesizeLogProp(&combinedNodeSet, inputLP); return outputEstLogProp; } // AppliedStatMan::joinJBBChildren
COMPARE_RESULT EstLogProp::compareEstLogProp (const EstLogPropSharedPtr &other) const { if (this == other.get()) return SAME; // First thing that we may want to compare is the CANodeSets of the EstLogProp // if these are NOT NULL // This would work if Query Analizer created nodeSet_ for this and other if ((nodeSet_ != NULL) && (other->nodeSet_ != NULL)) { if ((*nodeSet_) == (*(other->nodeSet_))) return SAME; else return INCOMPATIBLE; } // This is the old logic after removing heuristic returning SAME for close // EstLogProp like resultCardinality_/other->resultCardinality in [0.8,1.2] // That heuristics was incompatible with Cascades assumption that in the case // when pruning is on we cannot have 2 different context for optimization // if their comparison returns SAME. if ( resultCardinality_ == other->resultCardinality_ AND ( // Check for the case where we have two "empty" input logical properties. ( columnStats_.entries() == 0 AND other->columnStats_.entries() == 0) OR ( columnStats_ == other->columnStats_ AND unresolvedPreds_ == other->unresolvedPreds_ AND inputForSemiTSJ_ == other->inputForSemiTSJ_ ) ) ) return SAME; return INCOMPATIBLE; }
// --------------------------------------------------------------------- // Utility Routine: pickOutputs // // From the given ColStatDescList, populate columnStats_ with column // descriptors that are useful based on the characteristic outputs for // the group. // // Always include in the output the current histograms of the input data, // and, if the histogram is contained in the required output list, then // this is a useful histogram and will also be output. // // --------------------------------------------------------------------- void EstLogProp::pickOutputs( ColStatDescList & columnStats, const EstLogPropSharedPtr& inputEstLogProp, const ValueIdSet specifiedOutputs, const ValueIdSet predSet) { const ColStatDescList & outerColStatsList = inputEstLogProp->getColStats(); ValueIdSet colsRequiringHistograms = specifiedOutputs; // (i) see if the selection predicates contain any constant value or a // constant expression // (ii) check if there are any columns of this table being joined to some other // columns, which do not appear as characteristics outputs. There should be // histograms available for these columns, as these might be needed later. // This problem was seen for temporary tables created as normal_tables by the // triggers. colsRequiringHistograms.addSet(predSet.getColumnsForHistogram()); colStats().setMCSkewedValueLists(columnStats.getMCSkewedValueLists()) ; NABoolean colStatDescAdded = FALSE; for (CollIndex i=0; i < columnStats.entries(); i++) { // we probably don't need 'em all, but this is the easiest way to // grab all of the multi-column uec information we'll need later colStats().insertIntoUecList (columnStats.getUecList()) ; colStats().setScanRowCountWithoutHint(columnStats.getScanRowCountWithoutHint()); NABoolean found = FALSE; // Note: The following inserts into a ColStatDescList should not // have to be deep copies. From this point on, ColStatDescs that // describe the output of the calling operator are read-only. ColStatDescSharedPtr colStatDesc = columnStats[i]; // the value-id we're looking for const ValueId columnId = colStatDesc->getVEGColumn() ; for (CollIndex j=0 ; j < outerColStatsList.entries() ; j++) { if (columnId == outerColStatsList[j]->getVEGColumn() OR (CmpCommon::context()->showQueryStats())) { colStats().insert(colStatDesc) ; found = TRUE; if(!colStatDescAdded) colStatDescAdded = TRUE; break ; // jump to next ColStatDesc } } // OK, the valueid doesn't match directly -- but there are still a // couple of things to check in order to verify whether or not we're // interested in keeping the i'th ColStatDesc ... ValueId throwaway ; // used by the second clause below if ( NOT found AND (columnId != NULL_VALUE_ID) AND (colsRequiringHistograms.contains (columnId) OR colsRequiringHistograms.referencesTheGivenValue (columnId, throwaway) OR columnId.isInvolvedInJoinAndConst() OR CmpCommon::context()->showQueryStats() ) ) { colStats().insert(colStatDesc); found = TRUE; if(!colStatDescAdded) colStatDescAdded = TRUE; } if (CURRSTMT_OPTDEFAULTS->incorporateSkewInCosting()) { // if the column is referenced for histogram, but is // not needed beyond this time , then we shall save its // max freq, which might be used later in costing if this // column is a part of the partitioning key ColStatsSharedPtr stat = colStatDesc->getColStats(); if (!(stat->isVirtualColForHist() ) && NOT found && !(stat->isOrigFakeHist() ) ) { const ValueId col = colStatDesc->getColumn(); ColAnalysis * colAnalysis = col.colAnalysis(); if (colAnalysis) { NAColumn * column = stat->getStatColumns()[0]; if (column->isReferencedForHistogram()) { CostScalar maxFreq = columnStats.getMaxFreq(columnId); colAnalysis->setMaxFreq(maxFreq); colAnalysis->setFinalUec(stat->getTotalUec()); colAnalysis->setFinalRC(stat->getRowcount()); } } } } } // for columnStats.entries() if(!colStatDescAdded && columnStats.entries() > 0) colStats().insert(columnStats[0]) ; } // pickOutputs
CostScalar AppliedStatMan::computeJoinReduction( const CANodeIdSet & leftChildren, const CANodeIdSet & rightChildren) { CostScalar result = 0; // get stats for left EstLogPropSharedPtr leftCard = getStatsForCANodeIdSet(leftChildren); // get stats for right EstLogPropSharedPtr rightCard = getStatsForCANodeIdSet(rightChildren); CANodeIdSet jbbcsJoinedToRight; CANodeIdSet allPredecessors; CANodeIdSet allSuccessors; for( CANodeId rChild = rightChildren.init(); rightChildren.next(rChild); rightChildren.advance(rChild)) { JBBC * rChildJBBC = rChild.getNodeAnalysis()->getJBBC(); jbbcsJoinedToRight += rChildJBBC->getJoinedJBBCs(); jbbcsJoinedToRight += rChildJBBC->getPredecessorJBBCs(); allPredecessors += rChildJBBC->getPredecessorJBBCs(); jbbcsJoinedToRight += rChildJBBC->getSuccessorJBBCs(); allSuccessors += rChildJBBC->getSuccessorJBBCs(); } CANodeIdSet dependencyCausingNodesFromLeft = leftChildren; dependencyCausingNodesFromLeft.intersectSet(allPredecessors + allSuccessors); CANodeIdSet leftNodesJoinedToRight = leftChildren; leftNodesJoinedToRight.intersectSet(jbbcsJoinedToRight); if(!leftNodesJoinedToRight.entries()) { result = rightCard->getResultCardinality(); return result; } CANodeIdSet leftSetPredecessors; CANodeIdSet newNodes = leftNodesJoinedToRight; CANodeIdSet nodesConsidered; while(newNodes.entries()) { for( CANodeId lChild = newNodes.init(); newNodes.next(lChild); newNodes.advance(lChild)) { JBBC * lChildJBBC = lChild.getNodeAnalysis()->getJBBC(); leftSetPredecessors += lChildJBBC->getPredecessorJBBCs(); nodesConsidered += lChild; } leftSetPredecessors.intersectSet(leftChildren); newNodes = leftSetPredecessors; newNodes -= nodesConsidered; } leftNodesJoinedToRight += leftSetPredecessors; // for a JBBSubset to be legal it has to have at least one // independent jbbc i.e. a jbbcs connect via a innerNonSemiNonTsjJoin // Assumption: leftChildren represents a legal JBBSubset CANodeIdSet independentJBBCsInLeftNodesJoinedToRight = QueryAnalysis::Instance()->getInnerNonSemiNonTSJJBBCs(); independentJBBCsInLeftNodesJoinedToRight.intersectSet(leftNodesJoinedToRight); if(!independentJBBCsInLeftNodesJoinedToRight.entries()) leftNodesJoinedToRight += leftChildren.jbbcsToJBBSubset()-> getJBBSubsetAnalysis()-> getLargestIndependentNode(); EstLogPropSharedPtr cardLeftNodesJoinedToRight = getStatsForCANodeIdSet(leftNodesJoinedToRight); // All nodes connected via a join CANodeIdSet connectedNodes(leftNodesJoinedToRight); connectedNodes += rightChildren; EstLogPropSharedPtr cardConnectedNodes = joinJBBChildren(leftNodesJoinedToRight,rightChildren); result = cardConnectedNodes->getResultCardinality() / cardLeftNodesJoinedToRight->getResultCardinality(); return result; }
// LCOV_EXCL_START :cnu EstLogPropSharedPtr AppliedStatMan::joinEstLogProps ( const EstLogPropSharedPtr& leftEstLogProp, const EstLogPropSharedPtr& rightEstLogProp, const EstLogPropSharedPtr& inLP) { EstLogPropSharedPtr outputEstLogProp; NABoolean cacheable = FALSE; CANodeIdSet * inputNodeSet = inLP->getNodeSet(); // These nodesets could be NULL, if the estLogProps to which they // belong are not cacheable CANodeIdSet * leftNodeSet = leftEstLogProp->getNodeSet(); CANodeIdSet * rightNodeSet = rightEstLogProp->getNodeSet(); if ((leftEstLogProp->isCacheable()) && (rightEstLogProp->isCacheable()) && (inLP->isCacheable()) ) { CCMPASSERT(leftNodeSet != NULL); CCMPASSERT(rightNodeSet != NULL); CCMPASSERT(inputNodeSet != NULL); if (leftNodeSet && rightNodeSet && inputNodeSet) { cacheable = TRUE; } } if (cacheable) { // check the ASM cache to see if outputEstLogProp for these // NodeSets appear for the given inputEstLogProp CANodeIdSet combineNodeSet = *leftNodeSet; combineNodeSet.insert(*rightNodeSet); CANodeIdSet combinedWithInputNodeSet = combineNodeSet; combinedWithInputNodeSet.insert(*inputNodeSet); outputEstLogProp = getCachedStatistics(&combinedWithInputNodeSet); if (outputEstLogProp != NULL) return outputEstLogProp; } JBBSubset * newJBBSubset = NULL; ValueIdSet setOfPredicates; if (leftNodeSet && rightNodeSet) { // join predicates can be obtained from EstLogProp, only // if these corresponded to complete set of predicates - // all local or complete join. Also, we need a // combinedJBBSubset to set in the fake join expression // that we will be creating. newJBBSubset = leftNodeSet->computeJBBSubset(); JBBSubset rightJBBSubset = *(rightNodeSet->computeJBBSubset()); setOfPredicates = newJBBSubset->joinPredsWithOther(rightJBBSubset); // Since the properties from this group are cacheable, hence the // group attributes for the new join expression should contain // the combined JBBsubset of the left and the right children newJBBSubset->addSubset(rightJBBSubset); } // inputEstLogProp would be either empty input estLogProp or from the // outer child. If cacheable is TRUE, then newJBBsubset should // contain the combined left and the right JBB subset. But if // cacheable is FALSE, newJBBsubset should be NULL Join * joinExpr = formJoinExprWithEstLogProps( leftEstLogProp, rightEstLogProp, inLP, &setOfPredicates, cacheable, newJBBSubset); // Now do the actual synthesis and cache statistics in the cache outputEstLogProp = joinExpr->getGroupAttr()->outputLogProp(inLP); return outputEstLogProp; }
Join * AppliedStatMan::formJoinExprWithCANodeSets( const CANodeIdSet & leftNodeSet, const CANodeIdSet & rightNodeSet, EstLogPropSharedPtr& inLP, const ValueIdSet * joinPreds, const NABoolean cacheable) { EstLogPropSharedPtr leftEstLogProp = NULL; EstLogPropSharedPtr rightEstLogProp = NULL; CANodeIdSet * inputNodeSet = NULL; if (inLP->isCacheable()) { inputNodeSet = inLP->getNodeSet(); // if inLP are cacheable these should have a nodeSet attached // if it is not for some reason, assert in debug mode. In release // mode do not look for properties in ASM cache, instead get them // from group attr cache. if (inputNodeSet == NULL) { CCMPASSERT(inputNodeSet != NULL); inLP->setCacheableFlag(FALSE); } } CANodeIdSet commonNodeSet = leftNodeSet; commonNodeSet.intersectSet(rightNodeSet); // remove CANodeIds which are common to both left and the right children // from the child, whose estLogProps are not cached. If the estLogProps // of both children are not cached, then remove it from the child which // has a larger CANodeIdSet associated with it. CANodeIdSet tempLeftNodeSet = leftNodeSet; CANodeIdSet tempRightNodeSet = rightNodeSet; if (commonNodeSet.entries() > 0) { if (lookup(leftNodeSet)) tempRightNodeSet.subtractSet(commonNodeSet); else if (lookup(rightNodeSet)) tempLeftNodeSet.subtractSet(commonNodeSet); else if (leftNodeSet.entries() > rightNodeSet.entries()) tempLeftNodeSet.subtractSet(commonNodeSet); else tempRightNodeSet.subtractSet(commonNodeSet); } // get the estLogProps for the left and the right child. // If these are not in the cache, then synthesize them incrementally // starting from the left most JBBC in the JBBSubset if (inputNodeSet) { // leftEstLogProp cached? CANodeIdSet combinedNodeSetWithInput = tempLeftNodeSet; combinedNodeSetWithInput.insert(*inputNodeSet); leftEstLogProp = getCachedStatistics(&combinedNodeSetWithInput); combinedNodeSetWithInput = tempRightNodeSet; combinedNodeSetWithInput.insert(*inputNodeSet); rightEstLogProp = getCachedStatistics(&combinedNodeSetWithInput); } if (leftEstLogProp == NULL) leftEstLogProp = synthesizeLogProp(&tempLeftNodeSet, inLP); // if the estimate logical properties have been computed for non-cacheable // inLP, then these would not contain nodeSet. But we do need the nodeSet // to compute potential output values. Hence we shall add this now if (!leftEstLogProp->getNodeSet()) { CANodeIdSet * copyLeftNodeSet = new (STMTHEAP) CANodeIdSet (tempLeftNodeSet); leftEstLogProp->setNodeSet(copyLeftNodeSet); } if (rightEstLogProp == NULL) rightEstLogProp = synthesizeLogProp(&tempRightNodeSet, inLP); if (!rightEstLogProp->getNodeSet()) { CANodeIdSet * copyRightNodeSet = new (STMTHEAP) CANodeIdSet (tempRightNodeSet); rightEstLogProp->setNodeSet(copyRightNodeSet); } // Now form the join expressions with these EstLogProp, // inLP and the joinPred will be same as those for which the // estLogProp are to be synthesized. Cacheable flag would depend // on whether left, right and the outer child are caheable, or // if the join is on all columns or not // Since the join expression consists of the left and the right // JBBSubsets, the JBBSubset for this Join expression would be // the superset of left and right JBBSubset JBBSubset * combinedSet = leftNodeSet.jbbcsToJBBSubset(); combinedSet->addSubset(*(rightNodeSet.jbbcsToJBBSubset())); // Now form the join expressions with these EstLogProp, // inLP and the joinPred will be same as those for which the // estLogProp are to be synthesized. Cacheable flag would depend // on whether left, right and the outer child are ccaheable, or // if the join is on all columns or not return formJoinExprWithEstLogProps(leftEstLogProp, rightEstLogProp, inLP, joinPreds, cacheable, combinedSet); } // AppliedStatMan::formJoinExprWithCANodeSets
EstLogPropSharedPtr AppliedStatMan::getStatsForCANodeId( CANodeId jbbc, const EstLogPropSharedPtr &inLP, const ValueIdSet * predIdSet) { EstLogPropSharedPtr inputLP = inLP; if(inputLP == (*GLOBAL_EMPTY_INPUT_LOGPROP)) inputLP = jbbc.getJBBInput(); EstLogPropSharedPtr outputEstLogProp = NULL; // 1. Try to find Logical Properties from cache if cacheable. // The estimate Logical Properties can be cacheable if all local // predicates are to be applied and if inNodeSet is provided, // or the inLP are cacheable if ((inputLP->isCacheable()) && (predIdSet == NULL) ) { CANodeIdSet combinedSet = jbbc; // get the nodeIdSet of the outer child, if not already given. This // along with the present jbbc is used as a key in the cache CANodeIdSet * inputNodeSet; inputNodeSet = inputLP->getNodeSet(); // if inLP are cacheable these should have a nodeSet attached CCMPASSERT(inputNodeSet != NULL); if (inputNodeSet) { combinedSet.insert(*inputNodeSet); // if estLogProp for all local predicates is required, // then it might already exist in the cache outputEstLogProp = getCachedStatistics(&combinedSet); } } if (outputEstLogProp == NULL) { // 2. properties do not exist in the cache, so synthesize them. //if specified by the user apply those predicates, // else apply predicates in the original expr NodeAnalysis * jbbcNode = jbbc.getNodeAnalysis(); TableAnalysis * tableAnalysis = jbbcNode->getTableAnalysis(); if (tableAnalysis && predIdSet) { TableDesc * tableDesc = tableAnalysis->getTableDesc(); const QualifiedName& qualName = tableDesc->getNATable()->getTableName(); CorrName name(qualName, STMTHEAP); Scan *scanExpr = new STMTHEAP Scan(name, tableDesc, REL_SCAN, STMTHEAP); Cardinality rc = tableDesc->getNATable()->getEstRowCount(); const CardinalityHint* cardHint = tableDesc->getCardinalityHint(); if ( cardHint ) rc = (cardHint->getScanCardinality()).getValue(); if ( !cardHint && tableDesc->getNATable()->isHbaseTable() ) { NATable* nt = (NATable*)(tableDesc->getNATable()); StatsList* statsList = nt->getColStats(); if ( statsList && statsList->entries() > 0 ) { ColStatsSharedPtr cStatsPtr = statsList->getSingleColumnColStats(0); if ( cStatsPtr ) rc = (cStatsPtr->getRowcount()).getValue(); } } scanExpr->setBaseCardinality(MIN_ONE (rc)); GroupAttributes * gaExpr = new STMTHEAP GroupAttributes(); scanExpr->setSelectionPredicates(*predIdSet); ValueIdSet requiredOutputs = jbbc.getNodeAnalysis()->\ getOriginalExpr()->getGroupAttr()->getCharacteristicOutputs(); gaExpr->setCharacteristicOutputs(requiredOutputs); scanExpr->setGroupAttr(gaExpr); gaExpr->setLogExprForSynthesis(scanExpr); EstLogPropSharedPtr nonCacheableInLP(new (HISTHEAP) EstLogProp (*inputLP)); nonCacheableInLP->setCacheableFlag(FALSE); scanExpr->synthLogProp(); outputEstLogProp = scanExpr->getGroupAttr()->outputLogProp(nonCacheableInLP); } else { NodeAnalysis * nodeAnalysis = jbbc.getNodeAnalysis(); RelExpr * relExpr = nodeAnalysis->getModifiedExpr(); if (relExpr == NULL) relExpr = nodeAnalysis->getOriginalExpr(); // synthesize and cache estLogProp for the given inLP. outputEstLogProp = relExpr->getGroupAttr()->outputLogProp(inputLP); } } return outputEstLogProp; } // getStatsForCANodeId