// Is there any column which has a local predicates and no stats NABoolean TableDesc::isAnyHistWithPredsFakeOrSmallSample(const ValueIdSet &localPreds) { // if there are no local predicates return FALSE; if (localPreds.isEmpty()) return FALSE; const ColStatDescList & colStatsList = getTableColStats(); // for each predicate, check to see if stats exist for (ValueId id = localPreds.init(); localPreds.next(id); localPreds.advance(id)) { ColStatsSharedPtr colStats = colStatsList.getColStatsPtrForPredicate(id); if (colStats == NULL) return FALSE; if (colStats->isOrigFakeHist() || colStats->isSmallSampleHistogram()) return TRUE; } return FALSE; }
void computeAndAddSelParamIfPossible( CacheWA& cwa, BindWA& bindWA, ExprValueId& child, BaseColumn *base, ConstValue *val) { ColStatsSharedPtr cStatsPtr = (base->getTableDesc()->tableColStats()). getColStatsPtrForColumn(base->getValueId()); if (cStatsPtr == NULL ) return; HistogramSharedPtr hist = cStatsPtr->getHistogram(); if ( hist == NULL ) return; CostScalar sel; NABoolean canComputeSelectivity = hist -> computeSelectivityForEquality( val, cStatsPtr->getRowcount(), cStatsPtr->getTotalUec(), sel); if ( canComputeSelectivity == TRUE ) { const NAType * newType = base->getNAColumn()->getType(); // for char datatypes, assign the caseinsensitive attribute from // the const node. if (newType->getTypeQualifier() == NA_CHARACTER_TYPE) { newType = base->getNAColumn()->getType()->newCopy(cwa.wHeap()); ((CharType*)newType)->setCaseinsensitive(((CharType*)val->getType())->isCaseinsensitive()); } cwa.replaceWithNewOrOldSelParam(val, newType, Selectivity(sel), child, bindWA); } }
// --------------------------------------------------------------------- // Utility Routine: pickOutputs // // From the given ColStatDescList, populate columnStats_ with column // descriptors that are useful based on the characteristic outputs for // the group. // // Always include in the output the current histograms of the input data, // and, if the histogram is contained in the required output list, then // this is a useful histogram and will also be output. // // --------------------------------------------------------------------- void EstLogProp::pickOutputs( ColStatDescList & columnStats, const EstLogPropSharedPtr& inputEstLogProp, const ValueIdSet specifiedOutputs, const ValueIdSet predSet) { const ColStatDescList & outerColStatsList = inputEstLogProp->getColStats(); ValueIdSet colsRequiringHistograms = specifiedOutputs; // (i) see if the selection predicates contain any constant value or a // constant expression // (ii) check if there are any columns of this table being joined to some other // columns, which do not appear as characteristics outputs. There should be // histograms available for these columns, as these might be needed later. // This problem was seen for temporary tables created as normal_tables by the // triggers. colsRequiringHistograms.addSet(predSet.getColumnsForHistogram()); colStats().setMCSkewedValueLists(columnStats.getMCSkewedValueLists()) ; NABoolean colStatDescAdded = FALSE; for (CollIndex i=0; i < columnStats.entries(); i++) { // we probably don't need 'em all, but this is the easiest way to // grab all of the multi-column uec information we'll need later colStats().insertIntoUecList (columnStats.getUecList()) ; colStats().setScanRowCountWithoutHint(columnStats.getScanRowCountWithoutHint()); NABoolean found = FALSE; // Note: The following inserts into a ColStatDescList should not // have to be deep copies. From this point on, ColStatDescs that // describe the output of the calling operator are read-only. ColStatDescSharedPtr colStatDesc = columnStats[i]; // the value-id we're looking for const ValueId columnId = colStatDesc->getVEGColumn() ; for (CollIndex j=0 ; j < outerColStatsList.entries() ; j++) { if (columnId == outerColStatsList[j]->getVEGColumn() OR (CmpCommon::context()->showQueryStats())) { colStats().insert(colStatDesc) ; found = TRUE; if(!colStatDescAdded) colStatDescAdded = TRUE; break ; // jump to next ColStatDesc } } // OK, the valueid doesn't match directly -- but there are still a // couple of things to check in order to verify whether or not we're // interested in keeping the i'th ColStatDesc ... ValueId throwaway ; // used by the second clause below if ( NOT found AND (columnId != NULL_VALUE_ID) AND (colsRequiringHistograms.contains (columnId) OR colsRequiringHistograms.referencesTheGivenValue (columnId, throwaway) OR columnId.isInvolvedInJoinAndConst() OR CmpCommon::context()->showQueryStats() ) ) { colStats().insert(colStatDesc); found = TRUE; if(!colStatDescAdded) colStatDescAdded = TRUE; } if (CURRSTMT_OPTDEFAULTS->incorporateSkewInCosting()) { // if the column is referenced for histogram, but is // not needed beyond this time , then we shall save its // max freq, which might be used later in costing if this // column is a part of the partitioning key ColStatsSharedPtr stat = colStatDesc->getColStats(); if (!(stat->isVirtualColForHist() ) && NOT found && !(stat->isOrigFakeHist() ) ) { const ValueId col = colStatDesc->getColumn(); ColAnalysis * colAnalysis = col.colAnalysis(); if (colAnalysis) { NAColumn * column = stat->getStatColumns()[0]; if (column->isReferencedForHistogram()) { CostScalar maxFreq = columnStats.getMaxFreq(columnId); colAnalysis->setMaxFreq(maxFreq); colAnalysis->setFinalUec(stat->getTotalUec()); colAnalysis->setFinalRC(stat->getRowcount()); } } } } } // for columnStats.entries() if(!colStatDescAdded && columnStats.entries() > 0) colStats().insert(columnStats[0]) ; } // pickOutputs
// compress the histograms based on query predicates on this table void TableDesc::compressHistogramsForCurrentQuery() { // if there are some column statistics if ((colStats_.entries() != 0) && (table_) && (table_->getExtendedQualName().getSpecialType() == ExtendedQualName::NORMAL_TABLE)) { // if 1 // check if query analysis info is available if(QueryAnalysis::Instance()->isAnalysisON()) { // if 2 // get a handle to the query analysis QueryAnalysis* queryAnalysis = QueryAnalysis::Instance(); // get a handle to the table analysis const TableAnalysis * tableAnalysis = getTableAnalysis(); if(!tableAnalysis) return; // iterate over statistics for each column for(CollIndex i = 0; i < colStats_.entries(); i++) { // for 1 // Get a handle to the column's statistics descriptor ColStatDescSharedPtr columnStatDesc = colStats_[i]; // get a handle to the ColStats ColStatsSharedPtr colStats = columnStatDesc->getColStats(); // if this is a single column, as opposed to a multicolumn if(colStats->getStatColumns().entries() == 1) { // if 3 // get column's value id const ValueId columnId = columnStatDesc->getColumn(); // get column analysis ColAnalysis* colAnalysis = queryAnalysis->getColAnalysis(columnId); if(!colAnalysis) continue; ValueIdSet predicatesOnColumn = colAnalysis->getReferencingPreds(); // we can compress this column's histogram if there // is a equality predicate against a constant ItemExpr *constant = NULL; NABoolean colHasEqualityAgainstConst = colAnalysis->getConstValue(constant); // if a equality predicate with a constant was found // i.e. predicate of the form col = 5 if (colHasEqualityAgainstConst) { // if 4 if (constant) // compress the histogram columnStatDesc->compressColStatsForQueryPreds(constant,constant); } // if 4 else { // else 4 // since there is no equality predicates we might still // be able to compress the column's histogram based on // range predicates against a constant. Following are // examples of such predicates // * col > 1 <-- predicate defines a lower bound // * col < 3 <-- predicate defines a upper bound // * col >1 and col < 30 <-- window predicate, define both bounds ItemExpr * lowerBound = NULL; ItemExpr * upperBound = NULL; // Extract predicates from range spec and add it to the // original predicate set otherwise isARangePredicate() will // return FALSE, so histgram compression won't happen. ValueIdSet rangeSpecPred(predicatesOnColumn); for (ValueId predId= rangeSpecPred.init(); rangeSpecPred.next(predId); rangeSpecPred.advance(predId)) { ItemExpr * pred = predId.getItemExpr(); if ( pred->getOperatorType() == ITM_RANGE_SPEC_FUNC ) { ValueIdSet vs; ((RangeSpecRef *)pred)->getValueIdSetForReconsItemExpr(vs); // remove rangespec vid from the original set predicatesOnColumn.remove(predId); // add preds extracted from rangespec to the original set predicatesOnColumn.insert(vs); } } // in the following loop we iterate over all the predicates // on this column. If there is a range predicate e.g. a > 2 // or a < 3, then we use that to define upper and lower bounds. // Given predicate a > 2, we get a lower bound of 2. // Given predicate a < 3, we get a upper bound of 3. // The bound are then passed down to the histogram // compression methods. // iterate over predicates to see if any of them is a range // predicate e.g. a > 2 for (ValueId predId= predicatesOnColumn.init(); predicatesOnColumn.next(predId); predicatesOnColumn.advance(predId)) { // for 2 // check if this predicate is a range predicate ItemExpr * predicateOnColumn = predId.getItemExpr(); if (predicateOnColumn->isARangePredicate()) { // if 5 // if a predicate is a range predicate we need to find out more // information regarding the predicate to see if it can be used // to compress the columns histogram. We look for the following: // * The predicate is against a constant e.g. a > 3 and not against // another column e.g. a > b // Also give a predicate we need to find out what side is the column // and what side is the constant. Normally people write a range predicate // as a > 3, but the same could be written as 3 < a. // Also either on of the operands of the range predicate might be // a VEG, if so then we need to dig into the VEG to see where is // the constant and where is the column. // check the right and left children of this predicate to // see if one of them is a constant ItemExpr * leftChildItemExpr = (ItemExpr *) predicateOnColumn->getChild(0); ItemExpr * rightChildItemExpr = (ItemExpr *) predicateOnColumn->getChild(1); // by default assume the literal is at right i.e. predicate of // the form a > 2 NABoolean columnAtRight = FALSE; // check if right child of predicate is a VEG if ( rightChildItemExpr->getOperatorType() == ITM_VEG_REFERENCE) { // if 6 // if child is a VEG VEGReference * rightChildVEG = (VEGReference *) rightChildItemExpr; // check if the VEG contains the current column // if it does contain the current column then // the predicate has the column on right and potentially // a constant on the left. if(rightChildVEG->getVEG()->getAllValues().contains(columnId)) { // if 7 // column is at right i.e. predicate is of the form // 2 < a columnAtRight = TRUE; } // if 7 } // if 6 else { // else 6 // child is not a VEG if ( columnId == rightChildItemExpr->getValueId() ) { // if 8 // literals are at left i.e. predicate is of the form // (1,2) < (a, b) columnAtRight = TRUE; } // if 8 } // else 6 ItemExpr * potentialConstantExpr = NULL; // check if the range predicate is against a constant if (columnAtRight) { // if 9 // the left child is potentially a constant potentialConstantExpr = leftChildItemExpr; } // if 9 else { // else 9 // the right child is potentially a constant potentialConstantExpr = rightChildItemExpr; } // else 9 // initialize constant to NULL before // looking for next constant constant = NULL; // check if potentialConstantExpr contains a constant. // we need to see if this range predicate is a predicate // against a constant e.g col > 1 and not a predicate // against another column e.g. col > anothercol // if the expression is a VEG if ( potentialConstantExpr->getOperatorType() == ITM_VEG_REFERENCE) { // if 10 // expression is a VEG, dig into the VEG to // get see if it contains a constant VEGReference * potentialConstantExprVEG = (VEGReference *) potentialConstantExpr; potentialConstantExprVEG->getVEG()->\ getAllValues().referencesAConstValue(&constant); } // if 10 else { // else 10 // express is not a VEG, it is a constant if ( potentialConstantExpr->getOperatorType() == ITM_CONSTANT ) constant = potentialConstantExpr; } // else 10 // if predicate involves a constant, does the constant imply // a upper bound or lower bound if (constant) { // if 11 // if range predicate has column at right e.g. 3 > a if (columnAtRight) { // if 12 if ( predicateOnColumn->getOperatorType() == ITM_GREATER || predicateOnColumn->getOperatorType() == ITM_GREATER_EQ) { // if 13 if (!upperBound) upperBound = constant; } // if 13 else { // else 13 if (!lowerBound) lowerBound = constant; } // else 13 } // if 12 else { // else 12 // range predicate has column at left e.g. a < 3 if ( predicateOnColumn->getOperatorType() == ITM_LESS || predicateOnColumn->getOperatorType() == ITM_LESS_EQ) { // if 14 if (!upperBound) upperBound = constant; } // if 14 else { // else 14 if (!lowerBound) lowerBound = constant; } // else 14 } // else 12 } // if 11 } // if 5 } // for 2 // if we found a upper bound or a lower bound if (lowerBound || upperBound) { // compress the histogram based on range predicates columnStatDesc->compressColStatsForQueryPreds(lowerBound, upperBound); } } // else 4 } // if 3 } // for 1 } // if 2 } // if 1 // All histograms compressed. Set the histCompressed flag to TRUE histsCompressed(TRUE); }
EstLogPropSharedPtr AppliedStatMan::getStatsForCANodeId( CANodeId jbbc, const EstLogPropSharedPtr &inLP, const ValueIdSet * predIdSet) { EstLogPropSharedPtr inputLP = inLP; if(inputLP == (*GLOBAL_EMPTY_INPUT_LOGPROP)) inputLP = jbbc.getJBBInput(); EstLogPropSharedPtr outputEstLogProp = NULL; // 1. Try to find Logical Properties from cache if cacheable. // The estimate Logical Properties can be cacheable if all local // predicates are to be applied and if inNodeSet is provided, // or the inLP are cacheable if ((inputLP->isCacheable()) && (predIdSet == NULL) ) { CANodeIdSet combinedSet = jbbc; // get the nodeIdSet of the outer child, if not already given. This // along with the present jbbc is used as a key in the cache CANodeIdSet * inputNodeSet; inputNodeSet = inputLP->getNodeSet(); // if inLP are cacheable these should have a nodeSet attached CCMPASSERT(inputNodeSet != NULL); if (inputNodeSet) { combinedSet.insert(*inputNodeSet); // if estLogProp for all local predicates is required, // then it might already exist in the cache outputEstLogProp = getCachedStatistics(&combinedSet); } } if (outputEstLogProp == NULL) { // 2. properties do not exist in the cache, so synthesize them. //if specified by the user apply those predicates, // else apply predicates in the original expr NodeAnalysis * jbbcNode = jbbc.getNodeAnalysis(); TableAnalysis * tableAnalysis = jbbcNode->getTableAnalysis(); if (tableAnalysis && predIdSet) { TableDesc * tableDesc = tableAnalysis->getTableDesc(); const QualifiedName& qualName = tableDesc->getNATable()->getTableName(); CorrName name(qualName, STMTHEAP); Scan *scanExpr = new STMTHEAP Scan(name, tableDesc, REL_SCAN, STMTHEAP); Cardinality rc = tableDesc->getNATable()->getEstRowCount(); const CardinalityHint* cardHint = tableDesc->getCardinalityHint(); if ( cardHint ) rc = (cardHint->getScanCardinality()).getValue(); if ( !cardHint && tableDesc->getNATable()->isHbaseTable() ) { NATable* nt = (NATable*)(tableDesc->getNATable()); StatsList* statsList = nt->getColStats(); if ( statsList && statsList->entries() > 0 ) { ColStatsSharedPtr cStatsPtr = statsList->getSingleColumnColStats(0); if ( cStatsPtr ) rc = (cStatsPtr->getRowcount()).getValue(); } } scanExpr->setBaseCardinality(MIN_ONE (rc)); GroupAttributes * gaExpr = new STMTHEAP GroupAttributes(); scanExpr->setSelectionPredicates(*predIdSet); ValueIdSet requiredOutputs = jbbc.getNodeAnalysis()->\ getOriginalExpr()->getGroupAttr()->getCharacteristicOutputs(); gaExpr->setCharacteristicOutputs(requiredOutputs); scanExpr->setGroupAttr(gaExpr); gaExpr->setLogExprForSynthesis(scanExpr); EstLogPropSharedPtr nonCacheableInLP(new (HISTHEAP) EstLogProp (*inputLP)); nonCacheableInLP->setCacheableFlag(FALSE); scanExpr->synthLogProp(); outputEstLogProp = scanExpr->getGroupAttr()->outputLogProp(nonCacheableInLP); } else { NodeAnalysis * nodeAnalysis = jbbc.getNodeAnalysis(); RelExpr * relExpr = nodeAnalysis->getModifiedExpr(); if (relExpr == NULL) relExpr = nodeAnalysis->getOriginalExpr(); // synthesize and cache estLogProp for the given inLP. outputEstLogProp = relExpr->getGroupAttr()->outputLogProp(inputLP); } } return outputEstLogProp; } // getStatsForCANodeId