void computeAndAddSelParamIfPossible( CacheWA& cwa, BindWA& bindWA, ExprValueId& child, BaseColumn *base, ConstValue *val) { ColStatsSharedPtr cStatsPtr = (base->getTableDesc()->tableColStats()). getColStatsPtrForColumn(base->getValueId()); if (cStatsPtr == NULL ) return; HistogramSharedPtr hist = cStatsPtr->getHistogram(); if ( hist == NULL ) return; CostScalar sel; NABoolean canComputeSelectivity = hist -> computeSelectivityForEquality( val, cStatsPtr->getRowcount(), cStatsPtr->getTotalUec(), sel); if ( canComputeSelectivity == TRUE ) { const NAType * newType = base->getNAColumn()->getType(); // for char datatypes, assign the caseinsensitive attribute from // the const node. if (newType->getTypeQualifier() == NA_CHARACTER_TYPE) { newType = base->getNAColumn()->getType()->newCopy(cwa.wHeap()); ((CharType*)newType)->setCaseinsensitive(((CharType*)val->getType())->isCaseinsensitive()); } cwa.replaceWithNewOrOldSelParam(val, newType, Selectivity(sel), child, bindWA); } }
// --------------------------------------------------------------------- // Utility Routine: pickOutputs // // From the given ColStatDescList, populate columnStats_ with column // descriptors that are useful based on the characteristic outputs for // the group. // // Always include in the output the current histograms of the input data, // and, if the histogram is contained in the required output list, then // this is a useful histogram and will also be output. // // --------------------------------------------------------------------- void EstLogProp::pickOutputs( ColStatDescList & columnStats, const EstLogPropSharedPtr& inputEstLogProp, const ValueIdSet specifiedOutputs, const ValueIdSet predSet) { const ColStatDescList & outerColStatsList = inputEstLogProp->getColStats(); ValueIdSet colsRequiringHistograms = specifiedOutputs; // (i) see if the selection predicates contain any constant value or a // constant expression // (ii) check if there are any columns of this table being joined to some other // columns, which do not appear as characteristics outputs. There should be // histograms available for these columns, as these might be needed later. // This problem was seen for temporary tables created as normal_tables by the // triggers. colsRequiringHistograms.addSet(predSet.getColumnsForHistogram()); colStats().setMCSkewedValueLists(columnStats.getMCSkewedValueLists()) ; NABoolean colStatDescAdded = FALSE; for (CollIndex i=0; i < columnStats.entries(); i++) { // we probably don't need 'em all, but this is the easiest way to // grab all of the multi-column uec information we'll need later colStats().insertIntoUecList (columnStats.getUecList()) ; colStats().setScanRowCountWithoutHint(columnStats.getScanRowCountWithoutHint()); NABoolean found = FALSE; // Note: The following inserts into a ColStatDescList should not // have to be deep copies. From this point on, ColStatDescs that // describe the output of the calling operator are read-only. ColStatDescSharedPtr colStatDesc = columnStats[i]; // the value-id we're looking for const ValueId columnId = colStatDesc->getVEGColumn() ; for (CollIndex j=0 ; j < outerColStatsList.entries() ; j++) { if (columnId == outerColStatsList[j]->getVEGColumn() OR (CmpCommon::context()->showQueryStats())) { colStats().insert(colStatDesc) ; found = TRUE; if(!colStatDescAdded) colStatDescAdded = TRUE; break ; // jump to next ColStatDesc } } // OK, the valueid doesn't match directly -- but there are still a // couple of things to check in order to verify whether or not we're // interested in keeping the i'th ColStatDesc ... ValueId throwaway ; // used by the second clause below if ( NOT found AND (columnId != NULL_VALUE_ID) AND (colsRequiringHistograms.contains (columnId) OR colsRequiringHistograms.referencesTheGivenValue (columnId, throwaway) OR columnId.isInvolvedInJoinAndConst() OR CmpCommon::context()->showQueryStats() ) ) { colStats().insert(colStatDesc); found = TRUE; if(!colStatDescAdded) colStatDescAdded = TRUE; } if (CURRSTMT_OPTDEFAULTS->incorporateSkewInCosting()) { // if the column is referenced for histogram, but is // not needed beyond this time , then we shall save its // max freq, which might be used later in costing if this // column is a part of the partitioning key ColStatsSharedPtr stat = colStatDesc->getColStats(); if (!(stat->isVirtualColForHist() ) && NOT found && !(stat->isOrigFakeHist() ) ) { const ValueId col = colStatDesc->getColumn(); ColAnalysis * colAnalysis = col.colAnalysis(); if (colAnalysis) { NAColumn * column = stat->getStatColumns()[0]; if (column->isReferencedForHistogram()) { CostScalar maxFreq = columnStats.getMaxFreq(columnId); colAnalysis->setMaxFreq(maxFreq); colAnalysis->setFinalUec(stat->getTotalUec()); colAnalysis->setFinalRC(stat->getRowcount()); } } } } } // for columnStats.entries() if(!colStatDescAdded && columnStats.entries() > 0) colStats().insert(columnStats[0]) ; } // pickOutputs
EstLogPropSharedPtr AppliedStatMan::getStatsForCANodeId( CANodeId jbbc, const EstLogPropSharedPtr &inLP, const ValueIdSet * predIdSet) { EstLogPropSharedPtr inputLP = inLP; if(inputLP == (*GLOBAL_EMPTY_INPUT_LOGPROP)) inputLP = jbbc.getJBBInput(); EstLogPropSharedPtr outputEstLogProp = NULL; // 1. Try to find Logical Properties from cache if cacheable. // The estimate Logical Properties can be cacheable if all local // predicates are to be applied and if inNodeSet is provided, // or the inLP are cacheable if ((inputLP->isCacheable()) && (predIdSet == NULL) ) { CANodeIdSet combinedSet = jbbc; // get the nodeIdSet of the outer child, if not already given. This // along with the present jbbc is used as a key in the cache CANodeIdSet * inputNodeSet; inputNodeSet = inputLP->getNodeSet(); // if inLP are cacheable these should have a nodeSet attached CCMPASSERT(inputNodeSet != NULL); if (inputNodeSet) { combinedSet.insert(*inputNodeSet); // if estLogProp for all local predicates is required, // then it might already exist in the cache outputEstLogProp = getCachedStatistics(&combinedSet); } } if (outputEstLogProp == NULL) { // 2. properties do not exist in the cache, so synthesize them. //if specified by the user apply those predicates, // else apply predicates in the original expr NodeAnalysis * jbbcNode = jbbc.getNodeAnalysis(); TableAnalysis * tableAnalysis = jbbcNode->getTableAnalysis(); if (tableAnalysis && predIdSet) { TableDesc * tableDesc = tableAnalysis->getTableDesc(); const QualifiedName& qualName = tableDesc->getNATable()->getTableName(); CorrName name(qualName, STMTHEAP); Scan *scanExpr = new STMTHEAP Scan(name, tableDesc, REL_SCAN, STMTHEAP); Cardinality rc = tableDesc->getNATable()->getEstRowCount(); const CardinalityHint* cardHint = tableDesc->getCardinalityHint(); if ( cardHint ) rc = (cardHint->getScanCardinality()).getValue(); if ( !cardHint && tableDesc->getNATable()->isHbaseTable() ) { NATable* nt = (NATable*)(tableDesc->getNATable()); StatsList* statsList = nt->getColStats(); if ( statsList && statsList->entries() > 0 ) { ColStatsSharedPtr cStatsPtr = statsList->getSingleColumnColStats(0); if ( cStatsPtr ) rc = (cStatsPtr->getRowcount()).getValue(); } } scanExpr->setBaseCardinality(MIN_ONE (rc)); GroupAttributes * gaExpr = new STMTHEAP GroupAttributes(); scanExpr->setSelectionPredicates(*predIdSet); ValueIdSet requiredOutputs = jbbc.getNodeAnalysis()->\ getOriginalExpr()->getGroupAttr()->getCharacteristicOutputs(); gaExpr->setCharacteristicOutputs(requiredOutputs); scanExpr->setGroupAttr(gaExpr); gaExpr->setLogExprForSynthesis(scanExpr); EstLogPropSharedPtr nonCacheableInLP(new (HISTHEAP) EstLogProp (*inputLP)); nonCacheableInLP->setCacheableFlag(FALSE); scanExpr->synthLogProp(); outputEstLogProp = scanExpr->getGroupAttr()->outputLogProp(nonCacheableInLP); } else { NodeAnalysis * nodeAnalysis = jbbc.getNodeAnalysis(); RelExpr * relExpr = nodeAnalysis->getModifiedExpr(); if (relExpr == NULL) relExpr = nodeAnalysis->getOriginalExpr(); // synthesize and cache estLogProp for the given inLP. outputEstLogProp = relExpr->getGroupAttr()->outputLogProp(inputLP); } } return outputEstLogProp; } // getStatsForCANodeId