/******************************************************************** * Input: Selection predicates for the scan node, boolean indicating if * it is a indexOnlyIndex, reference parameter that will indicate if * IndexJoin is viable or not, GroupAttributes for the group and characteristic * inputs * Output: MdamFlag indicating if the index key access is good enough for * MDAM access (if a index does not have good MDAM access we have to * scan the whole index because single subset also will not have any * keys to apply) * IndexJoin flag indicating if index join cost would exceed base table * access or not. ********************************************************************/ MdamFlags IndexDesc::pruneMdam(const ValueIdSet& preds, NABoolean indexOnlyIndex, IndexJoinSelectivityEnum& selectivityEnum /* out*/ , const GroupAttributes * groupAttr, const ValueIdSet * inputValues) const { CollIndex numEmptyColumns=0; CostScalar numSkips = csOne; ValueIdSet emptyColumns; ValueId vid; if(indexOnlyIndex) selectivityEnum = INDEX_ONLY_INDEX; else selectivityEnum = INDEX_JOIN_VIABLE; if(preds.isEmpty()) return MDAM_OFF; //calculate how many key columns don't have any predicates for(CollIndex i=0;i<indexKey_.entries();i++) { if(preds.referencesTheGivenValue(indexKey_[i],vid)) break; else numEmptyColumns++; } //if we don't have any empty columns or we don't have to evaluate if index //join is promising or not then just return if(numEmptyColumns>=1 OR NOT indexOnlyIndex) { IndexDescHistograms ixHistogram(*this, (indexOnlyIndex?numEmptyColumns:indexKey_.entries())); NABoolean multiColUecAvail = ixHistogram.isMultiColUecInfoAvail(); ColumnOrderList keyPredsByCol(indexKey_); for(CollIndex j=0;j<numEmptyColumns;j++) { emptyColumns.insert(indexKey_[j]); if(j==0 OR multiColUecAvail == FALSE) { //no MCUec so just multiply the empty columns UEC count to //calculate MDAM skips numSkips *=(ixHistogram.getColStatsForColumn(indexKey_[j])). getTotalUec().getCeiling(); } else // otherwise try to use MCUec { NABoolean uecFound = FALSE; CostScalar correctUec = csOne; CostScalar combinedUECCount = csOne; // first let's see if there is multiColUec count for the skipped columns // so far. If there is that will be number of skips. If there isn't then // get the best estimate of UEC count for the current column using MCUec // if possible otherwise just using single column histograms. combinedUECCount = ixHistogram.getUecCountForColumns(emptyColumns); if(combinedUECCount >0) { numSkips = combinedUECCount; } else { uecFound = ixHistogram.estimateUecUsingMultiColUec(keyPredsByCol,j,correctUec); if(uecFound==TRUE) { numSkips *= correctUec; } else { numSkips *=(ixHistogram.getColStatsForColumn(indexKey_[j])). getTotalUec().getCeiling(); } } } } CostScalar rowCount = ixHistogram.getRowCount(); CostScalar numIndexBlocks = rowCount /getEstimatedRecordsPerBlock(); CostScalar numProbes = csOne; CostScalar numBaseTableBlocks = csOne; CostScalar inputProbes = csOne; // Pass any selectivity hint provided by the user const SelectivityHint * selHint = tableDesc_->getSelectivityHint(); const CardinalityHint * cardHint = tableDesc_->getCardinalityHint(); // If it is an index join then compute the number probes into the base // table. If the alternate index is not selective enough, we will have // lots of them making the index quite expensive. if(NOT indexOnlyIndex) { if((groupAttr->getInputLogPropList()).entries() >0) { //if there are incoming probes to the index. i.e. if the index join //is under another nested join or TSJ then compute result for all //probes. We are using the initial inputEstLogProp to compute the //resulting cardinality. It is possible that for the same group and //different inputEstLogProp would provide less row count per probe. //So in FileScanRule::nextSubstitute() we make sure that the context //inputEstLogProp is in the error range of this inputEstLogProp. // Ex. select * from lineitem, customer, nation // where l_custkey < c_custkey and c_custkey = n_nationkey; //Now if we were evaluating lineitem indexes where the outer was customer //we would want to exclude alternate index on custkey whereas if nation got //pushed below customer then range of values would be fewer and max value //being less would make alternate index on custkey quite attractive. ixHistogram. applyPredicatesWhenMultipleProbes(preds, *((groupAttr->getInputLogPropList())[0]), *inputValues, TRUE, selHint, cardHint, NULL, REL_SCAN); inputProbes = MIN_ONE((groupAttr->getInputLogPropList())[0]->getResultCardinality()); } else { RelExpr * dummyExpr = new (STMTHEAP) RelExpr(ITM_FIRST_ITEM_OP, NULL, NULL, STMTHEAP); ixHistogram.applyPredicates(preds, *dummyExpr, selHint, cardHint, REL_SCAN); } numProbes = ixHistogram.getRowCount(); numBaseTableBlocks = rowCount / tableDesc_->getClusteringIndex()-> getEstimatedRecordsPerBlock(); double readAhead = CURRSTMT_OPTDEFAULTS->readAheadMaxBlocks(); // although we compute cardinality from the index for all probes we // do the comparison for per probe. The assumption is that per probe // the upper bound of cost is scanning the whole base table. if(numProbes/inputProbes + MINOF((numIndexBlocks / readAhead),numSkips) > (numBaseTableBlocks/readAhead)) { selectivityEnum = EXCEEDS_BT_SCAN; } } //Does the number of skips exceed the cost of scanning the index. if((indexOnlyIndex AND numSkips <= (numIndexBlocks * CURRSTMT_OPTDEFAULTS->mdamSelectionDefault())) OR (NOT indexOnlyIndex AND numSkips + numProbes/inputProbes <= (numBaseTableBlocks * CURRSTMT_OPTDEFAULTS->mdamSelectionDefault()))) return MDAM_ON; } else return MDAM_ON; return MDAM_OFF; }
// --------------------------------------------------------------------- // Utility Routine: pickOutputs // // From the given ColStatDescList, populate columnStats_ with column // descriptors that are useful based on the characteristic outputs for // the group. // // Always include in the output the current histograms of the input data, // and, if the histogram is contained in the required output list, then // this is a useful histogram and will also be output. // // --------------------------------------------------------------------- void EstLogProp::pickOutputs( ColStatDescList & columnStats, const EstLogPropSharedPtr& inputEstLogProp, const ValueIdSet specifiedOutputs, const ValueIdSet predSet) { const ColStatDescList & outerColStatsList = inputEstLogProp->getColStats(); ValueIdSet colsRequiringHistograms = specifiedOutputs; // (i) see if the selection predicates contain any constant value or a // constant expression // (ii) check if there are any columns of this table being joined to some other // columns, which do not appear as characteristics outputs. There should be // histograms available for these columns, as these might be needed later. // This problem was seen for temporary tables created as normal_tables by the // triggers. colsRequiringHistograms.addSet(predSet.getColumnsForHistogram()); colStats().setMCSkewedValueLists(columnStats.getMCSkewedValueLists()) ; NABoolean colStatDescAdded = FALSE; for (CollIndex i=0; i < columnStats.entries(); i++) { // we probably don't need 'em all, but this is the easiest way to // grab all of the multi-column uec information we'll need later colStats().insertIntoUecList (columnStats.getUecList()) ; colStats().setScanRowCountWithoutHint(columnStats.getScanRowCountWithoutHint()); NABoolean found = FALSE; // Note: The following inserts into a ColStatDescList should not // have to be deep copies. From this point on, ColStatDescs that // describe the output of the calling operator are read-only. ColStatDescSharedPtr colStatDesc = columnStats[i]; // the value-id we're looking for const ValueId columnId = colStatDesc->getVEGColumn() ; for (CollIndex j=0 ; j < outerColStatsList.entries() ; j++) { if (columnId == outerColStatsList[j]->getVEGColumn() OR (CmpCommon::context()->showQueryStats())) { colStats().insert(colStatDesc) ; found = TRUE; if(!colStatDescAdded) colStatDescAdded = TRUE; break ; // jump to next ColStatDesc } } // OK, the valueid doesn't match directly -- but there are still a // couple of things to check in order to verify whether or not we're // interested in keeping the i'th ColStatDesc ... ValueId throwaway ; // used by the second clause below if ( NOT found AND (columnId != NULL_VALUE_ID) AND (colsRequiringHistograms.contains (columnId) OR colsRequiringHistograms.referencesTheGivenValue (columnId, throwaway) OR columnId.isInvolvedInJoinAndConst() OR CmpCommon::context()->showQueryStats() ) ) { colStats().insert(colStatDesc); found = TRUE; if(!colStatDescAdded) colStatDescAdded = TRUE; } if (CURRSTMT_OPTDEFAULTS->incorporateSkewInCosting()) { // if the column is referenced for histogram, but is // not needed beyond this time , then we shall save its // max freq, which might be used later in costing if this // column is a part of the partitioning key ColStatsSharedPtr stat = colStatDesc->getColStats(); if (!(stat->isVirtualColForHist() ) && NOT found && !(stat->isOrigFakeHist() ) ) { const ValueId col = colStatDesc->getColumn(); ColAnalysis * colAnalysis = col.colAnalysis(); if (colAnalysis) { NAColumn * column = stat->getStatColumns()[0]; if (column->isReferencedForHistogram()) { CostScalar maxFreq = columnStats.getMaxFreq(columnId); colAnalysis->setMaxFreq(maxFreq); colAnalysis->setFinalUec(stat->getTotalUec()); colAnalysis->setFinalRC(stat->getRowcount()); } } } } } // for columnStats.entries() if(!colStatDescAdded && columnStats.entries() > 0) colStats().insert(columnStats[0]) ; } // pickOutputs