// Get the scan expression for given jbbc // if jbbc is not a scan or predIdSet is NULL // then the original JBBC expression is returned RelExpr * AppliedStatMan::getExprForCANodeId( CANodeId jbbc, const EstLogPropSharedPtr &inLP, const ValueIdSet * predIdSet) { RelExpr * jbbcExpr = NULL; // should not happen but a check just in case CCMPASSERT(jbbc.getNodeAnalysis()); //if specified by the user apply those predicates, // else apply predicates in the original expr NodeAnalysis * jbbcNode = jbbc.getNodeAnalysis(); TableAnalysis * tableAnalysis = jbbcNode->getTableAnalysis(); if (tableAnalysis && predIdSet) { TableDesc * tableDesc = tableAnalysis->getTableDesc(); const CorrName& name = tableDesc->getNATable()->getTableName(); Scan *scanExpr = new STMTHEAP Scan(name, tableDesc, REL_SCAN, STMTHEAP); scanExpr->setBaseCardinality(MIN_ONE (tableDesc->getNATable()->getEstRowCount())) ; GroupAttributes * gaExpr = new STMTHEAP GroupAttributes(); scanExpr->setSelectionPredicates(*predIdSet); ValueIdSet requiredOutputs = jbbc.getNodeAnalysis()->\ getOriginalExpr()->getGroupAttr()->getCharacteristicOutputs(); gaExpr->setCharacteristicOutputs(requiredOutputs); ValueIdSet requiredInputs = jbbc.getNodeAnalysis()->\ getOriginalExpr()->getGroupAttr()->getCharacteristicInputs(); gaExpr->setCharacteristicInputs(requiredInputs); scanExpr->setGroupAttr(gaExpr); gaExpr->setLogExprForSynthesis(scanExpr); scanExpr->synthLogProp(); jbbcExpr = scanExpr; } else { NodeAnalysis * nodeAnalysis = jbbc.getNodeAnalysis(); RelExpr * relExpr = nodeAnalysis->getModifiedExpr(); if (relExpr == NULL) relExpr = nodeAnalysis->getOriginalExpr(); jbbcExpr = relExpr; } return jbbcExpr; } // getExprForCANodeId
/******************************************************************** * Input: Selection predicates for the scan node, boolean indicating if * it is a indexOnlyIndex, reference parameter that will indicate if * IndexJoin is viable or not, GroupAttributes for the group and characteristic * inputs * Output: MdamFlag indicating if the index key access is good enough for * MDAM access (if a index does not have good MDAM access we have to * scan the whole index because single subset also will not have any * keys to apply) * IndexJoin flag indicating if index join cost would exceed base table * access or not. ********************************************************************/ MdamFlags IndexDesc::pruneMdam(const ValueIdSet& preds, NABoolean indexOnlyIndex, IndexJoinSelectivityEnum& selectivityEnum /* out*/ , const GroupAttributes * groupAttr, const ValueIdSet * inputValues) const { CollIndex numEmptyColumns=0; CostScalar numSkips = csOne; ValueIdSet emptyColumns; ValueId vid; if(indexOnlyIndex) selectivityEnum = INDEX_ONLY_INDEX; else selectivityEnum = INDEX_JOIN_VIABLE; if(preds.isEmpty()) return MDAM_OFF; //calculate how many key columns don't have any predicates for(CollIndex i=0;i<indexKey_.entries();i++) { if(preds.referencesTheGivenValue(indexKey_[i],vid)) break; else numEmptyColumns++; } //if we don't have any empty columns or we don't have to evaluate if index //join is promising or not then just return if(numEmptyColumns>=1 OR NOT indexOnlyIndex) { IndexDescHistograms ixHistogram(*this, (indexOnlyIndex?numEmptyColumns:indexKey_.entries())); NABoolean multiColUecAvail = ixHistogram.isMultiColUecInfoAvail(); ColumnOrderList keyPredsByCol(indexKey_); for(CollIndex j=0;j<numEmptyColumns;j++) { emptyColumns.insert(indexKey_[j]); if(j==0 OR multiColUecAvail == FALSE) { //no MCUec so just multiply the empty columns UEC count to //calculate MDAM skips numSkips *=(ixHistogram.getColStatsForColumn(indexKey_[j])). getTotalUec().getCeiling(); } else // otherwise try to use MCUec { NABoolean uecFound = FALSE; CostScalar correctUec = csOne; CostScalar combinedUECCount = csOne; // first let's see if there is multiColUec count for the skipped columns // so far. If there is that will be number of skips. If there isn't then // get the best estimate of UEC count for the current column using MCUec // if possible otherwise just using single column histograms. combinedUECCount = ixHistogram.getUecCountForColumns(emptyColumns); if(combinedUECCount >0) { numSkips = combinedUECCount; } else { uecFound = ixHistogram.estimateUecUsingMultiColUec(keyPredsByCol,j,correctUec); if(uecFound==TRUE) { numSkips *= correctUec; } else { numSkips *=(ixHistogram.getColStatsForColumn(indexKey_[j])). getTotalUec().getCeiling(); } } } } CostScalar rowCount = ixHistogram.getRowCount(); CostScalar numIndexBlocks = rowCount /getEstimatedRecordsPerBlock(); CostScalar numProbes = csOne; CostScalar numBaseTableBlocks = csOne; CostScalar inputProbes = csOne; // Pass any selectivity hint provided by the user const SelectivityHint * selHint = tableDesc_->getSelectivityHint(); const CardinalityHint * cardHint = tableDesc_->getCardinalityHint(); // If it is an index join then compute the number probes into the base // table. If the alternate index is not selective enough, we will have // lots of them making the index quite expensive. if(NOT indexOnlyIndex) { if((groupAttr->getInputLogPropList()).entries() >0) { //if there are incoming probes to the index. i.e. if the index join //is under another nested join or TSJ then compute result for all //probes. We are using the initial inputEstLogProp to compute the //resulting cardinality. It is possible that for the same group and //different inputEstLogProp would provide less row count per probe. //So in FileScanRule::nextSubstitute() we make sure that the context //inputEstLogProp is in the error range of this inputEstLogProp. // Ex. select * from lineitem, customer, nation // where l_custkey < c_custkey and c_custkey = n_nationkey; //Now if we were evaluating lineitem indexes where the outer was customer //we would want to exclude alternate index on custkey whereas if nation got //pushed below customer then range of values would be fewer and max value //being less would make alternate index on custkey quite attractive. ixHistogram. applyPredicatesWhenMultipleProbes(preds, *((groupAttr->getInputLogPropList())[0]), *inputValues, TRUE, selHint, cardHint, NULL, REL_SCAN); inputProbes = MIN_ONE((groupAttr->getInputLogPropList())[0]->getResultCardinality()); } else { RelExpr * dummyExpr = new (STMTHEAP) RelExpr(ITM_FIRST_ITEM_OP, NULL, NULL, STMTHEAP); ixHistogram.applyPredicates(preds, *dummyExpr, selHint, cardHint, REL_SCAN); } numProbes = ixHistogram.getRowCount(); numBaseTableBlocks = rowCount / tableDesc_->getClusteringIndex()-> getEstimatedRecordsPerBlock(); double readAhead = CURRSTMT_OPTDEFAULTS->readAheadMaxBlocks(); // although we compute cardinality from the index for all probes we // do the comparison for per probe. The assumption is that per probe // the upper bound of cost is scanning the whole base table. if(numProbes/inputProbes + MINOF((numIndexBlocks / readAhead),numSkips) > (numBaseTableBlocks/readAhead)) { selectivityEnum = EXCEEDS_BT_SCAN; } } //Does the number of skips exceed the cost of scanning the index. if((indexOnlyIndex AND numSkips <= (numIndexBlocks * CURRSTMT_OPTDEFAULTS->mdamSelectionDefault())) OR (NOT indexOnlyIndex AND numSkips + numProbes/inputProbes <= (numBaseTableBlocks * CURRSTMT_OPTDEFAULTS->mdamSelectionDefault()))) return MDAM_ON; } else return MDAM_ON; return MDAM_OFF; }
EstLogPropSharedPtr AppliedStatMan::getStatsForCANodeId( CANodeId jbbc, const EstLogPropSharedPtr &inLP, const ValueIdSet * predIdSet) { EstLogPropSharedPtr inputLP = inLP; if(inputLP == (*GLOBAL_EMPTY_INPUT_LOGPROP)) inputLP = jbbc.getJBBInput(); EstLogPropSharedPtr outputEstLogProp = NULL; // 1. Try to find Logical Properties from cache if cacheable. // The estimate Logical Properties can be cacheable if all local // predicates are to be applied and if inNodeSet is provided, // or the inLP are cacheable if ((inputLP->isCacheable()) && (predIdSet == NULL) ) { CANodeIdSet combinedSet = jbbc; // get the nodeIdSet of the outer child, if not already given. This // along with the present jbbc is used as a key in the cache CANodeIdSet * inputNodeSet; inputNodeSet = inputLP->getNodeSet(); // if inLP are cacheable these should have a nodeSet attached CCMPASSERT(inputNodeSet != NULL); if (inputNodeSet) { combinedSet.insert(*inputNodeSet); // if estLogProp for all local predicates is required, // then it might already exist in the cache outputEstLogProp = getCachedStatistics(&combinedSet); } } if (outputEstLogProp == NULL) { // 2. properties do not exist in the cache, so synthesize them. //if specified by the user apply those predicates, // else apply predicates in the original expr NodeAnalysis * jbbcNode = jbbc.getNodeAnalysis(); TableAnalysis * tableAnalysis = jbbcNode->getTableAnalysis(); if (tableAnalysis && predIdSet) { TableDesc * tableDesc = tableAnalysis->getTableDesc(); const QualifiedName& qualName = tableDesc->getNATable()->getTableName(); CorrName name(qualName, STMTHEAP); Scan *scanExpr = new STMTHEAP Scan(name, tableDesc, REL_SCAN, STMTHEAP); Cardinality rc = tableDesc->getNATable()->getEstRowCount(); const CardinalityHint* cardHint = tableDesc->getCardinalityHint(); if ( cardHint ) rc = (cardHint->getScanCardinality()).getValue(); if ( !cardHint && tableDesc->getNATable()->isHbaseTable() ) { NATable* nt = (NATable*)(tableDesc->getNATable()); StatsList* statsList = nt->getColStats(); if ( statsList && statsList->entries() > 0 ) { ColStatsSharedPtr cStatsPtr = statsList->getSingleColumnColStats(0); if ( cStatsPtr ) rc = (cStatsPtr->getRowcount()).getValue(); } } scanExpr->setBaseCardinality(MIN_ONE (rc)); GroupAttributes * gaExpr = new STMTHEAP GroupAttributes(); scanExpr->setSelectionPredicates(*predIdSet); ValueIdSet requiredOutputs = jbbc.getNodeAnalysis()->\ getOriginalExpr()->getGroupAttr()->getCharacteristicOutputs(); gaExpr->setCharacteristicOutputs(requiredOutputs); scanExpr->setGroupAttr(gaExpr); gaExpr->setLogExprForSynthesis(scanExpr); EstLogPropSharedPtr nonCacheableInLP(new (HISTHEAP) EstLogProp (*inputLP)); nonCacheableInLP->setCacheableFlag(FALSE); scanExpr->synthLogProp(); outputEstLogProp = scanExpr->getGroupAttr()->outputLogProp(nonCacheableInLP); } else { NodeAnalysis * nodeAnalysis = jbbc.getNodeAnalysis(); RelExpr * relExpr = nodeAnalysis->getModifiedExpr(); if (relExpr == NULL) relExpr = nodeAnalysis->getOriginalExpr(); // synthesize and cache estLogProp for the given inLP. outputEstLogProp = relExpr->getGroupAttr()->outputLogProp(inputLP); } } return outputEstLogProp; } // getStatsForCANodeId