ValueIdSet AppliedStatMan::getPotentialOutputs( const CANodeIdSet & jbbcsNodeSet) { ValueIdSet potentialOutputs; for (CANodeId jbbc = jbbcsNodeSet.init(); jbbcsNodeSet.next(jbbc); jbbcsNodeSet.advance(jbbc)) { if (NodeAnalysis * jbbcNodeAnalysis = jbbc.getNodeAnalysis()) { ValueIdSet outputs; const Join * jbbcParentJoin = jbbcNodeAnalysis->getJBBC()-> getOriginalParentJoin(); if((!jbbcParentJoin) || (jbbcParentJoin && jbbcParentJoin->isInnerNonSemiJoin())) outputs = jbbcNodeAnalysis->getOriginalExpr()->\ getGroupAttr()->getCharacteristicOutputs(); else if (jbbcParentJoin->isLeftJoin()) outputs = jbbcParentJoin->nullInstantiatedOutput(); potentialOutputs.insert(outputs); } } return potentialOutputs; } // AppliedStatMan::getPotentialOutputs
void IndexDesc::getNonKeyColumnSet(ValueIdSet& nonKeyColumnSet) const { const ValueIdList &indexColumns = getIndexColumns(), &keyColumns = getIndexKey(); // clean up input: nonKeyColumnSet.clear(); // Add all index columns CollIndex i = 0; for (i=0; i < indexColumns.entries(); i++) { nonKeyColumnSet.insert(indexColumns[i]); } // And remove all key columns: for (i=0; i < keyColumns.entries(); i++) { nonKeyColumnSet.remove(keyColumns[i]); // if this is a secondary index, the base column // which is part of the index, // may also be present, remove it: const ItemExpr *colPtr = keyColumns[i].getItemExpr(); if (colPtr->getOperatorType() == ITM_INDEXCOLUMN) { const ValueId & colDef = ((IndexColumn *)(colPtr))->getDefinition(); nonKeyColumnSet.remove(colDef); } } } // IndexDesc::getNonKeyColumnSet(ValueIdSet& nonKeyColumnSet) const
// this method sets the primary key columns. It goes through all the columns // of the table, and collects the columns which are marked as primary keys void TableDesc::setPrimaryKeyColumns() { ValueIdSet primaryColumns; for ( CollIndex j = 0 ; j < colList_.entries() ; j++ ) { ValueId valId = colList_[j]; NAColumn *column = valId.getNAColumn(); if ( column->isPrimaryKey() ) { primaryColumns.insert(valId) ; // mark column as referenced for histogram, as we may need its histogram // during plan generation if ((column->isUserColumn() || column->isSaltColumn() ) && (column->getNATable()->getSpecialType() == ExtendedQualName::NORMAL_TABLE) ) column->setReferencedForMultiIntHist(); } } primaryKeyColumns_ = primaryColumns; }
/******************************************************************** * Input: Selection predicates for the scan node, boolean indicating if * it is a indexOnlyIndex, reference parameter that will indicate if * IndexJoin is viable or not, GroupAttributes for the group and characteristic * inputs * Output: MdamFlag indicating if the index key access is good enough for * MDAM access (if a index does not have good MDAM access we have to * scan the whole index because single subset also will not have any * keys to apply) * IndexJoin flag indicating if index join cost would exceed base table * access or not. ********************************************************************/ MdamFlags IndexDesc::pruneMdam(const ValueIdSet& preds, NABoolean indexOnlyIndex, IndexJoinSelectivityEnum& selectivityEnum /* out*/ , const GroupAttributes * groupAttr, const ValueIdSet * inputValues) const { CollIndex numEmptyColumns=0; CostScalar numSkips = csOne; ValueIdSet emptyColumns; ValueId vid; if(indexOnlyIndex) selectivityEnum = INDEX_ONLY_INDEX; else selectivityEnum = INDEX_JOIN_VIABLE; if(preds.isEmpty()) return MDAM_OFF; //calculate how many key columns don't have any predicates for(CollIndex i=0;i<indexKey_.entries();i++) { if(preds.referencesTheGivenValue(indexKey_[i],vid)) break; else numEmptyColumns++; } //if we don't have any empty columns or we don't have to evaluate if index //join is promising or not then just return if(numEmptyColumns>=1 OR NOT indexOnlyIndex) { IndexDescHistograms ixHistogram(*this, (indexOnlyIndex?numEmptyColumns:indexKey_.entries())); NABoolean multiColUecAvail = ixHistogram.isMultiColUecInfoAvail(); ColumnOrderList keyPredsByCol(indexKey_); for(CollIndex j=0;j<numEmptyColumns;j++) { emptyColumns.insert(indexKey_[j]); if(j==0 OR multiColUecAvail == FALSE) { //no MCUec so just multiply the empty columns UEC count to //calculate MDAM skips numSkips *=(ixHistogram.getColStatsForColumn(indexKey_[j])). getTotalUec().getCeiling(); } else // otherwise try to use MCUec { NABoolean uecFound = FALSE; CostScalar correctUec = csOne; CostScalar combinedUECCount = csOne; // first let's see if there is multiColUec count for the skipped columns // so far. If there is that will be number of skips. If there isn't then // get the best estimate of UEC count for the current column using MCUec // if possible otherwise just using single column histograms. combinedUECCount = ixHistogram.getUecCountForColumns(emptyColumns); if(combinedUECCount >0) { numSkips = combinedUECCount; } else { uecFound = ixHistogram.estimateUecUsingMultiColUec(keyPredsByCol,j,correctUec); if(uecFound==TRUE) { numSkips *= correctUec; } else { numSkips *=(ixHistogram.getColStatsForColumn(indexKey_[j])). getTotalUec().getCeiling(); } } } } CostScalar rowCount = ixHistogram.getRowCount(); CostScalar numIndexBlocks = rowCount /getEstimatedRecordsPerBlock(); CostScalar numProbes = csOne; CostScalar numBaseTableBlocks = csOne; CostScalar inputProbes = csOne; // Pass any selectivity hint provided by the user const SelectivityHint * selHint = tableDesc_->getSelectivityHint(); const CardinalityHint * cardHint = tableDesc_->getCardinalityHint(); // If it is an index join then compute the number probes into the base // table. If the alternate index is not selective enough, we will have // lots of them making the index quite expensive. if(NOT indexOnlyIndex) { if((groupAttr->getInputLogPropList()).entries() >0) { //if there are incoming probes to the index. i.e. if the index join //is under another nested join or TSJ then compute result for all //probes. We are using the initial inputEstLogProp to compute the //resulting cardinality. It is possible that for the same group and //different inputEstLogProp would provide less row count per probe. //So in FileScanRule::nextSubstitute() we make sure that the context //inputEstLogProp is in the error range of this inputEstLogProp. // Ex. select * from lineitem, customer, nation // where l_custkey < c_custkey and c_custkey = n_nationkey; //Now if we were evaluating lineitem indexes where the outer was customer //we would want to exclude alternate index on custkey whereas if nation got //pushed below customer then range of values would be fewer and max value //being less would make alternate index on custkey quite attractive. ixHistogram. applyPredicatesWhenMultipleProbes(preds, *((groupAttr->getInputLogPropList())[0]), *inputValues, TRUE, selHint, cardHint, NULL, REL_SCAN); inputProbes = MIN_ONE((groupAttr->getInputLogPropList())[0]->getResultCardinality()); } else { RelExpr * dummyExpr = new (STMTHEAP) RelExpr(ITM_FIRST_ITEM_OP, NULL, NULL, STMTHEAP); ixHistogram.applyPredicates(preds, *dummyExpr, selHint, cardHint, REL_SCAN); } numProbes = ixHistogram.getRowCount(); numBaseTableBlocks = rowCount / tableDesc_->getClusteringIndex()-> getEstimatedRecordsPerBlock(); double readAhead = CURRSTMT_OPTDEFAULTS->readAheadMaxBlocks(); // although we compute cardinality from the index for all probes we // do the comparison for per probe. The assumption is that per probe // the upper bound of cost is scanning the whole base table. if(numProbes/inputProbes + MINOF((numIndexBlocks / readAhead),numSkips) > (numBaseTableBlocks/readAhead)) { selectivityEnum = EXCEEDS_BT_SCAN; } } //Does the number of skips exceed the cost of scanning the index. if((indexOnlyIndex AND numSkips <= (numIndexBlocks * CURRSTMT_OPTDEFAULTS->mdamSelectionDefault())) OR (NOT indexOnlyIndex AND numSkips + numProbes/inputProbes <= (numBaseTableBlocks * CURRSTMT_OPTDEFAULTS->mdamSelectionDefault()))) return MDAM_ON; } else return MDAM_ON; return MDAM_OFF; }
// compress the histograms based on query predicates on this table void TableDesc::compressHistogramsForCurrentQuery() { // if there are some column statistics if ((colStats_.entries() != 0) && (table_) && (table_->getExtendedQualName().getSpecialType() == ExtendedQualName::NORMAL_TABLE)) { // if 1 // check if query analysis info is available if(QueryAnalysis::Instance()->isAnalysisON()) { // if 2 // get a handle to the query analysis QueryAnalysis* queryAnalysis = QueryAnalysis::Instance(); // get a handle to the table analysis const TableAnalysis * tableAnalysis = getTableAnalysis(); if(!tableAnalysis) return; // iterate over statistics for each column for(CollIndex i = 0; i < colStats_.entries(); i++) { // for 1 // Get a handle to the column's statistics descriptor ColStatDescSharedPtr columnStatDesc = colStats_[i]; // get a handle to the ColStats ColStatsSharedPtr colStats = columnStatDesc->getColStats(); // if this is a single column, as opposed to a multicolumn if(colStats->getStatColumns().entries() == 1) { // if 3 // get column's value id const ValueId columnId = columnStatDesc->getColumn(); // get column analysis ColAnalysis* colAnalysis = queryAnalysis->getColAnalysis(columnId); if(!colAnalysis) continue; ValueIdSet predicatesOnColumn = colAnalysis->getReferencingPreds(); // we can compress this column's histogram if there // is a equality predicate against a constant ItemExpr *constant = NULL; NABoolean colHasEqualityAgainstConst = colAnalysis->getConstValue(constant); // if a equality predicate with a constant was found // i.e. predicate of the form col = 5 if (colHasEqualityAgainstConst) { // if 4 if (constant) // compress the histogram columnStatDesc->compressColStatsForQueryPreds(constant,constant); } // if 4 else { // else 4 // since there is no equality predicates we might still // be able to compress the column's histogram based on // range predicates against a constant. Following are // examples of such predicates // * col > 1 <-- predicate defines a lower bound // * col < 3 <-- predicate defines a upper bound // * col >1 and col < 30 <-- window predicate, define both bounds ItemExpr * lowerBound = NULL; ItemExpr * upperBound = NULL; // Extract predicates from range spec and add it to the // original predicate set otherwise isARangePredicate() will // return FALSE, so histgram compression won't happen. ValueIdSet rangeSpecPred(predicatesOnColumn); for (ValueId predId= rangeSpecPred.init(); rangeSpecPred.next(predId); rangeSpecPred.advance(predId)) { ItemExpr * pred = predId.getItemExpr(); if ( pred->getOperatorType() == ITM_RANGE_SPEC_FUNC ) { ValueIdSet vs; ((RangeSpecRef *)pred)->getValueIdSetForReconsItemExpr(vs); // remove rangespec vid from the original set predicatesOnColumn.remove(predId); // add preds extracted from rangespec to the original set predicatesOnColumn.insert(vs); } } // in the following loop we iterate over all the predicates // on this column. If there is a range predicate e.g. a > 2 // or a < 3, then we use that to define upper and lower bounds. // Given predicate a > 2, we get a lower bound of 2. // Given predicate a < 3, we get a upper bound of 3. // The bound are then passed down to the histogram // compression methods. // iterate over predicates to see if any of them is a range // predicate e.g. a > 2 for (ValueId predId= predicatesOnColumn.init(); predicatesOnColumn.next(predId); predicatesOnColumn.advance(predId)) { // for 2 // check if this predicate is a range predicate ItemExpr * predicateOnColumn = predId.getItemExpr(); if (predicateOnColumn->isARangePredicate()) { // if 5 // if a predicate is a range predicate we need to find out more // information regarding the predicate to see if it can be used // to compress the columns histogram. We look for the following: // * The predicate is against a constant e.g. a > 3 and not against // another column e.g. a > b // Also give a predicate we need to find out what side is the column // and what side is the constant. Normally people write a range predicate // as a > 3, but the same could be written as 3 < a. // Also either on of the operands of the range predicate might be // a VEG, if so then we need to dig into the VEG to see where is // the constant and where is the column. // check the right and left children of this predicate to // see if one of them is a constant ItemExpr * leftChildItemExpr = (ItemExpr *) predicateOnColumn->getChild(0); ItemExpr * rightChildItemExpr = (ItemExpr *) predicateOnColumn->getChild(1); // by default assume the literal is at right i.e. predicate of // the form a > 2 NABoolean columnAtRight = FALSE; // check if right child of predicate is a VEG if ( rightChildItemExpr->getOperatorType() == ITM_VEG_REFERENCE) { // if 6 // if child is a VEG VEGReference * rightChildVEG = (VEGReference *) rightChildItemExpr; // check if the VEG contains the current column // if it does contain the current column then // the predicate has the column on right and potentially // a constant on the left. if(rightChildVEG->getVEG()->getAllValues().contains(columnId)) { // if 7 // column is at right i.e. predicate is of the form // 2 < a columnAtRight = TRUE; } // if 7 } // if 6 else { // else 6 // child is not a VEG if ( columnId == rightChildItemExpr->getValueId() ) { // if 8 // literals are at left i.e. predicate is of the form // (1,2) < (a, b) columnAtRight = TRUE; } // if 8 } // else 6 ItemExpr * potentialConstantExpr = NULL; // check if the range predicate is against a constant if (columnAtRight) { // if 9 // the left child is potentially a constant potentialConstantExpr = leftChildItemExpr; } // if 9 else { // else 9 // the right child is potentially a constant potentialConstantExpr = rightChildItemExpr; } // else 9 // initialize constant to NULL before // looking for next constant constant = NULL; // check if potentialConstantExpr contains a constant. // we need to see if this range predicate is a predicate // against a constant e.g col > 1 and not a predicate // against another column e.g. col > anothercol // if the expression is a VEG if ( potentialConstantExpr->getOperatorType() == ITM_VEG_REFERENCE) { // if 10 // expression is a VEG, dig into the VEG to // get see if it contains a constant VEGReference * potentialConstantExprVEG = (VEGReference *) potentialConstantExpr; potentialConstantExprVEG->getVEG()->\ getAllValues().referencesAConstValue(&constant); } // if 10 else { // else 10 // express is not a VEG, it is a constant if ( potentialConstantExpr->getOperatorType() == ITM_CONSTANT ) constant = potentialConstantExpr; } // else 10 // if predicate involves a constant, does the constant imply // a upper bound or lower bound if (constant) { // if 11 // if range predicate has column at right e.g. 3 > a if (columnAtRight) { // if 12 if ( predicateOnColumn->getOperatorType() == ITM_GREATER || predicateOnColumn->getOperatorType() == ITM_GREATER_EQ) { // if 13 if (!upperBound) upperBound = constant; } // if 13 else { // else 13 if (!lowerBound) lowerBound = constant; } // else 13 } // if 12 else { // else 12 // range predicate has column at left e.g. a < 3 if ( predicateOnColumn->getOperatorType() == ITM_LESS || predicateOnColumn->getOperatorType() == ITM_LESS_EQ) { // if 14 if (!upperBound) upperBound = constant; } // if 14 else { // else 14 if (!lowerBound) lowerBound = constant; } // else 14 } // else 12 } // if 11 } // if 5 } // for 2 // if we found a upper bound or a lower bound if (lowerBound || upperBound) { // compress the histogram based on range predicates columnStatDesc->compressColStatsForQueryPreds(lowerBound, upperBound); } } // else 4 } // if 3 } // for 1 } // if 2 } // if 1 // All histograms compressed. Set the histCompressed flag to TRUE histsCompressed(TRUE); }