ValueIdSet AppliedStatMan::getPotentialOutputs(
			  const CANodeIdSet & jbbcsNodeSet)
{
  ValueIdSet potentialOutputs;

  for (CANodeId jbbc = jbbcsNodeSet.init();
		    jbbcsNodeSet.next(jbbc);
		    jbbcsNodeSet.advance(jbbc))
  {
    if (NodeAnalysis * jbbcNodeAnalysis = jbbc.getNodeAnalysis())
    {
      ValueIdSet outputs;
	  const Join * jbbcParentJoin = jbbcNodeAnalysis->getJBBC()->
                                      getOriginalParentJoin();
      if((!jbbcParentJoin) ||
		 (jbbcParentJoin && jbbcParentJoin->isInnerNonSemiJoin()))
        outputs = jbbcNodeAnalysis->getOriginalExpr()->\
          getGroupAttr()->getCharacteristicOutputs();
	  else if (jbbcParentJoin->isLeftJoin())
        outputs = jbbcParentJoin->nullInstantiatedOutput();
      potentialOutputs.insert(outputs);
    }
  }

  return potentialOutputs;
} // AppliedStatMan::getPotentialOutputs
Beispiel #2
0
void
IndexDesc::getNonKeyColumnSet(ValueIdSet& nonKeyColumnSet) const
{

  const ValueIdList
    &indexColumns = getIndexColumns(),
    &keyColumns = getIndexKey();

  // clean up input:
  nonKeyColumnSet.clear();

  // Add all index columns
  CollIndex i = 0;
  for (i=0;
       i < indexColumns.entries();
       i++)
    {
      nonKeyColumnSet.insert(indexColumns[i]);
    }


  // And remove all key columns:
  for (i=0;
       i < keyColumns.entries();
       i++)
    {
      nonKeyColumnSet.remove(keyColumns[i]);
      // if this is a secondary index, the base column
      // which is part of the index,
      // may also be present, remove it:
      const ItemExpr *colPtr = keyColumns[i].getItemExpr();
      if (colPtr->getOperatorType()
          ==
          ITM_INDEXCOLUMN)
        {
          const ValueId & colDef = ((IndexColumn *)(colPtr))->getDefinition();
          nonKeyColumnSet.remove(colDef);
        }
    }
    


} // IndexDesc::getNonKeyColumnSet(ValueIdSet& nonKeyColumnSet) const
// this method sets the primary key columns. It goes through all the columns
// of the table, and collects the columns which are marked as primary keys
void TableDesc::setPrimaryKeyColumns()
{
    ValueIdSet primaryColumns;

    for ( CollIndex j = 0 ; j < colList_.entries() ; j++ )
    {

        ValueId valId = colList_[j];

        NAColumn *column = valId.getNAColumn();

        if ( column->isPrimaryKey() )
        {
            primaryColumns.insert(valId) ;
            // mark column as referenced for histogram, as we may need its histogram
            // during plan generation
            if ((column->isUserColumn() || column->isSaltColumn() ) &&
                    (column->getNATable()->getSpecialType() == ExtendedQualName::NORMAL_TABLE) )
                column->setReferencedForMultiIntHist();
        }
    }

    primaryKeyColumns_ = primaryColumns;
}
Beispiel #4
0
/********************************************************************
* Input: Selection predicates for the scan node, boolean indicating if
* it is a indexOnlyIndex, reference parameter that will indicate if 
* IndexJoin is viable or not, GroupAttributes for the group and characteristic
* inputs
* Output: MdamFlag indicating if the index key access is good enough for 
* MDAM access (if a index does not have good MDAM access we have to 
* scan the whole index because single subset also will not have any
* keys to apply)
* IndexJoin flag indicating if index join cost would exceed base table 
* access or not.
********************************************************************/
MdamFlags IndexDesc::pruneMdam(const ValueIdSet& preds,
				  NABoolean indexOnlyIndex,
				  IndexJoinSelectivityEnum& 
				  selectivityEnum /* out*/ ,
				  const GroupAttributes * groupAttr,
				  const ValueIdSet * inputValues) const
{
  CollIndex numEmptyColumns=0;
  CostScalar numSkips = csOne;
  ValueIdSet emptyColumns;
  ValueId vid;
  if(indexOnlyIndex)
    selectivityEnum = INDEX_ONLY_INDEX;
  else
    selectivityEnum = INDEX_JOIN_VIABLE;
  if(preds.isEmpty()) return MDAM_OFF;
  //calculate how many key columns don't have any predicates
  for(CollIndex i=0;i<indexKey_.entries();i++)
  {
    if(preds.referencesTheGivenValue(indexKey_[i],vid))
      break;
    else
      numEmptyColumns++;
  }
  
  //if we don't have any empty columns or we don't have to evaluate if index
  //join is promising or not then just return
  if(numEmptyColumns>=1 OR NOT indexOnlyIndex)
  {
    IndexDescHistograms ixHistogram(*this,
      (indexOnlyIndex?numEmptyColumns:indexKey_.entries()));

    NABoolean multiColUecAvail = ixHistogram.isMultiColUecInfoAvail();
    ColumnOrderList keyPredsByCol(indexKey_);
    for(CollIndex j=0;j<numEmptyColumns;j++)
    {
      emptyColumns.insert(indexKey_[j]);
      if(j==0 OR multiColUecAvail == FALSE)
      {
	//no MCUec so just multiply the empty columns UEC count to 
	//calculate MDAM skips
	numSkips *=(ixHistogram.getColStatsForColumn(indexKey_[j])).
	  getTotalUec().getCeiling();
      }
      else // otherwise try to use MCUec
      {
	
	NABoolean uecFound = FALSE;
	CostScalar correctUec = csOne;
	CostScalar combinedUECCount = csOne;
	// first let's see if there is multiColUec count for the skipped columns
	// so far. If there is that will be number of skips. If there isn't then
	// get the best estimate of UEC count for the current column using MCUec
	// if possible otherwise just using single column histograms. 
	combinedUECCount = ixHistogram.getUecCountForColumns(emptyColumns);
	if(combinedUECCount >0)
	{
	  numSkips = combinedUECCount;
	}
	else
	{
	  uecFound = ixHistogram.estimateUecUsingMultiColUec(keyPredsByCol,j,correctUec);
	  if(uecFound==TRUE)
	  {
	    numSkips *= correctUec;
	  }
	  else
	  {
	    numSkips *=(ixHistogram.getColStatsForColumn(indexKey_[j])).
	    getTotalUec().getCeiling();
	  }
	}
      }
    }


    CostScalar rowCount = ixHistogram.getRowCount();
    CostScalar numIndexBlocks = rowCount /getEstimatedRecordsPerBlock();
    CostScalar numProbes = csOne;
    CostScalar numBaseTableBlocks = csOne;
    CostScalar inputProbes = csOne;

    // Pass any selectivity hint provided by the user
    const SelectivityHint * selHint = tableDesc_->getSelectivityHint();
    const CardinalityHint * cardHint = tableDesc_->getCardinalityHint();

    // If it is an index join then compute the number probes into the base
    // table. If the alternate index is not selective enough, we will have 
    // lots of them making the index quite expensive.
    if(NOT indexOnlyIndex) 
    {
      if((groupAttr->getInputLogPropList()).entries() >0)
      {
	//if there are incoming probes to the index. i.e. if the index join
	//is under another nested join or TSJ then compute result for all 
	//probes. We are using the initial inputEstLogProp to compute the 
	//resulting cardinality. It is possible that for the same group and 
	//different inputEstLogProp would provide less row count per probe.
	//So in FileScanRule::nextSubstitute() we make sure that the context
	//inputEstLogProp is in the error range of this inputEstLogProp. 
	// Ex. select * from lineitem, customer, nation 
	//	  where l_custkey < c_custkey and c_custkey = n_nationkey;
	//Now if we were evaluating lineitem indexes where the outer was customer
	//we would want to exclude alternate index on custkey whereas if nation got
	//pushed below customer then range of values would be fewer and max value
	//being less would make alternate index on custkey quite attractive. 
	
	ixHistogram.
	applyPredicatesWhenMultipleProbes(preds,
					  *((groupAttr->getInputLogPropList())[0]),
					  *inputValues,
 					  TRUE,
					  selHint,
					  cardHint,
					  NULL,
					  REL_SCAN);
	inputProbes = MIN_ONE((groupAttr->getInputLogPropList())[0]->getResultCardinality());
      }
      else
      {
        RelExpr * dummyExpr = new (STMTHEAP) RelExpr(ITM_FIRST_ITEM_OP,
				    NULL,
				    NULL,
				    STMTHEAP);
	ixHistogram.applyPredicates(preds, *dummyExpr, selHint, cardHint, REL_SCAN);
      }

      numProbes = ixHistogram.getRowCount();
      numBaseTableBlocks = rowCount / tableDesc_->getClusteringIndex()->
	getEstimatedRecordsPerBlock();
      double readAhead = CURRSTMT_OPTDEFAULTS->readAheadMaxBlocks();

      // although we compute cardinality from the index for all probes we 
      // do the comparison for per probe. The assumption is that per probe
      // the upper bound of cost is scanning the whole base table.
      if(numProbes/inputProbes + MINOF((numIndexBlocks / readAhead),numSkips)
	> (numBaseTableBlocks/readAhead))
      {
	selectivityEnum = EXCEEDS_BT_SCAN;
      }
    }
    
    //Does the number of skips exceed the cost of scanning the index. 
    if((indexOnlyIndex AND numSkips <= 
      (numIndexBlocks * CURRSTMT_OPTDEFAULTS->mdamSelectionDefault())) OR 
      (NOT indexOnlyIndex AND numSkips + numProbes/inputProbes <= 
		  (numBaseTableBlocks * CURRSTMT_OPTDEFAULTS->mdamSelectionDefault())))
      return MDAM_ON;
  }
  else 
    return MDAM_ON;

  return MDAM_OFF;
}
// compress the histograms based on query predicates on this table
void TableDesc::compressHistogramsForCurrentQuery()
{

    // if there are some column statistics
    if ((colStats_.entries() != 0) &&
            (table_) &&
            (table_->getExtendedQualName().getSpecialType() == ExtendedQualName::NORMAL_TABLE))
    {   // if 1
        // check if query analysis info is available
        if(QueryAnalysis::Instance()->isAnalysisON())
        {   // if 2
            // get a handle to the query analysis
            QueryAnalysis* queryAnalysis = QueryAnalysis::Instance();

            // get a handle to the table analysis
            const TableAnalysis * tableAnalysis = getTableAnalysis();

            if(!tableAnalysis)
                return;

            // iterate over statistics for each column
            for(CollIndex i = 0; i < colStats_.entries(); i++)
            {   // for 1
                // Get a handle to the column's statistics descriptor
                ColStatDescSharedPtr columnStatDesc = colStats_[i];

                // get a handle to the ColStats
                ColStatsSharedPtr colStats = columnStatDesc->getColStats();

                // if this is a single column, as opposed to a multicolumn
                if(colStats->getStatColumns().entries() == 1)
                {   // if 3
                    // get column's value id
                    const ValueId columnId = columnStatDesc->getColumn();

                    // get column analysis
                    ColAnalysis* colAnalysis = queryAnalysis->getColAnalysis(columnId);

                    if(!colAnalysis) continue;

                    ValueIdSet predicatesOnColumn =
                        colAnalysis->getReferencingPreds();

                    // we can compress this column's histogram if there
                    // is a equality predicate against a constant

                    ItemExpr *constant = NULL;

                    NABoolean colHasEqualityAgainstConst =
                        colAnalysis->getConstValue(constant);

                    // if a equality predicate with a constant was found
                    // i.e. predicate of the form col = 5
                    if (colHasEqualityAgainstConst)
                    {   // if 4
                        if (constant)
                            // compress the histogram
                            columnStatDesc->compressColStatsForQueryPreds(constant,constant);
                    } // if 4
                    else { // else 4

                        // since there is no equality predicates we might still
                        // be able to compress the column's histogram based on
                        // range predicates against a constant. Following are
                        // examples of such predicates
                        // * col > 1 <-- predicate defines a lower bound
                        // * col < 3 <-- predicate defines a upper bound
                        // * col >1 and col < 30 <-- window predicate, define both bounds
                        ItemExpr * lowerBound = NULL;
                        ItemExpr * upperBound = NULL;

                        // Extract predicates from range spec and add it to the
                        // original predicate set otherwise isARangePredicate() will
                        // return FALSE, so histgram compression won't happen.
                        ValueIdSet rangeSpecPred(predicatesOnColumn);
                        for (ValueId predId= rangeSpecPred.init();
                                rangeSpecPred.next(predId);
                                rangeSpecPred.advance(predId))
                        {
                            ItemExpr * pred = predId.getItemExpr();
                            if ( pred->getOperatorType() == ITM_RANGE_SPEC_FUNC )
                            {
                                ValueIdSet vs;
                                ((RangeSpecRef *)pred)->getValueIdSetForReconsItemExpr(vs);
                                // remove rangespec vid from the original set
                                predicatesOnColumn.remove(predId);
                                // add preds extracted from rangespec to the original set
                                predicatesOnColumn.insert(vs);
                            }
                        }

                        // in the following loop we iterate over all the predicates
                        // on this column. If there is a range predicate e.g. a > 2
                        // or a < 3, then we use that to define upper and lower bounds.
                        // Given predicate a > 2, we get a lower bound of 2.
                        // Given predicate a < 3, we get a upper bound of 3.
                        // The bound are then passed down to the histogram
                        // compression methods.

                        // iterate over predicates to see if any of them is a range
                        // predicate e.g. a > 2
                        for (ValueId predId= predicatesOnColumn.init();
                                predicatesOnColumn.next(predId);
                                predicatesOnColumn.advance(predId))
                        {   // for 2
                            // check if this predicate is a range predicate
                            ItemExpr * predicateOnColumn = predId.getItemExpr();
                            if (predicateOnColumn->isARangePredicate())
                            {   // if 5

                                // if a predicate is a range predicate we need to find out more
                                // information regarding the predicate to see if it can be used
                                // to compress the columns histogram. We look for the following:
                                // * The predicate is against a constant e.g. a > 3 and not against
                                //   another column e.g. a > b
                                // Also give a predicate we need to find out what side is the column
                                // and what side is the constant. Normally people write a range predicate
                                // as a > 3, but the same could be written as 3 < a.
                                // Also either on of the operands of the range predicate might be
                                // a VEG, if so then we need to dig into the VEG to see where is
                                // the constant and where is the column.

                                // check the right and left children of this predicate to
                                // see if one of them is a constant
                                ItemExpr * leftChildItemExpr = (ItemExpr *) predicateOnColumn->getChild(0);
                                ItemExpr * rightChildItemExpr = (ItemExpr *) predicateOnColumn->getChild(1);

                                // by default assume the literal is at right i.e. predicate of
                                // the form a > 2
                                NABoolean columnAtRight = FALSE;

                                // check if right child of predicate is a VEG
                                if ( rightChildItemExpr->getOperatorType() == ITM_VEG_REFERENCE)
                                {   // if 6
                                    // if child is a VEG
                                    VEGReference * rightChildVEG = (VEGReference *) rightChildItemExpr;

                                    // check if the VEG contains the current column
                                    // if it does contain the current column then
                                    // the predicate has the column on right and potentially
                                    // a constant on the left.
                                    if(rightChildVEG->getVEG()->getAllValues().contains(columnId))
                                    {   // if 7
                                        // column is at right i.e. predicate is of the form
                                        // 2 < a
                                        columnAtRight = TRUE;
                                    } // if 7
                                } // if 6
                                else { // else 6
                                    // child is not a VEG
                                    if ( columnId == rightChildItemExpr->getValueId() )
                                    {   // if 8
                                        // literals are at left i.e. predicate is of the form
                                        // (1,2) < (a, b)
                                        columnAtRight = TRUE;
                                    } // if 8
                                } // else 6

                                ItemExpr * potentialConstantExpr = NULL;

                                // check if the range predicate is against a constant
                                if (columnAtRight)
                                {   // if 9
                                    // the left child is potentially a constant
                                    potentialConstantExpr = leftChildItemExpr;
                                } // if 9
                                else { // else 9
                                    // the right child is potentially a constant
                                    potentialConstantExpr = rightChildItemExpr;
                                } // else 9

                                // initialize constant to NULL before
                                // looking for next constant
                                constant = NULL;

                                // check if potentialConstantExpr contains a constant.
                                // we need to see if this range predicate is a predicate
                                // against a constant e.g col > 1 and not a predicate
                                // against another column e.g. col > anothercol

                                // if the expression is a VEG
                                if ( potentialConstantExpr->getOperatorType() == ITM_VEG_REFERENCE)
                                {   // if 10

                                    // expression is a VEG, dig into the VEG to
                                    // get see if it contains a constant
                                    VEGReference * potentialConstantExprVEG =
                                        (VEGReference *) potentialConstantExpr;

                                    potentialConstantExprVEG->getVEG()->\
                                    getAllValues().referencesAConstValue(&constant);
                                } // if 10
                                else { // else 10

                                    // express is not a VEG, it is a constant
                                    if ( potentialConstantExpr->getOperatorType() == ITM_CONSTANT )
                                        constant = potentialConstantExpr;
                                } // else 10

                                // if predicate involves a constant, does the constant imply
                                // a upper bound or lower bound
                                if (constant)
                                {   // if 11
                                    // if range predicate has column at right e.g. 3 > a
                                    if (columnAtRight)
                                    {   // if 12
                                        if ( predicateOnColumn->getOperatorType() == ITM_GREATER ||
                                                predicateOnColumn->getOperatorType() == ITM_GREATER_EQ)
                                        {   // if 13
                                            if (!upperBound)
                                                upperBound = constant;
                                        } // if 13
                                        else
                                        {   // else 13
                                            if (!lowerBound)
                                                lowerBound = constant;
                                        } // else 13
                                    } // if 12
                                    else { // else 12
                                        // range predicate has column at left e.g. a < 3
                                        if ( predicateOnColumn->getOperatorType() == ITM_LESS ||
                                                predicateOnColumn->getOperatorType() == ITM_LESS_EQ)
                                        {   // if 14
                                            if (!upperBound)
                                                upperBound = constant;
                                        } // if 14
                                        else
                                        {   // else 14
                                            if (!lowerBound)
                                                lowerBound = constant;
                                        } // else 14
                                    } // else 12
                                } // if 11
                            } // if 5
                        } // for 2

                        // if we found a upper bound or a lower bound
                        if (lowerBound || upperBound)
                        {
                            // compress the histogram based on range predicates
                            columnStatDesc->compressColStatsForQueryPreds(lowerBound, upperBound);
                        }
                    } // else 4
                } // if 3
            } // for 1
        } // if 2
    } // if 1
    // All histograms compressed. Set the histCompressed flag to TRUE
    histsCompressed(TRUE);
}