Esempio n. 1
0
/********************************************************************
* Input: Selection predicates for the scan node, boolean indicating if
* it is a indexOnlyIndex, reference parameter that will indicate if 
* IndexJoin is viable or not, GroupAttributes for the group and characteristic
* inputs
* Output: MdamFlag indicating if the index key access is good enough for 
* MDAM access (if a index does not have good MDAM access we have to 
* scan the whole index because single subset also will not have any
* keys to apply)
* IndexJoin flag indicating if index join cost would exceed base table 
* access or not.
********************************************************************/
MdamFlags IndexDesc::pruneMdam(const ValueIdSet& preds,
				  NABoolean indexOnlyIndex,
				  IndexJoinSelectivityEnum& 
				  selectivityEnum /* out*/ ,
				  const GroupAttributes * groupAttr,
				  const ValueIdSet * inputValues) const
{
  CollIndex numEmptyColumns=0;
  CostScalar numSkips = csOne;
  ValueIdSet emptyColumns;
  ValueId vid;
  if(indexOnlyIndex)
    selectivityEnum = INDEX_ONLY_INDEX;
  else
    selectivityEnum = INDEX_JOIN_VIABLE;
  if(preds.isEmpty()) return MDAM_OFF;
  //calculate how many key columns don't have any predicates
  for(CollIndex i=0;i<indexKey_.entries();i++)
  {
    if(preds.referencesTheGivenValue(indexKey_[i],vid))
      break;
    else
      numEmptyColumns++;
  }
  
  //if we don't have any empty columns or we don't have to evaluate if index
  //join is promising or not then just return
  if(numEmptyColumns>=1 OR NOT indexOnlyIndex)
  {
    IndexDescHistograms ixHistogram(*this,
      (indexOnlyIndex?numEmptyColumns:indexKey_.entries()));

    NABoolean multiColUecAvail = ixHistogram.isMultiColUecInfoAvail();
    ColumnOrderList keyPredsByCol(indexKey_);
    for(CollIndex j=0;j<numEmptyColumns;j++)
    {
      emptyColumns.insert(indexKey_[j]);
      if(j==0 OR multiColUecAvail == FALSE)
      {
	//no MCUec so just multiply the empty columns UEC count to 
	//calculate MDAM skips
	numSkips *=(ixHistogram.getColStatsForColumn(indexKey_[j])).
	  getTotalUec().getCeiling();
      }
      else // otherwise try to use MCUec
      {
	
	NABoolean uecFound = FALSE;
	CostScalar correctUec = csOne;
	CostScalar combinedUECCount = csOne;
	// first let's see if there is multiColUec count for the skipped columns
	// so far. If there is that will be number of skips. If there isn't then
	// get the best estimate of UEC count for the current column using MCUec
	// if possible otherwise just using single column histograms. 
	combinedUECCount = ixHistogram.getUecCountForColumns(emptyColumns);
	if(combinedUECCount >0)
	{
	  numSkips = combinedUECCount;
	}
	else
	{
	  uecFound = ixHistogram.estimateUecUsingMultiColUec(keyPredsByCol,j,correctUec);
	  if(uecFound==TRUE)
	  {
	    numSkips *= correctUec;
	  }
	  else
	  {
	    numSkips *=(ixHistogram.getColStatsForColumn(indexKey_[j])).
	    getTotalUec().getCeiling();
	  }
	}
      }
    }


    CostScalar rowCount = ixHistogram.getRowCount();
    CostScalar numIndexBlocks = rowCount /getEstimatedRecordsPerBlock();
    CostScalar numProbes = csOne;
    CostScalar numBaseTableBlocks = csOne;
    CostScalar inputProbes = csOne;

    // Pass any selectivity hint provided by the user
    const SelectivityHint * selHint = tableDesc_->getSelectivityHint();
    const CardinalityHint * cardHint = tableDesc_->getCardinalityHint();

    // If it is an index join then compute the number probes into the base
    // table. If the alternate index is not selective enough, we will have 
    // lots of them making the index quite expensive.
    if(NOT indexOnlyIndex) 
    {
      if((groupAttr->getInputLogPropList()).entries() >0)
      {
	//if there are incoming probes to the index. i.e. if the index join
	//is under another nested join or TSJ then compute result for all 
	//probes. We are using the initial inputEstLogProp to compute the 
	//resulting cardinality. It is possible that for the same group and 
	//different inputEstLogProp would provide less row count per probe.
	//So in FileScanRule::nextSubstitute() we make sure that the context
	//inputEstLogProp is in the error range of this inputEstLogProp. 
	// Ex. select * from lineitem, customer, nation 
	//	  where l_custkey < c_custkey and c_custkey = n_nationkey;
	//Now if we were evaluating lineitem indexes where the outer was customer
	//we would want to exclude alternate index on custkey whereas if nation got
	//pushed below customer then range of values would be fewer and max value
	//being less would make alternate index on custkey quite attractive. 
	
	ixHistogram.
	applyPredicatesWhenMultipleProbes(preds,
					  *((groupAttr->getInputLogPropList())[0]),
					  *inputValues,
 					  TRUE,
					  selHint,
					  cardHint,
					  NULL,
					  REL_SCAN);
	inputProbes = MIN_ONE((groupAttr->getInputLogPropList())[0]->getResultCardinality());
      }
      else
      {
        RelExpr * dummyExpr = new (STMTHEAP) RelExpr(ITM_FIRST_ITEM_OP,
				    NULL,
				    NULL,
				    STMTHEAP);
	ixHistogram.applyPredicates(preds, *dummyExpr, selHint, cardHint, REL_SCAN);
      }

      numProbes = ixHistogram.getRowCount();
      numBaseTableBlocks = rowCount / tableDesc_->getClusteringIndex()->
	getEstimatedRecordsPerBlock();
      double readAhead = CURRSTMT_OPTDEFAULTS->readAheadMaxBlocks();

      // although we compute cardinality from the index for all probes we 
      // do the comparison for per probe. The assumption is that per probe
      // the upper bound of cost is scanning the whole base table.
      if(numProbes/inputProbes + MINOF((numIndexBlocks / readAhead),numSkips)
	> (numBaseTableBlocks/readAhead))
      {
	selectivityEnum = EXCEEDS_BT_SCAN;
      }
    }
    
    //Does the number of skips exceed the cost of scanning the index. 
    if((indexOnlyIndex AND numSkips <= 
      (numIndexBlocks * CURRSTMT_OPTDEFAULTS->mdamSelectionDefault())) OR 
      (NOT indexOnlyIndex AND numSkips + numProbes/inputProbes <= 
		  (numBaseTableBlocks * CURRSTMT_OPTDEFAULTS->mdamSelectionDefault())))
      return MDAM_ON;
  }
  else 
    return MDAM_ON;

  return MDAM_OFF;
}
// ---------------------------------------------------------------------
// Utility Routine: pickOutputs
//
// From the given ColStatDescList, populate columnStats_ with column
// descriptors that are useful based on the characteristic outputs for
// the group.
//
// Always include in the output the current histograms of the input data,
// and, if the histogram is contained in the required output list, then
// this is a useful histogram and will also be output.
//
// ---------------------------------------------------------------------
void EstLogProp::pickOutputs( ColStatDescList & columnStats,
			      const EstLogPropSharedPtr& inputEstLogProp,
			      const ValueIdSet specifiedOutputs,
			      const ValueIdSet predSet)
{

  const ColStatDescList & outerColStatsList = inputEstLogProp->getColStats();

  ValueIdSet colsRequiringHistograms = specifiedOutputs;
  
  // (i) see if the selection predicates contain any constant value or a 
  // constant expression

  // (ii) check if there are any columns of this table being joined to some other
  // columns, which do not appear as characteristics outputs. There should be
  // histograms available for these columns, as these might be needed later.
  // This problem was seen for temporary tables created as normal_tables by the
  // triggers.


  colsRequiringHistograms.addSet(predSet.getColumnsForHistogram());
  colStats().setMCSkewedValueLists(columnStats.getMCSkewedValueLists()) ;

  NABoolean colStatDescAdded = FALSE;

  for (CollIndex i=0; i < columnStats.entries(); i++)
    {
      // we probably don't need 'em all, but this is the easiest way to
      // grab all of the multi-column uec information we'll need later
      colStats().insertIntoUecList (columnStats.getUecList()) ;
      colStats().setScanRowCountWithoutHint(columnStats.getScanRowCountWithoutHint());
      NABoolean found = FALSE;

      // Note: The following inserts into a ColStatDescList should not
      // have to be deep copies.  From this point on, ColStatDescs that
      // describe the output of the calling operator are read-only.

      ColStatDescSharedPtr colStatDesc = columnStats[i];

      // the value-id we're looking for
      const ValueId columnId = colStatDesc->getVEGColumn() ;

      for (CollIndex j=0 ; j < outerColStatsList.entries() ; j++)
	{
	  if (columnId == outerColStatsList[j]->getVEGColumn() OR
              (CmpCommon::context()->showQueryStats()))
            {
              colStats().insert(colStatDesc) ;
              found = TRUE;
              if(!colStatDescAdded)
                colStatDescAdded = TRUE;
              break ; // jump to next ColStatDesc
            }
	}

    // OK, the valueid doesn't match directly -- but there are still a
    // couple of things to check in order to verify whether or not we're
    // interested in keeping the i'th ColStatDesc ...

	ValueId throwaway ; // used by the second clause below

    if ( NOT found  AND
	 (columnId != NULL_VALUE_ID) AND
         (colsRequiringHistograms.contains (columnId) OR
          colsRequiringHistograms.referencesTheGivenValue (columnId, throwaway) OR
	  columnId.isInvolvedInJoinAndConst() OR
          CmpCommon::context()->showQueryStats() )
	)
	{
	  colStats().insert(colStatDesc);
	  found = TRUE;
	  if(!colStatDescAdded)
	    colStatDescAdded = TRUE;
	}
	
	if (CURRSTMT_OPTDEFAULTS->incorporateSkewInCosting())
	{
	  // if the column is referenced for histogram, but is 
	  // not needed beyond this time , then we shall save its  
	  // max freq, which might be used later in costing if this
	  // column is a part of the partitioning key

	  ColStatsSharedPtr stat = colStatDesc->getColStats();
	  if (!(stat->isVirtualColForHist() ) && NOT found &&
                    !(stat->isOrigFakeHist() ) )
	  {
            const ValueId col = colStatDesc->getColumn();
            ColAnalysis * colAnalysis = col.colAnalysis();
            if (colAnalysis)
            {
              NAColumn * column = stat->getStatColumns()[0];

              if (column->isReferencedForHistogram())
              {
                CostScalar maxFreq = columnStats.getMaxFreq(columnId);
                colAnalysis->setMaxFreq(maxFreq);
                colAnalysis->setFinalUec(stat->getTotalUec());
                colAnalysis->setFinalRC(stat->getRowcount());
              }
            }
          }
	}
      } // for columnStats.entries()
      if(!colStatDescAdded && columnStats.entries() > 0)
        colStats().insert(columnStats[0]) ;
} // pickOutputs