// Get the scan expression for given jbbc
// if jbbc is not a scan or predIdSet is NULL
// then the original JBBC expression is returned
RelExpr * AppliedStatMan::getExprForCANodeId(
          CANodeId jbbc,
          const EstLogPropSharedPtr &inLP,
          const ValueIdSet * predIdSet)
{
  RelExpr * jbbcExpr = NULL;
  
  // should not happen but a check just in case
  CCMPASSERT(jbbc.getNodeAnalysis());
  
  //if specified by the user apply those predicates,
  // else apply predicates in the original expr
  NodeAnalysis * jbbcNode = jbbc.getNodeAnalysis();

  TableAnalysis * tableAnalysis = jbbcNode->getTableAnalysis();

  if (tableAnalysis && predIdSet)
  {
    TableDesc * tableDesc = tableAnalysis->getTableDesc();
    const CorrName& name = tableDesc->getNATable()->getTableName();

    Scan *scanExpr = new STMTHEAP Scan(name, tableDesc, REL_SCAN, STMTHEAP);
    scanExpr->setBaseCardinality(MIN_ONE (tableDesc->getNATable()->getEstRowCount())) ;

    GroupAttributes * gaExpr = new STMTHEAP GroupAttributes();

    scanExpr->setSelectionPredicates(*predIdSet);

    ValueIdSet requiredOutputs = jbbc.getNodeAnalysis()->\
  getOriginalExpr()->getGroupAttr()->getCharacteristicOutputs();

    gaExpr->setCharacteristicOutputs(requiredOutputs);

    ValueIdSet requiredInputs = jbbc.getNodeAnalysis()->\
  getOriginalExpr()->getGroupAttr()->getCharacteristicInputs();

    gaExpr->setCharacteristicInputs(requiredInputs);
    
    scanExpr->setGroupAttr(gaExpr);
    gaExpr->setLogExprForSynthesis(scanExpr);
    scanExpr->synthLogProp();
    jbbcExpr = scanExpr;
  }
  else
  {
    NodeAnalysis * nodeAnalysis = jbbc.getNodeAnalysis();

    RelExpr * relExpr = nodeAnalysis->getModifiedExpr();

    if (relExpr == NULL)
      relExpr = nodeAnalysis->getOriginalExpr();

    jbbcExpr = relExpr;
  }

  return jbbcExpr;
} // getExprForCANodeId
Exemple #2
0
/********************************************************************
* Input: Selection predicates for the scan node, boolean indicating if
* it is a indexOnlyIndex, reference parameter that will indicate if 
* IndexJoin is viable or not, GroupAttributes for the group and characteristic
* inputs
* Output: MdamFlag indicating if the index key access is good enough for 
* MDAM access (if a index does not have good MDAM access we have to 
* scan the whole index because single subset also will not have any
* keys to apply)
* IndexJoin flag indicating if index join cost would exceed base table 
* access or not.
********************************************************************/
MdamFlags IndexDesc::pruneMdam(const ValueIdSet& preds,
				  NABoolean indexOnlyIndex,
				  IndexJoinSelectivityEnum& 
				  selectivityEnum /* out*/ ,
				  const GroupAttributes * groupAttr,
				  const ValueIdSet * inputValues) const
{
  CollIndex numEmptyColumns=0;
  CostScalar numSkips = csOne;
  ValueIdSet emptyColumns;
  ValueId vid;
  if(indexOnlyIndex)
    selectivityEnum = INDEX_ONLY_INDEX;
  else
    selectivityEnum = INDEX_JOIN_VIABLE;
  if(preds.isEmpty()) return MDAM_OFF;
  //calculate how many key columns don't have any predicates
  for(CollIndex i=0;i<indexKey_.entries();i++)
  {
    if(preds.referencesTheGivenValue(indexKey_[i],vid))
      break;
    else
      numEmptyColumns++;
  }
  
  //if we don't have any empty columns or we don't have to evaluate if index
  //join is promising or not then just return
  if(numEmptyColumns>=1 OR NOT indexOnlyIndex)
  {
    IndexDescHistograms ixHistogram(*this,
      (indexOnlyIndex?numEmptyColumns:indexKey_.entries()));

    NABoolean multiColUecAvail = ixHistogram.isMultiColUecInfoAvail();
    ColumnOrderList keyPredsByCol(indexKey_);
    for(CollIndex j=0;j<numEmptyColumns;j++)
    {
      emptyColumns.insert(indexKey_[j]);
      if(j==0 OR multiColUecAvail == FALSE)
      {
	//no MCUec so just multiply the empty columns UEC count to 
	//calculate MDAM skips
	numSkips *=(ixHistogram.getColStatsForColumn(indexKey_[j])).
	  getTotalUec().getCeiling();
      }
      else // otherwise try to use MCUec
      {
	
	NABoolean uecFound = FALSE;
	CostScalar correctUec = csOne;
	CostScalar combinedUECCount = csOne;
	// first let's see if there is multiColUec count for the skipped columns
	// so far. If there is that will be number of skips. If there isn't then
	// get the best estimate of UEC count for the current column using MCUec
	// if possible otherwise just using single column histograms. 
	combinedUECCount = ixHistogram.getUecCountForColumns(emptyColumns);
	if(combinedUECCount >0)
	{
	  numSkips = combinedUECCount;
	}
	else
	{
	  uecFound = ixHistogram.estimateUecUsingMultiColUec(keyPredsByCol,j,correctUec);
	  if(uecFound==TRUE)
	  {
	    numSkips *= correctUec;
	  }
	  else
	  {
	    numSkips *=(ixHistogram.getColStatsForColumn(indexKey_[j])).
	    getTotalUec().getCeiling();
	  }
	}
      }
    }


    CostScalar rowCount = ixHistogram.getRowCount();
    CostScalar numIndexBlocks = rowCount /getEstimatedRecordsPerBlock();
    CostScalar numProbes = csOne;
    CostScalar numBaseTableBlocks = csOne;
    CostScalar inputProbes = csOne;

    // Pass any selectivity hint provided by the user
    const SelectivityHint * selHint = tableDesc_->getSelectivityHint();
    const CardinalityHint * cardHint = tableDesc_->getCardinalityHint();

    // If it is an index join then compute the number probes into the base
    // table. If the alternate index is not selective enough, we will have 
    // lots of them making the index quite expensive.
    if(NOT indexOnlyIndex) 
    {
      if((groupAttr->getInputLogPropList()).entries() >0)
      {
	//if there are incoming probes to the index. i.e. if the index join
	//is under another nested join or TSJ then compute result for all 
	//probes. We are using the initial inputEstLogProp to compute the 
	//resulting cardinality. It is possible that for the same group and 
	//different inputEstLogProp would provide less row count per probe.
	//So in FileScanRule::nextSubstitute() we make sure that the context
	//inputEstLogProp is in the error range of this inputEstLogProp. 
	// Ex. select * from lineitem, customer, nation 
	//	  where l_custkey < c_custkey and c_custkey = n_nationkey;
	//Now if we were evaluating lineitem indexes where the outer was customer
	//we would want to exclude alternate index on custkey whereas if nation got
	//pushed below customer then range of values would be fewer and max value
	//being less would make alternate index on custkey quite attractive. 
	
	ixHistogram.
	applyPredicatesWhenMultipleProbes(preds,
					  *((groupAttr->getInputLogPropList())[0]),
					  *inputValues,
 					  TRUE,
					  selHint,
					  cardHint,
					  NULL,
					  REL_SCAN);
	inputProbes = MIN_ONE((groupAttr->getInputLogPropList())[0]->getResultCardinality());
      }
      else
      {
        RelExpr * dummyExpr = new (STMTHEAP) RelExpr(ITM_FIRST_ITEM_OP,
				    NULL,
				    NULL,
				    STMTHEAP);
	ixHistogram.applyPredicates(preds, *dummyExpr, selHint, cardHint, REL_SCAN);
      }

      numProbes = ixHistogram.getRowCount();
      numBaseTableBlocks = rowCount / tableDesc_->getClusteringIndex()->
	getEstimatedRecordsPerBlock();
      double readAhead = CURRSTMT_OPTDEFAULTS->readAheadMaxBlocks();

      // although we compute cardinality from the index for all probes we 
      // do the comparison for per probe. The assumption is that per probe
      // the upper bound of cost is scanning the whole base table.
      if(numProbes/inputProbes + MINOF((numIndexBlocks / readAhead),numSkips)
	> (numBaseTableBlocks/readAhead))
      {
	selectivityEnum = EXCEEDS_BT_SCAN;
      }
    }
    
    //Does the number of skips exceed the cost of scanning the index. 
    if((indexOnlyIndex AND numSkips <= 
      (numIndexBlocks * CURRSTMT_OPTDEFAULTS->mdamSelectionDefault())) OR 
      (NOT indexOnlyIndex AND numSkips + numProbes/inputProbes <= 
		  (numBaseTableBlocks * CURRSTMT_OPTDEFAULTS->mdamSelectionDefault())))
      return MDAM_ON;
  }
  else 
    return MDAM_ON;

  return MDAM_OFF;
}
EstLogPropSharedPtr AppliedStatMan::getStatsForCANodeId(
					CANodeId jbbc,
					const EstLogPropSharedPtr &inLP,
					const ValueIdSet * predIdSet)
{

  EstLogPropSharedPtr inputLP = inLP;

  if(inputLP == (*GLOBAL_EMPTY_INPUT_LOGPROP))
    inputLP = jbbc.getJBBInput();

  EstLogPropSharedPtr outputEstLogProp = NULL;

  // 1. Try to find Logical Properties from cache if cacheable.

  // The estimate Logical Properties can be cacheable if all local
  // predicates are to be applied and if inNodeSet is provided,
  // or the inLP are cacheable

  if ((inputLP->isCacheable()) && (predIdSet == NULL) )
  {
    CANodeIdSet combinedSet = jbbc;

    // get the nodeIdSet of the outer child, if not already given. This
    // along with the present jbbc is used as a key in the cache

    CANodeIdSet * inputNodeSet;
    inputNodeSet = inputLP->getNodeSet();

    // if inLP are cacheable these should have a nodeSet attached
    CCMPASSERT(inputNodeSet != NULL);

    if (inputNodeSet)
    {
      combinedSet.insert(*inputNodeSet);
      // if estLogProp for all local predicates is required,
      // then it might already exist in the cache
      outputEstLogProp = getCachedStatistics(&combinedSet);
    }
  }

  if (outputEstLogProp == NULL)
  {
    // 2. properties do not exist in the cache, so synthesize them.

    //if specified by the user apply those predicates,
    // else apply predicates in the original expr
    NodeAnalysis * jbbcNode = jbbc.getNodeAnalysis();

    TableAnalysis * tableAnalysis = jbbcNode->getTableAnalysis();

    if (tableAnalysis && predIdSet)
    {
      TableDesc * tableDesc = tableAnalysis->getTableDesc();

      const QualifiedName& qualName = 
            tableDesc->getNATable()->getTableName();

      CorrName name(qualName, STMTHEAP);

      Scan *scanExpr = new STMTHEAP Scan(name, tableDesc, REL_SCAN, STMTHEAP);

      Cardinality rc = tableDesc->getNATable()->getEstRowCount();

      const CardinalityHint* cardHint = tableDesc->getCardinalityHint();
      if ( cardHint ) 
         rc = (cardHint->getScanCardinality()).getValue();

      if ( !cardHint && tableDesc->getNATable()->isHbaseTable() ) {

          NATable* nt = (NATable*)(tableDesc->getNATable());
   
          StatsList* statsList = nt->getColStats();
   
          if ( statsList && statsList->entries() > 0 ) {
              ColStatsSharedPtr cStatsPtr = 
                    statsList->getSingleColumnColStats(0);
   
              if ( cStatsPtr )
                 rc = (cStatsPtr->getRowcount()).getValue();
          }
      }

      scanExpr->setBaseCardinality(MIN_ONE (rc));

      GroupAttributes * gaExpr = new STMTHEAP GroupAttributes();

      scanExpr->setSelectionPredicates(*predIdSet);

      ValueIdSet requiredOutputs = jbbc.getNodeAnalysis()->\
	getOriginalExpr()->getGroupAttr()->getCharacteristicOutputs();

      gaExpr->setCharacteristicOutputs(requiredOutputs);

      scanExpr->setGroupAttr(gaExpr);
      gaExpr->setLogExprForSynthesis(scanExpr);

      EstLogPropSharedPtr nonCacheableInLP(new (HISTHEAP) EstLogProp (*inputLP));
      nonCacheableInLP->setCacheableFlag(FALSE);
      scanExpr->synthLogProp();
      outputEstLogProp = scanExpr->getGroupAttr()->outputLogProp(nonCacheableInLP);
    }
    else
    {
        NodeAnalysis * nodeAnalysis = jbbc.getNodeAnalysis();

        RelExpr * relExpr = nodeAnalysis->getModifiedExpr();

	if (relExpr == NULL)
	  relExpr = nodeAnalysis->getOriginalExpr();

      // synthesize and cache estLogProp for the given inLP.
      outputEstLogProp = relExpr->getGroupAttr()->outputLogProp(inputLP);
    }
  }

  return outputEstLogProp;
} // getStatsForCANodeId