// Is there any column which has a local predicates and no stats
NABoolean TableDesc::isAnyHistWithPredsFakeOrSmallSample(const ValueIdSet &localPreds)
{
    // if there are no local predicates return FALSE;
    if (localPreds.isEmpty())
        return FALSE;

    const ColStatDescList & colStatsList = getTableColStats();
    // for each predicate, check to see if stats exist
    for (ValueId id = localPreds.init();
            localPreds.next(id);
            localPreds.advance(id))
    {
        ColStatsSharedPtr colStats = colStatsList.getColStatsPtrForPredicate(id);

        if (colStats == NULL)
            return FALSE;

        if (colStats->isOrigFakeHist() || colStats->isSmallSampleHistogram())
            return TRUE;
    }

    return FALSE;
}
Esempio n. 2
0
void computeAndAddSelParamIfPossible(
             CacheWA& cwa, BindWA& bindWA, ExprValueId& child,
             BaseColumn *base, ConstValue *val)
{
    ColStatsSharedPtr cStatsPtr = (base->getTableDesc()->tableColStats()).
                             getColStatsPtrForColumn(base->getValueId());

    if (cStatsPtr == NULL )
        return;

    HistogramSharedPtr hist = cStatsPtr->getHistogram();

    if ( hist == NULL )
       return;

    CostScalar sel;
    NABoolean canComputeSelectivity = hist -> computeSelectivityForEquality(
            val, cStatsPtr->getRowcount(), cStatsPtr->getTotalUec(),
            sel);

    if ( canComputeSelectivity == TRUE ) {
      const NAType * newType = base->getNAColumn()->getType();

      // for char datatypes, assign the caseinsensitive attribute from
      // the const node.
      if (newType->getTypeQualifier() == NA_CHARACTER_TYPE)
	{
	  newType = 
	    base->getNAColumn()->getType()->newCopy(cwa.wHeap());
	  ((CharType*)newType)->setCaseinsensitive(((CharType*)val->getType())->isCaseinsensitive());
	}
      
      cwa.replaceWithNewOrOldSelParam(val, newType, Selectivity(sel), 
                                      child, bindWA);

    }
}
// ---------------------------------------------------------------------
// Utility Routine: pickOutputs
//
// From the given ColStatDescList, populate columnStats_ with column
// descriptors that are useful based on the characteristic outputs for
// the group.
//
// Always include in the output the current histograms of the input data,
// and, if the histogram is contained in the required output list, then
// this is a useful histogram and will also be output.
//
// ---------------------------------------------------------------------
void EstLogProp::pickOutputs( ColStatDescList & columnStats,
			      const EstLogPropSharedPtr& inputEstLogProp,
			      const ValueIdSet specifiedOutputs,
			      const ValueIdSet predSet)
{

  const ColStatDescList & outerColStatsList = inputEstLogProp->getColStats();

  ValueIdSet colsRequiringHistograms = specifiedOutputs;
  
  // (i) see if the selection predicates contain any constant value or a 
  // constant expression

  // (ii) check if there are any columns of this table being joined to some other
  // columns, which do not appear as characteristics outputs. There should be
  // histograms available for these columns, as these might be needed later.
  // This problem was seen for temporary tables created as normal_tables by the
  // triggers.


  colsRequiringHistograms.addSet(predSet.getColumnsForHistogram());
  colStats().setMCSkewedValueLists(columnStats.getMCSkewedValueLists()) ;

  NABoolean colStatDescAdded = FALSE;

  for (CollIndex i=0; i < columnStats.entries(); i++)
    {
      // we probably don't need 'em all, but this is the easiest way to
      // grab all of the multi-column uec information we'll need later
      colStats().insertIntoUecList (columnStats.getUecList()) ;
      colStats().setScanRowCountWithoutHint(columnStats.getScanRowCountWithoutHint());
      NABoolean found = FALSE;

      // Note: The following inserts into a ColStatDescList should not
      // have to be deep copies.  From this point on, ColStatDescs that
      // describe the output of the calling operator are read-only.

      ColStatDescSharedPtr colStatDesc = columnStats[i];

      // the value-id we're looking for
      const ValueId columnId = colStatDesc->getVEGColumn() ;

      for (CollIndex j=0 ; j < outerColStatsList.entries() ; j++)
	{
	  if (columnId == outerColStatsList[j]->getVEGColumn() OR
              (CmpCommon::context()->showQueryStats()))
            {
              colStats().insert(colStatDesc) ;
              found = TRUE;
              if(!colStatDescAdded)
                colStatDescAdded = TRUE;
              break ; // jump to next ColStatDesc
            }
	}

    // OK, the valueid doesn't match directly -- but there are still a
    // couple of things to check in order to verify whether or not we're
    // interested in keeping the i'th ColStatDesc ...

	ValueId throwaway ; // used by the second clause below

    if ( NOT found  AND
	 (columnId != NULL_VALUE_ID) AND
         (colsRequiringHistograms.contains (columnId) OR
          colsRequiringHistograms.referencesTheGivenValue (columnId, throwaway) OR
	  columnId.isInvolvedInJoinAndConst() OR
          CmpCommon::context()->showQueryStats() )
	)
	{
	  colStats().insert(colStatDesc);
	  found = TRUE;
	  if(!colStatDescAdded)
	    colStatDescAdded = TRUE;
	}
	
	if (CURRSTMT_OPTDEFAULTS->incorporateSkewInCosting())
	{
	  // if the column is referenced for histogram, but is 
	  // not needed beyond this time , then we shall save its  
	  // max freq, which might be used later in costing if this
	  // column is a part of the partitioning key

	  ColStatsSharedPtr stat = colStatDesc->getColStats();
	  if (!(stat->isVirtualColForHist() ) && NOT found &&
                    !(stat->isOrigFakeHist() ) )
	  {
            const ValueId col = colStatDesc->getColumn();
            ColAnalysis * colAnalysis = col.colAnalysis();
            if (colAnalysis)
            {
              NAColumn * column = stat->getStatColumns()[0];

              if (column->isReferencedForHistogram())
              {
                CostScalar maxFreq = columnStats.getMaxFreq(columnId);
                colAnalysis->setMaxFreq(maxFreq);
                colAnalysis->setFinalUec(stat->getTotalUec());
                colAnalysis->setFinalRC(stat->getRowcount());
              }
            }
          }
	}
      } // for columnStats.entries()
      if(!colStatDescAdded && columnStats.entries() > 0)
        colStats().insert(columnStats[0]) ;
} // pickOutputs
// compress the histograms based on query predicates on this table
void TableDesc::compressHistogramsForCurrentQuery()
{

    // if there are some column statistics
    if ((colStats_.entries() != 0) &&
            (table_) &&
            (table_->getExtendedQualName().getSpecialType() == ExtendedQualName::NORMAL_TABLE))
    {   // if 1
        // check if query analysis info is available
        if(QueryAnalysis::Instance()->isAnalysisON())
        {   // if 2
            // get a handle to the query analysis
            QueryAnalysis* queryAnalysis = QueryAnalysis::Instance();

            // get a handle to the table analysis
            const TableAnalysis * tableAnalysis = getTableAnalysis();

            if(!tableAnalysis)
                return;

            // iterate over statistics for each column
            for(CollIndex i = 0; i < colStats_.entries(); i++)
            {   // for 1
                // Get a handle to the column's statistics descriptor
                ColStatDescSharedPtr columnStatDesc = colStats_[i];

                // get a handle to the ColStats
                ColStatsSharedPtr colStats = columnStatDesc->getColStats();

                // if this is a single column, as opposed to a multicolumn
                if(colStats->getStatColumns().entries() == 1)
                {   // if 3
                    // get column's value id
                    const ValueId columnId = columnStatDesc->getColumn();

                    // get column analysis
                    ColAnalysis* colAnalysis = queryAnalysis->getColAnalysis(columnId);

                    if(!colAnalysis) continue;

                    ValueIdSet predicatesOnColumn =
                        colAnalysis->getReferencingPreds();

                    // we can compress this column's histogram if there
                    // is a equality predicate against a constant

                    ItemExpr *constant = NULL;

                    NABoolean colHasEqualityAgainstConst =
                        colAnalysis->getConstValue(constant);

                    // if a equality predicate with a constant was found
                    // i.e. predicate of the form col = 5
                    if (colHasEqualityAgainstConst)
                    {   // if 4
                        if (constant)
                            // compress the histogram
                            columnStatDesc->compressColStatsForQueryPreds(constant,constant);
                    } // if 4
                    else { // else 4

                        // since there is no equality predicates we might still
                        // be able to compress the column's histogram based on
                        // range predicates against a constant. Following are
                        // examples of such predicates
                        // * col > 1 <-- predicate defines a lower bound
                        // * col < 3 <-- predicate defines a upper bound
                        // * col >1 and col < 30 <-- window predicate, define both bounds
                        ItemExpr * lowerBound = NULL;
                        ItemExpr * upperBound = NULL;

                        // Extract predicates from range spec and add it to the
                        // original predicate set otherwise isARangePredicate() will
                        // return FALSE, so histgram compression won't happen.
                        ValueIdSet rangeSpecPred(predicatesOnColumn);
                        for (ValueId predId= rangeSpecPred.init();
                                rangeSpecPred.next(predId);
                                rangeSpecPred.advance(predId))
                        {
                            ItemExpr * pred = predId.getItemExpr();
                            if ( pred->getOperatorType() == ITM_RANGE_SPEC_FUNC )
                            {
                                ValueIdSet vs;
                                ((RangeSpecRef *)pred)->getValueIdSetForReconsItemExpr(vs);
                                // remove rangespec vid from the original set
                                predicatesOnColumn.remove(predId);
                                // add preds extracted from rangespec to the original set
                                predicatesOnColumn.insert(vs);
                            }
                        }

                        // in the following loop we iterate over all the predicates
                        // on this column. If there is a range predicate e.g. a > 2
                        // or a < 3, then we use that to define upper and lower bounds.
                        // Given predicate a > 2, we get a lower bound of 2.
                        // Given predicate a < 3, we get a upper bound of 3.
                        // The bound are then passed down to the histogram
                        // compression methods.

                        // iterate over predicates to see if any of them is a range
                        // predicate e.g. a > 2
                        for (ValueId predId= predicatesOnColumn.init();
                                predicatesOnColumn.next(predId);
                                predicatesOnColumn.advance(predId))
                        {   // for 2
                            // check if this predicate is a range predicate
                            ItemExpr * predicateOnColumn = predId.getItemExpr();
                            if (predicateOnColumn->isARangePredicate())
                            {   // if 5

                                // if a predicate is a range predicate we need to find out more
                                // information regarding the predicate to see if it can be used
                                // to compress the columns histogram. We look for the following:
                                // * The predicate is against a constant e.g. a > 3 and not against
                                //   another column e.g. a > b
                                // Also give a predicate we need to find out what side is the column
                                // and what side is the constant. Normally people write a range predicate
                                // as a > 3, but the same could be written as 3 < a.
                                // Also either on of the operands of the range predicate might be
                                // a VEG, if so then we need to dig into the VEG to see where is
                                // the constant and where is the column.

                                // check the right and left children of this predicate to
                                // see if one of them is a constant
                                ItemExpr * leftChildItemExpr = (ItemExpr *) predicateOnColumn->getChild(0);
                                ItemExpr * rightChildItemExpr = (ItemExpr *) predicateOnColumn->getChild(1);

                                // by default assume the literal is at right i.e. predicate of
                                // the form a > 2
                                NABoolean columnAtRight = FALSE;

                                // check if right child of predicate is a VEG
                                if ( rightChildItemExpr->getOperatorType() == ITM_VEG_REFERENCE)
                                {   // if 6
                                    // if child is a VEG
                                    VEGReference * rightChildVEG = (VEGReference *) rightChildItemExpr;

                                    // check if the VEG contains the current column
                                    // if it does contain the current column then
                                    // the predicate has the column on right and potentially
                                    // a constant on the left.
                                    if(rightChildVEG->getVEG()->getAllValues().contains(columnId))
                                    {   // if 7
                                        // column is at right i.e. predicate is of the form
                                        // 2 < a
                                        columnAtRight = TRUE;
                                    } // if 7
                                } // if 6
                                else { // else 6
                                    // child is not a VEG
                                    if ( columnId == rightChildItemExpr->getValueId() )
                                    {   // if 8
                                        // literals are at left i.e. predicate is of the form
                                        // (1,2) < (a, b)
                                        columnAtRight = TRUE;
                                    } // if 8
                                } // else 6

                                ItemExpr * potentialConstantExpr = NULL;

                                // check if the range predicate is against a constant
                                if (columnAtRight)
                                {   // if 9
                                    // the left child is potentially a constant
                                    potentialConstantExpr = leftChildItemExpr;
                                } // if 9
                                else { // else 9
                                    // the right child is potentially a constant
                                    potentialConstantExpr = rightChildItemExpr;
                                } // else 9

                                // initialize constant to NULL before
                                // looking for next constant
                                constant = NULL;

                                // check if potentialConstantExpr contains a constant.
                                // we need to see if this range predicate is a predicate
                                // against a constant e.g col > 1 and not a predicate
                                // against another column e.g. col > anothercol

                                // if the expression is a VEG
                                if ( potentialConstantExpr->getOperatorType() == ITM_VEG_REFERENCE)
                                {   // if 10

                                    // expression is a VEG, dig into the VEG to
                                    // get see if it contains a constant
                                    VEGReference * potentialConstantExprVEG =
                                        (VEGReference *) potentialConstantExpr;

                                    potentialConstantExprVEG->getVEG()->\
                                    getAllValues().referencesAConstValue(&constant);
                                } // if 10
                                else { // else 10

                                    // express is not a VEG, it is a constant
                                    if ( potentialConstantExpr->getOperatorType() == ITM_CONSTANT )
                                        constant = potentialConstantExpr;
                                } // else 10

                                // if predicate involves a constant, does the constant imply
                                // a upper bound or lower bound
                                if (constant)
                                {   // if 11
                                    // if range predicate has column at right e.g. 3 > a
                                    if (columnAtRight)
                                    {   // if 12
                                        if ( predicateOnColumn->getOperatorType() == ITM_GREATER ||
                                                predicateOnColumn->getOperatorType() == ITM_GREATER_EQ)
                                        {   // if 13
                                            if (!upperBound)
                                                upperBound = constant;
                                        } // if 13
                                        else
                                        {   // else 13
                                            if (!lowerBound)
                                                lowerBound = constant;
                                        } // else 13
                                    } // if 12
                                    else { // else 12
                                        // range predicate has column at left e.g. a < 3
                                        if ( predicateOnColumn->getOperatorType() == ITM_LESS ||
                                                predicateOnColumn->getOperatorType() == ITM_LESS_EQ)
                                        {   // if 14
                                            if (!upperBound)
                                                upperBound = constant;
                                        } // if 14
                                        else
                                        {   // else 14
                                            if (!lowerBound)
                                                lowerBound = constant;
                                        } // else 14
                                    } // else 12
                                } // if 11
                            } // if 5
                        } // for 2

                        // if we found a upper bound or a lower bound
                        if (lowerBound || upperBound)
                        {
                            // compress the histogram based on range predicates
                            columnStatDesc->compressColStatsForQueryPreds(lowerBound, upperBound);
                        }
                    } // else 4
                } // if 3
            } // for 1
        } // if 2
    } // if 1
    // All histograms compressed. Set the histCompressed flag to TRUE
    histsCompressed(TRUE);
}
EstLogPropSharedPtr AppliedStatMan::getStatsForCANodeId(
					CANodeId jbbc,
					const EstLogPropSharedPtr &inLP,
					const ValueIdSet * predIdSet)
{

  EstLogPropSharedPtr inputLP = inLP;

  if(inputLP == (*GLOBAL_EMPTY_INPUT_LOGPROP))
    inputLP = jbbc.getJBBInput();

  EstLogPropSharedPtr outputEstLogProp = NULL;

  // 1. Try to find Logical Properties from cache if cacheable.

  // The estimate Logical Properties can be cacheable if all local
  // predicates are to be applied and if inNodeSet is provided,
  // or the inLP are cacheable

  if ((inputLP->isCacheable()) && (predIdSet == NULL) )
  {
    CANodeIdSet combinedSet = jbbc;

    // get the nodeIdSet of the outer child, if not already given. This
    // along with the present jbbc is used as a key in the cache

    CANodeIdSet * inputNodeSet;
    inputNodeSet = inputLP->getNodeSet();

    // if inLP are cacheable these should have a nodeSet attached
    CCMPASSERT(inputNodeSet != NULL);

    if (inputNodeSet)
    {
      combinedSet.insert(*inputNodeSet);
      // if estLogProp for all local predicates is required,
      // then it might already exist in the cache
      outputEstLogProp = getCachedStatistics(&combinedSet);
    }
  }

  if (outputEstLogProp == NULL)
  {
    // 2. properties do not exist in the cache, so synthesize them.

    //if specified by the user apply those predicates,
    // else apply predicates in the original expr
    NodeAnalysis * jbbcNode = jbbc.getNodeAnalysis();

    TableAnalysis * tableAnalysis = jbbcNode->getTableAnalysis();

    if (tableAnalysis && predIdSet)
    {
      TableDesc * tableDesc = tableAnalysis->getTableDesc();

      const QualifiedName& qualName = 
            tableDesc->getNATable()->getTableName();

      CorrName name(qualName, STMTHEAP);

      Scan *scanExpr = new STMTHEAP Scan(name, tableDesc, REL_SCAN, STMTHEAP);

      Cardinality rc = tableDesc->getNATable()->getEstRowCount();

      const CardinalityHint* cardHint = tableDesc->getCardinalityHint();
      if ( cardHint ) 
         rc = (cardHint->getScanCardinality()).getValue();

      if ( !cardHint && tableDesc->getNATable()->isHbaseTable() ) {

          NATable* nt = (NATable*)(tableDesc->getNATable());
   
          StatsList* statsList = nt->getColStats();
   
          if ( statsList && statsList->entries() > 0 ) {
              ColStatsSharedPtr cStatsPtr = 
                    statsList->getSingleColumnColStats(0);
   
              if ( cStatsPtr )
                 rc = (cStatsPtr->getRowcount()).getValue();
          }
      }

      scanExpr->setBaseCardinality(MIN_ONE (rc));

      GroupAttributes * gaExpr = new STMTHEAP GroupAttributes();

      scanExpr->setSelectionPredicates(*predIdSet);

      ValueIdSet requiredOutputs = jbbc.getNodeAnalysis()->\
	getOriginalExpr()->getGroupAttr()->getCharacteristicOutputs();

      gaExpr->setCharacteristicOutputs(requiredOutputs);

      scanExpr->setGroupAttr(gaExpr);
      gaExpr->setLogExprForSynthesis(scanExpr);

      EstLogPropSharedPtr nonCacheableInLP(new (HISTHEAP) EstLogProp (*inputLP));
      nonCacheableInLP->setCacheableFlag(FALSE);
      scanExpr->synthLogProp();
      outputEstLogProp = scanExpr->getGroupAttr()->outputLogProp(nonCacheableInLP);
    }
    else
    {
        NodeAnalysis * nodeAnalysis = jbbc.getNodeAnalysis();

        RelExpr * relExpr = nodeAnalysis->getModifiedExpr();

	if (relExpr == NULL)
	  relExpr = nodeAnalysis->getOriginalExpr();

      // synthesize and cache estLogProp for the given inLP.
      outputEstLogProp = relExpr->getGroupAttr()->outputLogProp(inputLP);
    }
  }

  return outputEstLogProp;
} // getStatsForCANodeId