Exemplo n.º 1
0
void computeAndAddSelParamIfPossible(
             CacheWA& cwa, BindWA& bindWA, ExprValueId& child,
             BaseColumn *base, ConstValue *val)
{
    ColStatsSharedPtr cStatsPtr = (base->getTableDesc()->tableColStats()).
                             getColStatsPtrForColumn(base->getValueId());

    if (cStatsPtr == NULL )
        return;

    HistogramSharedPtr hist = cStatsPtr->getHistogram();

    if ( hist == NULL )
       return;

    CostScalar sel;
    NABoolean canComputeSelectivity = hist -> computeSelectivityForEquality(
            val, cStatsPtr->getRowcount(), cStatsPtr->getTotalUec(),
            sel);

    if ( canComputeSelectivity == TRUE ) {
      const NAType * newType = base->getNAColumn()->getType();

      // for char datatypes, assign the caseinsensitive attribute from
      // the const node.
      if (newType->getTypeQualifier() == NA_CHARACTER_TYPE)
	{
	  newType = 
	    base->getNAColumn()->getType()->newCopy(cwa.wHeap());
	  ((CharType*)newType)->setCaseinsensitive(((CharType*)val->getType())->isCaseinsensitive());
	}
      
      cwa.replaceWithNewOrOldSelParam(val, newType, Selectivity(sel), 
                                      child, bindWA);

    }
}
Exemplo n.º 2
0
// ---------------------------------------------------------------------
// Utility Routine: pickOutputs
//
// From the given ColStatDescList, populate columnStats_ with column
// descriptors that are useful based on the characteristic outputs for
// the group.
//
// Always include in the output the current histograms of the input data,
// and, if the histogram is contained in the required output list, then
// this is a useful histogram and will also be output.
//
// ---------------------------------------------------------------------
void EstLogProp::pickOutputs( ColStatDescList & columnStats,
			      const EstLogPropSharedPtr& inputEstLogProp,
			      const ValueIdSet specifiedOutputs,
			      const ValueIdSet predSet)
{

  const ColStatDescList & outerColStatsList = inputEstLogProp->getColStats();

  ValueIdSet colsRequiringHistograms = specifiedOutputs;
  
  // (i) see if the selection predicates contain any constant value or a 
  // constant expression

  // (ii) check if there are any columns of this table being joined to some other
  // columns, which do not appear as characteristics outputs. There should be
  // histograms available for these columns, as these might be needed later.
  // This problem was seen for temporary tables created as normal_tables by the
  // triggers.


  colsRequiringHistograms.addSet(predSet.getColumnsForHistogram());
  colStats().setMCSkewedValueLists(columnStats.getMCSkewedValueLists()) ;

  NABoolean colStatDescAdded = FALSE;

  for (CollIndex i=0; i < columnStats.entries(); i++)
    {
      // we probably don't need 'em all, but this is the easiest way to
      // grab all of the multi-column uec information we'll need later
      colStats().insertIntoUecList (columnStats.getUecList()) ;
      colStats().setScanRowCountWithoutHint(columnStats.getScanRowCountWithoutHint());
      NABoolean found = FALSE;

      // Note: The following inserts into a ColStatDescList should not
      // have to be deep copies.  From this point on, ColStatDescs that
      // describe the output of the calling operator are read-only.

      ColStatDescSharedPtr colStatDesc = columnStats[i];

      // the value-id we're looking for
      const ValueId columnId = colStatDesc->getVEGColumn() ;

      for (CollIndex j=0 ; j < outerColStatsList.entries() ; j++)
	{
	  if (columnId == outerColStatsList[j]->getVEGColumn() OR
              (CmpCommon::context()->showQueryStats()))
            {
              colStats().insert(colStatDesc) ;
              found = TRUE;
              if(!colStatDescAdded)
                colStatDescAdded = TRUE;
              break ; // jump to next ColStatDesc
            }
	}

    // OK, the valueid doesn't match directly -- but there are still a
    // couple of things to check in order to verify whether or not we're
    // interested in keeping the i'th ColStatDesc ...

	ValueId throwaway ; // used by the second clause below

    if ( NOT found  AND
	 (columnId != NULL_VALUE_ID) AND
         (colsRequiringHistograms.contains (columnId) OR
          colsRequiringHistograms.referencesTheGivenValue (columnId, throwaway) OR
	  columnId.isInvolvedInJoinAndConst() OR
          CmpCommon::context()->showQueryStats() )
	)
	{
	  colStats().insert(colStatDesc);
	  found = TRUE;
	  if(!colStatDescAdded)
	    colStatDescAdded = TRUE;
	}
	
	if (CURRSTMT_OPTDEFAULTS->incorporateSkewInCosting())
	{
	  // if the column is referenced for histogram, but is 
	  // not needed beyond this time , then we shall save its  
	  // max freq, which might be used later in costing if this
	  // column is a part of the partitioning key

	  ColStatsSharedPtr stat = colStatDesc->getColStats();
	  if (!(stat->isVirtualColForHist() ) && NOT found &&
                    !(stat->isOrigFakeHist() ) )
	  {
            const ValueId col = colStatDesc->getColumn();
            ColAnalysis * colAnalysis = col.colAnalysis();
            if (colAnalysis)
            {
              NAColumn * column = stat->getStatColumns()[0];

              if (column->isReferencedForHistogram())
              {
                CostScalar maxFreq = columnStats.getMaxFreq(columnId);
                colAnalysis->setMaxFreq(maxFreq);
                colAnalysis->setFinalUec(stat->getTotalUec());
                colAnalysis->setFinalRC(stat->getRowcount());
              }
            }
          }
	}
      } // for columnStats.entries()
      if(!colStatDescAdded && columnStats.entries() > 0)
        colStats().insert(columnStats[0]) ;
} // pickOutputs
EstLogPropSharedPtr AppliedStatMan::getStatsForCANodeId(
					CANodeId jbbc,
					const EstLogPropSharedPtr &inLP,
					const ValueIdSet * predIdSet)
{

  EstLogPropSharedPtr inputLP = inLP;

  if(inputLP == (*GLOBAL_EMPTY_INPUT_LOGPROP))
    inputLP = jbbc.getJBBInput();

  EstLogPropSharedPtr outputEstLogProp = NULL;

  // 1. Try to find Logical Properties from cache if cacheable.

  // The estimate Logical Properties can be cacheable if all local
  // predicates are to be applied and if inNodeSet is provided,
  // or the inLP are cacheable

  if ((inputLP->isCacheable()) && (predIdSet == NULL) )
  {
    CANodeIdSet combinedSet = jbbc;

    // get the nodeIdSet of the outer child, if not already given. This
    // along with the present jbbc is used as a key in the cache

    CANodeIdSet * inputNodeSet;
    inputNodeSet = inputLP->getNodeSet();

    // if inLP are cacheable these should have a nodeSet attached
    CCMPASSERT(inputNodeSet != NULL);

    if (inputNodeSet)
    {
      combinedSet.insert(*inputNodeSet);
      // if estLogProp for all local predicates is required,
      // then it might already exist in the cache
      outputEstLogProp = getCachedStatistics(&combinedSet);
    }
  }

  if (outputEstLogProp == NULL)
  {
    // 2. properties do not exist in the cache, so synthesize them.

    //if specified by the user apply those predicates,
    // else apply predicates in the original expr
    NodeAnalysis * jbbcNode = jbbc.getNodeAnalysis();

    TableAnalysis * tableAnalysis = jbbcNode->getTableAnalysis();

    if (tableAnalysis && predIdSet)
    {
      TableDesc * tableDesc = tableAnalysis->getTableDesc();

      const QualifiedName& qualName = 
            tableDesc->getNATable()->getTableName();

      CorrName name(qualName, STMTHEAP);

      Scan *scanExpr = new STMTHEAP Scan(name, tableDesc, REL_SCAN, STMTHEAP);

      Cardinality rc = tableDesc->getNATable()->getEstRowCount();

      const CardinalityHint* cardHint = tableDesc->getCardinalityHint();
      if ( cardHint ) 
         rc = (cardHint->getScanCardinality()).getValue();

      if ( !cardHint && tableDesc->getNATable()->isHbaseTable() ) {

          NATable* nt = (NATable*)(tableDesc->getNATable());
   
          StatsList* statsList = nt->getColStats();
   
          if ( statsList && statsList->entries() > 0 ) {
              ColStatsSharedPtr cStatsPtr = 
                    statsList->getSingleColumnColStats(0);
   
              if ( cStatsPtr )
                 rc = (cStatsPtr->getRowcount()).getValue();
          }
      }

      scanExpr->setBaseCardinality(MIN_ONE (rc));

      GroupAttributes * gaExpr = new STMTHEAP GroupAttributes();

      scanExpr->setSelectionPredicates(*predIdSet);

      ValueIdSet requiredOutputs = jbbc.getNodeAnalysis()->\
	getOriginalExpr()->getGroupAttr()->getCharacteristicOutputs();

      gaExpr->setCharacteristicOutputs(requiredOutputs);

      scanExpr->setGroupAttr(gaExpr);
      gaExpr->setLogExprForSynthesis(scanExpr);

      EstLogPropSharedPtr nonCacheableInLP(new (HISTHEAP) EstLogProp (*inputLP));
      nonCacheableInLP->setCacheableFlag(FALSE);
      scanExpr->synthLogProp();
      outputEstLogProp = scanExpr->getGroupAttr()->outputLogProp(nonCacheableInLP);
    }
    else
    {
        NodeAnalysis * nodeAnalysis = jbbc.getNodeAnalysis();

        RelExpr * relExpr = nodeAnalysis->getModifiedExpr();

	if (relExpr == NULL)
	  relExpr = nodeAnalysis->getOriginalExpr();

      // synthesize and cache estLogProp for the given inLP.
      outputEstLogProp = relExpr->getGroupAttr()->outputLogProp(inputLP);
    }
  }

  return outputEstLogProp;
} // getStatsForCANodeId