ValueIdSet AppliedStatMan::getPotentialOutputs(
			  const CANodeIdSet & jbbcsNodeSet)
{
  ValueIdSet potentialOutputs;

  for (CANodeId jbbc = jbbcsNodeSet.init();
		    jbbcsNodeSet.next(jbbc);
		    jbbcsNodeSet.advance(jbbc))
  {
    if (NodeAnalysis * jbbcNodeAnalysis = jbbc.getNodeAnalysis())
    {
      ValueIdSet outputs;
	  const Join * jbbcParentJoin = jbbcNodeAnalysis->getJBBC()->
                                      getOriginalParentJoin();
      if((!jbbcParentJoin) ||
		 (jbbcParentJoin && jbbcParentJoin->isInnerNonSemiJoin()))
        outputs = jbbcNodeAnalysis->getOriginalExpr()->\
          getGroupAttr()->getCharacteristicOutputs();
	  else if (jbbcParentJoin->isLeftJoin())
        outputs = jbbcParentJoin->nullInstantiatedOutput();
      potentialOutputs.insert(outputs);
    }
  }

  return potentialOutputs;
} // AppliedStatMan::getPotentialOutputs
MultiJoin::MultiJoin(const JBBSubset & jbbSubset,
                     CollHeap *oHeap)
  : RelExpr(REL_MULTI_JOIN, NULL, NULL, oHeap)
  , jbbSubset_(jbbSubset)
  , childrenMap_(oHeap)
  , scheduledLSRs_(oHeap)
{
  // Need to initialize the childrenMap
  // This will set all children to NULL
  CANodeIdSet jbbcs = jbbSubset_.getJBBCs();
  Lng32 index = 0;

  for (CANodeId x= jbbcs.init();
       jbbcs.next(x);
       jbbcs.advance(x) )
  {
    JBBCExprGroupEntry* entry = new (oHeap)
      JBBCExprGroupEntry(x, (RelExpr*)NULL, oHeap);

    childrenMap_.insertAt(index, entry);
	index++;
  }

  lsrC_ = new (oHeap) LSRConfidence(oHeap);
#pragma warning (disable : 4018)  //warning elimination
  CMPASSERT (getArity() == jbbcs.entries());
#pragma warning (default : 4018)  //warning elimination
}
// this method assume jbbNodeSet contains nodes from the same JBB
EstLogPropSharedPtr AppliedStatMan::getStatsForCANodeIdSet(
					const CANodeIdSet & jbbNodeSet)
{

  EstLogPropSharedPtr outputEstLogProp;
  CANodeIdSet combinedNodeSet = jbbNodeSet;
  combinedNodeSet += *(jbbNodeSet.getJBBInput()->getNodeSet());
  EstLogPropSharedPtr jBBInput = jbbNodeSet.getJBBInput();
  if ((outputEstLogProp = getCachedStatistics(&combinedNodeSet)) == NULL)
    outputEstLogProp = synthesizeLogProp(&jbbNodeSet, jBBInput);

  return outputEstLogProp;
}
Beispiel #4
0
void MvQueryRewriteHandler::dumpAnalysisToFile(QueryAnalysis* qa, RelExpr* expr)
{
  // Dump the QueryAnalysis data to a file.
  NAString analysisFileName = fileNamePrefix_ + ".analysis";
  NAString str;
  expr->unparse(str, OPTIMIZER_PHASE, MVINFO_FORMAT);
  str += "\n";
  str += qa->getText();

  // Add in some stuff to look at join predicates for the JBBCs.
  str += "Join Predicates\n";
  str += "===============";
  char buffer[20];
  ARRAY(JBB*) jbbs = qa->getJBBs();
  for (CollIndex jbbInx = 0; jbbInx < jbbs.entries(); jbbInx++)
    {
      JBB* jbb = jbbs[jbbInx];
      str_itoa(jbbInx, buffer);
      ((str += "\nJBB #") += NAString(buffer)) += ":\n";
      CANodeIdSet jbbcs = jbb->getJBBCs();
      for (CANodeId jbbcId=jbbcs.init();  jbbcs.next(jbbcId); jbbcs.advance(jbbcId) )
      {
        str_itoa(jbbcId, buffer);
        ((str += "\nJBBC with CANodeId ") += NAString(buffer)) += ":\n";
        ValueIdSet joinPreds = jbbcId.getNodeAnalysis()->getJBBC()->getJoinPreds();
        str += valueIdSetGetText(joinPreds);
        if (joinPreds.entries() > 0)
          {
            str.append("\n(value ids of predicates are ");
            NABoolean first = true;
            for (ValueId jpVid=joinPreds.init(); joinPreds.next(jpVid); joinPreds.advance(jpVid))
              {
                if (first)
                  first = FALSE;
                else
                  str.append(", ");
                str_itoa(jpVid, buffer);
                str.append(buffer);
              }
            str.append(")\n");
          }
      }
      str += '\n';
    }

  dumpToFile(analysisFileName.data(), str.data());
}  // dumpAnalysisToFile()
EstLogPropSharedPtr AppliedStatMan::synthesizeLogProp(
				const CANodeIdSet * nodeSet,
				EstLogPropSharedPtr &inLP)
{
  EstLogPropSharedPtr outputEstLogProp;
  CANodeIdSet combinedNodeSetWithInput = *nodeSet;

  if (inLP->isCacheable())
  {
    CANodeIdSet * inNodeSet = inLP->getNodeSet();

    // if inLP are cacheable these should have a nodeSet attached
    // if not, assert in debug mode. In release mode, set the properties
    // as not cacheable. These will then be looked into group attr cache
    if (inNodeSet == NULL)
    {
      CCMPASSERT(inNodeSet != NULL);
      inLP->setCacheableFlag(FALSE);
    }
    else
    {
      // check ASM cache for the estLogProps of nodeSet for the given
      // inLP

      combinedNodeSetWithInput.insert(*inNodeSet);
      if ((outputEstLogProp =\
        getCachedStatistics(&combinedNodeSetWithInput)) != NULL)
      return outputEstLogProp;
    }
  }

	if(nodeSet->entries() == 1)
    return getStatsForCANodeId(nodeSet->getFirst(), inLP);

  JBBSubset * jbbSubset = nodeSet->jbbcsToJBBSubset();

  Join * preferredJoin = jbbSubset->getPreferredJoin();

  //CMPASSERT(preferredJoin->isJoinFromMJSynthLogProp());

  outputEstLogProp = preferredJoin->getGroupAttr()->outputLogProp(inLP);

	return outputEstLogProp;
} // AppliedStatMan::synthesizeLogProp
// --------------------------------------------------------------------
// use origExprs from NodeAnalysis to set this MultiJoin childrenMap_
// --------------------------------------------------------------------
void MultiJoin::setChildrenFromOrigExprs(QueryAnalysis * qa)
{
  CollHeap* outHeap = qa->outHeap();

  CANodeIdSet jbbcs = jbbSubset_.getJBBCs();

  CMPASSERT (qa->getJBBCs().contains(jbbcs));

  Lng32 index = 0;

  for (CANodeId x= jbbcs.init();
       jbbcs.next(x);
       jbbcs.advance(x) )
  {
    JBBCExprGroupEntry* entry = new (outHeap)
      JBBCExprGroupEntry(x, qa->getNodeAnalysis(x)->getOriginalExpr(), outHeap);

    childrenMap_.insertAt(index, entry);
	index++;
  }

  return;
}
EstLogPropSharedPtr AppliedStatMan::joinJBBChildren(
					const CANodeIdSet & leftChildren,
					const CANodeIdSet & rightChildren,
					EstLogPropSharedPtr & inLP)
{

  EstLogPropSharedPtr inputLP = inLP;

  EstLogPropSharedPtr outputEstLogProp;

  if(inputLP == (*GLOBAL_EMPTY_INPUT_LOGPROP))
    inputLP = leftChildren.getJBBInput();

  // Because there exist a nodeSet for the left, right and the outer
  // child, hence these properties are cacheable. Check to see if the
  // outputEstLogProp of the join for the given inLP exist in the cache

  CANodeIdSet combinedNodeSet = leftChildren;
  combinedNodeSet.insert(rightChildren);

  CANodeIdSet * inNodeSet = NULL;

  if (inputLP->isCacheable())
  {
    inNodeSet = inputLP->getNodeSet();

    CANodeIdSet combinedWithInputNodeSet = combinedNodeSet;
    combinedWithInputNodeSet.insert(*inNodeSet);

    outputEstLogProp = getCachedStatistics(&combinedWithInputNodeSet);
  }

  if(outputEstLogProp == NULL)
    outputEstLogProp = synthesizeLogProp(&combinedNodeSet, inputLP);
  
  return outputEstLogProp;
} // AppliedStatMan::joinJBBChildren
// --------------------------------------------------------------------
// use the input JBBCExprGroupMap to set this MultiJoin childrenMap_
// --------------------------------------------------------------------
void MultiJoin::setChildren(const JBBCExprGroupMap & map)
{
  // everything here goes to statement heap
  CollHeap* outHeap = CmpCommon::statementHeap();

  CANodeIdSet jbbcs = jbbSubset_.getJBBCs();

  CMPASSERT (map.getJBBCs().contains(jbbcs));

  Lng32 index = 0;

  for (CANodeId x= jbbcs.init();
       jbbcs.next(x);
       jbbcs.advance(x) )
  {
    JBBCExprGroupEntry* entry = new (outHeap)
      JBBCExprGroupEntry(x, map.getExprGroupIdOfJBBC(x), outHeap);

    childrenMap_.insertAt(index, entry);
	index++;
  }

  return;
}
CostScalar AppliedStatMan::computeJoinReduction(
          const CANodeIdSet & leftChildren,
          const CANodeIdSet & rightChildren)
{
  CostScalar result = 0;

  // get stats for left
  EstLogPropSharedPtr leftCard =
    getStatsForCANodeIdSet(leftChildren);

  // get stats for right
  EstLogPropSharedPtr rightCard =
    getStatsForCANodeIdSet(rightChildren);

  CANodeIdSet jbbcsJoinedToRight;
  CANodeIdSet allPredecessors;
  CANodeIdSet allSuccessors;

  for( CANodeId rChild = rightChildren.init();
       rightChildren.next(rChild);
       rightChildren.advance(rChild))
  {
    JBBC * rChildJBBC = rChild.getNodeAnalysis()->getJBBC();
    jbbcsJoinedToRight += rChildJBBC->getJoinedJBBCs();
    jbbcsJoinedToRight += rChildJBBC->getPredecessorJBBCs();
    allPredecessors    += rChildJBBC->getPredecessorJBBCs();
    jbbcsJoinedToRight += rChildJBBC->getSuccessorJBBCs();
    allSuccessors      += rChildJBBC->getSuccessorJBBCs();
  }

  CANodeIdSet dependencyCausingNodesFromLeft = leftChildren;
  dependencyCausingNodesFromLeft.intersectSet(allPredecessors + allSuccessors);

  CANodeIdSet leftNodesJoinedToRight = leftChildren;
  leftNodesJoinedToRight.intersectSet(jbbcsJoinedToRight);

  if(!leftNodesJoinedToRight.entries())
  {
    result = rightCard->getResultCardinality();
    return result;
  }

  CANodeIdSet leftSetPredecessors;
  CANodeIdSet newNodes = leftNodesJoinedToRight;
  CANodeIdSet nodesConsidered;

  while(newNodes.entries())
  {
    for( CANodeId lChild = newNodes.init();
         newNodes.next(lChild);
         newNodes.advance(lChild))
    {
      JBBC * lChildJBBC = lChild.getNodeAnalysis()->getJBBC();
      leftSetPredecessors += lChildJBBC->getPredecessorJBBCs();
      nodesConsidered += lChild;
    }

    leftSetPredecessors.intersectSet(leftChildren);
    newNodes = leftSetPredecessors;
    newNodes -= nodesConsidered;
  }

  leftNodesJoinedToRight += leftSetPredecessors;

  // for a JBBSubset to be legal it has to have at least one
  // independent jbbc i.e. a jbbcs connect via a innerNonSemiNonTsjJoin
  // Assumption: leftChildren represents a legal JBBSubset
  CANodeIdSet independentJBBCsInLeftNodesJoinedToRight =
    QueryAnalysis::Instance()->getInnerNonSemiNonTSJJBBCs();

  independentJBBCsInLeftNodesJoinedToRight.intersectSet(leftNodesJoinedToRight);
  
  if(!independentJBBCsInLeftNodesJoinedToRight.entries())
    leftNodesJoinedToRight += 
      leftChildren.jbbcsToJBBSubset()->
        getJBBSubsetAnalysis()->
          getLargestIndependentNode();

  EstLogPropSharedPtr cardLeftNodesJoinedToRight =
    getStatsForCANodeIdSet(leftNodesJoinedToRight);

  // All nodes connected via a join
  CANodeIdSet connectedNodes(leftNodesJoinedToRight);
  connectedNodes += rightChildren;

  EstLogPropSharedPtr cardConnectedNodes =
    joinJBBChildren(leftNodesJoinedToRight,rightChildren);

  result = cardConnectedNodes->getResultCardinality() /
             cardLeftNodesJoinedToRight->getResultCardinality();

  return result;
}
Join * AppliedStatMan::formJoinExprWithCANodeSets(
					const CANodeIdSet & leftNodeSet,
					const CANodeIdSet & rightNodeSet,
					EstLogPropSharedPtr& inLP,
					const ValueIdSet * joinPreds,
					const NABoolean cacheable)
{
  EstLogPropSharedPtr leftEstLogProp = NULL;
  EstLogPropSharedPtr rightEstLogProp = NULL;

  CANodeIdSet * inputNodeSet = NULL;
  if (inLP->isCacheable())
  {
    inputNodeSet = inLP->getNodeSet();

    // if inLP are cacheable these should have a nodeSet attached
    // if it is not for some reason, assert in debug mode. In release
    // mode do not look for properties in ASM cache, instead get them
    // from group attr cache.
    if (inputNodeSet == NULL)
    {
      CCMPASSERT(inputNodeSet != NULL);
      inLP->setCacheableFlag(FALSE);
    }
  }

  CANodeIdSet commonNodeSet = leftNodeSet;
  commonNodeSet.intersectSet(rightNodeSet);

  // remove CANodeIds which are common to both left and the right children
  // from the child, whose estLogProps are not cached. If the estLogProps
  // of both children are not cached, then remove it from the child which
  // has a larger CANodeIdSet associated with it.

  CANodeIdSet tempLeftNodeSet = leftNodeSet;
  CANodeIdSet tempRightNodeSet = rightNodeSet;

  if (commonNodeSet.entries() > 0)
  {
    if (lookup(leftNodeSet))
      tempRightNodeSet.subtractSet(commonNodeSet);
    else
      if (lookup(rightNodeSet))
	tempLeftNodeSet.subtractSet(commonNodeSet);
      else
	if (leftNodeSet.entries() > rightNodeSet.entries())
	  tempLeftNodeSet.subtractSet(commonNodeSet);
	else
	  tempRightNodeSet.subtractSet(commonNodeSet);
  }

  // get the estLogProps for the left and the right child.
  // If these are not in the cache, then synthesize them incrementally
  // starting from the left most JBBC in the JBBSubset

  if (inputNodeSet)
  {
    // leftEstLogProp cached?

    CANodeIdSet combinedNodeSetWithInput = tempLeftNodeSet;
    combinedNodeSetWithInput.insert(*inputNodeSet);

    leftEstLogProp = getCachedStatistics(&combinedNodeSetWithInput);

    combinedNodeSetWithInput = tempRightNodeSet;
    combinedNodeSetWithInput.insert(*inputNodeSet);

    rightEstLogProp = getCachedStatistics(&combinedNodeSetWithInput);
  }

  if (leftEstLogProp == NULL)
      leftEstLogProp = synthesizeLogProp(&tempLeftNodeSet, inLP);

  // if the estimate logical properties have been computed for non-cacheable
  // inLP, then these would not contain nodeSet. But we do need the nodeSet
  // to compute potential output values. Hence we shall add this now

  if (!leftEstLogProp->getNodeSet())
  {
    CANodeIdSet * copyLeftNodeSet = new (STMTHEAP) CANodeIdSet (tempLeftNodeSet);
    leftEstLogProp->setNodeSet(copyLeftNodeSet);
  }

  if (rightEstLogProp == NULL)
      rightEstLogProp = synthesizeLogProp(&tempRightNodeSet, inLP);

  if (!rightEstLogProp->getNodeSet())
  {
    CANodeIdSet * copyRightNodeSet = new (STMTHEAP) CANodeIdSet (tempRightNodeSet);
    rightEstLogProp->setNodeSet(copyRightNodeSet);
  }

  // Now form the join expressions with these EstLogProp,
  // inLP and the joinPred will be same as those for which the
  // estLogProp are to be synthesized. Cacheable flag would depend
  // on whether left, right and the outer child are caheable, or
  // if the join is on all columns or not

  // Since the join expression consists of the left and the right
  // JBBSubsets, the JBBSubset for this Join expression would be
  // the superset of left and right JBBSubset

  JBBSubset * combinedSet = leftNodeSet.jbbcsToJBBSubset();
  combinedSet->addSubset(*(rightNodeSet.jbbcsToJBBSubset()));

  // Now form the join expressions with these EstLogProp,
  // inLP and the joinPred will be same as those for which the
  // estLogProp are to be synthesized. Cacheable flag would depend
  // on whether left, right and the outer child are ccaheable, or
  // if the join is on all columns or not

  return formJoinExprWithEstLogProps(leftEstLogProp, rightEstLogProp,
			    inLP, joinPreds, cacheable, combinedSet);



} // AppliedStatMan::formJoinExprWithCANodeSets
// LCOV_EXCL_START :cnu
EstLogPropSharedPtr AppliedStatMan::joinEstLogProps (
              const EstLogPropSharedPtr& leftEstLogProp,
              const EstLogPropSharedPtr& rightEstLogProp,
              const EstLogPropSharedPtr& inLP)
{
  EstLogPropSharedPtr outputEstLogProp;

  NABoolean cacheable = FALSE;

  CANodeIdSet * inputNodeSet = inLP->getNodeSet();

  // These nodesets could be NULL, if the estLogProps to which they
  // belong are not cacheable

  CANodeIdSet * leftNodeSet = leftEstLogProp->getNodeSet();
  CANodeIdSet * rightNodeSet = rightEstLogProp->getNodeSet();

  if ((leftEstLogProp->isCacheable()) &&
     (rightEstLogProp->isCacheable()) &&
     (inLP->isCacheable()) )
  {
    CCMPASSERT(leftNodeSet != NULL);
    CCMPASSERT(rightNodeSet != NULL);
    CCMPASSERT(inputNodeSet != NULL);
    if (leftNodeSet && rightNodeSet && inputNodeSet)
    {
      cacheable = TRUE;
    }
  }

  if (cacheable)
  {
    // check the ASM cache to see if outputEstLogProp for these
    // NodeSets appear for the given inputEstLogProp

    CANodeIdSet combineNodeSet = *leftNodeSet;
    combineNodeSet.insert(*rightNodeSet);

    CANodeIdSet combinedWithInputNodeSet = combineNodeSet;
    combinedWithInputNodeSet.insert(*inputNodeSet);

    outputEstLogProp = getCachedStatistics(&combinedWithInputNodeSet);
    if (outputEstLogProp != NULL)
      return outputEstLogProp;
  }

  JBBSubset * newJBBSubset = NULL;

  ValueIdSet setOfPredicates;

  if  (leftNodeSet && rightNodeSet)
  {
    // join predicates can be obtained from EstLogProp, only
    // if these corresponded to complete set of predicates -
    // all local or complete join. Also, we need a
    // combinedJBBSubset to set in the fake join expression
    // that we will be creating.

    newJBBSubset = leftNodeSet->computeJBBSubset();
    JBBSubset rightJBBSubset = *(rightNodeSet->computeJBBSubset());
    setOfPredicates = newJBBSubset->joinPredsWithOther(rightJBBSubset);

    // Since the properties from this group are cacheable, hence the
    // group attributes for the new join expression should contain
    // the combined JBBsubset of the left and the right children

    newJBBSubset->addSubset(rightJBBSubset);
  }

  // inputEstLogProp would be either empty input estLogProp or from the
  // outer child. If cacheable is TRUE, then newJBBsubset should
  // contain the combined left and the right JBB subset. But if
  // cacheable is FALSE, newJBBsubset should be NULL

  Join * joinExpr = formJoinExprWithEstLogProps(
				      leftEstLogProp,
				      rightEstLogProp,
				      inLP,
				      &setOfPredicates,
				      cacheable,
				      newJBBSubset);

  // Now do the actual synthesis and cache statistics in the cache

  outputEstLogProp = joinExpr->getGroupAttr()->outputLogProp(inLP);
  return outputEstLogProp;
}
// This method forms the join expression for join on JBBC specified by jbbcId
// inputEstLogProp should not be cacheable
Join * AppliedStatMan::formJoinExprForJoinOnJBBC(
          CANodeIdSet jbbSubset,
          CANodeId    jbbcId,
          const ValueIdSet * jbbcLocalPreds,
          const ValueIdSet * joinPreds,
          const EstLogPropSharedPtr& inputEstLogProp,
          const NABoolean cacheable)
{

  NABoolean origInputIsCacheable = inputEstLogProp->isCacheable();
  if(origInputIsCacheable)
  {
    inputEstLogProp->setCacheableFlag(FALSE);
    CCMPASSERT("Expecting Non Cacheable Input");
  }
  
  RelExpr * jbbcExpr = getExprForCANodeId(jbbcId, inputEstLogProp, jbbcLocalPreds);
  jbbcExpr->getGroupAttr()->outputLogProp(inputEstLogProp);
  RelExpr * jbbSubsetExpr = jbbSubset.jbbcsToJBBSubset()->getPreferredJoin();
  
  if(!jbbSubsetExpr)
    if(jbbSubset.entries()==1)
      if(!inputEstLogProp->isCacheable())
      {
        inputEstLogProp->setCacheableFlag(TRUE);
        jbbSubsetExpr = getExprForCANodeId(jbbSubset.getFirst(), inputEstLogProp);
        inputEstLogProp->setCacheableFlag(FALSE);
      }
      else
        jbbSubsetExpr = getExprForCANodeId(jbbSubset.getFirst(), inputEstLogProp);
    else
    {
      CCMPASSERT("No Subset expression, need at least one entry in set");
    }


  RelExpr * leftChildExpr = jbbSubsetExpr;
  RelExpr * rightChildExpr = jbbcExpr;
  
  GroupAttributes * galeft = jbbSubsetExpr->getGroupAttr();
  GroupAttributes * garight = jbbcExpr->getGroupAttr();
  
  // xxx

  JBBC * jbbc = jbbcId.getNodeAnalysis()->getJBBC();
  Join * jbbcParentJoin = jbbc->getOriginalParentJoin();
  ValueIdSet leftOuterJoinFilterPreds;


  Join * joinExpr = NULL;
  
  if(jbbcParentJoin)
  {
      if(jbbcParentJoin->isSemiJoin())
        joinExpr = new STMTHEAP Join(leftChildExpr, rightChildExpr, REL_SEMIJOIN, NULL);

      if(jbbcParentJoin->isAntiSemiJoin())
        joinExpr = new STMTHEAP Join(leftChildExpr, rightChildExpr, REL_ANTI_SEMIJOIN, NULL);

      if(jbbcParentJoin->isLeftJoin())
      {
        joinExpr = new STMTHEAP Join(leftChildExpr, rightChildExpr, REL_LEFT_JOIN, NULL);
        leftOuterJoinFilterPreds += jbbc->getLeftJoinFilterPreds();
      }

      if(joinExpr)
      {
        joinExpr->setJoinPred(jbbc->getPredsWithPredecessors());

        joinExpr->nullInstantiatedOutput().insert(jbbc->nullInstantiatedOutput());
      }
  }

  if(!joinExpr)
  {
    // now form a JoinExpr with these left and right children.
    joinExpr = new STMTHEAP Join(leftChildExpr, rightChildExpr, REL_JOIN, NULL);
  }

  ValueIdSet selPredsAndLOJFilter = leftOuterJoinFilterPreds;
  selPredsAndLOJFilter += (*joinPreds);
  joinExpr->setSelectionPredicates(selPredsAndLOJFilter);

  // set groupAttr of this Join expression
  GroupAttributes * gaJoin = new STMTHEAP GroupAttributes();

  // set required outputs of Join as sum of characteristic
  // outputs of the left and the right children
  ValueIdSet requiredOutputs;

  requiredOutputs.addSet(getPotentialOutputs(jbbSubset));

  requiredOutputs.addSet(getPotentialOutputs(jbbcId));

  gaJoin->setCharacteristicOutputs(requiredOutputs);

  // set JBBSubset for this group, if all estLogProps are cacheable.
  // Else JBBSubset is NULL

  CANodeIdSet combinedSet = jbbSubset;
  combinedSet += jbbcId;
  
  if (cacheable)
    gaJoin->getGroupAnalysis()->setLocalJBBView(combinedSet.jbbcsToJBBSubset());

  gaJoin->setMinChildEstRowCount(MINOF(garight->getMinChildEstRowCount(), galeft->getMinChildEstRowCount() ) );

  // if there are some probes coming into the join
  // then join type = tsj.
  if ((inputEstLogProp->getResultCardinality() > 1) ||
      (inputEstLogProp->getColStats().entries() > 1))
  {
    if (cacheable)
    {
      CANodeIdSet inputNodeSet =  *(inputEstLogProp->getNodeSet());
      gaJoin->setCharacteristicInputs(getPotentialOutputs(inputNodeSet));
    }
  }

  joinExpr->setGroupAttr(gaJoin);
  gaJoin->setLogExprForSynthesis(joinExpr);
  joinExpr->synthLogProp();
  inputEstLogProp->setCacheableFlag(origInputIsCacheable);
  return joinExpr;
} // AppliedStatMan::formJoinExprForJoinOnJBBC
ULng32 AppliedStatMan::hashASM(const CANodeIdSet &key)
{
  return key.hash();
} // AppliedStatMan::hashASM
// This method forms the join expression with the estLogProps.
Join * AppliedStatMan::formJoinExprWithEstLogProps(
					const EstLogPropSharedPtr& leftEstLogProp,
					const EstLogPropSharedPtr& rightEstLogProp,
					const EstLogPropSharedPtr& inputEstLogProp,
					const ValueIdSet * setOfPredicates,
					const NABoolean cacheable,
					JBBSubset * combinedJBBSubset)
{
  // Form a join expression with these estLogProps.

  // form the left child. Since the estLogProps of the left and the
  // right children exist, these can be treated as Scan expressions

  Scan * leftChildExpr = new STMTHEAP Scan();
  GroupAttributes * galeft = new STMTHEAP GroupAttributes();

  // set GroupAttr of the leftChild
  galeft->inputLogPropList().insert(inputEstLogProp);
  galeft->outputLogPropList().insert(leftEstLogProp);
  CANodeIdSet * leftNodeSet = leftEstLogProp->getNodeSet();

  CANodeId nodeId;

  if (leftNodeSet)
  {
    if (leftNodeSet->entries() == 1)
    {
      nodeId = leftNodeSet->getFirst();
      if(nodeId.getNodeAnalysis()->getTableAnalysis())
	leftChildExpr->setTableAttributes(nodeId);
    }
    CostScalar minEstCard = leftNodeSet->getMinChildEstRowCount();

    galeft->setMinChildEstRowCount(minEstCard);
  }

  leftChildExpr->setGroupAttr(galeft);
  galeft->setLogExprForSynthesis(leftChildExpr);

  // form the right child and set its groupAttr
  Scan * rightChildExpr = new STMTHEAP Scan();
  GroupAttributes * garight = new STMTHEAP GroupAttributes();
  garight->inputLogPropList().insert(inputEstLogProp);
  garight->outputLogPropList().insert(rightEstLogProp);
  CANodeIdSet * rightNodeSet = rightEstLogProp->getNodeSet();

  // xxx

  JBBC * singleRightChild = NULL;
  Join * singleRightChildParentJoin = NULL;
  ValueIdSet leftOuterJoinFilterPreds;


  if (rightNodeSet)
  {
    if (rightNodeSet->entries() == 1)
    {
      nodeId = rightNodeSet->getFirst();
      if(nodeId.getNodeAnalysis()->getTableAnalysis())
	rightChildExpr->setTableAttributes(nodeId);
	  if(nodeId.getNodeAnalysis()->getJBBC())
	  {
		  singleRightChild = nodeId.getNodeAnalysis()->getJBBC();
		  if(singleRightChild)
		    singleRightChildParentJoin = singleRightChild->getOriginalParentJoin();
	  }
    }
    CostScalar minEstCard = rightNodeSet->getMinChildEstRowCount();

    garight->setMinChildEstRowCount(minEstCard);
  }

  rightChildExpr->setGroupAttr(garight);
  garight->setLogExprForSynthesis(rightChildExpr);

  Join * joinExpr = NULL;
  if(singleRightChild &&
	 singleRightChildParentJoin)
  {
      if(singleRightChildParentJoin->isSemiJoin())
        joinExpr = new STMTHEAP Join(leftChildExpr,
                                     rightChildExpr,
                                     REL_SEMIJOIN,
                                     NULL);

      if(singleRightChildParentJoin->isAntiSemiJoin())
        joinExpr = new STMTHEAP Join(leftChildExpr,
                                     rightChildExpr,
                                     REL_ANTI_SEMIJOIN,
                                     NULL);

      if(singleRightChildParentJoin->isLeftJoin())
      {
        joinExpr = new STMTHEAP Join(leftChildExpr,
			                          rightChildExpr,
									  REL_LEFT_JOIN,
									  NULL);
        leftOuterJoinFilterPreds += singleRightChild->getLeftJoinFilterPreds();
      }

      if(joinExpr)
      {
        joinExpr->setJoinPred(singleRightChild->getPredsWithPredecessors());

        joinExpr->nullInstantiatedOutput().insert(singleRightChild->
                                                    nullInstantiatedOutput());
      }
  }

  if(!joinExpr)
  {
  // now form a JoinExpr with these left and right children.
  joinExpr = new STMTHEAP Join(leftChildExpr,  // left child
				      rightChildExpr, // right child
				      REL_JOIN,	      // join type
				      NULL);	      // join predicates
  }

  ValueIdSet selPredsAndLOJFilter = leftOuterJoinFilterPreds;
  selPredsAndLOJFilter += (*setOfPredicates);
  joinExpr->setSelectionPredicates(selPredsAndLOJFilter);

  // set groupAttr of this Join expression
  GroupAttributes * gaJoin = new STMTHEAP GroupAttributes();

  // set required outputs of Join as sum of characteristic
  // outputs of the left and the right children
  ValueIdSet requiredOutputs;

  if (leftNodeSet)
    requiredOutputs.addSet(getPotentialOutputs(*(leftNodeSet)));

  if (rightNodeSet)
    requiredOutputs.addSet(getPotentialOutputs(*(rightNodeSet)));

  gaJoin->setCharacteristicOutputs(requiredOutputs);

  // set JBBSubset for this group, if all estLogProps are cacheable.
  // Else JBBSubset is NULL

  if (cacheable)
    gaJoin->getGroupAnalysis()->setLocalJBBView(combinedJBBSubset);

  gaJoin->setMinChildEstRowCount(MINOF(garight->getMinChildEstRowCount(), galeft->getMinChildEstRowCount() ) );

  joinExpr->setGroupAttr(gaJoin);

  // if there are some probes coming into the join
  // then join type = tsj.
  if ((inputEstLogProp->getResultCardinality() > 1) ||
      (inputEstLogProp->getColStats().entries() > 1))
  {
    if (cacheable)
    {
      CANodeIdSet inputNodeSet =  *(inputEstLogProp->getNodeSet());
      gaJoin->setCharacteristicInputs(getPotentialOutputs(inputNodeSet));
    }
  }

  joinExpr->setGroupAttr(gaJoin);
  gaJoin->setLogExprForSynthesis(joinExpr);
  return joinExpr;

} // AppliedStatMan::formJoinExprWithEstLogProps
Beispiel #15
0
Join* MultiJoin::createLeftLinearJoinTree
                   (const NAList<CANodeIdSet> * const leftDeepJoinSequence,
                    NAList<MJJoinDirective *> * joinDirectives) const
{
  Join* result = NULL;

  Join* currentJoin=NULL;

  NABoolean reUseMultiJoins = FALSE;

  //Set of all JBBCs in this multi-join.
  //This set will be broken up to make the join tree
  //representing the substitue.
  //The loop below will construct the join tree,
  //starting from the top join.
  CANodeIdSet childSet = getJBBSubset().getJBBCs();

  // variables used in loop below
  MultiJoin * currentMJoin = (MultiJoin *) this;

  // in an iteration this is the parent join
  // e.g. when we are create JOIN3, this will
  // be JOIN2
  Join * parentJoin = NULL;

#ifdef _DEBUG
  if ( CmpCommon::getDefault( NSK_DBG ) == DF_ON  &&
       CmpCommon::getDefault( NSK_DBG_MJRULES_TRACKING ) == DF_ON )
  {
// LCOV_EXCL_START - dpm
    CURRCONTEXT_OPTDEBUG->stream() << "Following is left deep join sequence: " << endl;
    CURRCONTEXT_OPTDEBUG->stream() << endl;
// LCOV_EXCL_STOP
  }
#endif

  UInt32 numJoinChildren = leftDeepJoinSequence->entries();

  CANodeId currentTable = NULL_CA_ID;

  for (UInt32 i = 0; i < (numJoinChildren-1); i++)
  {
    //create JBBSubset representing a comprising component of the
    //leftDeepJoinSequence.
    JBBSubset * joinRightChild = ((*leftDeepJoinSequence)[i]).computeJBBSubset();

    MJJoinDirective * joinDirective = (*joinDirectives)[i];

    //remove all tables that will become right side of join
    childSet.remove((*leftDeepJoinSequence)[i]);

#ifdef _DEBUG
    //print the right child of the current join
    if ( CmpCommon::getDefault( NSK_DBG ) == DF_ON  &&
       CmpCommon::getDefault( NSK_DBG_MJRULES_TRACKING ) == DF_ON )
    {
      CURRCONTEXT_OPTDEBUG->stream() << ((*leftDeepJoinSequence)[i]).getText() << endl; // LCOV_EXCL_LINE - dpm
    }
#endif
    //Get JBBSubset for left side of join
    JBBSubset * joinLeftChild = childSet.computeJBBSubset();

    //create the join by doing a split of the multi-join
    currentJoin = currentMJoin->splitSubset(*joinLeftChild, *joinRightChild, reUseMultiJoins);

    joinDirective->setupJoin(currentJoin);

    if ( CmpCommon::getDefault(COMP_BOOL_120) == DF_OFF)
      currentJoin->updatePotential(-3);

    // if this is the first iteration
    // set the result to be the top join
    if (i == 0)
      result = currentJoin;

    //set the current multi-join to the left child of the
    //join just created
    //change getChild to child().getPointer
    currentMJoin = (MultiJoin*) currentJoin->getChild(0);

    //if there was a parent join, set the left child to
    //point to the new join we just created i.e. currentJoin.
    if (parentJoin)
      parentJoin->setChild(0,currentJoin);

    //set currentJoin to be the parent for the next iteration
    parentJoin = currentJoin;

  }

#ifdef _DEBUG
  //print the left most child
  if ( CmpCommon::getDefault( NSK_DBG ) == DF_ON  &&
       CmpCommon::getDefault( NSK_DBG_MJRULES_TRACKING ) == DF_ON )
  {
// LCOV_EXCL_START  - dpm
    CURRCONTEXT_OPTDEBUG->stream() << ((*leftDeepJoinSequence)[(numJoinChildren-1)]).getText() << endl;
    CURRCONTEXT_OPTDEBUG->stream() << endl;
// LCOV_EXCL_STOP
  }
#endif

  // end - construct the join tree

  // synth the join
  result->synthLogProp();

  //if the right child of the top-most join is a multi-Join,
  //synthesize_it
  if(result->child(1))
    if(result->child(1)->getOperatorType()==REL_MULTI_JOIN)
      result->child(1)->synthLogProp();

  // synth the left child too
  result->child(0)->synthLogProp();

  return result;

} // MultiJoin::createLeftLinearJoinTree()
// AppliedStatMan::setupASMCacheForJBB method will be called from
// Query::Analyze after connectivity analysis has been done and
// empty logical properties have been set.
void AppliedStatMan::setupASMCacheForJBB(JBB & jbb)
{
  EstLogPropSharedPtr myEstLogProp;

  // get all JBBCs of JBB
  const CANodeIdSet jbbcNodeIdSet = jbb.getMainJBBSubset().getJBBCs();
  CANodeId jbbcId;

  // for all jbbcs
  for (jbbcId = jbbcNodeIdSet.init();
	  jbbcNodeIdSet.next(jbbcId);
	  jbbcNodeIdSet.advance(jbbcId))
  {
    if (NodeAnalysis * jbbcNode = jbbcId.getNodeAnalysis())
    {
      // Evaluate local predicates only if it is a table.

      RelExpr * jbbcExpr = jbbcNode->getOriginalExpr();

      if ((jbbcNode->getTableAnalysis() != NULL) &&
	        (jbbcExpr->getOperatorType() == REL_SCAN))
      {
        // get the original expression of the jbbc
        Scan * scanExpr = (Scan *) jbbcExpr;

        ValueIdSet localPreds = scanExpr->getSelectionPredicates();

        // if local predicates have already been computed, then skip
        if ((localPreds.entries() > 0) || !(lookup(jbbcId)))
        {
          // check to see this GA has already been associated with
          // a logExpr for synthesis.  If not, then synthesize
	        // log. expression, and then apply local predicates to it

          if (NOT scanExpr->getGroupAttr()->existsLogExprForSynthesis())
	          scanExpr->synthLogProp();

	        myEstLogProp = getStatsForCANodeId(jbbcId);
	      }
      }
    }
  }

  // Now do a second traversal of the JBB looking for join reducers
  for (jbbcId = jbbcNodeIdSet.init();
		jbbcNodeIdSet.next(jbbcId);
		jbbcNodeIdSet.advance(jbbcId))
  {
    // now look for all two way joins for this child
    if (jbbcId.getNodeAnalysis())
    {

      // get all JBBCs connected to this JBBC, and do a two-way
      // join with all of them

      CANodeIdSet connectedNodes = jbbcId.getNodeAnalysis()->\
				  getJBBC()->getJoinedJBBCs();

      for (CANodeId connectedTable = connectedNodes.init();
			      connectedNodes.next(connectedTable);
			      connectedNodes.advance(connectedTable))
      {
	      if (connectedTable.getNodeAnalysis())
	      {

	        // ASM does not concern itself with the order of the tables,
	        // hence it is possible that the join has already been computed

	        CANodeIdSet tableSet = jbbcId;
	        tableSet.insert(connectedTable);

	        if ((myEstLogProp = getCachedStatistics(&tableSet)) == NULL)
	        {
	          CANodeIdSet setForjbbcId(jbbcId);
	          CANodeIdSet setForConnectedTable(connectedTable);
	          myEstLogProp = joinJBBChildren(setForjbbcId, setForConnectedTable);
	        }
	      }
      }
    }
  }
} // AppliedStatMan::setupASMCacheForJBB
EstLogPropSharedPtr AppliedStatMan::getStatsForCANodeId(
					CANodeId jbbc,
					const EstLogPropSharedPtr &inLP,
					const ValueIdSet * predIdSet)
{

  EstLogPropSharedPtr inputLP = inLP;

  if(inputLP == (*GLOBAL_EMPTY_INPUT_LOGPROP))
    inputLP = jbbc.getJBBInput();

  EstLogPropSharedPtr outputEstLogProp = NULL;

  // 1. Try to find Logical Properties from cache if cacheable.

  // The estimate Logical Properties can be cacheable if all local
  // predicates are to be applied and if inNodeSet is provided,
  // or the inLP are cacheable

  if ((inputLP->isCacheable()) && (predIdSet == NULL) )
  {
    CANodeIdSet combinedSet = jbbc;

    // get the nodeIdSet of the outer child, if not already given. This
    // along with the present jbbc is used as a key in the cache

    CANodeIdSet * inputNodeSet;
    inputNodeSet = inputLP->getNodeSet();

    // if inLP are cacheable these should have a nodeSet attached
    CCMPASSERT(inputNodeSet != NULL);

    if (inputNodeSet)
    {
      combinedSet.insert(*inputNodeSet);
      // if estLogProp for all local predicates is required,
      // then it might already exist in the cache
      outputEstLogProp = getCachedStatistics(&combinedSet);
    }
  }

  if (outputEstLogProp == NULL)
  {
    // 2. properties do not exist in the cache, so synthesize them.

    //if specified by the user apply those predicates,
    // else apply predicates in the original expr
    NodeAnalysis * jbbcNode = jbbc.getNodeAnalysis();

    TableAnalysis * tableAnalysis = jbbcNode->getTableAnalysis();

    if (tableAnalysis && predIdSet)
    {
      TableDesc * tableDesc = tableAnalysis->getTableDesc();

      const QualifiedName& qualName = 
            tableDesc->getNATable()->getTableName();

      CorrName name(qualName, STMTHEAP);

      Scan *scanExpr = new STMTHEAP Scan(name, tableDesc, REL_SCAN, STMTHEAP);

      Cardinality rc = tableDesc->getNATable()->getEstRowCount();

      const CardinalityHint* cardHint = tableDesc->getCardinalityHint();
      if ( cardHint ) 
         rc = (cardHint->getScanCardinality()).getValue();

      if ( !cardHint && tableDesc->getNATable()->isHbaseTable() ) {

          NATable* nt = (NATable*)(tableDesc->getNATable());
   
          StatsList* statsList = nt->getColStats();
   
          if ( statsList && statsList->entries() > 0 ) {
              ColStatsSharedPtr cStatsPtr = 
                    statsList->getSingleColumnColStats(0);
   
              if ( cStatsPtr )
                 rc = (cStatsPtr->getRowcount()).getValue();
          }
      }

      scanExpr->setBaseCardinality(MIN_ONE (rc));

      GroupAttributes * gaExpr = new STMTHEAP GroupAttributes();

      scanExpr->setSelectionPredicates(*predIdSet);

      ValueIdSet requiredOutputs = jbbc.getNodeAnalysis()->\
	getOriginalExpr()->getGroupAttr()->getCharacteristicOutputs();

      gaExpr->setCharacteristicOutputs(requiredOutputs);

      scanExpr->setGroupAttr(gaExpr);
      gaExpr->setLogExprForSynthesis(scanExpr);

      EstLogPropSharedPtr nonCacheableInLP(new (HISTHEAP) EstLogProp (*inputLP));
      nonCacheableInLP->setCacheableFlag(FALSE);
      scanExpr->synthLogProp();
      outputEstLogProp = scanExpr->getGroupAttr()->outputLogProp(nonCacheableInLP);
    }
    else
    {
        NodeAnalysis * nodeAnalysis = jbbc.getNodeAnalysis();

        RelExpr * relExpr = nodeAnalysis->getModifiedExpr();

	if (relExpr == NULL)
	  relExpr = nodeAnalysis->getOriginalExpr();

      // synthesize and cache estLogProp for the given inLP.
      outputEstLogProp = relExpr->getGroupAttr()->outputLogProp(inputLP);
    }
  }

  return outputEstLogProp;
} // getStatsForCANodeId