예제 #1
0
// ---------------------------------------------------------------
// Compute the Group Analysis for this multi join based on its
// children. Verify its consistent with its jbbSubset_
// ---------------------------------------------------------------
void MultiJoin::primeGroupAnalysis()
{
  // Analysis must have passed now that we have a MultiJoin. Otherwise
  // we must have cleanup problem.
  CMPASSERT (QueryAnalysis::Instance()->isAnalysisON());

  // no GB in multi-joins for now
  CMPASSERT (jbbSubset_.getGB() == NULL_CA_ID);

  GroupAnalysis* groupAnalysis = getGroupAnalysis();

  // recursively call the children appending their subtreeTables
  // and parentJBBViews
  JBBSubset* localView =
    new (groupAnalysis->outHeap()) JBBSubset(groupAnalysis->outHeap());
  CANodeIdSet allSubtreeTables;
  Int32 arity = getArity();
  for (Lng32 i = 0; i < arity; i++)
  {
    GroupAnalysis* childAnalysis = child(i).getPtr()->getGroupAnalysis();
	// use children parentViews to build this join local view
    localView->addSubset(*(childAnalysis->getParentJBBView()));
    allSubtreeTables += childAnalysis->getAllSubtreeTables();
  }
  groupAnalysis->setLocalJBBView(localView);
  groupAnalysis->setSubtreeTables(allSubtreeTables);

  // The computed localJBBView should be the same as the MultiJoin subset.
  CMPASSERT (jbbSubset_ == *localView);

  return;
}
EstLogPropSharedPtr AppliedStatMan::getStatsForJBBSubset(
					const JBBSubset & jbbSubset)
{
  CANodeIdSet jbbNodeSet = jbbSubset.getJBBCs();

  // We don't want any group-bys at this stage. 
  CMPASSERT ( (jbbSubset.getGB() == NULL_CA_ID));

  return getStatsForCANodeIdSet(jbbNodeSet);
};
EstLogPropSharedPtr AppliedStatMan::synthesizeLogProp(
				const CANodeIdSet * nodeSet,
				EstLogPropSharedPtr &inLP)
{
  EstLogPropSharedPtr outputEstLogProp;
  CANodeIdSet combinedNodeSetWithInput = *nodeSet;

  if (inLP->isCacheable())
  {
    CANodeIdSet * inNodeSet = inLP->getNodeSet();

    // if inLP are cacheable these should have a nodeSet attached
    // if not, assert in debug mode. In release mode, set the properties
    // as not cacheable. These will then be looked into group attr cache
    if (inNodeSet == NULL)
    {
      CCMPASSERT(inNodeSet != NULL);
      inLP->setCacheableFlag(FALSE);
    }
    else
    {
      // check ASM cache for the estLogProps of nodeSet for the given
      // inLP

      combinedNodeSetWithInput.insert(*inNodeSet);
      if ((outputEstLogProp =\
        getCachedStatistics(&combinedNodeSetWithInput)) != NULL)
      return outputEstLogProp;
    }
  }

	if(nodeSet->entries() == 1)
    return getStatsForCANodeId(nodeSet->getFirst(), inLP);

  JBBSubset * jbbSubset = nodeSet->jbbcsToJBBSubset();

  Join * preferredJoin = jbbSubset->getPreferredJoin();

  //CMPASSERT(preferredJoin->isJoinFromMJSynthLogProp());

  outputEstLogProp = preferredJoin->getGroupAttr()->outputLogProp(inLP);

	return outputEstLogProp;
} // AppliedStatMan::synthesizeLogProp
// LCOV_EXCL_START :cnu
EstLogPropSharedPtr AppliedStatMan::joinEstLogProps (
              const EstLogPropSharedPtr& leftEstLogProp,
              const EstLogPropSharedPtr& rightEstLogProp,
              const EstLogPropSharedPtr& inLP)
{
  EstLogPropSharedPtr outputEstLogProp;

  NABoolean cacheable = FALSE;

  CANodeIdSet * inputNodeSet = inLP->getNodeSet();

  // These nodesets could be NULL, if the estLogProps to which they
  // belong are not cacheable

  CANodeIdSet * leftNodeSet = leftEstLogProp->getNodeSet();
  CANodeIdSet * rightNodeSet = rightEstLogProp->getNodeSet();

  if ((leftEstLogProp->isCacheable()) &&
     (rightEstLogProp->isCacheable()) &&
     (inLP->isCacheable()) )
  {
    CCMPASSERT(leftNodeSet != NULL);
    CCMPASSERT(rightNodeSet != NULL);
    CCMPASSERT(inputNodeSet != NULL);
    if (leftNodeSet && rightNodeSet && inputNodeSet)
    {
      cacheable = TRUE;
    }
  }

  if (cacheable)
  {
    // check the ASM cache to see if outputEstLogProp for these
    // NodeSets appear for the given inputEstLogProp

    CANodeIdSet combineNodeSet = *leftNodeSet;
    combineNodeSet.insert(*rightNodeSet);

    CANodeIdSet combinedWithInputNodeSet = combineNodeSet;
    combinedWithInputNodeSet.insert(*inputNodeSet);

    outputEstLogProp = getCachedStatistics(&combinedWithInputNodeSet);
    if (outputEstLogProp != NULL)
      return outputEstLogProp;
  }

  JBBSubset * newJBBSubset = NULL;

  ValueIdSet setOfPredicates;

  if  (leftNodeSet && rightNodeSet)
  {
    // join predicates can be obtained from EstLogProp, only
    // if these corresponded to complete set of predicates -
    // all local or complete join. Also, we need a
    // combinedJBBSubset to set in the fake join expression
    // that we will be creating.

    newJBBSubset = leftNodeSet->computeJBBSubset();
    JBBSubset rightJBBSubset = *(rightNodeSet->computeJBBSubset());
    setOfPredicates = newJBBSubset->joinPredsWithOther(rightJBBSubset);

    // Since the properties from this group are cacheable, hence the
    // group attributes for the new join expression should contain
    // the combined JBBsubset of the left and the right children

    newJBBSubset->addSubset(rightJBBSubset);
  }

  // inputEstLogProp would be either empty input estLogProp or from the
  // outer child. If cacheable is TRUE, then newJBBsubset should
  // contain the combined left and the right JBB subset. But if
  // cacheable is FALSE, newJBBsubset should be NULL

  Join * joinExpr = formJoinExprWithEstLogProps(
				      leftEstLogProp,
				      rightEstLogProp,
				      inLP,
				      &setOfPredicates,
				      cacheable,
				      newJBBSubset);

  // Now do the actual synthesis and cache statistics in the cache

  outputEstLogProp = joinExpr->getGroupAttr()->outputLogProp(inLP);
  return outputEstLogProp;
}
Join * AppliedStatMan::formJoinExprWithCANodeSets(
					const CANodeIdSet & leftNodeSet,
					const CANodeIdSet & rightNodeSet,
					EstLogPropSharedPtr& inLP,
					const ValueIdSet * joinPreds,
					const NABoolean cacheable)
{
  EstLogPropSharedPtr leftEstLogProp = NULL;
  EstLogPropSharedPtr rightEstLogProp = NULL;

  CANodeIdSet * inputNodeSet = NULL;
  if (inLP->isCacheable())
  {
    inputNodeSet = inLP->getNodeSet();

    // if inLP are cacheable these should have a nodeSet attached
    // if it is not for some reason, assert in debug mode. In release
    // mode do not look for properties in ASM cache, instead get them
    // from group attr cache.
    if (inputNodeSet == NULL)
    {
      CCMPASSERT(inputNodeSet != NULL);
      inLP->setCacheableFlag(FALSE);
    }
  }

  CANodeIdSet commonNodeSet = leftNodeSet;
  commonNodeSet.intersectSet(rightNodeSet);

  // remove CANodeIds which are common to both left and the right children
  // from the child, whose estLogProps are not cached. If the estLogProps
  // of both children are not cached, then remove it from the child which
  // has a larger CANodeIdSet associated with it.

  CANodeIdSet tempLeftNodeSet = leftNodeSet;
  CANodeIdSet tempRightNodeSet = rightNodeSet;

  if (commonNodeSet.entries() > 0)
  {
    if (lookup(leftNodeSet))
      tempRightNodeSet.subtractSet(commonNodeSet);
    else
      if (lookup(rightNodeSet))
	tempLeftNodeSet.subtractSet(commonNodeSet);
      else
	if (leftNodeSet.entries() > rightNodeSet.entries())
	  tempLeftNodeSet.subtractSet(commonNodeSet);
	else
	  tempRightNodeSet.subtractSet(commonNodeSet);
  }

  // get the estLogProps for the left and the right child.
  // If these are not in the cache, then synthesize them incrementally
  // starting from the left most JBBC in the JBBSubset

  if (inputNodeSet)
  {
    // leftEstLogProp cached?

    CANodeIdSet combinedNodeSetWithInput = tempLeftNodeSet;
    combinedNodeSetWithInput.insert(*inputNodeSet);

    leftEstLogProp = getCachedStatistics(&combinedNodeSetWithInput);

    combinedNodeSetWithInput = tempRightNodeSet;
    combinedNodeSetWithInput.insert(*inputNodeSet);

    rightEstLogProp = getCachedStatistics(&combinedNodeSetWithInput);
  }

  if (leftEstLogProp == NULL)
      leftEstLogProp = synthesizeLogProp(&tempLeftNodeSet, inLP);

  // if the estimate logical properties have been computed for non-cacheable
  // inLP, then these would not contain nodeSet. But we do need the nodeSet
  // to compute potential output values. Hence we shall add this now

  if (!leftEstLogProp->getNodeSet())
  {
    CANodeIdSet * copyLeftNodeSet = new (STMTHEAP) CANodeIdSet (tempLeftNodeSet);
    leftEstLogProp->setNodeSet(copyLeftNodeSet);
  }

  if (rightEstLogProp == NULL)
      rightEstLogProp = synthesizeLogProp(&tempRightNodeSet, inLP);

  if (!rightEstLogProp->getNodeSet())
  {
    CANodeIdSet * copyRightNodeSet = new (STMTHEAP) CANodeIdSet (tempRightNodeSet);
    rightEstLogProp->setNodeSet(copyRightNodeSet);
  }

  // Now form the join expressions with these EstLogProp,
  // inLP and the joinPred will be same as those for which the
  // estLogProp are to be synthesized. Cacheable flag would depend
  // on whether left, right and the outer child are caheable, or
  // if the join is on all columns or not

  // Since the join expression consists of the left and the right
  // JBBSubsets, the JBBSubset for this Join expression would be
  // the superset of left and right JBBSubset

  JBBSubset * combinedSet = leftNodeSet.jbbcsToJBBSubset();
  combinedSet->addSubset(*(rightNodeSet.jbbcsToJBBSubset()));

  // Now form the join expressions with these EstLogProp,
  // inLP and the joinPred will be same as those for which the
  // estLogProp are to be synthesized. Cacheable flag would depend
  // on whether left, right and the outer child are ccaheable, or
  // if the join is on all columns or not

  return formJoinExprWithEstLogProps(leftEstLogProp, rightEstLogProp,
			    inLP, joinPreds, cacheable, combinedSet);



} // AppliedStatMan::formJoinExprWithCANodeSets