// This method forms the join expression for join on JBBC specified by jbbcId // inputEstLogProp should not be cacheable Join * AppliedStatMan::formJoinExprForJoinOnJBBC( CANodeIdSet jbbSubset, CANodeId jbbcId, const ValueIdSet * jbbcLocalPreds, const ValueIdSet * joinPreds, const EstLogPropSharedPtr& inputEstLogProp, const NABoolean cacheable) { NABoolean origInputIsCacheable = inputEstLogProp->isCacheable(); if(origInputIsCacheable) { inputEstLogProp->setCacheableFlag(FALSE); CCMPASSERT("Expecting Non Cacheable Input"); } RelExpr * jbbcExpr = getExprForCANodeId(jbbcId, inputEstLogProp, jbbcLocalPreds); jbbcExpr->getGroupAttr()->outputLogProp(inputEstLogProp); RelExpr * jbbSubsetExpr = jbbSubset.jbbcsToJBBSubset()->getPreferredJoin(); if(!jbbSubsetExpr) if(jbbSubset.entries()==1) if(!inputEstLogProp->isCacheable()) { inputEstLogProp->setCacheableFlag(TRUE); jbbSubsetExpr = getExprForCANodeId(jbbSubset.getFirst(), inputEstLogProp); inputEstLogProp->setCacheableFlag(FALSE); } else jbbSubsetExpr = getExprForCANodeId(jbbSubset.getFirst(), inputEstLogProp); else { CCMPASSERT("No Subset expression, need at least one entry in set"); } RelExpr * leftChildExpr = jbbSubsetExpr; RelExpr * rightChildExpr = jbbcExpr; GroupAttributes * galeft = jbbSubsetExpr->getGroupAttr(); GroupAttributes * garight = jbbcExpr->getGroupAttr(); // xxx JBBC * jbbc = jbbcId.getNodeAnalysis()->getJBBC(); Join * jbbcParentJoin = jbbc->getOriginalParentJoin(); ValueIdSet leftOuterJoinFilterPreds; Join * joinExpr = NULL; if(jbbcParentJoin) { if(jbbcParentJoin->isSemiJoin()) joinExpr = new STMTHEAP Join(leftChildExpr, rightChildExpr, REL_SEMIJOIN, NULL); if(jbbcParentJoin->isAntiSemiJoin()) joinExpr = new STMTHEAP Join(leftChildExpr, rightChildExpr, REL_ANTI_SEMIJOIN, NULL); if(jbbcParentJoin->isLeftJoin()) { joinExpr = new STMTHEAP Join(leftChildExpr, rightChildExpr, REL_LEFT_JOIN, NULL); leftOuterJoinFilterPreds += jbbc->getLeftJoinFilterPreds(); } if(joinExpr) { joinExpr->setJoinPred(jbbc->getPredsWithPredecessors()); joinExpr->nullInstantiatedOutput().insert(jbbc->nullInstantiatedOutput()); } } if(!joinExpr) { // now form a JoinExpr with these left and right children. joinExpr = new STMTHEAP Join(leftChildExpr, rightChildExpr, REL_JOIN, NULL); } ValueIdSet selPredsAndLOJFilter = leftOuterJoinFilterPreds; selPredsAndLOJFilter += (*joinPreds); joinExpr->setSelectionPredicates(selPredsAndLOJFilter); // set groupAttr of this Join expression GroupAttributes * gaJoin = new STMTHEAP GroupAttributes(); // set required outputs of Join as sum of characteristic // outputs of the left and the right children ValueIdSet requiredOutputs; requiredOutputs.addSet(getPotentialOutputs(jbbSubset)); requiredOutputs.addSet(getPotentialOutputs(jbbcId)); gaJoin->setCharacteristicOutputs(requiredOutputs); // set JBBSubset for this group, if all estLogProps are cacheable. // Else JBBSubset is NULL CANodeIdSet combinedSet = jbbSubset; combinedSet += jbbcId; if (cacheable) gaJoin->getGroupAnalysis()->setLocalJBBView(combinedSet.jbbcsToJBBSubset()); gaJoin->setMinChildEstRowCount(MINOF(garight->getMinChildEstRowCount(), galeft->getMinChildEstRowCount() ) ); // if there are some probes coming into the join // then join type = tsj. if ((inputEstLogProp->getResultCardinality() > 1) || (inputEstLogProp->getColStats().entries() > 1)) { if (cacheable) { CANodeIdSet inputNodeSet = *(inputEstLogProp->getNodeSet()); gaJoin->setCharacteristicInputs(getPotentialOutputs(inputNodeSet)); } } joinExpr->setGroupAttr(gaJoin); gaJoin->setLogExprForSynthesis(joinExpr); joinExpr->synthLogProp(); inputEstLogProp->setCacheableFlag(origInputIsCacheable); return joinExpr; } // AppliedStatMan::formJoinExprForJoinOnJBBC
// --------------------------------------------------------------------- // Utility Routine: pickOutputs // // From the given ColStatDescList, populate columnStats_ with column // descriptors that are useful based on the characteristic outputs for // the group. // // Always include in the output the current histograms of the input data, // and, if the histogram is contained in the required output list, then // this is a useful histogram and will also be output. // // --------------------------------------------------------------------- void EstLogProp::pickOutputs( ColStatDescList & columnStats, const EstLogPropSharedPtr& inputEstLogProp, const ValueIdSet specifiedOutputs, const ValueIdSet predSet) { const ColStatDescList & outerColStatsList = inputEstLogProp->getColStats(); ValueIdSet colsRequiringHistograms = specifiedOutputs; // (i) see if the selection predicates contain any constant value or a // constant expression // (ii) check if there are any columns of this table being joined to some other // columns, which do not appear as characteristics outputs. There should be // histograms available for these columns, as these might be needed later. // This problem was seen for temporary tables created as normal_tables by the // triggers. colsRequiringHistograms.addSet(predSet.getColumnsForHistogram()); colStats().setMCSkewedValueLists(columnStats.getMCSkewedValueLists()) ; NABoolean colStatDescAdded = FALSE; for (CollIndex i=0; i < columnStats.entries(); i++) { // we probably don't need 'em all, but this is the easiest way to // grab all of the multi-column uec information we'll need later colStats().insertIntoUecList (columnStats.getUecList()) ; colStats().setScanRowCountWithoutHint(columnStats.getScanRowCountWithoutHint()); NABoolean found = FALSE; // Note: The following inserts into a ColStatDescList should not // have to be deep copies. From this point on, ColStatDescs that // describe the output of the calling operator are read-only. ColStatDescSharedPtr colStatDesc = columnStats[i]; // the value-id we're looking for const ValueId columnId = colStatDesc->getVEGColumn() ; for (CollIndex j=0 ; j < outerColStatsList.entries() ; j++) { if (columnId == outerColStatsList[j]->getVEGColumn() OR (CmpCommon::context()->showQueryStats())) { colStats().insert(colStatDesc) ; found = TRUE; if(!colStatDescAdded) colStatDescAdded = TRUE; break ; // jump to next ColStatDesc } } // OK, the valueid doesn't match directly -- but there are still a // couple of things to check in order to verify whether or not we're // interested in keeping the i'th ColStatDesc ... ValueId throwaway ; // used by the second clause below if ( NOT found AND (columnId != NULL_VALUE_ID) AND (colsRequiringHistograms.contains (columnId) OR colsRequiringHistograms.referencesTheGivenValue (columnId, throwaway) OR columnId.isInvolvedInJoinAndConst() OR CmpCommon::context()->showQueryStats() ) ) { colStats().insert(colStatDesc); found = TRUE; if(!colStatDescAdded) colStatDescAdded = TRUE; } if (CURRSTMT_OPTDEFAULTS->incorporateSkewInCosting()) { // if the column is referenced for histogram, but is // not needed beyond this time , then we shall save its // max freq, which might be used later in costing if this // column is a part of the partitioning key ColStatsSharedPtr stat = colStatDesc->getColStats(); if (!(stat->isVirtualColForHist() ) && NOT found && !(stat->isOrigFakeHist() ) ) { const ValueId col = colStatDesc->getColumn(); ColAnalysis * colAnalysis = col.colAnalysis(); if (colAnalysis) { NAColumn * column = stat->getStatColumns()[0]; if (column->isReferencedForHistogram()) { CostScalar maxFreq = columnStats.getMaxFreq(columnId); colAnalysis->setMaxFreq(maxFreq); colAnalysis->setFinalUec(stat->getTotalUec()); colAnalysis->setFinalRC(stat->getRowcount()); } } } } } // for columnStats.entries() if(!colStatDescAdded && columnStats.entries() > 0) colStats().insert(columnStats[0]) ; } // pickOutputs
// This method forms the join expression with the estLogProps. Join * AppliedStatMan::formJoinExprWithEstLogProps( const EstLogPropSharedPtr& leftEstLogProp, const EstLogPropSharedPtr& rightEstLogProp, const EstLogPropSharedPtr& inputEstLogProp, const ValueIdSet * setOfPredicates, const NABoolean cacheable, JBBSubset * combinedJBBSubset) { // Form a join expression with these estLogProps. // form the left child. Since the estLogProps of the left and the // right children exist, these can be treated as Scan expressions Scan * leftChildExpr = new STMTHEAP Scan(); GroupAttributes * galeft = new STMTHEAP GroupAttributes(); // set GroupAttr of the leftChild galeft->inputLogPropList().insert(inputEstLogProp); galeft->outputLogPropList().insert(leftEstLogProp); CANodeIdSet * leftNodeSet = leftEstLogProp->getNodeSet(); CANodeId nodeId; if (leftNodeSet) { if (leftNodeSet->entries() == 1) { nodeId = leftNodeSet->getFirst(); if(nodeId.getNodeAnalysis()->getTableAnalysis()) leftChildExpr->setTableAttributes(nodeId); } CostScalar minEstCard = leftNodeSet->getMinChildEstRowCount(); galeft->setMinChildEstRowCount(minEstCard); } leftChildExpr->setGroupAttr(galeft); galeft->setLogExprForSynthesis(leftChildExpr); // form the right child and set its groupAttr Scan * rightChildExpr = new STMTHEAP Scan(); GroupAttributes * garight = new STMTHEAP GroupAttributes(); garight->inputLogPropList().insert(inputEstLogProp); garight->outputLogPropList().insert(rightEstLogProp); CANodeIdSet * rightNodeSet = rightEstLogProp->getNodeSet(); // xxx JBBC * singleRightChild = NULL; Join * singleRightChildParentJoin = NULL; ValueIdSet leftOuterJoinFilterPreds; if (rightNodeSet) { if (rightNodeSet->entries() == 1) { nodeId = rightNodeSet->getFirst(); if(nodeId.getNodeAnalysis()->getTableAnalysis()) rightChildExpr->setTableAttributes(nodeId); if(nodeId.getNodeAnalysis()->getJBBC()) { singleRightChild = nodeId.getNodeAnalysis()->getJBBC(); if(singleRightChild) singleRightChildParentJoin = singleRightChild->getOriginalParentJoin(); } } CostScalar minEstCard = rightNodeSet->getMinChildEstRowCount(); garight->setMinChildEstRowCount(minEstCard); } rightChildExpr->setGroupAttr(garight); garight->setLogExprForSynthesis(rightChildExpr); Join * joinExpr = NULL; if(singleRightChild && singleRightChildParentJoin) { if(singleRightChildParentJoin->isSemiJoin()) joinExpr = new STMTHEAP Join(leftChildExpr, rightChildExpr, REL_SEMIJOIN, NULL); if(singleRightChildParentJoin->isAntiSemiJoin()) joinExpr = new STMTHEAP Join(leftChildExpr, rightChildExpr, REL_ANTI_SEMIJOIN, NULL); if(singleRightChildParentJoin->isLeftJoin()) { joinExpr = new STMTHEAP Join(leftChildExpr, rightChildExpr, REL_LEFT_JOIN, NULL); leftOuterJoinFilterPreds += singleRightChild->getLeftJoinFilterPreds(); } if(joinExpr) { joinExpr->setJoinPred(singleRightChild->getPredsWithPredecessors()); joinExpr->nullInstantiatedOutput().insert(singleRightChild-> nullInstantiatedOutput()); } } if(!joinExpr) { // now form a JoinExpr with these left and right children. joinExpr = new STMTHEAP Join(leftChildExpr, // left child rightChildExpr, // right child REL_JOIN, // join type NULL); // join predicates } ValueIdSet selPredsAndLOJFilter = leftOuterJoinFilterPreds; selPredsAndLOJFilter += (*setOfPredicates); joinExpr->setSelectionPredicates(selPredsAndLOJFilter); // set groupAttr of this Join expression GroupAttributes * gaJoin = new STMTHEAP GroupAttributes(); // set required outputs of Join as sum of characteristic // outputs of the left and the right children ValueIdSet requiredOutputs; if (leftNodeSet) requiredOutputs.addSet(getPotentialOutputs(*(leftNodeSet))); if (rightNodeSet) requiredOutputs.addSet(getPotentialOutputs(*(rightNodeSet))); gaJoin->setCharacteristicOutputs(requiredOutputs); // set JBBSubset for this group, if all estLogProps are cacheable. // Else JBBSubset is NULL if (cacheable) gaJoin->getGroupAnalysis()->setLocalJBBView(combinedJBBSubset); gaJoin->setMinChildEstRowCount(MINOF(garight->getMinChildEstRowCount(), galeft->getMinChildEstRowCount() ) ); joinExpr->setGroupAttr(gaJoin); // if there are some probes coming into the join // then join type = tsj. if ((inputEstLogProp->getResultCardinality() > 1) || (inputEstLogProp->getColStats().entries() > 1)) { if (cacheable) { CANodeIdSet inputNodeSet = *(inputEstLogProp->getNodeSet()); gaJoin->setCharacteristicInputs(getPotentialOutputs(inputNodeSet)); } } joinExpr->setGroupAttr(gaJoin); gaJoin->setLogExprForSynthesis(joinExpr); return joinExpr; } // AppliedStatMan::formJoinExprWithEstLogProps