ValueIdSet AppliedStatMan::getPotentialOutputs( const CANodeIdSet & jbbcsNodeSet) { ValueIdSet potentialOutputs; for (CANodeId jbbc = jbbcsNodeSet.init(); jbbcsNodeSet.next(jbbc); jbbcsNodeSet.advance(jbbc)) { if (NodeAnalysis * jbbcNodeAnalysis = jbbc.getNodeAnalysis()) { ValueIdSet outputs; const Join * jbbcParentJoin = jbbcNodeAnalysis->getJBBC()-> getOriginalParentJoin(); if((!jbbcParentJoin) || (jbbcParentJoin && jbbcParentJoin->isInnerNonSemiJoin())) outputs = jbbcNodeAnalysis->getOriginalExpr()->\ getGroupAttr()->getCharacteristicOutputs(); else if (jbbcParentJoin->isLeftJoin()) outputs = jbbcParentJoin->nullInstantiatedOutput(); potentialOutputs.insert(outputs); } } return potentialOutputs; } // AppliedStatMan::getPotentialOutputs
// LCOV_EXCL_START - cnu CostScalar TableDesc::getBaseRowCntIfUniqueJoinCol(const ValueIdSet &joinedCols) { // get the joining columns for this table ValueIdList userColumns; // get All user columns for this table; getUserColumnList(userColumns); ValueIdSet userColumnSet(userColumns); ValueIdSet joinedColsCopy(joinedCols); ValueIdSet thisTableJoinCols = joinedColsCopy.intersect(userColumnSet); if (thisTableJoinCols.isEmpty() ) return csMinusOne; CostScalar baseRowCount = csMinusOne; if (thisTableJoinCols.doColumnsConstituteUniqueIndex(this) ) baseRowCount = tableColStats()[0]->getColStats()->getRowcount(); return baseRowCount; } // TableDesc::getBaseRowCntIfUniqueJoinCol
void generateMarkedEntries(Generator *generator, ValueIdSet &marks) { for(ValueId vid = marks.init(); marks.next(vid); marks.advance(vid)) { MapInfo *mapInfo = generator->getMapInfoAsIs(vid); if(mapInfo) mapInfo->codeGenerated(); } }
// PhysSequence::computeHistoryAttributes // // Helper function to compute the attribute for the history buffer based // on the items projected from the child and the computed history items. // Also, adds the attribute information the the map table. // void PhysSequence::computeHistoryAttributes(Generator *generator, MapTable *localMapTable, Attributes **attrs, const ValueIdSet &historyIds) const { // Get a local handle on some of the generator objects. // CollHeap *wHeap = generator->wHeap(); // Populate the attribute vector with the flattened list of sequence // functions and/or sequence function arguments that must be in the // history row. Add convert nodes for the items that are not sequence // functions to force them to be moved into the history row. // if(NOT historyIds.isEmpty()) { Int32 i = 0; ValueId valId; for (valId = historyIds.init(); historyIds.next(valId); historyIds.advance(valId)) { // If this is not a sequence function, then insert a convert // node. // if(!valId.getItemExpr()->isASequenceFunction()) { // Get a handle on the original expression and erase // the value ID. // ItemExpr *origExpr = valId.getItemExpr(); origExpr->setValueId(NULL_VALUE_ID); origExpr->markAsUnBound(); // Construct the cast expression with the original expression // as the child -- must have undone the child value ID to // avoid recursion later. // ItemExpr *castExpr = new(wHeap) Cast(origExpr, &(valId.getType())); // Replace the expression for the original value ID and the // synthesize the types and value ID for the new expression. // valId.replaceItemExpr(castExpr); castExpr->synthTypeAndValueId(TRUE); } attrs[i++] = (generator->addMapInfoToThis(localMapTable, valId, 0))->getAttr(); } } } // PhysSequence::computeHistoryAttributes
NABoolean TableDesc::hasIdentityColumnInClusteringKey() const { ValueIdSet pKeyColumns = clusteringIndex_->getIndexKey(); NAColumn * column = NULL; for(ValueId id = pKeyColumns.init(); pKeyColumns.next(id); pKeyColumns.advance(id)) { column = id.getNAColumn(); if (column && column->isIdentityColumn()) return TRUE; } return FALSE; }
ValueIdSet TableDesc::getLocalPreds() { ValueIdSet localPreds; localPreds.clear(); // We can get this information from TableAnalysis const TableAnalysis * tableAnalysis = getTableAnalysis(); // if no tableAnalysis exists, return FALSE if(tableAnalysis) localPreds = tableAnalysis->getLocalPreds(); return localPreds; }
void NormWA::locateVEGRegionAndMarkToBeMergedRecursively(const ValueId & vid) { VEGRegion* toBeMergedRegion = locateVEGRegionAndMarkToBeMerged(vid); if (toBeMergedRegion) { ValueIdSet nullInstValues; toBeMergedRegion->gatherInstantiateNullMembers(nullInstValues); for (ValueId exprId = nullInstValues.init(); nullInstValues.next(exprId); nullInstValues.advance(exprId)) { locateVEGRegionAndMarkToBeMergedRecursively(exprId); } } }
void MvQueryRewriteHandler::dumpAnalysisToFile(QueryAnalysis* qa, RelExpr* expr) { // Dump the QueryAnalysis data to a file. NAString analysisFileName = fileNamePrefix_ + ".analysis"; NAString str; expr->unparse(str, OPTIMIZER_PHASE, MVINFO_FORMAT); str += "\n"; str += qa->getText(); // Add in some stuff to look at join predicates for the JBBCs. str += "Join Predicates\n"; str += "==============="; char buffer[20]; ARRAY(JBB*) jbbs = qa->getJBBs(); for (CollIndex jbbInx = 0; jbbInx < jbbs.entries(); jbbInx++) { JBB* jbb = jbbs[jbbInx]; str_itoa(jbbInx, buffer); ((str += "\nJBB #") += NAString(buffer)) += ":\n"; CANodeIdSet jbbcs = jbb->getJBBCs(); for (CANodeId jbbcId=jbbcs.init(); jbbcs.next(jbbcId); jbbcs.advance(jbbcId) ) { str_itoa(jbbcId, buffer); ((str += "\nJBBC with CANodeId ") += NAString(buffer)) += ":\n"; ValueIdSet joinPreds = jbbcId.getNodeAnalysis()->getJBBC()->getJoinPreds(); str += valueIdSetGetText(joinPreds); if (joinPreds.entries() > 0) { str.append("\n(value ids of predicates are "); NABoolean first = true; for (ValueId jpVid=joinPreds.init(); joinPreds.next(jpVid); joinPreds.advance(jpVid)) { if (first) first = FALSE; else str.append(", "); str_itoa(jpVid, buffer); str.append(buffer); } str.append(")\n"); } } str += '\n'; } dumpToFile(analysisFileName.data(), str.data()); } // dumpAnalysisToFile()
void IndexDesc::getNonKeyColumnSet(ValueIdSet& nonKeyColumnSet) const { const ValueIdList &indexColumns = getIndexColumns(), &keyColumns = getIndexKey(); // clean up input: nonKeyColumnSet.clear(); // Add all index columns CollIndex i = 0; for (i=0; i < indexColumns.entries(); i++) { nonKeyColumnSet.insert(indexColumns[i]); } // And remove all key columns: for (i=0; i < keyColumns.entries(); i++) { nonKeyColumnSet.remove(keyColumns[i]); // if this is a secondary index, the base column // which is part of the index, // may also be present, remove it: const ItemExpr *colPtr = keyColumns[i].getItemExpr(); if (colPtr->getOperatorType() == ITM_INDEXCOLUMN) { const ValueId & colDef = ((IndexColumn *)(colPtr))->getDefinition(); nonKeyColumnSet.remove(colDef); } } } // IndexDesc::getNonKeyColumnSet(ValueIdSet& nonKeyColumnSet) const
void MultiJoin::getPotentialOutputValues(ValueIdSet & outputValues) const { outputValues.clear(); CANodeIdSet jbbcs = jbbSubset_.getJBBCs(); Int32 arity = getArity(); for (Lng32 i = 0; i < arity; i++) { JBBC * jbbci = child(i)->getGroupAnalysis()->getNodeAnalysis()->getJBBC(); if(jbbci->parentIsLeftJoin()) outputValues.insertList(jbbci->nullInstantiatedOutput()); else // Default implementation is good enough for innerNonSemi multi join outputValues += child(i).getGroupAttr()->getCharacteristicOutputs(); } } // MultiJoin::getPotentialOutputValues()
void PhysSequence::seperateReadAndReturnItems( //ValueIdSet & readPhaseSet, //ValueIdSet & returnPhaseSet, CollHeap *wHeap) { ValueIdSet outputFromChild = child(0)->getGroupAttr()->getCharacteristicOutputs(); ValueIdSet seqFuncs = sequenceFunctions(); for(ValueId valId = seqFuncs.init(); seqFuncs.next(valId); seqFuncs.advance(valId)) { computeReadNReturnItems(valId, valId, //sequenceFunctions(), //returnSeqFunctions(), outputFromChild, wHeap); } }
void HbaseSearchSpec::addColumnNames(const ValueIdSet& vs) { // TEMP TEMP. Not all needed column names are being set up. // for now, return without populating result. // that will cause all columns to be retrieved. //return; for (ValueId vid = vs.init(); vs.next(vid); vs.advance(vid)) { ItemExpr* ie = vid.getItemExpr(); NAString colName; if ( ie->getOperatorType() == ITM_BASECOLUMN ) { colName = ((BaseColumn*)ie)->getColName(); } else if ( ie->getOperatorType() == ITM_INDEXCOLUMN ) { colName = ((IndexColumn*)ie)->getNAColumn()->getIndexColName(); } if (NOT colNames_.contains(colName)) colNames_.insert(colName); } }
// ----------------------------------------------------------------------- // MultJoin::recomputeOuterReferences() // ----------------------------------------------------------------------- void MultiJoin::recomputeOuterReferences() { // --------------------------------------------------------------------- // Delete all those input values that are no longer referenced on // this operator. // --------------------------------------------------------------------- if (NOT getGroupAttr()->getCharacteristicInputs().isEmpty()) { ValueIdSet outerRefs = getGroupAttr()->getCharacteristicInputs(); // Weed out those expressions not needed by my selectionPred and joinPred // xxx instead of taking this from getLocalJoinPreds, should I take it // from MultiJoin selectionPred??? refer to getLocalJoinPreds definition // and consider preds that referencing inputs!!! ValueIdSet exprSet = jbbSubset_.getLocalJoinPreds(); // from JbbSubsetAnalysis // Need to include LocalDependentPreds later when supported. Ok now for inner MultiJoins exprSet.weedOutUnreferenced(outerRefs); // Add back those expressiones needed by my children Int32 arity = getArity(); // outputs produced by JBBCs in this MultiJoin ValueIdSet jbbcOutputs; for (Int32 i = 0; i < arity; i++) { outerRefs += child(i)->getGroupAttr()->getCharacteristicInputs(); jbbcOutputs += child(i)->getGroupAttr()->getCharacteristicOutputs(); // these inputs are provided by jbbcs in this MultiJoin } // account for TSJs i.e. values flowing from // one jbbc to another within this MultiJoin outerRefs -= jbbcOutputs; getGroupAttr()->setCharacteristicInputs(outerRefs); } return; } // MultiJoin::recomputeOuterReferences()
// Is there any column which has a local predicates and no stats NABoolean TableDesc::isAnyHistWithPredsFakeOrSmallSample(const ValueIdSet &localPreds) { // if there are no local predicates return FALSE; if (localPreds.isEmpty()) return FALSE; const ColStatDescList & colStatsList = getTableColStats(); // for each predicate, check to see if stats exist for (ValueId id = localPreds.init(); localPreds.next(id); localPreds.advance(id)) { ColStatsSharedPtr colStats = colStatsList.getColStatsPtrForPredicate(id); if (colStats == NULL) return FALSE; if (colStats->isOrigFakeHist() || colStats->isSmallSampleHistogram()) return TRUE; } return FALSE; }
// this method sets the primary key columns. It goes through all the columns // of the table, and collects the columns which are marked as primary keys void TableDesc::setPrimaryKeyColumns() { ValueIdSet primaryColumns; for ( CollIndex j = 0 ; j < colList_.entries() ; j++ ) { ValueId valId = colList_[j]; NAColumn *column = valId.getNAColumn(); if ( column->isPrimaryKey() ) { primaryColumns.insert(valId) ; // mark column as referenced for histogram, as we may need its histogram // during plan generation if ((column->isUserColumn() || column->isSaltColumn() ) && (column->getNATable()->getSpecialType() == ExtendedQualName::NORMAL_TABLE) ) column->setReferencedForMultiIntHist(); } } primaryKeyColumns_ = primaryColumns; }
// TableDesc::isKeyIndex() // Parameter is an secondary index on the table. Table checks to see // if the keys of the secondary index is built using the primary key // of the table. If it is return true otherwise false. NABoolean TableDesc::isKeyIndex(const IndexDesc * idesc) const { ValueIdSet pKeyColumns = clusteringIndex_->getIndexKey(); ValueIdSet indexColumns = idesc->getIndexKey(); ValueIdSet basePKeys=pKeyColumns.convertToBaseIds(); for(ValueId id = indexColumns.init(); indexColumns.next(id); indexColumns.advance(id)) { ValueId baseId = ((BaseColumn *)(((IndexColumn *)id.getItemExpr())-> getDefinition().getItemExpr()))->getValueId(); if(NOT basePKeys.contains(baseId)) { return FALSE; } } return TRUE; }
void PhysSequence::computeReadNReturnItems( ValueId topSeqVid, ValueId vid, const ValueIdSet &outputFromChild, CollHeap *wHeap) { ItemExpr * itmExpr = vid.getItemExpr(); if (outputFromChild.contains(vid)) { return; } //test if itm_minus and then if negative offset .... if ( itmExpr->getOperatorType() == ITM_OFFSET && ((ItmSeqOffset *)itmExpr)->getOffsetConstantValue() < 0) { readSeqFunctions() -= topSeqVid; returnSeqFunctions() += topSeqVid; readSeqFunctions() += itmExpr->child(0)->castToItemExpr()->getValueId(); return; } if (itmExpr->getOperatorType() == ITM_MINUS) { ItemExpr * chld0 = itmExpr->child(0)->castToItemExpr(); if ( chld0->getOperatorType() == ITM_OFFSET && ((ItmSeqOffset *)chld0)->getOffsetConstantValue() <0) { readSeqFunctions() -= topSeqVid; returnSeqFunctions() += topSeqVid; readSeqFunctions() += chld0->child(0)->castToItemExpr()->getValueId(); ItemExpr * chld1 = itmExpr->child(1)->castToItemExpr(); if (chld1->getOperatorType() == ITM_OFFSET && ((ItmSeqOffset *)chld1)->getOffsetConstantValue() < 0) { readSeqFunctions() += chld1->child(0)->castToItemExpr()->getValueId(); } else { readSeqFunctions() += chld1->getValueId(); } return; } } if (itmExpr->getOperatorType() == ITM_OLAP_MIN || itmExpr->getOperatorType() == ITM_OLAP_MAX) { ItmSeqOlapFunction * olap = (ItmSeqOlapFunction *)itmExpr; if (olap->getframeEnd()>0) { readSeqFunctions() -= topSeqVid; returnSeqFunctions() += topSeqVid; ItemExpr *newChild = new(wHeap) Convert (itmExpr->child(0)->castToItemExpr()); newChild->synthTypeAndValueId(TRUE); itmExpr->child(0) = newChild; readSeqFunctions() += newChild->getValueId(); return; } } if (itmExpr->getOperatorType() == ITM_SCALAR_MIN || itmExpr->getOperatorType() == ITM_SCALAR_MAX) { ItemExpr * chld0 = itmExpr->child(0)->castToItemExpr(); ItemExpr * chld1 = itmExpr->child(1)->castToItemExpr(); if ((chld0->getOperatorType() == ITM_OLAP_MIN && chld1->getOperatorType() == ITM_OLAP_MIN )|| (chld0->getOperatorType() == ITM_OLAP_MAX && chld1->getOperatorType() == ITM_OLAP_MAX )) { ItmSeqOlapFunction * olap0 = (ItmSeqOlapFunction *)chld0; ItmSeqOlapFunction * olap1 = (ItmSeqOlapFunction *)chld1; if ( olap1->getframeEnd()>0) { CMPASSERT(olap0->getframeEnd()==0); readSeqFunctions() -= topSeqVid; returnSeqFunctions() += topSeqVid; readSeqFunctions() += olap0->getValueId(); ItemExpr *newChild = new(wHeap) Convert (olap1->child(0)->castToItemExpr()); newChild->synthTypeAndValueId(TRUE); olap1->child(0) = newChild; readSeqFunctions() += newChild->getValueId(); } else { CMPASSERT(olap1->getframeEnd()==0); readSeqFunctions() -= topSeqVid; returnSeqFunctions() += topSeqVid; readSeqFunctions() += olap1->getValueId(); ItemExpr *newChild = new(wHeap) Convert (olap0->child(0)->castToItemExpr()); newChild->synthTypeAndValueId(TRUE); olap0->child(0) = newChild; readSeqFunctions() += newChild->getValueId(); } return; } } for (Int32 i= 0 ; i < itmExpr->getArity(); i++) { ItemExpr * chld= itmExpr->child(i); computeReadNReturnItems(topSeqVid, chld->getValueId(), outputFromChild, wHeap); } }//void PhysSequence::computeReadNReturnItems(ItemExpr * other)
/******************************************************************** * Input: Selection predicates for the scan node, boolean indicating if * it is a indexOnlyIndex, reference parameter that will indicate if * IndexJoin is viable or not, GroupAttributes for the group and characteristic * inputs * Output: MdamFlag indicating if the index key access is good enough for * MDAM access (if a index does not have good MDAM access we have to * scan the whole index because single subset also will not have any * keys to apply) * IndexJoin flag indicating if index join cost would exceed base table * access or not. ********************************************************************/ MdamFlags IndexDesc::pruneMdam(const ValueIdSet& preds, NABoolean indexOnlyIndex, IndexJoinSelectivityEnum& selectivityEnum /* out*/ , const GroupAttributes * groupAttr, const ValueIdSet * inputValues) const { CollIndex numEmptyColumns=0; CostScalar numSkips = csOne; ValueIdSet emptyColumns; ValueId vid; if(indexOnlyIndex) selectivityEnum = INDEX_ONLY_INDEX; else selectivityEnum = INDEX_JOIN_VIABLE; if(preds.isEmpty()) return MDAM_OFF; //calculate how many key columns don't have any predicates for(CollIndex i=0;i<indexKey_.entries();i++) { if(preds.referencesTheGivenValue(indexKey_[i],vid)) break; else numEmptyColumns++; } //if we don't have any empty columns or we don't have to evaluate if index //join is promising or not then just return if(numEmptyColumns>=1 OR NOT indexOnlyIndex) { IndexDescHistograms ixHistogram(*this, (indexOnlyIndex?numEmptyColumns:indexKey_.entries())); NABoolean multiColUecAvail = ixHistogram.isMultiColUecInfoAvail(); ColumnOrderList keyPredsByCol(indexKey_); for(CollIndex j=0;j<numEmptyColumns;j++) { emptyColumns.insert(indexKey_[j]); if(j==0 OR multiColUecAvail == FALSE) { //no MCUec so just multiply the empty columns UEC count to //calculate MDAM skips numSkips *=(ixHistogram.getColStatsForColumn(indexKey_[j])). getTotalUec().getCeiling(); } else // otherwise try to use MCUec { NABoolean uecFound = FALSE; CostScalar correctUec = csOne; CostScalar combinedUECCount = csOne; // first let's see if there is multiColUec count for the skipped columns // so far. If there is that will be number of skips. If there isn't then // get the best estimate of UEC count for the current column using MCUec // if possible otherwise just using single column histograms. combinedUECCount = ixHistogram.getUecCountForColumns(emptyColumns); if(combinedUECCount >0) { numSkips = combinedUECCount; } else { uecFound = ixHistogram.estimateUecUsingMultiColUec(keyPredsByCol,j,correctUec); if(uecFound==TRUE) { numSkips *= correctUec; } else { numSkips *=(ixHistogram.getColStatsForColumn(indexKey_[j])). getTotalUec().getCeiling(); } } } } CostScalar rowCount = ixHistogram.getRowCount(); CostScalar numIndexBlocks = rowCount /getEstimatedRecordsPerBlock(); CostScalar numProbes = csOne; CostScalar numBaseTableBlocks = csOne; CostScalar inputProbes = csOne; // Pass any selectivity hint provided by the user const SelectivityHint * selHint = tableDesc_->getSelectivityHint(); const CardinalityHint * cardHint = tableDesc_->getCardinalityHint(); // If it is an index join then compute the number probes into the base // table. If the alternate index is not selective enough, we will have // lots of them making the index quite expensive. if(NOT indexOnlyIndex) { if((groupAttr->getInputLogPropList()).entries() >0) { //if there are incoming probes to the index. i.e. if the index join //is under another nested join or TSJ then compute result for all //probes. We are using the initial inputEstLogProp to compute the //resulting cardinality. It is possible that for the same group and //different inputEstLogProp would provide less row count per probe. //So in FileScanRule::nextSubstitute() we make sure that the context //inputEstLogProp is in the error range of this inputEstLogProp. // Ex. select * from lineitem, customer, nation // where l_custkey < c_custkey and c_custkey = n_nationkey; //Now if we were evaluating lineitem indexes where the outer was customer //we would want to exclude alternate index on custkey whereas if nation got //pushed below customer then range of values would be fewer and max value //being less would make alternate index on custkey quite attractive. ixHistogram. applyPredicatesWhenMultipleProbes(preds, *((groupAttr->getInputLogPropList())[0]), *inputValues, TRUE, selHint, cardHint, NULL, REL_SCAN); inputProbes = MIN_ONE((groupAttr->getInputLogPropList())[0]->getResultCardinality()); } else { RelExpr * dummyExpr = new (STMTHEAP) RelExpr(ITM_FIRST_ITEM_OP, NULL, NULL, STMTHEAP); ixHistogram.applyPredicates(preds, *dummyExpr, selHint, cardHint, REL_SCAN); } numProbes = ixHistogram.getRowCount(); numBaseTableBlocks = rowCount / tableDesc_->getClusteringIndex()-> getEstimatedRecordsPerBlock(); double readAhead = CURRSTMT_OPTDEFAULTS->readAheadMaxBlocks(); // although we compute cardinality from the index for all probes we // do the comparison for per probe. The assumption is that per probe // the upper bound of cost is scanning the whole base table. if(numProbes/inputProbes + MINOF((numIndexBlocks / readAhead),numSkips) > (numBaseTableBlocks/readAhead)) { selectivityEnum = EXCEEDS_BT_SCAN; } } //Does the number of skips exceed the cost of scanning the index. if((indexOnlyIndex AND numSkips <= (numIndexBlocks * CURRSTMT_OPTDEFAULTS->mdamSelectionDefault())) OR (NOT indexOnlyIndex AND numSkips + numProbes/inputProbes <= (numBaseTableBlocks * CURRSTMT_OPTDEFAULTS->mdamSelectionDefault()))) return MDAM_ON; } else return MDAM_ON; return MDAM_OFF; }
// getHistoryAttributes // // Helper function that traverses the set of root sequence functions // supplied by the compiler and constructs the set of all of the // attributes that must be materialized in the history row. // void PhysSequence::getHistoryAttributes(const ValueIdSet &sequenceFunctions, const ValueIdSet &outputFromChild, ValueIdSet &historyAttributes, NABoolean addConvNodes, CollHeap *wHeap, ValueIdMap *origAttributes) const { if(addConvNodes && !origAttributes) { origAttributes = new (wHeap) ValueIdMap(); } ValueIdSet children; for(ValueId valId = sequenceFunctions.init(); sequenceFunctions.next(valId); sequenceFunctions.advance(valId)) { if(valId.getItemExpr()->isASequenceFunction()) { ItemExpr *itmExpr = valId.getItemExpr(); switch(itmExpr->getOperatorType()) { // The child needs to be in the history row. // case ITM_OFFSET: case ITM_ROWS_SINCE: case ITM_THIS: case ITM_NOT_THIS: // If the child needs to be in the history buffer, then // add a Convert node to force the value to be moved to the // history buffer. if (addConvNodes) { itmExpr->child(0) = addConvNode(itmExpr->child(0), origAttributes, wHeap); } historyAttributes += itmExpr->child(0)->getValueId(); break; // The sequence function needs to be in the history row. // case ITM_RUNNING_SUM: case ITM_RUNNING_COUNT: case ITM_RUNNING_MIN: case ITM_RUNNING_MAX: case ITM_LAST_NOT_NULL: historyAttributes += itmExpr->getValueId(); break; /* // after PhysSequence precode gen OLAP sum and count are already transform,ed into running // this is used during optimization phase-- case ITM_OLAP_SUM: case ITM_OLAP_COUNT: case ITM_OLAP_RANK: case ITM_OLAP_DRANK: if (addConvNodes) { itmExpr->child(0) = addConvNode(itmExpr->child(0), origAttributes, wHeap); } historyAttributes += itmExpr->child(0)->getValueId(); //historyAttributes += itmExpr->getValueId(); break; */ // The child and sequence function need to be in the history row. // case ITM_OLAP_MIN: case ITM_OLAP_MAX: case ITM_MOVING_MIN: case ITM_MOVING_MAX: // If the child needs to be in the history buffer, then // add a Convert node to force the value to be moved to the // history buffer. if (addConvNodes) { itmExpr->child(0) = addConvNode(itmExpr->child(0), origAttributes, wHeap); } historyAttributes += itmExpr->child(0)->getValueId(); historyAttributes += itmExpr->getValueId(); break; case ITM_RUNNING_CHANGE: if (itmExpr->child(0)->getOperatorType() == ITM_ITEM_LIST) { // child is a multi-valued expression // ExprValueId treePtr = itmExpr->child(0); ItemExprTreeAsList changeValues(&treePtr, ITM_ITEM_LIST, RIGHT_LINEAR_TREE); CollIndex nc = changeValues.entries(); ItemExpr *newChild = NULL; if(addConvNodes) { newChild = addConvNode(changeValues[nc-1], origAttributes, wHeap); historyAttributes += newChild->getValueId(); } else { historyAttributes += changeValues[nc-1]->getValueId(); } // add each item in the list // for (CollIndex i = nc; i > 0; i--) { if(addConvNodes) { ItemExpr *conv = addConvNode(changeValues[i-1], origAttributes, wHeap); newChild = new(wHeap) ItemList(conv, newChild); newChild->synthTypeAndValueId(TRUE); historyAttributes += conv->getValueId(); } else { historyAttributes += changeValues[i-1]->getValueId(); } } if(addConvNodes) { itmExpr->child(0) = newChild; } } else { // If the child needs to be in the history buffer, then // add a Convert node to force the value to be moved to the // history buffer. if (addConvNodes) { itmExpr->child(0) = addConvNode(itmExpr->child(0), origAttributes, wHeap); } historyAttributes += itmExpr->child(0)->getValueId(); } historyAttributes += itmExpr->getValueId(); break; default: CMPASSERT(0); } } // Gather all the children, and if not empty, recurse down to the // next level of the tree. // for(Lng32 i = 0; i < valId.getItemExpr()->getArity(); i++) { if (!outputFromChild.contains(valId.getItemExpr()->child(i)->getValueId())) //!valId.getItemExpr()->child(i)->nodeIsPreCodeGenned()) { children += valId.getItemExpr()->child(i)->getValueId(); } } } if (NOT children.isEmpty()) { getHistoryAttributes( children, outputFromChild, historyAttributes, addConvNodes, wHeap, origAttributes); } } // PhysSequence::getHistoryAttributes
// ------------------------------------------------------------------------------ // create my colStats based on my child's output, by converting the columns to // that of mine // ------------------------------------------------------------------------------ void EstLogProp::mapOutputsForUpdate(const GenericUpdate & updateExpr, const ValueIdMap & updateSelectValueIdMap) { TableDesc * updateTable = updateExpr.getTableDesc(); for ( CollIndex i = 0; i < colStats().entries(); i++ ) { ColStatDescSharedPtr colStatPtr = (colStats())[i]; const ValueId columnId = colStatPtr->getVEGColumn(); ValueId updateColVEGOutputId; updateSelectValueIdMap.mapValueIdUp(updateColVEGOutputId, columnId); ValueId updateBaseColumnId; if (updateColVEGOutputId != columnId) { updateBaseColumnId = updateColVEGOutputId; ValueIdSet baseColumns; updateColVEGOutputId.getItemExpr()->findAll( ITM_BASECOLUMN, baseColumns, TRUE, TRUE ); // from all the columns extracted, get the one for Insert table TableDesc * thisTable = NULL; for (ValueId column = baseColumns.init(); baseColumns.next(column); baseColumns.advance(column) ) { ItemExpr * columnExpr = column.getItemExpr(); thisTable = ((BaseColumn *)columnExpr)->getTableDesc(); if (thisTable == updateTable) { // set my column as the base column updateBaseColumnId = column; break; } } ColStatsSharedPtr inColStats = colStatPtr->getColStats(); ColStatsSharedPtr colStatsForUpdate(new (STMTHEAP) ColStats (*inColStats,STMTHEAP)); colStatsForUpdate->setStatColumn(updateBaseColumnId.getNAColumn()); // use this ColStat to generate new ColStat corresponding to the char output // of the Update expression ColStatDescSharedPtr colStatDescForUpdate(new (STMTHEAP) ColStatDesc(colStatsForUpdate, updateBaseColumnId, // ValueId of the column that will be used // as a column name, VEG and mergeStats STMTHEAP), STMTHEAP); colStatDescForUpdate->VEGColumn() = updateColVEGOutputId; colStatDescForUpdate->mergeState().clear() ; colStatDescForUpdate->mergeState().insert(updateBaseColumnId); // Remove the old colStat and insert this colStat into the result colStatDescList colStats().removeAt( i ); colStats().insertDeepCopyAt(i, colStatDescForUpdate, // colStats to be copied 1, // scale FALSE); } } }
// --------------------------------------------------------------------- // Utility Routine: pickOutputs // // From the given ColStatDescList, populate columnStats_ with column // descriptors that are useful based on the characteristic outputs for // the group. // // Always include in the output the current histograms of the input data, // and, if the histogram is contained in the required output list, then // this is a useful histogram and will also be output. // // --------------------------------------------------------------------- void EstLogProp::pickOutputs( ColStatDescList & columnStats, const EstLogPropSharedPtr& inputEstLogProp, const ValueIdSet specifiedOutputs, const ValueIdSet predSet) { const ColStatDescList & outerColStatsList = inputEstLogProp->getColStats(); ValueIdSet colsRequiringHistograms = specifiedOutputs; // (i) see if the selection predicates contain any constant value or a // constant expression // (ii) check if there are any columns of this table being joined to some other // columns, which do not appear as characteristics outputs. There should be // histograms available for these columns, as these might be needed later. // This problem was seen for temporary tables created as normal_tables by the // triggers. colsRequiringHistograms.addSet(predSet.getColumnsForHistogram()); colStats().setMCSkewedValueLists(columnStats.getMCSkewedValueLists()) ; NABoolean colStatDescAdded = FALSE; for (CollIndex i=0; i < columnStats.entries(); i++) { // we probably don't need 'em all, but this is the easiest way to // grab all of the multi-column uec information we'll need later colStats().insertIntoUecList (columnStats.getUecList()) ; colStats().setScanRowCountWithoutHint(columnStats.getScanRowCountWithoutHint()); NABoolean found = FALSE; // Note: The following inserts into a ColStatDescList should not // have to be deep copies. From this point on, ColStatDescs that // describe the output of the calling operator are read-only. ColStatDescSharedPtr colStatDesc = columnStats[i]; // the value-id we're looking for const ValueId columnId = colStatDesc->getVEGColumn() ; for (CollIndex j=0 ; j < outerColStatsList.entries() ; j++) { if (columnId == outerColStatsList[j]->getVEGColumn() OR (CmpCommon::context()->showQueryStats())) { colStats().insert(colStatDesc) ; found = TRUE; if(!colStatDescAdded) colStatDescAdded = TRUE; break ; // jump to next ColStatDesc } } // OK, the valueid doesn't match directly -- but there are still a // couple of things to check in order to verify whether or not we're // interested in keeping the i'th ColStatDesc ... ValueId throwaway ; // used by the second clause below if ( NOT found AND (columnId != NULL_VALUE_ID) AND (colsRequiringHistograms.contains (columnId) OR colsRequiringHistograms.referencesTheGivenValue (columnId, throwaway) OR columnId.isInvolvedInJoinAndConst() OR CmpCommon::context()->showQueryStats() ) ) { colStats().insert(colStatDesc); found = TRUE; if(!colStatDescAdded) colStatDescAdded = TRUE; } if (CURRSTMT_OPTDEFAULTS->incorporateSkewInCosting()) { // if the column is referenced for histogram, but is // not needed beyond this time , then we shall save its // max freq, which might be used later in costing if this // column is a part of the partitioning key ColStatsSharedPtr stat = colStatDesc->getColStats(); if (!(stat->isVirtualColForHist() ) && NOT found && !(stat->isOrigFakeHist() ) ) { const ValueId col = colStatDesc->getColumn(); ColAnalysis * colAnalysis = col.colAnalysis(); if (colAnalysis) { NAColumn * column = stat->getStatColumns()[0]; if (column->isReferencedForHistogram()) { CostScalar maxFreq = columnStats.getMaxFreq(columnId); colAnalysis->setMaxFreq(maxFreq); colAnalysis->setFinalUec(stat->getTotalUec()); colAnalysis->setFinalRC(stat->getRowcount()); } } } } } // for columnStats.entries() if(!colStatDescAdded && columnStats.entries() > 0) colStats().insert(columnStats[0]) ; } // pickOutputs
Join* MultiJoin::splitSubset(const JBBSubset & leftSet, const JBBSubset & rightSet, NABoolean reUseMJ) const { // At this point assert that none of the subsets has a group by member CMPASSERT ( (jbbSubset_.getGB() == NULL_CA_ID) && (leftSet.getGB() == NULL_CA_ID) && (rightSet.getGB() == NULL_CA_ID) ); #ifndef NDEBUG // assert that left + right == subSet_ // and left intersect right = phi CANodeIdSet unionSet(leftSet.getJBBCs()); CANodeIdSet intersectSet(leftSet.getJBBCs()); unionSet += rightSet.getJBBCs(); intersectSet.intersectSet(rightSet.getJBBCs()); CMPASSERT ( (unionSet == jbbSubset_.getJBBCs()) && (intersectSet.entries() == 0 )); #endif // Note: Joins including left, semi, anti semi are only created when // a single jbbc connected via one of them is split as a single right // child. InnerNonSemi joins can be created for any split i.e. any // number of jbbcs on the left and the right of the join, but special // joins (i.e. left, semi and anti semi joins) are only created when // there is a single right child i.e. the rightSet contains only one // jbbc that is connected via a special join. This is enforced as follows // // * The leftSet should be legal: This means that for every jbbc in the // leftSet any predecessor jbbcs should be present in the leftSet. // * The rightSet is either a single jbbc or if the rightSet has more // than one jbbc then it should be legal, note that a jbbc connected // via a special join is not a legal set by itself but we allow // creation of special joins assuming the predecessors are present // in the leftSet. // // An implicit assumption here is that 'this' MultiJoin is legal, which // is fair since apart from the top level multijoin, rest of the multijoins // are produced by splitting the top level multijoin. This method should // not produce illegal multijoins, since we check both leftSet and rightSet // for legality. Only time we don't check for legality is when the rightChild // is a single jbbc, and a single jbbc does not result in a multijoin. if(!leftSet.legal()) return NULL; if((rightSet.getJBBCs().entries() > 1) && (!rightSet.legal())) return NULL; // everything here goes to statement heap CollHeap* outHeap = CmpCommon::statementHeap(); RelExpr* child0 = generateSubsetExpr(leftSet, reUseMJ); RelExpr* child1 = generateSubsetExpr(rightSet, reUseMJ); // Flag to remember to pass on the derivedFromRoutineJoin flag if needed. NABoolean derivedFromRoutineJoin(FALSE); // now form a JoinExpr with these left and right children. Join * result = NULL; // if the rightSet is a single jbbc, then it could be connected via // a special join. In such a case we have to create the appropriate // join operator if(rightSet.getJBBCs().entries() == 1){ JBBC * rightChild = rightSet.getJBBCs().getFirst().getNodeAnalysis() ->getJBBC(); Join * rightChildParentJoin = rightChild->getOriginalParentJoin(); // If rightChildParentJoin is NULL, then the child is the left // child of the left most join and is considered to be connected // via a InnerNonSemi join. if(rightChildParentJoin) { if(rightChildParentJoin->derivedFromRoutineJoin()) derivedFromRoutineJoin = TRUE; if(rightChildParentJoin->isSemiJoin()) result = new (outHeap) Join(child0, child1, REL_SEMIJOIN, NULL); if(rightChildParentJoin->isAntiSemiJoin()) result = new (outHeap) Join(child0, child1, REL_ANTI_SEMIJOIN, NULL); if(rightChildParentJoin->isLeftJoin()) { // left joins can have filter preds, i.e. predicates that // are applied as filters after applying the join predicate. // We need to set them here. result = new (outHeap) Join(child0, child1, REL_LEFT_JOIN, NULL); result->setSelectionPredicates(rightChild->getLeftJoinFilterPreds()); } if(rightChildParentJoin->isRoutineJoin()) { derivedFromRoutineJoin = TRUE; result = new (outHeap) Join(child0, child1, REL_ROUTINE_JOIN, NULL); ValueIdSet routineJoinFilterPreds = rightChild->getRoutineJoinFilterPreds(); ValueIdSet predsToAddToRoutineJoin; // add covered filter preds for (ValueId filterPred= routineJoinFilterPreds.init(); routineJoinFilterPreds.next(filterPred); routineJoinFilterPreds.advance(filterPred) ) { if(jbbSubset_.coversExpr(filterPred)) predsToAddToRoutineJoin += filterPred; } result->setSelectionPredicates(predsToAddToRoutineJoin); } if(result) { // set the join predicate for special joins, note predicates // for regular InnerNonSemi joins are set as selection predicates // in the join relexpr. result->setJoinPred(rightChild->getPredsWithPredecessors()); result->nullInstantiatedOutput().insert(rightChild-> nullInstantiatedOutput()); } } } // The join to be created is a regular InnerNonSemi join if (!result) result = new (outHeap) Join(child0, child1, REL_JOIN, NULL); // Make sure we carry the derivedFromRoutineJoin flag with us if (derivedFromRoutineJoin) result->setDerivedFromRoutineJoin(); // Share my groupAttr with result result->setGroupAttr(getGroupAttr()); // get inner join predicates ValueIdSet selPreds = rightSet.joinPredsWithOther(leftSet); // get left join filter preds if any selPreds += result->getSelectionPredicates(); result->setSelectionPredicates(selPreds); result->findEquiJoinPredicates(); // May be I could save a little if i pushdown only to the child(ren) // that are not already JBBCs, i.e. multijoins result->pushdownCoveredExpr (result->getGroupAttr()->getCharacteristicOutputs(), result->getGroupAttr()->getCharacteristicInputs(), result->selectionPred()); // We used CutOp as children, to avoid pushing predicates to JBBCs. // Now put the actual expression back in case the child is a JBBCs if(leftSet.getJBBCs().entries() == 1) result->setChild(0, getJBBCRelExpr(leftSet.getJBBCs().getFirst())); // We used CutOp as children, to avoid pushing predicates to JBBCs. // Now put the actual expression back in case the child is a JBBCs if(rightSet.getJBBCs().entries() == 1) result->setChild(1, getJBBCRelExpr(rightSet.getJBBCs().getFirst())); // Temp fixup. We need to take the selectionPred out of MultiJoin // for now to prevent that pushed expr from being there. selectionPred // is not being used now in MultiJoin xxx. if (leftSet.getJBBCs().entries() > 1) result->child(0)->selectionPred().clear(); if (rightSet.getJBBCs().entries() > 1) result->child(1)->selectionPred().clear(); return result; }
// computeHistoryBuffer // // Helper function that traverses the set of root sequence functions // supplied by the compiler and dynamically determines the size // of the history buffer. // void PhysSequence::computeHistoryRows(const ValueIdSet &sequenceFunctions,//historyIds Lng32 &computedHistoryRows, Lng32 &unableToCalculate, NABoolean &unboundedFollowing, Lng32 &minFollowingRows, const ValueIdSet &outputFromChild) { ValueIdSet children; ValueIdSet historyAttributes; Lng32 value = 0; for(ValueId valId = sequenceFunctions.init(); sequenceFunctions.next(valId); sequenceFunctions.advance(valId)) { if(valId.getItemExpr()->isASequenceFunction()) { ItemExpr *itmExpr = valId.getItemExpr(); switch(itmExpr->getOperatorType()) { // THIS and NOT THIS are not dynamically computed // case ITM_THIS: case ITM_NOT_THIS: break; // The RUNNING functions and LastNotNull all need to go back just one row. // case ITM_RUNNING_SUM: case ITM_RUNNING_COUNT: case ITM_RUNNING_MIN: case ITM_RUNNING_MAX: case ITM_RUNNING_CHANGE: case ITM_LAST_NOT_NULL: computedHistoryRows = MAXOF(computedHistoryRows, 2); break; ///set to unable to compute for now-- will change later to compte values from frameStart_ and frameEnd_ case ITM_OLAP_SUM: case ITM_OLAP_COUNT: case ITM_OLAP_MIN: case ITM_OLAP_MAX: case ITM_OLAP_RANK: case ITM_OLAP_DRANK: { if ( !outputFromChild.contains(itmExpr->getValueId())) { ItmSeqOlapFunction * olap = (ItmSeqOlapFunction*)itmExpr; if (olap->isFrameStartUnboundedPreceding()) //(olap->getframeStart() == - INT_MAX) { computedHistoryRows = MAXOF(computedHistoryRows, 2); } else { computedHistoryRows = MAXOF(computedHistoryRows, ABS(olap->getframeStart()) + 2); } if (!olap->isFrameEndUnboundedFollowing()) //(olap->getframeEnd() != INT_MAX) { computedHistoryRows = MAXOF(computedHistoryRows, ABS(olap->getframeEnd()) + 1); } if (olap->isFrameEndUnboundedFollowing()) //(olap->getframeEnd() == INT_MAX) { unboundedFollowing = TRUE; if (olap->getframeStart() > 0) { minFollowingRows = ((minFollowingRows > olap->getframeStart()) ? minFollowingRows : olap->getframeStart()); } } else if (olap->getframeEnd() > 0) { minFollowingRows = ((minFollowingRows > olap->getframeEnd()) ? minFollowingRows : olap->getframeEnd()); } } } break; // If 'rows since', we cannot determine how much history is needed. case ITM_ROWS_SINCE: unableToCalculate = 1; break; // The MOVING and OFFSET functions need to go back as far as the value // of their second child. // // The second argument can be: // Constant: for these, we can use the constant value to set the upper bound // for the history buffer. // ItmScalarMinMax(child0, child1) (with operType = ITM_SCALAR_MIN) // - if child0 or child1 is a constant, then we can use either one // to set the upper bound. case ITM_MOVING_MIN: case ITM_MOVING_MAX: case ITM_OFFSET: for(Lng32 i = 1; i < itmExpr->getArity(); i++) { if (itmExpr->child(i)->getOperatorType() != ITM_NOTCOVERED) { ItemExpr * exprPtr = itmExpr->child(i); NABoolean negate; ConstValue *cv = exprPtr->castToConstValue(negate); if (cv AND cv->canGetExactNumericValue()) { Lng32 scale; Int64 value64 = cv->getExactNumericValue(scale); if(scale == 0 && value64 >= 0 && value64 < INT_MAX) { value64 = (negate ? -value64 : value64); value = MAXOF((Lng32)value64, value); } } else { if (exprPtr->getOperatorType() == ITM_SCALAR_MIN) { for(Lng32 j = 0; j < exprPtr->getArity(); j++) { if (exprPtr->child(j)->getOperatorType() != ITM_NOTCOVERED) { ItemExpr * exprPtr1 = exprPtr->child(j); NABoolean negate1; ConstValue *cv1 = exprPtr1->castToConstValue(negate1); if (cv1 AND cv1->canGetExactNumericValue()) { Lng32 scale1; Int64 value64_1 = cv1->getExactNumericValue(scale1); if(scale1 == 0 && value64_1 >= 0 && value64_1 < INT_MAX) { value64_1 = (negate1 ? -value64_1 : value64_1); value = MAXOF((Lng32)value64_1, value); } } } } } } // end of inner else }// end of if }// end of for // Check if the value is greater than zero. // If it is, then save the value, but first // increment the returned ConstValue by one. // Otherwise, the offset or moving value was unable // to be calculated. if (value > 0) { value++; computedHistoryRows = MAXOF(computedHistoryRows, value); value = 0; } else unableToCalculate = 1; break; default: CMPASSERT(0); } } // Gather all the children, and if not empty, recurse down to the // next level of the tree. // for(Lng32 i = 0; i < valId.getItemExpr()->getArity(); i++) { if (//valId.getItemExpr()->child(i)->getOperatorType() != ITM_NOTCOVERED //old stuff !outputFromChild.contains(valId.getItemExpr()->child(i)->getValueId())) { children += valId.getItemExpr()->child(i)->getValueId(); } } } if (NOT children.isEmpty()) { computeHistoryRows(children, computedHistoryRows, unableToCalculate, unboundedFollowing, minFollowingRows, outputFromChild); } } // PhysSequence::computeHistoryRows
// This method forms the join expression for join on JBBC specified by jbbcId // inputEstLogProp should not be cacheable Join * AppliedStatMan::formJoinExprForJoinOnJBBC( CANodeIdSet jbbSubset, CANodeId jbbcId, const ValueIdSet * jbbcLocalPreds, const ValueIdSet * joinPreds, const EstLogPropSharedPtr& inputEstLogProp, const NABoolean cacheable) { NABoolean origInputIsCacheable = inputEstLogProp->isCacheable(); if(origInputIsCacheable) { inputEstLogProp->setCacheableFlag(FALSE); CCMPASSERT("Expecting Non Cacheable Input"); } RelExpr * jbbcExpr = getExprForCANodeId(jbbcId, inputEstLogProp, jbbcLocalPreds); jbbcExpr->getGroupAttr()->outputLogProp(inputEstLogProp); RelExpr * jbbSubsetExpr = jbbSubset.jbbcsToJBBSubset()->getPreferredJoin(); if(!jbbSubsetExpr) if(jbbSubset.entries()==1) if(!inputEstLogProp->isCacheable()) { inputEstLogProp->setCacheableFlag(TRUE); jbbSubsetExpr = getExprForCANodeId(jbbSubset.getFirst(), inputEstLogProp); inputEstLogProp->setCacheableFlag(FALSE); } else jbbSubsetExpr = getExprForCANodeId(jbbSubset.getFirst(), inputEstLogProp); else { CCMPASSERT("No Subset expression, need at least one entry in set"); } RelExpr * leftChildExpr = jbbSubsetExpr; RelExpr * rightChildExpr = jbbcExpr; GroupAttributes * galeft = jbbSubsetExpr->getGroupAttr(); GroupAttributes * garight = jbbcExpr->getGroupAttr(); // xxx JBBC * jbbc = jbbcId.getNodeAnalysis()->getJBBC(); Join * jbbcParentJoin = jbbc->getOriginalParentJoin(); ValueIdSet leftOuterJoinFilterPreds; Join * joinExpr = NULL; if(jbbcParentJoin) { if(jbbcParentJoin->isSemiJoin()) joinExpr = new STMTHEAP Join(leftChildExpr, rightChildExpr, REL_SEMIJOIN, NULL); if(jbbcParentJoin->isAntiSemiJoin()) joinExpr = new STMTHEAP Join(leftChildExpr, rightChildExpr, REL_ANTI_SEMIJOIN, NULL); if(jbbcParentJoin->isLeftJoin()) { joinExpr = new STMTHEAP Join(leftChildExpr, rightChildExpr, REL_LEFT_JOIN, NULL); leftOuterJoinFilterPreds += jbbc->getLeftJoinFilterPreds(); } if(joinExpr) { joinExpr->setJoinPred(jbbc->getPredsWithPredecessors()); joinExpr->nullInstantiatedOutput().insert(jbbc->nullInstantiatedOutput()); } } if(!joinExpr) { // now form a JoinExpr with these left and right children. joinExpr = new STMTHEAP Join(leftChildExpr, rightChildExpr, REL_JOIN, NULL); } ValueIdSet selPredsAndLOJFilter = leftOuterJoinFilterPreds; selPredsAndLOJFilter += (*joinPreds); joinExpr->setSelectionPredicates(selPredsAndLOJFilter); // set groupAttr of this Join expression GroupAttributes * gaJoin = new STMTHEAP GroupAttributes(); // set required outputs of Join as sum of characteristic // outputs of the left and the right children ValueIdSet requiredOutputs; requiredOutputs.addSet(getPotentialOutputs(jbbSubset)); requiredOutputs.addSet(getPotentialOutputs(jbbcId)); gaJoin->setCharacteristicOutputs(requiredOutputs); // set JBBSubset for this group, if all estLogProps are cacheable. // Else JBBSubset is NULL CANodeIdSet combinedSet = jbbSubset; combinedSet += jbbcId; if (cacheable) gaJoin->getGroupAnalysis()->setLocalJBBView(combinedSet.jbbcsToJBBSubset()); gaJoin->setMinChildEstRowCount(MINOF(garight->getMinChildEstRowCount(), galeft->getMinChildEstRowCount() ) ); // if there are some probes coming into the join // then join type = tsj. if ((inputEstLogProp->getResultCardinality() > 1) || (inputEstLogProp->getColStats().entries() > 1)) { if (cacheable) { CANodeIdSet inputNodeSet = *(inputEstLogProp->getNodeSet()); gaJoin->setCharacteristicInputs(getPotentialOutputs(inputNodeSet)); } } joinExpr->setGroupAttr(gaJoin); gaJoin->setLogExprForSynthesis(joinExpr); joinExpr->synthLogProp(); inputEstLogProp->setCacheableFlag(origInputIsCacheable); return joinExpr; } // AppliedStatMan::formJoinExprForJoinOnJBBC
short PhysSequence::codeGen(Generator *generator) { // Get a local handle on some of the generator objects. // CollHeap *wHeap = generator->wHeap(); Space *space = generator->getSpace(); ExpGenerator *expGen = generator->getExpGenerator(); MapTable *mapTable = generator->getMapTable(); // Allocate a new map table for this node. This must be done // before generating the code for my child so that this local // map table will be sandwiched between the map tables already // generated and the map tables generated by my offspring. // // Only the items available as output from this node will // be put in the local map table. Before exiting this function, all of // my offsprings map tables will be removed. Thus, none of the outputs // from nodes below this node will be visible to nodes above it except // those placed in the local map table and those that already exist in // my ancestors map tables. This is the standard mechanism used in the // generator for managing the access to item expressions. // MapTable *localMapTable = generator->appendAtEnd(); // Since this operation doesn't modify the row on the way down the tree, // go ahead and generate the child subtree. Capture the given composite row // descriptor and the child's returned TDB and composite row descriptor. // ex_cri_desc * givenCriDesc = generator->getCriDesc(Generator::DOWN); child(0)->codeGen(generator); ComTdb *childTdb = (ComTdb*)generator->getGenObj(); ex_cri_desc * childCriDesc = generator->getCriDesc(Generator::UP); ExplainTuple *childExplainTuple = generator->getExplainTuple(); // Make all of my child's outputs map to ATP 1. The child row is only // accessed in the project expression and it will be the second ATP // (ATP 1) passed to this expression. // localMapTable->setAllAtp(1); // My returned composite row has an additional tupp. // Int32 numberTuples = givenCriDesc->noTuples() + 1; ex_cri_desc * returnCriDesc #pragma nowarn(1506) // warning elimination = new (space) ex_cri_desc(numberTuples, space); #pragma warn(1506) // warning elimination // For now, the history buffer row looks just the return row. Later, // it may be useful to add an additional tupp for sequence function // itermediates that are not needed above this node -- thus, this // ATP is kept separate from the returned ATP. // const Int32 historyAtp = 0; const Int32 historyAtpIndex = numberTuples-1; #pragma nowarn(1506) // warning elimination ex_cri_desc *historyCriDesc = new (space) ex_cri_desc(numberTuples, space); #pragma warn(1506) // warning elimination ExpTupleDesc *historyDesc = 0; //seperate the read and retur expressions seperateReadAndReturnItems(wHeap); // The history buffer consists of items projected directly from the // child, the root sequence functions, the value arguments of the // offset functions, and running sequence functions. These elements must // be materialized in the history buffer in order to be able to compute // the outputs of this node -- the items projected directly from the child // (projectValues) and the root sequence functions (sequenceFunctions). // // Compute the set of sequence function items that must be materialized // int the history buffer. -- sequenceItems // // Compute the set of items in the history buffer: the union of the // projected values and the value arguments. -- historyIds // // Compute the set of items in the history buffer that are computed: // the difference between all the elements in the history buffer // and the projected items. -- computedHistoryIds // // KB---will need to return atp with 3 tups only 0,1 and 2 // 2 -->values from history buffer after ther are moved to it addCheckPartitionChangeExpr(generator, TRUE); ValueIdSet historyIds; historyIds += movePartIdsExpr(); historyIds += sequencedColumns(); ValueIdSet outputFromChild = child(0)->getGroupAttr()->getCharacteristicOutputs(); getHistoryAttributes(readSeqFunctions(),outputFromChild, historyIds, TRUE, wHeap); // Add in the top level sequence functions. historyIds += readSeqFunctions(); getHistoryAttributes(returnSeqFunctions(),outputFromChild, historyIds, TRUE, wHeap); // Add in the top level functions. historyIds += returnSeqFunctions(); // Layout the work tuple format which consists of the projected // columns and the computed sequence functions. First, compute // the number of attributes in the tuple. // ULng32 numberAttributes = ((NOT historyIds.isEmpty()) ? historyIds.entries() : 0); // Allocate an attribute pointer vector from the working heap. // Attributes **attrs = new(wHeap) Attributes*[numberAttributes]; // Fill in the attributes vector for the history buffer including // adding the entries to the map table. Also, compute the value ID // set for the elements to project from the child row. // //??????????re-visit this function?? computeHistoryAttributes(generator, localMapTable, attrs, historyIds); // Create the tuple descriptor for the history buffer row and // assign the offsets to the attributes. For now, this layout is // identical to the returned row. Set the tuple descriptors for // the return and history rows. // ULng32 historyRecLen; expGen->processAttributes(numberAttributes, attrs, ExpTupleDesc::SQLARK_EXPLODED_FORMAT, historyRecLen, historyAtp, historyAtpIndex, &historyDesc, ExpTupleDesc::SHORT_FORMAT); NADELETEBASIC(attrs, wHeap); #pragma nowarn(1506) // warning elimination returnCriDesc->setTupleDescriptor(historyAtpIndex, historyDesc); #pragma warn(1506) // warning elimination #pragma nowarn(1506) // warning elimination historyCriDesc->setTupleDescriptor(historyAtpIndex, historyDesc); #pragma warn(1506) // warning elimination // If there are any sequence function items, generate the sequence // function expressions. // ex_expr * readSeqExpr = NULL; if(NOT readSeqFunctions().isEmpty()) { ValueIdSet seqVals = readSeqFunctions(); seqVals += sequencedColumns(); seqVals += movePartIdsExpr(); expGen->generateSequenceExpression(seqVals, readSeqExpr); } ex_expr *checkPartChangeExpr = NULL; if (!checkPartitionChangeExpr().isEmpty()) { ItemExpr * newCheckPartitionChangeTree= checkPartitionChangeExpr().rebuildExprTree(ITM_AND,TRUE,TRUE); expGen->generateExpr(newCheckPartitionChangeTree->getValueId(), ex_expr::exp_SCAN_PRED, &checkPartChangeExpr); } //unsigned long rowLength; ex_expr * returnExpr = NULL; if(NOT returnSeqFunctions().isEmpty()) { expGen->generateSequenceExpression(returnSeqFunctions(), returnExpr); } // Generate expression to evaluate predicate on the output // ex_expr *postPred = 0; if (! selectionPred().isEmpty()) { ItemExpr * newPredTree = selectionPred().rebuildExprTree(ITM_AND,TRUE,TRUE); expGen->generateExpr(newPredTree->getValueId(), ex_expr::exp_SCAN_PRED, &postPred); } // Reset ATP's to zero for parent. // localMapTable->setAllAtp(0); // Generate expression to evaluate the cancel expression // ex_expr *cancelExpression = 0; if (! cancelExpr().isEmpty()) { ItemExpr * newCancelExprTree = cancelExpr().rebuildExprTree(ITM_AND,TRUE,TRUE); expGen->generateExpr(newCancelExprTree->getValueId(), ex_expr::exp_SCAN_PRED, &cancelExpression); } // // For overflow // // ( The following are meaningless if ! unlimitedHistoryRows() ) NABoolean noOverflow = CmpCommon::getDefault(EXE_BMO_DISABLE_OVERFLOW) == DF_ON ; NABoolean logDiagnostics = CmpCommon::getDefault(EXE_DIAGNOSTIC_EVENTS) == DF_ON ; NABoolean possibleMultipleCalls = generator->getRightSideOfFlow() ; short scratchTresholdPct = (short) CmpCommon::getDefaultLong(SCRATCH_FREESPACE_THRESHOLD_PERCENT); // determione the memory usage (amount of memory as percentage from total // physical memory used to initialize data structures) unsigned short memUsagePercent = (unsigned short) getDefault(BMO_MEMORY_USAGE_PERCENT); short memPressurePct = (short)getDefault(GEN_MEM_PRESSURE_THRESHOLD); historyRecLen = ROUND8(historyRecLen); Lng32 maxNumberOfOLAPBuffers; Lng32 maxRowsInOLAPBuffer; Lng32 minNumberOfOLAPBuffers; Lng32 numberOfWinOLAPBuffers; Lng32 olapBufferSize; computeHistoryParams(historyRecLen, maxRowsInOLAPBuffer, minNumberOfOLAPBuffers, numberOfWinOLAPBuffers, maxNumberOfOLAPBuffers, olapBufferSize); ComTdbSequence *sequenceTdb = new(space) ComTdbSequence(readSeqExpr, returnExpr, postPred, cancelExpression, getMinFollowingRows(), #pragma nowarn(1506) // warning elimination historyRecLen, historyAtpIndex, childTdb, givenCriDesc, returnCriDesc, (queue_index)getDefault(GEN_SEQFUNC_SIZE_DOWN), (queue_index)getDefault(GEN_SEQFUNC_SIZE_UP), getDefault(GEN_SEQFUNC_NUM_BUFFERS), getDefault(GEN_SEQFUNC_BUFFER_SIZE), olapBufferSize, maxNumberOfOLAPBuffers, numHistoryRows(), getUnboundedFollowing(), logDiagnostics, possibleMultipleCalls, scratchTresholdPct, memUsagePercent, memPressurePct, maxRowsInOLAPBuffer, minNumberOfOLAPBuffers, numberOfWinOLAPBuffers, noOverflow, checkPartChangeExpr); #pragma warn(1506) // warning elimination generator->initTdbFields(sequenceTdb); // update the estimated value of HistoryRowLength with actual value //setEstHistoryRowLength(historyIds.getRowLength()); double sequenceMemEst = getEstimatedRunTimeMemoryUsage(sequenceTdb); generator->addToTotalEstimatedMemory(sequenceMemEst); if(!generator->explainDisabled()) { Lng32 seqMemEstInKBPerCPU = (Lng32)(sequenceMemEst / 1024) ; seqMemEstInKBPerCPU = seqMemEstInKBPerCPU/ (MAXOF(generator->compilerStatsInfo().dop(),1)); generator->setOperEstimatedMemory(seqMemEstInKBPerCPU); generator-> setExplainTuple(addExplainInfo(sequenceTdb, childExplainTuple, 0, generator)); generator->setOperEstimatedMemory(0); } sequenceTdb->setScratchIOVectorSize((Int16)getDefault(SCRATCH_IO_VECTOR_SIZE_HASH)); sequenceTdb->setOverflowMode(generator->getOverflowMode()); sequenceTdb->setBmoMinMemBeforePressureCheck((Int16)getDefault(EXE_BMO_MIN_SIZE_BEFORE_PRESSURE_CHECK_IN_MB)); if(generator->getOverflowMode() == ComTdb::OFM_SSD ) sequenceTdb->setBMOMaxMemThresholdMB((UInt16)(ActiveSchemaDB()-> getDefaults()). getAsLong(SSD_BMO_MAX_MEM_THRESHOLD_IN_MB)); else sequenceTdb->setBMOMaxMemThresholdMB((UInt16)(ActiveSchemaDB()-> getDefaults()). getAsLong(EXE_MEMORY_AVAILABLE_IN_MB)); // The CQD EXE_MEM_LIMIT_PER_BMO_IN_MB has precedence over the mem quota sys NADefaults &defs = ActiveSchemaDB()->getDefaults(); UInt16 mmu = (UInt16)(defs.getAsDouble(EXE_MEM_LIMIT_PER_BMO_IN_MB)); UInt16 numBMOsInFrag = (UInt16)generator->getFragmentDir()->getNumBMOs(); if (mmu != 0) sequenceTdb->setMemoryQuotaMB(mmu); else { // Apply quota system if either one the following two is true: // 1. the memory limit feature is turned off and more than one BMOs // 2. the memory limit feature is turned on NABoolean mlimitPerCPU = defs.getAsDouble(EXE_MEMORY_LIMIT_PER_CPU) > 0; if ( mlimitPerCPU || numBMOsInFrag > 1 ) { double memQuota = computeMemoryQuota(generator->getEspLevel() == 0, mlimitPerCPU, generator->getBMOsMemoryLimitPerCPU().value(), generator->getTotalNumBMOsPerCPU(), generator->getTotalBMOsMemoryPerCPU().value(), numBMOsInFrag, generator->getFragmentDir()->getBMOsMemoryUsage() ); sequenceTdb->setMemoryQuotaMB( UInt16(memQuota) ); } } generator->setCriDesc(givenCriDesc, Generator::DOWN); generator->setCriDesc(returnCriDesc, Generator::UP); generator->setGenObj(this, sequenceTdb); return 0; }
// AppliedStatMan::setupASMCacheForJBB method will be called from // Query::Analyze after connectivity analysis has been done and // empty logical properties have been set. void AppliedStatMan::setupASMCacheForJBB(JBB & jbb) { EstLogPropSharedPtr myEstLogProp; // get all JBBCs of JBB const CANodeIdSet jbbcNodeIdSet = jbb.getMainJBBSubset().getJBBCs(); CANodeId jbbcId; // for all jbbcs for (jbbcId = jbbcNodeIdSet.init(); jbbcNodeIdSet.next(jbbcId); jbbcNodeIdSet.advance(jbbcId)) { if (NodeAnalysis * jbbcNode = jbbcId.getNodeAnalysis()) { // Evaluate local predicates only if it is a table. RelExpr * jbbcExpr = jbbcNode->getOriginalExpr(); if ((jbbcNode->getTableAnalysis() != NULL) && (jbbcExpr->getOperatorType() == REL_SCAN)) { // get the original expression of the jbbc Scan * scanExpr = (Scan *) jbbcExpr; ValueIdSet localPreds = scanExpr->getSelectionPredicates(); // if local predicates have already been computed, then skip if ((localPreds.entries() > 0) || !(lookup(jbbcId))) { // check to see this GA has already been associated with // a logExpr for synthesis. If not, then synthesize // log. expression, and then apply local predicates to it if (NOT scanExpr->getGroupAttr()->existsLogExprForSynthesis()) scanExpr->synthLogProp(); myEstLogProp = getStatsForCANodeId(jbbcId); } } } } // Now do a second traversal of the JBB looking for join reducers for (jbbcId = jbbcNodeIdSet.init(); jbbcNodeIdSet.next(jbbcId); jbbcNodeIdSet.advance(jbbcId)) { // now look for all two way joins for this child if (jbbcId.getNodeAnalysis()) { // get all JBBCs connected to this JBBC, and do a two-way // join with all of them CANodeIdSet connectedNodes = jbbcId.getNodeAnalysis()->\ getJBBC()->getJoinedJBBCs(); for (CANodeId connectedTable = connectedNodes.init(); connectedNodes.next(connectedTable); connectedNodes.advance(connectedTable)) { if (connectedTable.getNodeAnalysis()) { // ASM does not concern itself with the order of the tables, // hence it is possible that the join has already been computed CANodeIdSet tableSet = jbbcId; tableSet.insert(connectedTable); if ((myEstLogProp = getCachedStatistics(&tableSet)) == NULL) { CANodeIdSet setForjbbcId(jbbcId); CANodeIdSet setForConnectedTable(connectedTable); myEstLogProp = joinJBBChildren(setForjbbcId, setForConnectedTable); } } } } } } // AppliedStatMan::setupASMCacheForJBB
// compress the histograms based on query predicates on this table void TableDesc::compressHistogramsForCurrentQuery() { // if there are some column statistics if ((colStats_.entries() != 0) && (table_) && (table_->getExtendedQualName().getSpecialType() == ExtendedQualName::NORMAL_TABLE)) { // if 1 // check if query analysis info is available if(QueryAnalysis::Instance()->isAnalysisON()) { // if 2 // get a handle to the query analysis QueryAnalysis* queryAnalysis = QueryAnalysis::Instance(); // get a handle to the table analysis const TableAnalysis * tableAnalysis = getTableAnalysis(); if(!tableAnalysis) return; // iterate over statistics for each column for(CollIndex i = 0; i < colStats_.entries(); i++) { // for 1 // Get a handle to the column's statistics descriptor ColStatDescSharedPtr columnStatDesc = colStats_[i]; // get a handle to the ColStats ColStatsSharedPtr colStats = columnStatDesc->getColStats(); // if this is a single column, as opposed to a multicolumn if(colStats->getStatColumns().entries() == 1) { // if 3 // get column's value id const ValueId columnId = columnStatDesc->getColumn(); // get column analysis ColAnalysis* colAnalysis = queryAnalysis->getColAnalysis(columnId); if(!colAnalysis) continue; ValueIdSet predicatesOnColumn = colAnalysis->getReferencingPreds(); // we can compress this column's histogram if there // is a equality predicate against a constant ItemExpr *constant = NULL; NABoolean colHasEqualityAgainstConst = colAnalysis->getConstValue(constant); // if a equality predicate with a constant was found // i.e. predicate of the form col = 5 if (colHasEqualityAgainstConst) { // if 4 if (constant) // compress the histogram columnStatDesc->compressColStatsForQueryPreds(constant,constant); } // if 4 else { // else 4 // since there is no equality predicates we might still // be able to compress the column's histogram based on // range predicates against a constant. Following are // examples of such predicates // * col > 1 <-- predicate defines a lower bound // * col < 3 <-- predicate defines a upper bound // * col >1 and col < 30 <-- window predicate, define both bounds ItemExpr * lowerBound = NULL; ItemExpr * upperBound = NULL; // Extract predicates from range spec and add it to the // original predicate set otherwise isARangePredicate() will // return FALSE, so histgram compression won't happen. ValueIdSet rangeSpecPred(predicatesOnColumn); for (ValueId predId= rangeSpecPred.init(); rangeSpecPred.next(predId); rangeSpecPred.advance(predId)) { ItemExpr * pred = predId.getItemExpr(); if ( pred->getOperatorType() == ITM_RANGE_SPEC_FUNC ) { ValueIdSet vs; ((RangeSpecRef *)pred)->getValueIdSetForReconsItemExpr(vs); // remove rangespec vid from the original set predicatesOnColumn.remove(predId); // add preds extracted from rangespec to the original set predicatesOnColumn.insert(vs); } } // in the following loop we iterate over all the predicates // on this column. If there is a range predicate e.g. a > 2 // or a < 3, then we use that to define upper and lower bounds. // Given predicate a > 2, we get a lower bound of 2. // Given predicate a < 3, we get a upper bound of 3. // The bound are then passed down to the histogram // compression methods. // iterate over predicates to see if any of them is a range // predicate e.g. a > 2 for (ValueId predId= predicatesOnColumn.init(); predicatesOnColumn.next(predId); predicatesOnColumn.advance(predId)) { // for 2 // check if this predicate is a range predicate ItemExpr * predicateOnColumn = predId.getItemExpr(); if (predicateOnColumn->isARangePredicate()) { // if 5 // if a predicate is a range predicate we need to find out more // information regarding the predicate to see if it can be used // to compress the columns histogram. We look for the following: // * The predicate is against a constant e.g. a > 3 and not against // another column e.g. a > b // Also give a predicate we need to find out what side is the column // and what side is the constant. Normally people write a range predicate // as a > 3, but the same could be written as 3 < a. // Also either on of the operands of the range predicate might be // a VEG, if so then we need to dig into the VEG to see where is // the constant and where is the column. // check the right and left children of this predicate to // see if one of them is a constant ItemExpr * leftChildItemExpr = (ItemExpr *) predicateOnColumn->getChild(0); ItemExpr * rightChildItemExpr = (ItemExpr *) predicateOnColumn->getChild(1); // by default assume the literal is at right i.e. predicate of // the form a > 2 NABoolean columnAtRight = FALSE; // check if right child of predicate is a VEG if ( rightChildItemExpr->getOperatorType() == ITM_VEG_REFERENCE) { // if 6 // if child is a VEG VEGReference * rightChildVEG = (VEGReference *) rightChildItemExpr; // check if the VEG contains the current column // if it does contain the current column then // the predicate has the column on right and potentially // a constant on the left. if(rightChildVEG->getVEG()->getAllValues().contains(columnId)) { // if 7 // column is at right i.e. predicate is of the form // 2 < a columnAtRight = TRUE; } // if 7 } // if 6 else { // else 6 // child is not a VEG if ( columnId == rightChildItemExpr->getValueId() ) { // if 8 // literals are at left i.e. predicate is of the form // (1,2) < (a, b) columnAtRight = TRUE; } // if 8 } // else 6 ItemExpr * potentialConstantExpr = NULL; // check if the range predicate is against a constant if (columnAtRight) { // if 9 // the left child is potentially a constant potentialConstantExpr = leftChildItemExpr; } // if 9 else { // else 9 // the right child is potentially a constant potentialConstantExpr = rightChildItemExpr; } // else 9 // initialize constant to NULL before // looking for next constant constant = NULL; // check if potentialConstantExpr contains a constant. // we need to see if this range predicate is a predicate // against a constant e.g col > 1 and not a predicate // against another column e.g. col > anothercol // if the expression is a VEG if ( potentialConstantExpr->getOperatorType() == ITM_VEG_REFERENCE) { // if 10 // expression is a VEG, dig into the VEG to // get see if it contains a constant VEGReference * potentialConstantExprVEG = (VEGReference *) potentialConstantExpr; potentialConstantExprVEG->getVEG()->\ getAllValues().referencesAConstValue(&constant); } // if 10 else { // else 10 // express is not a VEG, it is a constant if ( potentialConstantExpr->getOperatorType() == ITM_CONSTANT ) constant = potentialConstantExpr; } // else 10 // if predicate involves a constant, does the constant imply // a upper bound or lower bound if (constant) { // if 11 // if range predicate has column at right e.g. 3 > a if (columnAtRight) { // if 12 if ( predicateOnColumn->getOperatorType() == ITM_GREATER || predicateOnColumn->getOperatorType() == ITM_GREATER_EQ) { // if 13 if (!upperBound) upperBound = constant; } // if 13 else { // else 13 if (!lowerBound) lowerBound = constant; } // else 13 } // if 12 else { // else 12 // range predicate has column at left e.g. a < 3 if ( predicateOnColumn->getOperatorType() == ITM_LESS || predicateOnColumn->getOperatorType() == ITM_LESS_EQ) { // if 14 if (!upperBound) upperBound = constant; } // if 14 else { // else 14 if (!lowerBound) lowerBound = constant; } // else 14 } // else 12 } // if 11 } // if 5 } // for 2 // if we found a upper bound or a lower bound if (lowerBound || upperBound) { // compress the histogram based on range predicates columnStatDesc->compressColStatsForQueryPreds(lowerBound, upperBound); } } // else 4 } // if 3 } // for 1 } // if 2 } // if 1 // All histograms compressed. Set the histCompressed flag to TRUE histsCompressed(TRUE); }
// This method forms the join expression with the estLogProps. Join * AppliedStatMan::formJoinExprWithEstLogProps( const EstLogPropSharedPtr& leftEstLogProp, const EstLogPropSharedPtr& rightEstLogProp, const EstLogPropSharedPtr& inputEstLogProp, const ValueIdSet * setOfPredicates, const NABoolean cacheable, JBBSubset * combinedJBBSubset) { // Form a join expression with these estLogProps. // form the left child. Since the estLogProps of the left and the // right children exist, these can be treated as Scan expressions Scan * leftChildExpr = new STMTHEAP Scan(); GroupAttributes * galeft = new STMTHEAP GroupAttributes(); // set GroupAttr of the leftChild galeft->inputLogPropList().insert(inputEstLogProp); galeft->outputLogPropList().insert(leftEstLogProp); CANodeIdSet * leftNodeSet = leftEstLogProp->getNodeSet(); CANodeId nodeId; if (leftNodeSet) { if (leftNodeSet->entries() == 1) { nodeId = leftNodeSet->getFirst(); if(nodeId.getNodeAnalysis()->getTableAnalysis()) leftChildExpr->setTableAttributes(nodeId); } CostScalar minEstCard = leftNodeSet->getMinChildEstRowCount(); galeft->setMinChildEstRowCount(minEstCard); } leftChildExpr->setGroupAttr(galeft); galeft->setLogExprForSynthesis(leftChildExpr); // form the right child and set its groupAttr Scan * rightChildExpr = new STMTHEAP Scan(); GroupAttributes * garight = new STMTHEAP GroupAttributes(); garight->inputLogPropList().insert(inputEstLogProp); garight->outputLogPropList().insert(rightEstLogProp); CANodeIdSet * rightNodeSet = rightEstLogProp->getNodeSet(); // xxx JBBC * singleRightChild = NULL; Join * singleRightChildParentJoin = NULL; ValueIdSet leftOuterJoinFilterPreds; if (rightNodeSet) { if (rightNodeSet->entries() == 1) { nodeId = rightNodeSet->getFirst(); if(nodeId.getNodeAnalysis()->getTableAnalysis()) rightChildExpr->setTableAttributes(nodeId); if(nodeId.getNodeAnalysis()->getJBBC()) { singleRightChild = nodeId.getNodeAnalysis()->getJBBC(); if(singleRightChild) singleRightChildParentJoin = singleRightChild->getOriginalParentJoin(); } } CostScalar minEstCard = rightNodeSet->getMinChildEstRowCount(); garight->setMinChildEstRowCount(minEstCard); } rightChildExpr->setGroupAttr(garight); garight->setLogExprForSynthesis(rightChildExpr); Join * joinExpr = NULL; if(singleRightChild && singleRightChildParentJoin) { if(singleRightChildParentJoin->isSemiJoin()) joinExpr = new STMTHEAP Join(leftChildExpr, rightChildExpr, REL_SEMIJOIN, NULL); if(singleRightChildParentJoin->isAntiSemiJoin()) joinExpr = new STMTHEAP Join(leftChildExpr, rightChildExpr, REL_ANTI_SEMIJOIN, NULL); if(singleRightChildParentJoin->isLeftJoin()) { joinExpr = new STMTHEAP Join(leftChildExpr, rightChildExpr, REL_LEFT_JOIN, NULL); leftOuterJoinFilterPreds += singleRightChild->getLeftJoinFilterPreds(); } if(joinExpr) { joinExpr->setJoinPred(singleRightChild->getPredsWithPredecessors()); joinExpr->nullInstantiatedOutput().insert(singleRightChild-> nullInstantiatedOutput()); } } if(!joinExpr) { // now form a JoinExpr with these left and right children. joinExpr = new STMTHEAP Join(leftChildExpr, // left child rightChildExpr, // right child REL_JOIN, // join type NULL); // join predicates } ValueIdSet selPredsAndLOJFilter = leftOuterJoinFilterPreds; selPredsAndLOJFilter += (*setOfPredicates); joinExpr->setSelectionPredicates(selPredsAndLOJFilter); // set groupAttr of this Join expression GroupAttributes * gaJoin = new STMTHEAP GroupAttributes(); // set required outputs of Join as sum of characteristic // outputs of the left and the right children ValueIdSet requiredOutputs; if (leftNodeSet) requiredOutputs.addSet(getPotentialOutputs(*(leftNodeSet))); if (rightNodeSet) requiredOutputs.addSet(getPotentialOutputs(*(rightNodeSet))); gaJoin->setCharacteristicOutputs(requiredOutputs); // set JBBSubset for this group, if all estLogProps are cacheable. // Else JBBSubset is NULL if (cacheable) gaJoin->getGroupAnalysis()->setLocalJBBView(combinedJBBSubset); gaJoin->setMinChildEstRowCount(MINOF(garight->getMinChildEstRowCount(), galeft->getMinChildEstRowCount() ) ); joinExpr->setGroupAttr(gaJoin); // if there are some probes coming into the join // then join type = tsj. if ((inputEstLogProp->getResultCardinality() > 1) || (inputEstLogProp->getColStats().entries() > 1)) { if (cacheable) { CANodeIdSet inputNodeSet = *(inputEstLogProp->getNodeSet()); gaJoin->setCharacteristicInputs(getPotentialOutputs(inputNodeSet)); } } joinExpr->setGroupAttr(gaJoin); gaJoin->setLogExprForSynthesis(joinExpr); return joinExpr; } // AppliedStatMan::formJoinExprWithEstLogProps