// getHistoryAttributes // // Helper function that traverses the set of root sequence functions // supplied by the compiler and constructs the set of all of the // attributes that must be materialized in the history row. // void PhysSequence::getHistoryAttributes(const ValueIdSet &sequenceFunctions, const ValueIdSet &outputFromChild, ValueIdSet &historyAttributes, NABoolean addConvNodes, CollHeap *wHeap, ValueIdMap *origAttributes) const { if(addConvNodes && !origAttributes) { origAttributes = new (wHeap) ValueIdMap(); } ValueIdSet children; for(ValueId valId = sequenceFunctions.init(); sequenceFunctions.next(valId); sequenceFunctions.advance(valId)) { if(valId.getItemExpr()->isASequenceFunction()) { ItemExpr *itmExpr = valId.getItemExpr(); switch(itmExpr->getOperatorType()) { // The child needs to be in the history row. // case ITM_OFFSET: case ITM_ROWS_SINCE: case ITM_THIS: case ITM_NOT_THIS: // If the child needs to be in the history buffer, then // add a Convert node to force the value to be moved to the // history buffer. if (addConvNodes) { itmExpr->child(0) = addConvNode(itmExpr->child(0), origAttributes, wHeap); } historyAttributes += itmExpr->child(0)->getValueId(); break; // The sequence function needs to be in the history row. // case ITM_RUNNING_SUM: case ITM_RUNNING_COUNT: case ITM_RUNNING_MIN: case ITM_RUNNING_MAX: case ITM_LAST_NOT_NULL: historyAttributes += itmExpr->getValueId(); break; /* // after PhysSequence precode gen OLAP sum and count are already transform,ed into running // this is used during optimization phase-- case ITM_OLAP_SUM: case ITM_OLAP_COUNT: case ITM_OLAP_RANK: case ITM_OLAP_DRANK: if (addConvNodes) { itmExpr->child(0) = addConvNode(itmExpr->child(0), origAttributes, wHeap); } historyAttributes += itmExpr->child(0)->getValueId(); //historyAttributes += itmExpr->getValueId(); break; */ // The child and sequence function need to be in the history row. // case ITM_OLAP_MIN: case ITM_OLAP_MAX: case ITM_MOVING_MIN: case ITM_MOVING_MAX: // If the child needs to be in the history buffer, then // add a Convert node to force the value to be moved to the // history buffer. if (addConvNodes) { itmExpr->child(0) = addConvNode(itmExpr->child(0), origAttributes, wHeap); } historyAttributes += itmExpr->child(0)->getValueId(); historyAttributes += itmExpr->getValueId(); break; case ITM_RUNNING_CHANGE: if (itmExpr->child(0)->getOperatorType() == ITM_ITEM_LIST) { // child is a multi-valued expression // ExprValueId treePtr = itmExpr->child(0); ItemExprTreeAsList changeValues(&treePtr, ITM_ITEM_LIST, RIGHT_LINEAR_TREE); CollIndex nc = changeValues.entries(); ItemExpr *newChild = NULL; if(addConvNodes) { newChild = addConvNode(changeValues[nc-1], origAttributes, wHeap); historyAttributes += newChild->getValueId(); } else { historyAttributes += changeValues[nc-1]->getValueId(); } // add each item in the list // for (CollIndex i = nc; i > 0; i--) { if(addConvNodes) { ItemExpr *conv = addConvNode(changeValues[i-1], origAttributes, wHeap); newChild = new(wHeap) ItemList(conv, newChild); newChild->synthTypeAndValueId(TRUE); historyAttributes += conv->getValueId(); } else { historyAttributes += changeValues[i-1]->getValueId(); } } if(addConvNodes) { itmExpr->child(0) = newChild; } } else { // If the child needs to be in the history buffer, then // add a Convert node to force the value to be moved to the // history buffer. if (addConvNodes) { itmExpr->child(0) = addConvNode(itmExpr->child(0), origAttributes, wHeap); } historyAttributes += itmExpr->child(0)->getValueId(); } historyAttributes += itmExpr->getValueId(); break; default: CMPASSERT(0); } } // Gather all the children, and if not empty, recurse down to the // next level of the tree. // for(Lng32 i = 0; i < valId.getItemExpr()->getArity(); i++) { if (!outputFromChild.contains(valId.getItemExpr()->child(i)->getValueId())) //!valId.getItemExpr()->child(i)->nodeIsPreCodeGenned()) { children += valId.getItemExpr()->child(i)->getValueId(); } } } if (NOT children.isEmpty()) { getHistoryAttributes( children, outputFromChild, historyAttributes, addConvNodes, wHeap, origAttributes); } } // PhysSequence::getHistoryAttributes
void PhysSequence::computeReadNReturnItems( ValueId topSeqVid, ValueId vid, const ValueIdSet &outputFromChild, CollHeap *wHeap) { ItemExpr * itmExpr = vid.getItemExpr(); if (outputFromChild.contains(vid)) { return; } //test if itm_minus and then if negative offset .... if ( itmExpr->getOperatorType() == ITM_OFFSET && ((ItmSeqOffset *)itmExpr)->getOffsetConstantValue() < 0) { readSeqFunctions() -= topSeqVid; returnSeqFunctions() += topSeqVid; readSeqFunctions() += itmExpr->child(0)->castToItemExpr()->getValueId(); return; } if (itmExpr->getOperatorType() == ITM_MINUS) { ItemExpr * chld0 = itmExpr->child(0)->castToItemExpr(); if ( chld0->getOperatorType() == ITM_OFFSET && ((ItmSeqOffset *)chld0)->getOffsetConstantValue() <0) { readSeqFunctions() -= topSeqVid; returnSeqFunctions() += topSeqVid; readSeqFunctions() += chld0->child(0)->castToItemExpr()->getValueId(); ItemExpr * chld1 = itmExpr->child(1)->castToItemExpr(); if (chld1->getOperatorType() == ITM_OFFSET && ((ItmSeqOffset *)chld1)->getOffsetConstantValue() < 0) { readSeqFunctions() += chld1->child(0)->castToItemExpr()->getValueId(); } else { readSeqFunctions() += chld1->getValueId(); } return; } } if (itmExpr->getOperatorType() == ITM_OLAP_MIN || itmExpr->getOperatorType() == ITM_OLAP_MAX) { ItmSeqOlapFunction * olap = (ItmSeqOlapFunction *)itmExpr; if (olap->getframeEnd()>0) { readSeqFunctions() -= topSeqVid; returnSeqFunctions() += topSeqVid; ItemExpr *newChild = new(wHeap) Convert (itmExpr->child(0)->castToItemExpr()); newChild->synthTypeAndValueId(TRUE); itmExpr->child(0) = newChild; readSeqFunctions() += newChild->getValueId(); return; } } if (itmExpr->getOperatorType() == ITM_SCALAR_MIN || itmExpr->getOperatorType() == ITM_SCALAR_MAX) { ItemExpr * chld0 = itmExpr->child(0)->castToItemExpr(); ItemExpr * chld1 = itmExpr->child(1)->castToItemExpr(); if ((chld0->getOperatorType() == ITM_OLAP_MIN && chld1->getOperatorType() == ITM_OLAP_MIN )|| (chld0->getOperatorType() == ITM_OLAP_MAX && chld1->getOperatorType() == ITM_OLAP_MAX )) { ItmSeqOlapFunction * olap0 = (ItmSeqOlapFunction *)chld0; ItmSeqOlapFunction * olap1 = (ItmSeqOlapFunction *)chld1; if ( olap1->getframeEnd()>0) { CMPASSERT(olap0->getframeEnd()==0); readSeqFunctions() -= topSeqVid; returnSeqFunctions() += topSeqVid; readSeqFunctions() += olap0->getValueId(); ItemExpr *newChild = new(wHeap) Convert (olap1->child(0)->castToItemExpr()); newChild->synthTypeAndValueId(TRUE); olap1->child(0) = newChild; readSeqFunctions() += newChild->getValueId(); } else { CMPASSERT(olap1->getframeEnd()==0); readSeqFunctions() -= topSeqVid; returnSeqFunctions() += topSeqVid; readSeqFunctions() += olap1->getValueId(); ItemExpr *newChild = new(wHeap) Convert (olap0->child(0)->castToItemExpr()); newChild->synthTypeAndValueId(TRUE); olap0->child(0) = newChild; readSeqFunctions() += newChild->getValueId(); } return; } } for (Int32 i= 0 ; i < itmExpr->getArity(); i++) { ItemExpr * chld= itmExpr->child(i); computeReadNReturnItems(topSeqVid, chld->getValueId(), outputFromChild, wHeap); } }//void PhysSequence::computeReadNReturnItems(ItemExpr * other)
// computeHistoryBuffer // // Helper function that traverses the set of root sequence functions // supplied by the compiler and dynamically determines the size // of the history buffer. // void PhysSequence::computeHistoryRows(const ValueIdSet &sequenceFunctions,//historyIds Lng32 &computedHistoryRows, Lng32 &unableToCalculate, NABoolean &unboundedFollowing, Lng32 &minFollowingRows, const ValueIdSet &outputFromChild) { ValueIdSet children; ValueIdSet historyAttributes; Lng32 value = 0; for(ValueId valId = sequenceFunctions.init(); sequenceFunctions.next(valId); sequenceFunctions.advance(valId)) { if(valId.getItemExpr()->isASequenceFunction()) { ItemExpr *itmExpr = valId.getItemExpr(); switch(itmExpr->getOperatorType()) { // THIS and NOT THIS are not dynamically computed // case ITM_THIS: case ITM_NOT_THIS: break; // The RUNNING functions and LastNotNull all need to go back just one row. // case ITM_RUNNING_SUM: case ITM_RUNNING_COUNT: case ITM_RUNNING_MIN: case ITM_RUNNING_MAX: case ITM_RUNNING_CHANGE: case ITM_LAST_NOT_NULL: computedHistoryRows = MAXOF(computedHistoryRows, 2); break; ///set to unable to compute for now-- will change later to compte values from frameStart_ and frameEnd_ case ITM_OLAP_SUM: case ITM_OLAP_COUNT: case ITM_OLAP_MIN: case ITM_OLAP_MAX: case ITM_OLAP_RANK: case ITM_OLAP_DRANK: { if ( !outputFromChild.contains(itmExpr->getValueId())) { ItmSeqOlapFunction * olap = (ItmSeqOlapFunction*)itmExpr; if (olap->isFrameStartUnboundedPreceding()) //(olap->getframeStart() == - INT_MAX) { computedHistoryRows = MAXOF(computedHistoryRows, 2); } else { computedHistoryRows = MAXOF(computedHistoryRows, ABS(olap->getframeStart()) + 2); } if (!olap->isFrameEndUnboundedFollowing()) //(olap->getframeEnd() != INT_MAX) { computedHistoryRows = MAXOF(computedHistoryRows, ABS(olap->getframeEnd()) + 1); } if (olap->isFrameEndUnboundedFollowing()) //(olap->getframeEnd() == INT_MAX) { unboundedFollowing = TRUE; if (olap->getframeStart() > 0) { minFollowingRows = ((minFollowingRows > olap->getframeStart()) ? minFollowingRows : olap->getframeStart()); } } else if (olap->getframeEnd() > 0) { minFollowingRows = ((minFollowingRows > olap->getframeEnd()) ? minFollowingRows : olap->getframeEnd()); } } } break; // If 'rows since', we cannot determine how much history is needed. case ITM_ROWS_SINCE: unableToCalculate = 1; break; // The MOVING and OFFSET functions need to go back as far as the value // of their second child. // // The second argument can be: // Constant: for these, we can use the constant value to set the upper bound // for the history buffer. // ItmScalarMinMax(child0, child1) (with operType = ITM_SCALAR_MIN) // - if child0 or child1 is a constant, then we can use either one // to set the upper bound. case ITM_MOVING_MIN: case ITM_MOVING_MAX: case ITM_OFFSET: for(Lng32 i = 1; i < itmExpr->getArity(); i++) { if (itmExpr->child(i)->getOperatorType() != ITM_NOTCOVERED) { ItemExpr * exprPtr = itmExpr->child(i); NABoolean negate; ConstValue *cv = exprPtr->castToConstValue(negate); if (cv AND cv->canGetExactNumericValue()) { Lng32 scale; Int64 value64 = cv->getExactNumericValue(scale); if(scale == 0 && value64 >= 0 && value64 < INT_MAX) { value64 = (negate ? -value64 : value64); value = MAXOF((Lng32)value64, value); } } else { if (exprPtr->getOperatorType() == ITM_SCALAR_MIN) { for(Lng32 j = 0; j < exprPtr->getArity(); j++) { if (exprPtr->child(j)->getOperatorType() != ITM_NOTCOVERED) { ItemExpr * exprPtr1 = exprPtr->child(j); NABoolean negate1; ConstValue *cv1 = exprPtr1->castToConstValue(negate1); if (cv1 AND cv1->canGetExactNumericValue()) { Lng32 scale1; Int64 value64_1 = cv1->getExactNumericValue(scale1); if(scale1 == 0 && value64_1 >= 0 && value64_1 < INT_MAX) { value64_1 = (negate1 ? -value64_1 : value64_1); value = MAXOF((Lng32)value64_1, value); } } } } } } // end of inner else }// end of if }// end of for // Check if the value is greater than zero. // If it is, then save the value, but first // increment the returned ConstValue by one. // Otherwise, the offset or moving value was unable // to be calculated. if (value > 0) { value++; computedHistoryRows = MAXOF(computedHistoryRows, value); value = 0; } else unableToCalculate = 1; break; default: CMPASSERT(0); } } // Gather all the children, and if not empty, recurse down to the // next level of the tree. // for(Lng32 i = 0; i < valId.getItemExpr()->getArity(); i++) { if (//valId.getItemExpr()->child(i)->getOperatorType() != ITM_NOTCOVERED //old stuff !outputFromChild.contains(valId.getItemExpr()->child(i)->getValueId())) { children += valId.getItemExpr()->child(i)->getValueId(); } } } if (NOT children.isEmpty()) { computeHistoryRows(children, computedHistoryRows, unableToCalculate, unboundedFollowing, minFollowingRows, outputFromChild); } } // PhysSequence::computeHistoryRows
// --------------------------------------------------------------------- // Utility Routine: pickOutputs // // From the given ColStatDescList, populate columnStats_ with column // descriptors that are useful based on the characteristic outputs for // the group. // // Always include in the output the current histograms of the input data, // and, if the histogram is contained in the required output list, then // this is a useful histogram and will also be output. // // --------------------------------------------------------------------- void EstLogProp::pickOutputs( ColStatDescList & columnStats, const EstLogPropSharedPtr& inputEstLogProp, const ValueIdSet specifiedOutputs, const ValueIdSet predSet) { const ColStatDescList & outerColStatsList = inputEstLogProp->getColStats(); ValueIdSet colsRequiringHistograms = specifiedOutputs; // (i) see if the selection predicates contain any constant value or a // constant expression // (ii) check if there are any columns of this table being joined to some other // columns, which do not appear as characteristics outputs. There should be // histograms available for these columns, as these might be needed later. // This problem was seen for temporary tables created as normal_tables by the // triggers. colsRequiringHistograms.addSet(predSet.getColumnsForHistogram()); colStats().setMCSkewedValueLists(columnStats.getMCSkewedValueLists()) ; NABoolean colStatDescAdded = FALSE; for (CollIndex i=0; i < columnStats.entries(); i++) { // we probably don't need 'em all, but this is the easiest way to // grab all of the multi-column uec information we'll need later colStats().insertIntoUecList (columnStats.getUecList()) ; colStats().setScanRowCountWithoutHint(columnStats.getScanRowCountWithoutHint()); NABoolean found = FALSE; // Note: The following inserts into a ColStatDescList should not // have to be deep copies. From this point on, ColStatDescs that // describe the output of the calling operator are read-only. ColStatDescSharedPtr colStatDesc = columnStats[i]; // the value-id we're looking for const ValueId columnId = colStatDesc->getVEGColumn() ; for (CollIndex j=0 ; j < outerColStatsList.entries() ; j++) { if (columnId == outerColStatsList[j]->getVEGColumn() OR (CmpCommon::context()->showQueryStats())) { colStats().insert(colStatDesc) ; found = TRUE; if(!colStatDescAdded) colStatDescAdded = TRUE; break ; // jump to next ColStatDesc } } // OK, the valueid doesn't match directly -- but there are still a // couple of things to check in order to verify whether or not we're // interested in keeping the i'th ColStatDesc ... ValueId throwaway ; // used by the second clause below if ( NOT found AND (columnId != NULL_VALUE_ID) AND (colsRequiringHistograms.contains (columnId) OR colsRequiringHistograms.referencesTheGivenValue (columnId, throwaway) OR columnId.isInvolvedInJoinAndConst() OR CmpCommon::context()->showQueryStats() ) ) { colStats().insert(colStatDesc); found = TRUE; if(!colStatDescAdded) colStatDescAdded = TRUE; } if (CURRSTMT_OPTDEFAULTS->incorporateSkewInCosting()) { // if the column is referenced for histogram, but is // not needed beyond this time , then we shall save its // max freq, which might be used later in costing if this // column is a part of the partitioning key ColStatsSharedPtr stat = colStatDesc->getColStats(); if (!(stat->isVirtualColForHist() ) && NOT found && !(stat->isOrigFakeHist() ) ) { const ValueId col = colStatDesc->getColumn(); ColAnalysis * colAnalysis = col.colAnalysis(); if (colAnalysis) { NAColumn * column = stat->getStatColumns()[0]; if (column->isReferencedForHistogram()) { CostScalar maxFreq = columnStats.getMaxFreq(columnId); colAnalysis->setMaxFreq(maxFreq); colAnalysis->setFinalUec(stat->getTotalUec()); colAnalysis->setFinalRC(stat->getRowcount()); } } } } } // for columnStats.entries() if(!colStatDescAdded && columnStats.entries() > 0) colStats().insert(columnStats[0]) ; } // pickOutputs