// static void IndexBoundsBuilder::translateEquality(const BSONElement& data, bool isHashed, OrderedIntervalList* oil, bool* exact) { // We have to copy the data out of the parse tree and stuff it into the index // bounds. BSONValue will be useful here. BSONObj dataObj; if (isHashed) { dataObj = ExpressionMapping::hash(data); } else { dataObj = objFromElement(data); } // UNITTEST 11738048 if (Array == dataObj.firstElement().type()) { // XXX: bad oil->intervals.push_back(allValues()); *exact = false; } else { verify(dataObj.isOwned()); oil->intervals.push_back(makePointInterval(dataObj)); // XXX: it's exact if the index isn't sparse? if (dataObj.firstElement().isNull() || isHashed) { *exact = false; } else { *exact = true; } } }
Foam::tmp<Foam::Field<Type> > Foam::fieldValue::combineFields ( const Field<Type>& field ) const { List<Field<Type> > allValues(Pstream::nProcs()); allValues[Pstream::myProcNo()] = field; Pstream::gatherList(allValues); if (Pstream::master()) { return tmp<Field<Type> > ( new Field<Type> ( ListListOps::combine<Field<Type> > ( allValues, accessOp<Field<Type> >() ) ) ); } else { return field; } }
void Foam::fieldValue::combineFields(Field<Type>& field) { List<Field<Type> > allValues(Pstream::nProcs()); allValues[Pstream::myProcNo()] = field; Pstream::gatherList(allValues); if (Pstream::master()) { field = ListListOps::combine<Field<Type> > ( allValues, accessOp<Field<Type> >() ); } }
// static void IndexBoundsBuilder::translate(const MatchExpression* expr, const BSONElement& elt, OrderedIntervalList* oilOut, bool* exactOut) { int direction = (elt.numberInt() >= 0) ? 1 : -1; Interval interval; bool exact = false; oilOut->name = elt.fieldName(); bool isHashed = false; if (mongoutils::str::equals("hashed", elt.valuestrsafe())) { isHashed = true; } if (isHashed) { verify(MatchExpression::EQ == expr->matchType() || MatchExpression::MATCH_IN == expr->matchType()); } if (MatchExpression::EQ == expr->matchType()) { const EqualityMatchExpression* node = static_cast<const EqualityMatchExpression*>(expr); // We have to copy the data out of the parse tree and stuff it into the index // bounds. BSONValue will be useful here. BSONObj dataObj; if (isHashed) { dataObj = ExpressionMapping::hash(node->getData()); } else { dataObj = objFromElement(node->getData()); } // UNITTEST 11738048 if (Array == dataObj.firstElement().type()) { // XXX: build better bounds warning() << "building lazy bounds for " << expr->toString() << endl; interval = allValues(); exact = false; } else { verify(dataObj.isOwned()); interval = makePointInterval(dataObj); // XXX: it's exact if the index isn't sparse if (dataObj.firstElement().isNull()) { exact = false; } else if (isHashed) { exact = false; } else { exact = true; } } } else if (MatchExpression::LTE == expr->matchType()) { const LTEMatchExpression* node = static_cast<const LTEMatchExpression*>(expr); BSONElement dataElt = node->getData(); BSONObjBuilder bob; bob.appendMinForType("", dataElt.type()); bob.append(dataElt); BSONObj dataObj = bob.obj(); verify(dataObj.isOwned()); interval = makeRangeInterval(dataObj, true, true); // XXX: only exact if not (null or array) exact = true; } else if (MatchExpression::LT == expr->matchType()) { const LTMatchExpression* node = static_cast<const LTMatchExpression*>(expr); BSONElement dataElt = node->getData(); BSONObjBuilder bob; bob.appendMinForType("", dataElt.type()); bob.append(dataElt); BSONObj dataObj = bob.obj(); verify(dataObj.isOwned()); interval = makeRangeInterval(dataObj, true, false); // XXX: only exact if not (null or array) exact = true; } else if (MatchExpression::GT == expr->matchType()) { const GTMatchExpression* node = static_cast<const GTMatchExpression*>(expr); BSONElement dataElt = node->getData(); BSONObjBuilder bob; bob.append(node->getData()); bob.appendMaxForType("", dataElt.type()); BSONObj dataObj = bob.obj(); verify(dataObj.isOwned()); interval = makeRangeInterval(dataObj, false, true); // XXX: only exact if not (null or array) exact = true; } else if (MatchExpression::GTE == expr->matchType()) { const GTEMatchExpression* node = static_cast<const GTEMatchExpression*>(expr); BSONElement dataElt = node->getData(); BSONObjBuilder bob; bob.append(dataElt); bob.appendMaxForType("", dataElt.type()); BSONObj dataObj = bob.obj(); verify(dataObj.isOwned()); interval = makeRangeInterval(dataObj, true, true); // XXX: only exact if not (null or array) exact = true; } else if (MatchExpression::REGEX == expr->matchType()) { warning() << "building lazy bounds for " << expr->toString() << endl; interval = allValues(); exact = false; } else if (MatchExpression::MOD == expr->matchType()) { BSONObjBuilder bob; bob.appendMinForType("", NumberDouble); bob.appendMaxForType("", NumberDouble); BSONObj dataObj = bob.obj(); verify(dataObj.isOwned()); interval = makeRangeInterval(dataObj, true, true); exact = false; } else if (MatchExpression::MATCH_IN == expr->matchType()) { warning() << "building lazy bounds for " << expr->toString() << endl; interval = allValues(); exact = false; } else if (MatchExpression::TYPE_OPERATOR == expr->matchType()) { const TypeMatchExpression* tme = static_cast<const TypeMatchExpression*>(expr); BSONObjBuilder bob; bob.appendMinForType("", tme->getData()); bob.appendMaxForType("", tme->getData()); BSONObj dataObj = bob.obj(); verify(dataObj.isOwned()); interval = makeRangeInterval(dataObj, true, true); exact = false; } else if (MatchExpression::MATCH_IN == expr->matchType()) { warning() << "building lazy bounds for " << expr->toString() << endl; interval = allValues(); exact = false; } else if (MatchExpression::GEO == expr->matchType()) { const GeoMatchExpression* gme = static_cast<const GeoMatchExpression*>(expr); // Can only do this for 2dsphere. if (!mongoutils::str::equals("2dsphere", elt.valuestrsafe())) { warning() << "Planner error trying to build geo bounds for " << elt.toString() << " index element."; verify(0); } const S2Region& region = gme->getGeoQuery().getRegion(); ExpressionMapping::cover2dsphere(region, oilOut); *exactOut = false; // XXX: restructure this method return; } else { warning() << "Planner error, trying to build bounds for expr " << expr->toString() << endl; verify(0); } if (-1 == direction) { reverseInterval(&interval); } oilOut->intervals.push_back(interval); *exactOut = exact; }
TripMetricsReference::TripMetricsReference( const std::vector< TripMetrics >& input, long binsForHistograms, const TripMetricsReference& reference ): m_histograms(), m_binsForHistograms( binsForHistograms ), m_meanValues(), m_stdValues(), m_pca( 0 ) { // Create the vectors to feed the histograms if ( input.size() == 0 ) { throw std::runtime_error( "TripMetricsReference::TripMetricsReference : 0 size input given." ); } const size_t numberOfHistograms = input.front().values().size(); std::vector< std::vector<double> > allValues( numberOfHistograms, std::vector<double>() ); // Loop over the trip metrics and fill in the valarrays, minimum and maximum values for ( size_t iMetric = 0; iMetric < input.size(); ++iMetric ) { const std::vector<double>& metricValues = input[iMetric].values(); for ( size_t iValue = 0; iValue < numberOfHistograms; ++iValue ) { if ( std::isnan(metricValues[iValue])) continue; double currentValue = metricValues[iValue]; allValues[iValue].push_back( currentValue ); } } // For each vector create the corresponding histogram for ( size_t iValue = 0; iValue < numberOfHistograms; ++iValue ) { std::vector<double>& valuesForMetric = allValues[iValue]; double lowEdge = reference.m_histograms[iValue]->lowEdge(); double highEdge = reference.m_histograms[iValue]->highEdge(); double binSize = ( (highEdge/1.01) - lowEdge ) / binsForHistograms; // Create the histogram m_histograms.push_back( new Histogram( valuesForMetric, binsForHistograms, lowEdge, highEdge ) ); } // Get rid of empty values and normalise allValues.clear(); allValues.reserve( input.size() ); for ( std::vector< TripMetrics >::const_iterator iSample = input.begin(); iSample != input.end(); ++iSample ) allValues.push_back( iSample->values() ); const long nBinaryVariables = TripMetrics::numberOfBinaryMetrics(); const long numberOfFeatures = numberOfHistograms - nBinaryVariables; std::vector< std::vector< double > > cleanData; cleanData.reserve( allValues.size() ); m_meanValues = reference.m_meanValues; m_stdValues = reference.m_stdValues; for ( size_t iSample = 0; iSample < allValues.size(); ++iSample ) { bool nanFound = false; const std::vector<double>& metricValues = allValues[iSample]; for ( size_t iMetric = 0; iMetric < metricValues.size(); ++iMetric ) { if ( std::isnan( metricValues[iMetric]) ) { nanFound = true; break; } } if ( nanFound ) continue; std::vector<double> sampleValues = std::vector<double>( metricValues.begin() + nBinaryVariables, metricValues.end() ); // Normalise using previously calculated mean and std values. for ( size_t iFeature = 0; iFeature < numberOfFeatures; ++iFeature ) sampleValues[iFeature] = (sampleValues[iFeature] - m_meanValues[iFeature] ) / m_stdValues[iFeature]; cleanData.push_back( sampleValues ); } // Transform the clean data using the reference pca obejcts and create the corresponding histograms m_pca = new PCA( *(reference.m_pca ) ); for ( std::vector< std::vector< double > >::iterator iData = cleanData.begin(); iData != cleanData.end(); ++iData ) *iData = m_pca->transform( *iData ); const size_t nPrincipalComponents = cleanData.front().size(); const size_t numberOfSamples = cleanData.size(); m_histogramsPCA.reserve( nPrincipalComponents ); for ( size_t iComponent = 0; iComponent < nPrincipalComponents; ++iComponent ) { std::vector< double > histogramData( numberOfSamples, 0.0 ); for ( size_t iSample = 0; iSample < numberOfSamples; ++iSample ) { histogramData[iSample] = cleanData[iSample][iComponent]; } // Create the histogram m_histogramsPCA.push_back( new Histogram( histogramData, m_binsForHistograms, reference.m_histogramsPCA[iComponent]->lowEdge(), reference.m_histogramsPCA[iComponent]->highEdge() ) ); } }
TripMetricsReference::TripMetricsReference( const std::vector< TripMetrics >& input, long binsForHistograms ): m_histograms(), m_binsForHistograms( binsForHistograms ), m_meanValues(), m_stdValues(), m_pca( 0 ) { // Create the vectors to feed the histograms if ( input.size() == 0 ) { throw std::runtime_error( "TripMetricsReference::TripMetricsReference : 0 size input given." ); } ProcessLogger log(3, "Building the trip reference : "); const size_t numberOfHistograms = input.front().values().size(); std::vector<double> minValues = input.front().values(); std::vector<double> maxValues = input.front().values(); std::vector< std::vector<double> > allValues( numberOfHistograms, std::vector<double>() ); // Loop over the trip metrics and fill in the valarrays, minimum and maximum values for ( size_t iMetric = 0; iMetric < input.size(); ++iMetric ) { const std::vector<double>& metricValues = input[iMetric].values(); for ( size_t iValue = 0; iValue < numberOfHistograms; ++iValue ) { if ( std::isnan(metricValues[iValue])) continue; double currentValue = metricValues[iValue]; allValues[iValue].push_back( currentValue ); if ( std::isnan(minValues[iValue]) || currentValue < minValues[iValue] ) minValues[iValue] = currentValue; if ( std::isnan(maxValues[iValue]) || currentValue > maxValues[iValue] ) maxValues[iValue] = currentValue; } } log.taskEnded(); // For each vector create the corresponding histogram for ( size_t iValue = 0; iValue < numberOfHistograms; ++iValue ) { // Calculate the edges to be 1% of the bin size beyond the minimum and maximum value double lowEdge = minValues[iValue]; double highEdge = maxValues[iValue]; std::vector<double>& valuesForMetric = allValues[iValue]; // Trim extremes! std::sort( valuesForMetric.begin(), valuesForMetric.end() ); const double percentageToKeep = 99.5; size_t lowEdgeIndex = static_cast< size_t>(std::floor( valuesForMetric.size() * (100 - percentageToKeep) / 200 ) ); lowEdge = valuesForMetric[lowEdgeIndex]; size_t highEdgeIndex = static_cast< size_t>(std::floor( valuesForMetric.size() * (100 + percentageToKeep) / 200 ) ) + 1; highEdge = valuesForMetric[highEdgeIndex]; double binSize = ( highEdge - lowEdge ) / binsForHistograms; highEdge += 0.01 * binSize; // Create the histogram m_histograms.push_back( new Histogram( valuesForMetric, binsForHistograms, lowEdge, highEdge ) ); // Trim the values (so that the sorting is not repeated at the next step valuesForMetric = std::vector<double>( valuesForMetric.begin()+lowEdgeIndex, valuesForMetric.begin()+highEdgeIndex ); } log.taskEnded(); // Create the PCA histograms allValues.clear(); allValues.reserve( input.size() ); for ( std::vector< TripMetrics >::const_iterator iSample = input.begin(); iSample != input.end(); ++iSample ) allValues.push_back( iSample->values() ); this->performPCA( allValues ); log.taskEnded(); }
// static void IndexBoundsBuilder::translate(const MatchExpression* expr, const BSONElement& elt, OrderedIntervalList* oilOut, bool* exactOut) { oilOut->name = elt.fieldName(); bool isHashed = false; if (mongoutils::str::equals("hashed", elt.valuestrsafe())) { isHashed = true; } if (isHashed) { verify(MatchExpression::EQ == expr->matchType() || MatchExpression::MATCH_IN == expr->matchType()); } if (MatchExpression::ELEM_MATCH_VALUE == expr->matchType()) { OrderedIntervalList acc; bool exact; translate(expr->getChild(0), elt, &acc, &exact); if (!exact) { *exactOut = false; } for (size_t i = 1; i < expr->numChildren(); ++i) { OrderedIntervalList next; translate(expr->getChild(i), elt, &next, &exact); if (!exact) { *exactOut = false; } intersectize(next, &acc); } for (size_t i = 0; i < acc.intervals.size(); ++i) { oilOut->intervals.push_back(acc.intervals[i]); } if (!oilOut->intervals.empty()) { std::sort(oilOut->intervals.begin(), oilOut->intervals.end(), IntervalComparison); } } else if (MatchExpression::EQ == expr->matchType()) { const EqualityMatchExpression* node = static_cast<const EqualityMatchExpression*>(expr); translateEquality(node->getData(), isHashed, oilOut, exactOut); } else if (MatchExpression::LTE == expr->matchType()) { const LTEMatchExpression* node = static_cast<const LTEMatchExpression*>(expr); BSONElement dataElt = node->getData(); // Everything is <= MaxKey. if (MaxKey == dataElt.type()) { oilOut->intervals.push_back(allValues()); *exactOut = true; return; } BSONObjBuilder bob; bob.appendMinForType("", dataElt.type()); bob.appendAs(dataElt, ""); BSONObj dataObj = bob.obj(); verify(dataObj.isOwned()); oilOut->intervals.push_back(makeRangeInterval(dataObj, true, true)); // XXX: only exact if not (null or array) *exactOut = true; } else if (MatchExpression::LT == expr->matchType()) { const LTMatchExpression* node = static_cast<const LTMatchExpression*>(expr); BSONElement dataElt = node->getData(); // Everything is <= MaxKey. if (MaxKey == dataElt.type()) { oilOut->intervals.push_back(allValues()); *exactOut = true; return; } BSONObjBuilder bob; bob.appendMinForType("", dataElt.type()); bob.appendAs(dataElt, ""); BSONObj dataObj = bob.obj(); verify(dataObj.isOwned()); QLOG() << "data obj is " << dataObj.toString() << endl; oilOut->intervals.push_back(makeRangeInterval(dataObj, true, false)); // XXX: only exact if not (null or array) *exactOut = true; } else if (MatchExpression::GT == expr->matchType()) { const GTMatchExpression* node = static_cast<const GTMatchExpression*>(expr); BSONElement dataElt = node->getData(); // Everything is > MinKey. if (MinKey == dataElt.type()) { oilOut->intervals.push_back(allValues()); *exactOut = true; return; } BSONObjBuilder bob; bob.appendAs(node->getData(), ""); bob.appendMaxForType("", dataElt.type()); BSONObj dataObj = bob.obj(); verify(dataObj.isOwned()); oilOut->intervals.push_back(makeRangeInterval(dataObj, false, true)); // XXX: only exact if not (null or array) *exactOut = true; } else if (MatchExpression::GTE == expr->matchType()) { const GTEMatchExpression* node = static_cast<const GTEMatchExpression*>(expr); BSONElement dataElt = node->getData(); // Everything is >= MinKey. if (MinKey == dataElt.type()) { oilOut->intervals.push_back(allValues()); *exactOut = true; return; } BSONObjBuilder bob; bob.appendAs(dataElt, ""); bob.appendMaxForType("", dataElt.type()); BSONObj dataObj = bob.obj(); verify(dataObj.isOwned()); oilOut->intervals.push_back(makeRangeInterval(dataObj, true, true)); // XXX: only exact if not (null or array) *exactOut = true; } else if (MatchExpression::REGEX == expr->matchType()) { const RegexMatchExpression* rme = static_cast<const RegexMatchExpression*>(expr); translateRegex(rme, oilOut, exactOut); } else if (MatchExpression::MOD == expr->matchType()) { BSONObjBuilder bob; bob.appendMinForType("", NumberDouble); bob.appendMaxForType("", NumberDouble); BSONObj dataObj = bob.obj(); verify(dataObj.isOwned()); oilOut->intervals.push_back(makeRangeInterval(dataObj, true, true)); *exactOut = false; } else if (MatchExpression::TYPE_OPERATOR == expr->matchType()) { const TypeMatchExpression* tme = static_cast<const TypeMatchExpression*>(expr); BSONObjBuilder bob; bob.appendMinForType("", tme->getData()); bob.appendMaxForType("", tme->getData()); BSONObj dataObj = bob.obj(); verify(dataObj.isOwned()); oilOut->intervals.push_back(makeRangeInterval(dataObj, true, true)); *exactOut = false; } else if (MatchExpression::MATCH_IN == expr->matchType()) { const InMatchExpression* ime = static_cast<const InMatchExpression*>(expr); const ArrayFilterEntries& afr = ime->getData(); *exactOut = true; // Create our various intervals. bool thisBoundExact = false; for (BSONElementSet::iterator it = afr.equalities().begin(); it != afr.equalities().end(); ++it) { translateEquality(*it, isHashed, oilOut, &thisBoundExact); if (!thisBoundExact) { *exactOut = false; } } for (size_t i = 0; i < afr.numRegexes(); ++i) { translateRegex(afr.regex(i), oilOut, &thisBoundExact); if (!thisBoundExact) { *exactOut = false; } } // XXX: what happens here? if (afr.hasNull()) { } // XXX: what happens here as well? if (afr.hasEmptyArray()) { } unionize(oilOut); } else if (MatchExpression::GEO == expr->matchType()) { const GeoMatchExpression* gme = static_cast<const GeoMatchExpression*>(expr); // Can only do this for 2dsphere. if (!mongoutils::str::equals("2dsphere", elt.valuestrsafe())) { warning() << "Planner error trying to build geo bounds for " << elt.toString() << " index element."; verify(0); } const S2Region& region = gme->getGeoQuery().getRegion(); ExpressionMapping::cover2dsphere(region, oilOut); *exactOut = false; } else { warning() << "Planner error, trying to build bounds for expr " << expr->toString() << endl; verify(0); } }
// static void IndexBoundsBuilder::translate(const MatchExpression* expr, int direction, OrderedIntervalList* oilOut, bool* exactOut) { Interval interval; bool exact = false; if (expr->isLeaf()) { if (MatchExpression::EQ == expr->matchType()) { const EqualityMatchExpression* node = static_cast<const EqualityMatchExpression*>(expr); // We have to copy the data out of the parse tree and stuff it into the index bounds. // BSONValue will be useful here. BSONObj dataObj = objFromElement(node->getData()); if (dataObj.couldBeArray()) { // XXX: build better bounds warning() << "building lazy bounds for " << expr->toString() << endl; interval = allValues(); exact = false; } else { verify(dataObj.isOwned()); interval = makePointInterval(dataObj); exact = true; } } else if (MatchExpression::LTE == expr->matchType()) { const LTEMatchExpression* node = static_cast<const LTEMatchExpression*>(expr); BSONObjBuilder bob; bob.appendMinKey(""); bob.append(node->getData()); BSONObj dataObj = bob.obj(); verify(dataObj.isOwned()); interval = makeRangeInterval(dataObj, true, true); exact = true; } else if (MatchExpression::LT == expr->matchType()) { const LTMatchExpression* node = static_cast<const LTMatchExpression*>(expr); BSONObjBuilder bob; bob.appendMinKey(""); bob.append(node->getData()); BSONObj dataObj = bob.obj(); verify(dataObj.isOwned()); interval = makeRangeInterval(dataObj, true, false); exact = true; } else if (MatchExpression::GT == expr->matchType()) { const GTMatchExpression* node = static_cast<const GTMatchExpression*>(expr); BSONObjBuilder bob; bob.append(node->getData()); bob.appendMaxKey(""); BSONObj dataObj = bob.obj(); verify(dataObj.isOwned()); interval = makeRangeInterval(dataObj, false, true); exact = true; } else if (MatchExpression::GTE == expr->matchType()) { const GTEMatchExpression* node = static_cast<const GTEMatchExpression*>(expr); BSONObjBuilder bob; bob.append(node->getData()); bob.appendMaxKey(""); BSONObj dataObj = bob.obj(); verify(dataObj.isOwned()); interval = makeRangeInterval(dataObj, true, true); exact = true; } else { // XXX: build better bounds warning() << "building lazy bounds for " << expr->toString() << endl; interval = allValues(); exact = false; } } else { // XXX: build better bounds verify(expr->isArray()); warning() << "building lazy bounds for " << expr->toString() << endl; interval = allValues(); exact = false; } if (-1 == direction) { reverseInterval(&interval); } oilOut->intervals.push_back(interval); *exactOut = exact; }