void FTSSpec::scoreDocument(const BSONObj& obj, TermFrequencyMap* term_freqs) const { if (_textIndexVersion == TEXT_INDEX_VERSION_1) { return _scoreDocumentV1(obj, term_freqs); } FTSElementIterator it(*this, obj); while (it.more()) { FTSIteratorValue val = it.next(); std::unique_ptr<FTSTokenizer> tokenizer(val._language->createTokenizer()); _scoreStringV2(tokenizer.get(), val._text, term_freqs, val._weight); } }
void FTSSpec::scoreDocument( const BSONObj& obj, TermFrequencyMap* term_freqs ) const { if ( _textIndexVersion == TEXT_INDEX_VERSION_1 ) { return _scoreDocumentV1( obj, term_freqs ); } FTSElementIterator it( *this, obj ); while ( it.more() ) { FTSIteratorValue val = it.next(); Stemmer stemmer( *val._language ); Tools tools( *val._language, &stemmer, StopWords::getStopWords( *val._language ) ); _scoreStringV2( tools, val._text, term_freqs, val._weight ); } }
void FTSSpec::scoreDocument( const BSONObj& obj, const FTSLanguage& parentLanguage, const string& parentPath, bool isArray, TermFrequencyMap* term_freqs ) const { if ( _textIndexVersion == TEXT_INDEX_VERSION_1 ) { dassert( parentPath == "" ); dassert( !isArray ); return _scoreDocumentV1( obj, term_freqs ); } const FTSLanguage& language = _getLanguageToUseV2( obj, parentLanguage ); Stemmer stemmer( language ); Tools tools( language, &stemmer, StopWords::getStopWords( language ) ); // Perform a depth-first traversal of obj, skipping fields not touched by this spec. BSONObjIterator j( obj ); while ( j.more() ) { BSONElement elem = j.next(); string fieldName = elem.fieldName(); // Skip "language" specifier fields if wildcard. if ( wildcard() && languageOverrideField() == fieldName ) { continue; } // Compose the dotted name of the current field: // 1. parent path empty (top level): use the current field name // 2. parent path non-empty and obj is an array: use the parent path // 3. parent path non-empty and obj is a sub-doc: append field name to parent path string dottedName = ( parentPath.empty() ? fieldName : isArray ? parentPath : parentPath + '.' + fieldName ); // Find lower bound of dottedName in _weights. lower_bound leaves us at the first // weight that could possibly match or be a prefix of dottedName. And if this // element fails to match, then no subsequent weight can match, since the weights // are lexicographically ordered. Weights::const_iterator i = _weights.lower_bound( elem.type() == Object ? dottedName + '.' : dottedName ); // possibleWeightMatch is set if the weight map contains either a match or some item // lexicographically larger than fieldName. This boolean acts as a guard on // dereferences of iterator 'i'. bool possibleWeightMatch = ( i != _weights.end() ); // Optimize away two cases, when not wildcard: // 1. lower_bound seeks to end(): no prefix match possible // 2. lower_bound seeks to a name which is not a prefix if ( !wildcard() ) { if ( !possibleWeightMatch ) { continue; } else if ( !_matchPrefix( dottedName, i->first ) ) { continue; } } // Is the current field an exact match on a weight? bool exactMatch = ( possibleWeightMatch && i->first == dottedName ); double weight = ( possibleWeightMatch ? i->second : DEFAULT_WEIGHT ); switch ( elem.type() ) { case String: // Only index strings on exact match or wildcard. if ( exactMatch || wildcard() ) { _scoreStringV2( tools, elem.valuestr(), term_freqs, weight ); } break; case Object: // Only descend into a sub-document on proper prefix or wildcard. Note that // !exactMatch is a sufficient test for proper prefix match, because of // matchPrefix() continue block above. if ( !exactMatch || wildcard() ) { scoreDocument( elem.Obj(), language, dottedName, false, term_freqs ); } break; case Array: // Only descend into arrays from non-array parents or on wildcard. if ( !isArray || wildcard() ) { scoreDocument( elem.Obj(), language, dottedName, true, term_freqs ); } break; default: // Skip over all other BSON types. break; } } }