Example #1
0
void FTSSpec::scoreDocument(const BSONObj& obj, TermFrequencyMap* term_freqs) const {
    if (_textIndexVersion == TEXT_INDEX_VERSION_1) {
        return _scoreDocumentV1(obj, term_freqs);
    }

    FTSElementIterator it(*this, obj);

    while (it.more()) {
        FTSIteratorValue val = it.next();
        std::unique_ptr<FTSTokenizer> tokenizer(val._language->createTokenizer());
        _scoreStringV2(tokenizer.get(), val._text, term_freqs, val._weight);
    }
}
Example #2
0
        void FTSSpec::scoreDocument( const BSONObj& obj, TermFrequencyMap* term_freqs ) const {
            if ( _textIndexVersion == TEXT_INDEX_VERSION_1 ) {
                return _scoreDocumentV1( obj, term_freqs );
            }

            FTSElementIterator it( *this, obj );

            while ( it.more() ) {
                FTSIteratorValue val = it.next();
                Stemmer stemmer( *val._language );
                Tools tools( *val._language, &stemmer, StopWords::getStopWords( *val._language ) );
                _scoreStringV2( tools, val._text, term_freqs, val._weight );
            }
        }
Example #3
0
void FTSSpec::scoreDocument( const BSONObj& obj,
                             const FTSLanguage& parentLanguage,
                             const string& parentPath,
                             bool isArray,
                             TermFrequencyMap* term_freqs ) const {

    if ( _textIndexVersion == TEXT_INDEX_VERSION_1 ) {
        dassert( parentPath == "" );
        dassert( !isArray );
        return _scoreDocumentV1( obj, term_freqs );
    }

    const FTSLanguage& language = _getLanguageToUseV2( obj, parentLanguage );
    Stemmer stemmer( language );
    Tools tools( language, &stemmer, StopWords::getStopWords( language ) );

    // Perform a depth-first traversal of obj, skipping fields not touched by this spec.
    BSONObjIterator j( obj );
    while ( j.more() ) {

        BSONElement elem = j.next();
        string fieldName = elem.fieldName();

        // Skip "language" specifier fields if wildcard.
        if ( wildcard() && languageOverrideField() == fieldName ) {
            continue;
        }

        // Compose the dotted name of the current field:
        // 1. parent path empty (top level): use the current field name
        // 2. parent path non-empty and obj is an array: use the parent path
        // 3. parent path non-empty and obj is a sub-doc: append field name to parent path
        string dottedName = ( parentPath.empty() ? fieldName
                              : isArray ? parentPath
                              : parentPath + '.' + fieldName );

        // Find lower bound of dottedName in _weights.  lower_bound leaves us at the first
        // weight that could possibly match or be a prefix of dottedName.  And if this
        // element fails to match, then no subsequent weight can match, since the weights
        // are lexicographically ordered.
        Weights::const_iterator i = _weights.lower_bound( elem.type() == Object
                                    ? dottedName + '.'
                                    : dottedName );

        // possibleWeightMatch is set if the weight map contains either a match or some item
        // lexicographically larger than fieldName.  This boolean acts as a guard on
        // dereferences of iterator 'i'.
        bool possibleWeightMatch = ( i != _weights.end() );

        // Optimize away two cases, when not wildcard:
        // 1. lower_bound seeks to end(): no prefix match possible
        // 2. lower_bound seeks to a name which is not a prefix
        if ( !wildcard() ) {
            if ( !possibleWeightMatch ) {
                continue;
            }
            else if ( !_matchPrefix( dottedName, i->first ) ) {
                continue;
            }
        }

        // Is the current field an exact match on a weight?
        bool exactMatch = ( possibleWeightMatch && i->first == dottedName );

        double weight = ( possibleWeightMatch ? i->second : DEFAULT_WEIGHT );

        switch ( elem.type() ) {
        case String:
            // Only index strings on exact match or wildcard.
            if ( exactMatch || wildcard() ) {
                _scoreStringV2( tools, elem.valuestr(), term_freqs, weight );
            }
            break;
        case Object:
            // Only descend into a sub-document on proper prefix or wildcard.  Note that
            // !exactMatch is a sufficient test for proper prefix match, because of
            // matchPrefix() continue block above.
            if ( !exactMatch || wildcard() ) {
                scoreDocument( elem.Obj(), language, dottedName, false, term_freqs );
            }
            break;
        case Array:
            // Only descend into arrays from non-array parents or on wildcard.
            if ( !isArray || wildcard() ) {
                scoreDocument( elem.Obj(), language, dottedName, true, term_freqs );
            }
            break;
        default:
            // Skip over all other BSON types.
            break;
        }
    }
}