void FTSIndexFormat::getKeys(const FTSSpec& spec, const BSONObj& obj, BSONObjSet* keys) {
    int extraSize = 0;
    vector<BSONElement> extrasBefore;
    vector<BSONElement> extrasAfter;

    // compute the non FTS key elements
    for (unsigned i = 0; i < spec.numExtraBefore(); i++) {
        BSONElement e = obj.getFieldDotted(spec.extraBefore(i));
        if (e.eoo())
            e = nullElt;
        uassert(16675, "cannot have a multi-key as a prefix to a text index", e.type() != Array);
        extrasBefore.push_back(e);
        extraSize += e.size();
    }
    for (unsigned i = 0; i < spec.numExtraAfter(); i++) {
        BSONElement e = obj.getFieldDotted(spec.extraAfter(i));
        if (e.eoo())
            e = nullElt;
        extrasAfter.push_back(e);
        extraSize += e.size();
    }


    TermFrequencyMap term_freqs;
    spec.scoreDocument(obj, &term_freqs);

    // create index keys from raw scores
    // only 1 per string

    uassert(16732,
            mongolutils::str::stream() << "too many unique keys for a single document to"
                                      << " have a text index, max is " << term_freqs.size()
                                      << obj["_id"],
            term_freqs.size() <= 400000);

    long long keyBSONSize = 0;
    const int MaxKeyBSONSizeMB = 4;

    for (TermFrequencyMap::const_iterator i = term_freqs.begin(); i != term_freqs.end(); ++i) {
        const string& term = i->first;
        double weight = i->second;

        // guess the total size of the btree entry based on the size of the weight, term tuple
        int guess = 5 /* bson overhead */ + 10 /* weight */ + 8 /* term overhead */ +
            /* term size (could be truncated/hashed) */
            guessTermSize(term, spec.getTextIndexVersion()) + extraSize;

        BSONObjBuilder b(guess);  // builds a BSON object with guess length.
        for (unsigned k = 0; k < extrasBefore.size(); k++) {
            b.appendAs(extrasBefore[k], "");
        }
        _appendIndexKey(b, weight, term, spec.getTextIndexVersion());
        for (unsigned k = 0; k < extrasAfter.size(); k++) {
            b.appendAs(extrasAfter[k], "");
        }
        BSONObj res = b.obj();

        verify(guess >= res.objsize());

        keys->insert(res);
        keyBSONSize += res.objsize();

        uassert(16733,
                mongolutils::str::stream()
                    << "trying to index text where term list is too big, max is "
                    << MaxKeyBSONSizeMB << "mb " << obj["_id"],
                keyBSONSize <= (MaxKeyBSONSizeMB * 1024 * 1024));
    }
}
Beispiel #2
0
void FTSIndexFormat::getKeys(const FTSSpec& spec, const BSONObj& obj, BSONObjSet* keys) {
    int extraSize = 0;
    vector<BSONElement> extrasBefore;
    vector<BSONElement> extrasAfter;

    // Compute the non FTS key elements for the prefix.
    for (unsigned i = 0; i < spec.numExtraBefore(); i++) {
        auto indexedElement = extractNonFTSKeyElement(obj, spec.extraBefore(i));
        extrasBefore.push_back(indexedElement);
        extraSize += indexedElement.size();
    }

    // Compute the non FTS key elements for the suffix.
    for (unsigned i = 0; i < spec.numExtraAfter(); i++) {
        auto indexedElement = extractNonFTSKeyElement(obj, spec.extraAfter(i));
        extrasAfter.push_back(indexedElement);
        extraSize += indexedElement.size();
    }

    TermFrequencyMap term_freqs;
    spec.scoreDocument(obj, &term_freqs);

    // create index keys from raw scores
    // only 1 per string

    // TODO SERVER-36440: Completely remove this limit in 4.3.
    if (serverGlobalParams.featureCompatibility.isVersionInitialized() &&
        serverGlobalParams.featureCompatibility.getVersion() ==
            ServerGlobalParams::FeatureCompatibility::Version::kFullyDowngradedTo40) {
        uassert(16732,
                mongoutils::str::stream() << "too many unique keys for a single document to"
                                          << " have a text index, max is "
                                          << term_freqs.size()
                                          << obj["_id"],
                term_freqs.size() <= 400000);
    }

    long long keyBSONSize = 0;
    const int MaxKeyBSONSizeMB = 4;

    for (TermFrequencyMap::const_iterator i = term_freqs.begin(); i != term_freqs.end(); ++i) {
        const string& term = i->first;
        double weight = i->second;

        // guess the total size of the btree entry based on the size of the weight, term tuple
        int guess = 5 /* bson overhead */ + 10 /* weight */ + 8 /* term overhead */ +
            /* term size (could be truncated/hashed) */
            guessTermSize(term, spec.getTextIndexVersion()) + extraSize;

        BSONObjBuilder b(guess);  // builds a BSON object with guess length.
        for (unsigned k = 0; k < extrasBefore.size(); k++) {
            b.appendAs(extrasBefore[k], "");
        }
        _appendIndexKey(b, weight, term, spec.getTextIndexVersion());
        for (unsigned k = 0; k < extrasAfter.size(); k++) {
            b.appendAs(extrasAfter[k], "");
        }
        BSONObj res = b.obj();

        verify(guess >= res.objsize());

        keys->insert(res);
        keyBSONSize += res.objsize();

        // TODO SERVER-36440: Completely remove this limit in 4.3.
        if (serverGlobalParams.featureCompatibility.isVersionInitialized() &&
            serverGlobalParams.featureCompatibility.getVersion() ==
                ServerGlobalParams::FeatureCompatibility::Version::kFullyDowngradedTo40) {
            uassert(16733,
                    mongoutils::str::stream()
                        << "trying to index text where term list is too big, max is "
                        << MaxKeyBSONSizeMB
                        << "mb "
                        << obj["_id"],
                    keyBSONSize <= (MaxKeyBSONSizeMB * 1024 * 1024));
        }
    }
}
FTSElementIterator::FTSElementIterator(const FTSSpec& spec, const BSONObj& obj)
    : _frame(obj, spec, &spec.defaultLanguage(), "", false),
      _spec(spec),
      _currentValue(advance()) {}