void FTSIndexFormat::getKeys(const FTSSpec& spec, const BSONObj& obj, BSONObjSet* keys) { int extraSize = 0; vector<BSONElement> extrasBefore; vector<BSONElement> extrasAfter; // compute the non FTS key elements for (unsigned i = 0; i < spec.numExtraBefore(); i++) { BSONElement e = obj.getFieldDotted(spec.extraBefore(i)); if (e.eoo()) e = nullElt; uassert(16675, "cannot have a multi-key as a prefix to a text index", e.type() != Array); extrasBefore.push_back(e); extraSize += e.size(); } for (unsigned i = 0; i < spec.numExtraAfter(); i++) { BSONElement e = obj.getFieldDotted(spec.extraAfter(i)); if (e.eoo()) e = nullElt; extrasAfter.push_back(e); extraSize += e.size(); } TermFrequencyMap term_freqs; spec.scoreDocument(obj, &term_freqs); // create index keys from raw scores // only 1 per string uassert(16732, mongolutils::str::stream() << "too many unique keys for a single document to" << " have a text index, max is " << term_freqs.size() << obj["_id"], term_freqs.size() <= 400000); long long keyBSONSize = 0; const int MaxKeyBSONSizeMB = 4; for (TermFrequencyMap::const_iterator i = term_freqs.begin(); i != term_freqs.end(); ++i) { const string& term = i->first; double weight = i->second; // guess the total size of the btree entry based on the size of the weight, term tuple int guess = 5 /* bson overhead */ + 10 /* weight */ + 8 /* term overhead */ + /* term size (could be truncated/hashed) */ guessTermSize(term, spec.getTextIndexVersion()) + extraSize; BSONObjBuilder b(guess); // builds a BSON object with guess length. for (unsigned k = 0; k < extrasBefore.size(); k++) { b.appendAs(extrasBefore[k], ""); } _appendIndexKey(b, weight, term, spec.getTextIndexVersion()); for (unsigned k = 0; k < extrasAfter.size(); k++) { b.appendAs(extrasAfter[k], ""); } BSONObj res = b.obj(); verify(guess >= res.objsize()); keys->insert(res); keyBSONSize += res.objsize(); uassert(16733, mongolutils::str::stream() << "trying to index text where term list is too big, max is " << MaxKeyBSONSizeMB << "mb " << obj["_id"], keyBSONSize <= (MaxKeyBSONSizeMB * 1024 * 1024)); } }
void FTSIndexFormat::getKeys(const FTSSpec& spec, const BSONObj& obj, BSONObjSet* keys) { int extraSize = 0; vector<BSONElement> extrasBefore; vector<BSONElement> extrasAfter; // Compute the non FTS key elements for the prefix. for (unsigned i = 0; i < spec.numExtraBefore(); i++) { auto indexedElement = extractNonFTSKeyElement(obj, spec.extraBefore(i)); extrasBefore.push_back(indexedElement); extraSize += indexedElement.size(); } // Compute the non FTS key elements for the suffix. for (unsigned i = 0; i < spec.numExtraAfter(); i++) { auto indexedElement = extractNonFTSKeyElement(obj, spec.extraAfter(i)); extrasAfter.push_back(indexedElement); extraSize += indexedElement.size(); } TermFrequencyMap term_freqs; spec.scoreDocument(obj, &term_freqs); // create index keys from raw scores // only 1 per string // TODO SERVER-36440: Completely remove this limit in 4.3. if (serverGlobalParams.featureCompatibility.isVersionInitialized() && serverGlobalParams.featureCompatibility.getVersion() == ServerGlobalParams::FeatureCompatibility::Version::kFullyDowngradedTo40) { uassert(16732, mongoutils::str::stream() << "too many unique keys for a single document to" << " have a text index, max is " << term_freqs.size() << obj["_id"], term_freqs.size() <= 400000); } long long keyBSONSize = 0; const int MaxKeyBSONSizeMB = 4; for (TermFrequencyMap::const_iterator i = term_freqs.begin(); i != term_freqs.end(); ++i) { const string& term = i->first; double weight = i->second; // guess the total size of the btree entry based on the size of the weight, term tuple int guess = 5 /* bson overhead */ + 10 /* weight */ + 8 /* term overhead */ + /* term size (could be truncated/hashed) */ guessTermSize(term, spec.getTextIndexVersion()) + extraSize; BSONObjBuilder b(guess); // builds a BSON object with guess length. for (unsigned k = 0; k < extrasBefore.size(); k++) { b.appendAs(extrasBefore[k], ""); } _appendIndexKey(b, weight, term, spec.getTextIndexVersion()); for (unsigned k = 0; k < extrasAfter.size(); k++) { b.appendAs(extrasAfter[k], ""); } BSONObj res = b.obj(); verify(guess >= res.objsize()); keys->insert(res); keyBSONSize += res.objsize(); // TODO SERVER-36440: Completely remove this limit in 4.3. if (serverGlobalParams.featureCompatibility.isVersionInitialized() && serverGlobalParams.featureCompatibility.getVersion() == ServerGlobalParams::FeatureCompatibility::Version::kFullyDowngradedTo40) { uassert(16733, mongoutils::str::stream() << "trying to index text where term list is too big, max is " << MaxKeyBSONSizeMB << "mb " << obj["_id"], keyBSONSize <= (MaxKeyBSONSizeMB * 1024 * 1024)); } } }
FTSElementIterator::FTSElementIterator(const FTSSpec& spec, const BSONObj& obj) : _frame(obj, spec, &spec.defaultLanguage(), "", false), _spec(spec), _currentValue(advance()) {}