string BSONObj::toString() const { if ( isEmpty() ) return "{}"; stringstream s; s << "{ "; BSONObjIterator i(*this); bool first = true; while ( 1 ) { massert( "Object does not end with EOO", i.more() ); BSONElement e = i.next( true ); massert( "Invalid element size", e.size() > 0 ); massert( "Element too large", e.size() < ( 1 << 30 ) ); int offset = e.rawdata() - this->objdata(); massert( "Element extends past end of object", e.size() + offset <= this->objsize() ); e.validate(); bool end = ( e.size() + offset == this->objsize() ); if ( e.eoo() ) { massert( "EOO Before end of object", end ); break; } if ( first ) first = false; else s << ", "; s << e.toString(); } s << " }"; return s.str(); }
bool debug( const BSONObj& o , int depth=0) { string prefix = ""; for ( int i=0; i<depth; i++ ) { prefix += "\t\t\t"; } int read = 4; try { cout << prefix << "--- new object ---\n"; cout << prefix << "\t size : " << o.objsize() << "\n"; BSONObjIterator i(o); while ( i.more() ) { BSONElement e = i.next(); cout << prefix << "\t\t " << e.fieldName() << "\n" << prefix << "\t\t\t type:" << setw(3) << e.type() << " size: " << e.size() << endl; if ( ( read + e.size() ) > o.objsize() ) { cout << prefix << " SIZE DOES NOT WORK" << endl; return false; } read += e.size(); try { e.validate(); if ( e.isABSONObj() ) { if ( ! debug( e.Obj() , depth + 1 ) ) { //return false; cout << prefix << "\t\t\t BAD BAD BAD" << endl; if ( e.size() < 1000 ) { cout << "---\n" << e.Obj().hexDump() << "\n---" << endl; } } } else if ( e.type() == String && ! isValidUTF8( e.valuestr() ) ) { cout << prefix << "\t\t\t" << "bad utf8 String!" << endl; } else if ( logger::globalLogDomain()->shouldLog(logger::LogSeverity::Debug(1)) ) { cout << prefix << "\t\t\t" << e << endl; } } catch ( std::exception& e ) { cout << prefix << "\t\t\t bad value: " << e.what() << endl; } } } catch ( std::exception& e ) { cout << prefix << "\tbad\t" << e.what() << endl; cout << "----\n" << o.hexDump() << "\n---" << endl; } return true; }
void BtreeKeyGenerator::getKeys(const BSONObj& obj, BSONObjSet* keys, MultikeyPaths* multikeyPaths) const { if (_isIdIndex) { // we special case for speed BSONElement e = obj["_id"]; if (e.eoo()) { keys->insert(_nullKey); } else { int size = e.size() + 5 /* bson over head*/ - 3 /* remove _id string */; BSONObjBuilder b(size); b.appendAs(e, ""); keys->insert(b.obj()); invariant(keys->begin()->objsize() == size); } return; } // '_fieldNames' and '_fixed' are passed by value so that they can be mutated as part of the // getKeys call. :| getKeysImpl(_fieldNames, _fixed, obj, keys, multikeyPaths); if (keys->empty() && !_isSparse) { keys->insert(_nullKey); } }
bool BSONElement::binaryEqual(const BSONElement& rhs) const { const int elemSize = size(); if (elemSize != rhs.size()) { return false; } return (elemSize == 0) || (memcmp(data, rhs.rawdata(), elemSize) == 0); }
void BtreeKeyGenerator::getKeys(const BSONObj& obj, BSONObjSet* keys, MultikeyPaths* multikeyPaths) const { if (_isIdIndex) { // we special case for speed BSONElement e = obj["_id"]; if (e.eoo()) { keys->insert(_nullKey); } else if (_collator) { BSONObjBuilder b; CollationIndexKey::collationAwareIndexKeyAppend(e, _collator, &b); // Insert a copy so its buffer size fits the object size. keys->insert(b.obj().copy()); } else { int size = e.size() + 5 /* bson over head*/ - 3 /* remove _id string */; BSONObjBuilder b(size); b.appendAs(e, ""); keys->insert(b.obj()); invariant(keys->begin()->objsize() == size); } // The {_id: 1} index can never be multikey because the _id field isn't allowed to be an // array value. We therefore always set 'multikeyPaths' as [ [ ] ]. if (multikeyPaths) { multikeyPaths->resize(1); } } else { if (multikeyPaths) { invariant(multikeyPaths->empty()); multikeyPaths->resize(_fieldNames.size()); } // '_fieldNames' and '_fixed' are passed by value so that their copies can be mutated as // part of the _getKeysWithArray method. _getKeysWithArray(_fieldNames, _fixed, obj, keys, 0, _emptyPositionalInfo, multikeyPaths); } if (keys->empty() && !_isSparse) { keys->insert(_nullKey); } }
void BtreeKeyGeneratorV1::getKeysImpl(std::vector<const char*> fieldNames, std::vector<BSONElement> fixed, const BSONObj& obj, BSONObjSet* keys, MultikeyPaths* multikeyPaths) const { if (_isIdIndex) { // we special case for speed BSONElement e = obj["_id"]; if (e.eoo()) { keys->insert(_nullKey); } else if (_collator) { BSONObjBuilder b; CollationIndexKey::collationAwareIndexKeyAppend(e, _collator, &b); // Insert a copy so its buffer size fits the object size. keys->insert(b.obj().copy()); } else { int size = e.size() + 5 /* bson over head*/ - 3 /* remove _id string */; BSONObjBuilder b(size); b.appendAs(e, ""); keys->insert(b.obj()); invariant(keys->begin()->objsize() == size); } // The {_id: 1} index can never be multikey because the _id field isn't allowed to be an // array value. We therefore always set 'multikeyPaths' as [ [ ] ]. if (multikeyPaths) { multikeyPaths->resize(1); } return; } if (multikeyPaths) { invariant(multikeyPaths->empty()); multikeyPaths->resize(fieldNames.size()); } getKeysImplWithArray( std::move(fieldNames), std::move(fixed), obj, keys, 0, _emptyPositionalInfo, multikeyPaths); }
void BtreeKeyGeneratorV0::getKeysImpl(std::vector<const char*> fieldNames, std::vector<BSONElement> fixed, const BSONObj& obj, BSONObjSet* keys, MultikeyPaths* multikeyPaths) const { if (_isIdIndex) { // we special case for speed BSONElement e = obj["_id"]; if (e.eoo()) { keys->insert(_nullKey); } else { int size = e.size() + 5 /* bson over head*/ - 3 /* remove _id string */; BSONObjBuilder b(size); b.appendAs(e, ""); keys->insert(b.obj()); invariant(keys->begin()->objsize() == size); } return; } BSONElement arrElt; unsigned arrIdx = ~0; unsigned numNotFound = 0; for (unsigned i = 0; i < fieldNames.size(); ++i) { if (*fieldNames[i] == '\0') continue; BSONElement e = dps::extractElementAtPathOrArrayAlongPath(obj, fieldNames[i]); if (e.eoo()) { e = nullElt; // no matching field numNotFound++; } if (e.type() != Array) fieldNames[i] = ""; // no matching field or non-array match if (*fieldNames[i] == '\0') // no need for further object expansion (though array expansion still possible) fixed[i] = e; if (e.type() == Array && arrElt.eoo()) { // we only expand arrays on a single path -- track the path here arrIdx = i; arrElt = e; } // enforce single array path here if (e.type() == Array && e.rawdata() != arrElt.rawdata()) { assertParallelArrays(e.fieldName(), arrElt.fieldName()); } } bool allFound = true; // have we found elements for all field names in the key spec? for (std::vector<const char*>::const_iterator i = fieldNames.begin(); i != fieldNames.end(); ++i) { if (**i != '\0') { allFound = false; break; } } if (_isSparse && numNotFound == _fieldNames.size()) { // we didn't find any fields // so we're not going to index this document return; } bool insertArrayNull = false; if (allFound) { if (arrElt.eoo()) { // no terminal array element to expand BSONObjBuilder b(_sizeTracker); for (std::vector<BSONElement>::iterator i = fixed.begin(); i != fixed.end(); ++i) b.appendAs(*i, ""); keys->insert(b.obj()); } else { // terminal array element to expand, so generate all keys BSONObjIterator i(arrElt.embeddedObject()); if (i.more()) { while (i.more()) { BSONObjBuilder b(_sizeTracker); for (unsigned j = 0; j < fixed.size(); ++j) { if (j == arrIdx) b.appendAs(i.next(), ""); else b.appendAs(fixed[j], ""); } keys->insert(b.obj()); } } else if (fixed.size() > 1) { insertArrayNull = true; } } } else { // nonterminal array element to expand, so recurse verify(!arrElt.eoo()); BSONObjIterator i(arrElt.embeddedObject()); if (i.more()) { while (i.more()) { BSONElement e = i.next(); if (e.type() == Object) { getKeysImpl(fieldNames, fixed, e.embeddedObject(), keys, multikeyPaths); } } } else { insertArrayNull = true; } } if (insertArrayNull) { // x : [] - need to insert undefined BSONObjBuilder b(_sizeTracker); for (unsigned j = 0; j < fixed.size(); ++j) { if (j == arrIdx) { b.appendUndefined(""); } else { BSONElement e = fixed[j]; if (e.eoo()) b.appendNull(""); else b.appendAs(e, ""); } } keys->insert(b.obj()); } }
void FTSIndexFormat::getKeys(const FTSSpec& spec, const BSONObj& obj, BSONObjSet* keys) { int extraSize = 0; vector<BSONElement> extrasBefore; vector<BSONElement> extrasAfter; // compute the non FTS key elements for (unsigned i = 0; i < spec.numExtraBefore(); i++) { BSONElement e = obj.getFieldDotted(spec.extraBefore(i)); if (e.eoo()) e = nullElt; uassert(16675, "cannot have a multi-key as a prefix to a text index", e.type() != Array); extrasBefore.push_back(e); extraSize += e.size(); } for (unsigned i = 0; i < spec.numExtraAfter(); i++) { BSONElement e = obj.getFieldDotted(spec.extraAfter(i)); if (e.eoo()) e = nullElt; extrasAfter.push_back(e); extraSize += e.size(); } TermFrequencyMap term_freqs; spec.scoreDocument(obj, &term_freqs); // create index keys from raw scores // only 1 per string uassert(16732, mongolutils::str::stream() << "too many unique keys for a single document to" << " have a text index, max is " << term_freqs.size() << obj["_id"], term_freqs.size() <= 400000); long long keyBSONSize = 0; const int MaxKeyBSONSizeMB = 4; for (TermFrequencyMap::const_iterator i = term_freqs.begin(); i != term_freqs.end(); ++i) { const string& term = i->first; double weight = i->second; // guess the total size of the btree entry based on the size of the weight, term tuple int guess = 5 /* bson overhead */ + 10 /* weight */ + 8 /* term overhead */ + /* term size (could be truncated/hashed) */ guessTermSize(term, spec.getTextIndexVersion()) + extraSize; BSONObjBuilder b(guess); // builds a BSON object with guess length. for (unsigned k = 0; k < extrasBefore.size(); k++) { b.appendAs(extrasBefore[k], ""); } _appendIndexKey(b, weight, term, spec.getTextIndexVersion()); for (unsigned k = 0; k < extrasAfter.size(); k++) { b.appendAs(extrasAfter[k], ""); } BSONObj res = b.obj(); verify(guess >= res.objsize()); keys->insert(res); keyBSONSize += res.objsize(); uassert(16733, mongolutils::str::stream() << "trying to index text where term list is too big, max is " << MaxKeyBSONSizeMB << "mb " << obj["_id"], keyBSONSize <= (MaxKeyBSONSizeMB * 1024 * 1024)); } }
bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { Timer t; string ns = dbname + '.' + cmdObj.firstElement().valuestr(); string key = cmdObj["key"].valuestrsafe(); BSONObj keyPattern = BSON( key << 1 ); BSONObj query = getQuery( cmdObj ); int bufSize = BSONObjMaxUserSize - 4096; BufBuilder bb( bufSize ); char * start = bb.buf(); BSONArrayBuilder arr( bb ); BSONElementSet values; long long nscanned = 0; // locations looked at long long nscannedObjects = 0; // full objects looked at long long n = 0; // matches NamespaceDetails * d = nsdetails( ns ); string cursorName; if (!d) { result.appendArray( "values" , BSONObj() ); result.append("stats", BSON("n" << 0 << "nscanned" << 0 << "nscannedObjects" << 0)); return true; } CanonicalQuery* cq; // XXX: project out just the field we're distinct-ing. May be covered... if (!CanonicalQuery::canonicalize(ns, query, &cq).isOK()) { uasserted(17215, "Can't canonicalize query " + query.toString()); return 0; } Runner* rawRunner; if (!getRunner(cq, &rawRunner).isOK()) { uasserted(17216, "Can't get runner for query " + query.toString()); return 0; } auto_ptr<Runner> runner(rawRunner); auto_ptr<DeregisterEvenIfUnderlyingCodeThrows> safety; ClientCursor::registerRunner(runner.get()); runner->setYieldPolicy(Runner::YIELD_AUTO); safety.reset(new DeregisterEvenIfUnderlyingCodeThrows(runner.get())); BSONObj obj; Runner::RunnerState state; while (Runner::RUNNER_ADVANCED == (state = runner->getNext(&obj, NULL))) { BSONElementSet elts; obj.getFieldsDotted(key, elts); for (BSONElementSet::iterator it = elts.begin(); it != elts.end(); ++it) { BSONElement elt = *it; if (values.count(elt)) { continue; } int currentBufPos = bb.len(); uassert(17217, "distinct too big, 16mb cap", (currentBufPos + elt.size() + 1024) < bufSize); arr.append(elt); BSONElement x(start + currentBufPos); values.insert(x); } } TypeExplain* bareExplain; Status res = runner->getExplainPlan(&bareExplain); if (res.isOK()) { auto_ptr<TypeExplain> explain(bareExplain); if (explain->isCursorSet()) { cursorName = explain->getCursor(); } n = explain->getN(); nscanned = explain->getNScanned(); nscannedObjects = explain->getNScannedObjects(); } verify( start == bb.buf() ); result.appendArray( "values" , arr.done() ); { BSONObjBuilder b; b.appendNumber( "n" , n ); b.appendNumber( "nscanned" , nscanned ); b.appendNumber( "nscannedObjects" , nscannedObjects ); b.appendNumber( "timems" , t.millis() ); b.append( "cursor" , cursorName ); result.append( "stats" , b.obj() ); } return true; }
bool run(OperationContext* txn, const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result) { Timer t; const string ns = parseNs(dbname, cmdObj); AutoGetCollectionForRead ctx(txn, ns); Collection* collection = ctx.getCollection(); auto executor = getPlanExecutor(txn, collection, ns, cmdObj, false); if (!executor.isOK()) { return appendCommandStatus(result, executor.getStatus()); } string key = cmdObj[kKeyField].valuestrsafe(); int bufSize = BSONObjMaxUserSize - 4096; BufBuilder bb(bufSize); char* start = bb.buf(); BSONArrayBuilder arr(bb); BSONElementSet values; BSONObj obj; PlanExecutor::ExecState state; while (PlanExecutor::ADVANCED == (state = executor.getValue()->getNext(&obj, NULL))) { // Distinct expands arrays. // // If our query is covered, each value of the key should be in the index key and // available to us without this. If a collection scan is providing the data, we may // have to expand an array. BSONElementSet elts; obj.getFieldsDotted(key, elts); for (BSONElementSet::iterator it = elts.begin(); it != elts.end(); ++it) { BSONElement elt = *it; if (values.count(elt)) { continue; } int currentBufPos = bb.len(); uassert(17217, "distinct too big, 16mb cap", (currentBufPos + elt.size() + 1024) < bufSize); arr.append(elt); BSONElement x(start + currentBufPos); values.insert(x); } } // Return an error if execution fails for any reason. if (PlanExecutor::FAILURE == state || PlanExecutor::DEAD == state) { const std::unique_ptr<PlanStageStats> stats(executor.getValue()->getStats()); log() << "Plan executor error during distinct command: " << PlanExecutor::statestr(state) << ", stats: " << Explain::statsToBSON(*stats); return appendCommandStatus(result, Status(ErrorCodes::OperationFailed, str::stream() << "Executor error during distinct command: " << WorkingSetCommon::toStatusString(obj))); } // Get summary information about the plan. PlanSummaryStats stats; Explain::getSummaryStats(*executor.getValue(), &stats); collection->infoCache()->notifyOfQuery(txn, stats.indexesUsed); CurOp::get(txn)->debug().fromMultiPlanner = stats.fromMultiPlanner; CurOp::get(txn)->debug().replanned = stats.replanned; verify(start == bb.buf()); result.appendArray("values", arr.done()); { BSONObjBuilder b; b.appendNumber("n", stats.nReturned); b.appendNumber("nscanned", stats.totalKeysExamined); b.appendNumber("nscannedObjects", stats.totalDocsExamined); b.appendNumber("timems", t.millis()); b.append("planSummary", Explain::getPlanSummary(executor.getValue().get())); result.append("stats", b.obj()); } return true; }
bool run(OperationContext* txn, const string& dbname, BSONObj& cmdObj, int options, string& errmsg, BSONObjBuilder& result) { const string ns = parseNs(dbname, cmdObj); const NamespaceString nss(ns); const ExtensionsCallbackReal extensionsCallback(txn, &nss); auto parsedDistinct = ParsedDistinct::parse(txn, nss, cmdObj, extensionsCallback, false); if (!parsedDistinct.isOK()) { return appendCommandStatus(result, parsedDistinct.getStatus()); } if (!parsedDistinct.getValue().getQuery()->getQueryRequest().getCollation().isEmpty() && serverGlobalParams.featureCompatibility.version.load() == ServerGlobalParams::FeatureCompatibility::Version::k32) { return appendCommandStatus( result, Status(ErrorCodes::InvalidOptions, "The featureCompatibilityVersion must be 3.4 to use collation. See " "http://dochub.mongodb.org/core/3.4-feature-compatibility.")); } AutoGetCollectionOrViewForRead ctx(txn, ns); Collection* collection = ctx.getCollection(); if (ctx.getView()) { ctx.releaseLocksForView(); auto viewAggregation = parsedDistinct.getValue().asAggregationCommand(); if (!viewAggregation.isOK()) { return appendCommandStatus(result, viewAggregation.getStatus()); } BSONObjBuilder aggResult; (void)Command::findCommand("aggregate") ->run(txn, dbname, viewAggregation.getValue(), options, errmsg, aggResult); if (ResolvedView::isResolvedViewErrorResponse(aggResult.asTempObj())) { result.appendElements(aggResult.obj()); return false; } ViewResponseFormatter formatter(aggResult.obj()); Status formatStatus = formatter.appendAsDistinctResponse(&result); if (!formatStatus.isOK()) { return appendCommandStatus(result, formatStatus); } return true; } auto executor = getExecutorDistinct( txn, collection, ns, &parsedDistinct.getValue(), PlanExecutor::YIELD_AUTO); if (!executor.isOK()) { return appendCommandStatus(result, executor.getStatus()); } { stdx::lock_guard<Client>(*txn->getClient()); CurOp::get(txn)->setPlanSummary_inlock( Explain::getPlanSummary(executor.getValue().get())); } string key = cmdObj[ParsedDistinct::kKeyField].valuestrsafe(); int bufSize = BSONObjMaxUserSize - 4096; BufBuilder bb(bufSize); char* start = bb.buf(); BSONArrayBuilder arr(bb); BSONElementSet values(executor.getValue()->getCanonicalQuery()->getCollator()); BSONObj obj; PlanExecutor::ExecState state; while (PlanExecutor::ADVANCED == (state = executor.getValue()->getNext(&obj, NULL))) { // Distinct expands arrays. // // If our query is covered, each value of the key should be in the index key and // available to us without this. If a collection scan is providing the data, we may // have to expand an array. BSONElementSet elts; dps::extractAllElementsAlongPath(obj, key, elts); for (BSONElementSet::iterator it = elts.begin(); it != elts.end(); ++it) { BSONElement elt = *it; if (values.count(elt)) { continue; } int currentBufPos = bb.len(); uassert(17217, "distinct too big, 16mb cap", (currentBufPos + elt.size() + 1024) < bufSize); arr.append(elt); BSONElement x(start + currentBufPos); values.insert(x); } } // Return an error if execution fails for any reason. if (PlanExecutor::FAILURE == state || PlanExecutor::DEAD == state) { log() << "Plan executor error during distinct command: " << redact(PlanExecutor::statestr(state)) << ", stats: " << redact(Explain::getWinningPlanStats(executor.getValue().get())); return appendCommandStatus(result, Status(ErrorCodes::OperationFailed, str::stream() << "Executor error during distinct command: " << WorkingSetCommon::toStatusString(obj))); } auto curOp = CurOp::get(txn); // Get summary information about the plan. PlanSummaryStats stats; Explain::getSummaryStats(*executor.getValue(), &stats); if (collection) { collection->infoCache()->notifyOfQuery(txn, stats.indexesUsed); } curOp->debug().setPlanSummaryMetrics(stats); if (curOp->shouldDBProfile()) { BSONObjBuilder execStatsBob; Explain::getWinningPlanStats(executor.getValue().get(), &execStatsBob); curOp->debug().execStats = execStatsBob.obj(); } verify(start == bb.buf()); result.appendArray("values", arr.done()); return true; }
bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { Timer t; string ns = dbname + '.' + cmdObj.firstElement().valuestr(); string key = cmdObj["key"].valuestrsafe(); BSONObj keyPattern = BSON( key << 1 ); BSONObj query = getQuery( cmdObj ); int bufSize = BSONObjMaxUserSize - 4096; BufBuilder bb( bufSize ); char * start = bb.buf(); BSONArrayBuilder arr( bb ); BSONElementSet values; long long nscanned = 0; // locations looked at long long nscannedObjects = 0; // full objects looked at long long n = 0; // matches MatchDetails md; NamespaceDetails * d = nsdetails( ns ); if ( ! d ) { result.appendArray( "values" , BSONObj() ); result.append( "stats" , BSON( "n" << 0 << "nscanned" << 0 << "nscannedObjects" << 0 ) ); return true; } shared_ptr<Cursor> cursor; if ( ! query.isEmpty() ) { cursor = getOptimizedCursor( ns.c_str(), query, BSONObj() ); } else { // query is empty, so lets see if we can find an index // with the key so we don't have to hit the raw data NamespaceDetails::IndexIterator ii = d->ii(); while ( ii.more() ) { IndexDetails& idx = ii.next(); if ( d->isMultikey( ii.pos() - 1 ) ) continue; if ( idx.inKeyPattern( key ) ) { cursor = getBestGuessCursor( ns.c_str(), BSONObj(), idx.keyPattern() ); if( cursor.get() ) break; } } if ( ! cursor.get() ) cursor = getOptimizedCursor(ns.c_str() , query , BSONObj() ); } verify( cursor ); string cursorName = cursor->toString(); auto_ptr<ClientCursor> cc (new ClientCursor(QueryOption_NoCursorTimeout, cursor, ns)); // map from indexed field to offset in key object map<string, int> indexedFields; if (!cursor->modifiedKeys()) { // store index information so we can decide if we can // get something out of the index key rather than full object int x = 0; BSONObjIterator i( cursor->indexKeyPattern() ); while ( i.more() ) { BSONElement e = i.next(); if ( e.isNumber() ) { // only want basic index fields, not "2d" etc indexedFields[e.fieldName()] = x; } x++; } } while ( cursor->ok() ) { nscanned++; bool loadedRecord = false; if ( cursor->currentMatches( &md ) && !cursor->getsetdup( cursor->currLoc() ) ) { n++; BSONObj holder; BSONElementSet temp; // Try to get the record from the key fields. loadedRecord = !getFieldsDotted(indexedFields, cursor, key, temp, holder); for ( BSONElementSet::iterator i=temp.begin(); i!=temp.end(); ++i ) { BSONElement e = *i; if ( values.count( e ) ) continue; int now = bb.len(); uassert(10044, "distinct too big, 16mb cap", ( now + e.size() + 1024 ) < bufSize ); arr.append( e ); BSONElement x( start + now ); values.insert( x ); } } if ( loadedRecord || md.hasLoadedRecord() ) nscannedObjects++; cursor->advance(); if (!cc->yieldSometimes( ClientCursor::MaybeCovered )) { cc.release(); break; } RARELY killCurrentOp.checkForInterrupt(); } verify( start == bb.buf() ); result.appendArray( "values" , arr.done() ); { BSONObjBuilder b; b.appendNumber( "n" , n ); b.appendNumber( "nscanned" , nscanned ); b.appendNumber( "nscannedObjects" , nscannedObjects ); b.appendNumber( "timems" , t.millis() ); b.append( "cursor" , cursorName ); result.append( "stats" , b.obj() ); } return true; }
bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { Timer t; string ns = dbname + '.' + cmdObj.firstElement().valuestr(); string key = cmdObj["key"].valuestrsafe(); BSONObj keyPattern = BSON( key << 1 ); BSONObj query = getQuery( cmdObj ); int bufSize = BSONObjMaxUserSize - 4096; BufBuilder bb( bufSize ); char * start = bb.buf(); BSONArrayBuilder arr( bb ); BSONElementSet values; long long nscanned = 0; // locations looked at long long nscannedObjects = 0; // full objects looked at long long n = 0; // matches Collection* collection = cc().database()->getCollection( ns ); if (!collection) { result.appendArray( "values" , BSONObj() ); result.append("stats", BSON("n" << 0 << "nscanned" << 0 << "nscannedObjects" << 0)); return true; } Runner* rawRunner; Status status = getRunnerDistinct(collection, query, key, &rawRunner); if (!status.isOK()) { uasserted(17216, mongoutils::str::stream() << "Can't get runner for query " << query << ": " << status.toString()); return 0; } auto_ptr<Runner> runner(rawRunner); const ScopedRunnerRegistration safety(runner.get()); runner->setYieldPolicy(Runner::YIELD_AUTO); string cursorName; BSONObj obj; Runner::RunnerState state; while (Runner::RUNNER_ADVANCED == (state = runner->getNext(&obj, NULL))) { // Distinct expands arrays. // // If our query is covered, each value of the key should be in the index key and // available to us without this. If a collection scan is providing the data, we may // have to expand an array. BSONElementSet elts; obj.getFieldsDotted(key, elts); for (BSONElementSet::iterator it = elts.begin(); it != elts.end(); ++it) { BSONElement elt = *it; if (values.count(elt)) { continue; } int currentBufPos = bb.len(); uassert(17217, "distinct too big, 16mb cap", (currentBufPos + elt.size() + 1024) < bufSize); arr.append(elt); BSONElement x(start + currentBufPos); values.insert(x); } } TypeExplain* bareExplain; Status res = runner->getInfo(&bareExplain, NULL); if (res.isOK()) { auto_ptr<TypeExplain> explain(bareExplain); if (explain->isCursorSet()) { cursorName = explain->getCursor(); } n = explain->getN(); nscanned = explain->getNScanned(); nscannedObjects = explain->getNScannedObjects(); } verify( start == bb.buf() ); result.appendArray( "values" , arr.done() ); { BSONObjBuilder b; b.appendNumber( "n" , n ); b.appendNumber( "nscanned" , nscanned ); b.appendNumber( "nscannedObjects" , nscannedObjects ); b.appendNumber( "timems" , t.millis() ); b.append( "cursor" , cursorName ); result.append( "stats" , b.obj() ); } return true; }
bool debug( const BSONObj& o , int depth=0) { string prefix = ""; for ( int i=0; i<depth; i++ ) { prefix += "\t\t\t"; } int read = 4; try { cout << prefix << "--- new object ---\n"; cout << prefix << "\t size : " << o.objsize() << "\n"; // Note: this will recursively check each level of the bson and will also be called by // this function at each level. While inefficient, it shouldn't effect correctness. const Status status = validateBSON(o.objdata(), o.objsize()); if (!status.isOK()) { cout << prefix << "\t OBJECT IS INVALID: " << status.reason() << '\n' << prefix << "\t attempting to print as much as possible" << endl; } BSONObjIterator i(o); while ( i.more() ) { // This call verifies it is safe to call size() and fieldName() but doesn't check // whether the element extends past the end of the object. That is done below. BSONElement e = i.next(/*checkEnd=*/true); cout << prefix << "\t\t " << e.fieldName() << "\n" << prefix << "\t\t\t type:" << setw(3) << e.type() << " size: " << e.size() << endl; if ( ( read + e.size() ) > o.objsize() ) { cout << prefix << " SIZE DOES NOT WORK" << endl; return false; } read += e.size(); try { if ( e.isABSONObj() ) { if ( ! debug( e.Obj() , depth + 1 ) ) { //return false; cout << prefix << "\t\t\t BAD BAD BAD" << endl; if ( e.size() < 1000 ) { cout << "---\n" << e.Obj().hexDump() << "\n---" << endl; } } } else if ( e.type() == String && ! isValidUTF8( e.valuestr() ) ) { cout << prefix << "\t\t\t" << "bad utf8 String!" << endl; } else if ( logger::globalLogDomain()->shouldLog(logger::LogSeverity::Debug(1)) ) { cout << prefix << "\t\t\t" << e << endl; } } catch ( std::exception& e ) { cout << prefix << "\t\t\t bad value: " << e.what() << endl; } } } catch ( std::exception& e ) { cout << prefix << "\tbad\t" << e.what() << endl; cout << "----\n" << o.hexDump() << "\n---" << endl; } return true; }
bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { Timer t; string ns = dbname + '.' + cmdObj.firstElement().valuestr(); string key = cmdObj["key"].valuestrsafe(); BSONObj keyPattern = BSON( key << 1 ); BSONObj query = getQuery( cmdObj ); int bufSize = BSONObjMaxUserSize - 4096; BufBuilder bb( bufSize ); char * start = bb.buf(); BSONArrayBuilder arr( bb ); BSONElementSet values; long long nscanned = 0; // locations looked at long long nscannedObjects = 0; // full objects looked at long long n = 0; // matches MatchDetails md; NamespaceDetails * d = nsdetails( ns.c_str() ); if ( ! d ) { result.appendArray( "values" , BSONObj() ); result.append( "stats" , BSON( "n" << 0 << "nscanned" << 0 << "nscannedObjects" << 0 ) ); return true; } shared_ptr<Cursor> cursor; if ( ! query.isEmpty() ) { cursor = NamespaceDetailsTransient::getCursor(ns.c_str() , query , BSONObj() ); } else { // query is empty, so lets see if we can find an index // with the key so we don't have to hit the raw data NamespaceDetails::IndexIterator ii = d->ii(); while ( ii.more() ) { IndexDetails& idx = ii.next(); if ( d->isMultikey( ii.pos() - 1 ) ) continue; if ( idx.inKeyPattern( key ) ) { cursor = bestGuessCursor( ns.c_str() , BSONObj() , idx.keyPattern() ); if( cursor.get() ) break; } } if ( ! cursor.get() ) cursor = NamespaceDetailsTransient::getCursor(ns.c_str() , query , BSONObj() ); } assert( cursor ); string cursorName = cursor->toString(); auto_ptr<ClientCursor> cc (new ClientCursor(QueryOption_NoCursorTimeout, cursor, ns)); while ( cursor->ok() ) { nscanned++; bool loadedObject = false; if ( ( !cursor->matcher() || cursor->matcher()->matchesCurrent( cursor.get() , &md ) ) && !cursor->getsetdup( cursor->currLoc() ) ) { n++; BSONElementSet temp; loadedObject = ! cc->getFieldsDotted( key , temp ); for ( BSONElementSet::iterator i=temp.begin(); i!=temp.end(); ++i ) { BSONElement e = *i; if ( values.count( e ) ) continue; int now = bb.len(); uassert(10044, "distinct too big, 16mb cap", ( now + e.size() + 1024 ) < bufSize ); arr.append( e ); BSONElement x( start + now ); values.insert( x ); } } if ( loadedObject || md._loadedObject ) nscannedObjects++; cursor->advance(); if (!cc->yieldSometimes( ClientCursor::MaybeCovered )) { cc.release(); break; } RARELY killCurrentOp.checkForInterrupt(); } assert( start == bb.buf() ); result.appendArray( "values" , arr.done() ); { BSONObjBuilder b; b.appendNumber( "n" , n ); b.appendNumber( "nscanned" , nscanned ); b.appendNumber( "nscannedObjects" , nscannedObjects ); b.appendNumber( "timems" , t.millis() ); b.append( "cursor" , cursorName ); result.append( "stats" , b.obj() ); } return true; }
/** * actually applies a reduce, to a list of tuples (key, value). * After the call, tuples will hold a single tuple {"0": key, "1": value} */ void JSReducer::_reduce( const BSONList& tuples , BSONObj& key , int& endSizeEstimate ) { uassert( 10074 , "need values" , tuples.size() ); int sizeEstimate = ( tuples.size() * tuples.begin()->getField( "value" ).size() ) + 128; // need to build the reduce args: ( key, [values] ) BSONObjBuilder reduceArgs( sizeEstimate ); boost::scoped_ptr<BSONArrayBuilder> valueBuilder; int sizeSoFar = 0; unsigned n = 0; for ( ; n<tuples.size(); n++ ) { BSONObjIterator j(tuples[n]); BSONElement keyE = j.next(); if ( n == 0 ) { reduceArgs.append( keyE ); key = keyE.wrap(); sizeSoFar = 5 + keyE.size(); valueBuilder.reset(new BSONArrayBuilder( reduceArgs.subarrayStart( "tuples" ) )); } BSONElement ee = j.next(); uassert( 13070 , "value too large to reduce" , ee.size() < ( BSONObjMaxUserSize / 2 ) ); if ( sizeSoFar + ee.size() > BSONObjMaxUserSize ) { assert( n > 1 ); // if not, inf. loop break; } valueBuilder->append( ee ); sizeSoFar += ee.size(); } assert(valueBuilder); valueBuilder->done(); BSONObj args = reduceArgs.obj(); Scope * s = _func.scope(); s->invokeSafe( _func.func() , args ); if ( s->type( "return" ) == Array ) { uasserted( 10075 , "reduce -> multiple not supported yet"); return; } endSizeEstimate = key.objsize() + ( args.objsize() / tuples.size() ); if ( n == tuples.size() ) return; // the input list was too large, add the rest of elmts to new tuples and reduce again // note: would be better to use loop instead of recursion to avoid stack overflow BSONList x; for ( ; n < tuples.size(); n++ ) { x.push_back( tuples[n] ); } BSONObjBuilder temp( endSizeEstimate ); temp.append( key.firstElement() ); s->append( temp , "1" , "return" ); x.push_back( temp.obj() ); _reduce( x , key , endSizeEstimate ); }
bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { Timer t; string ns = dbname + '.' + cmdObj.firstElement().valuestr(); string key = cmdObj["key"].valuestrsafe(); BSONObj keyPattern = BSON( key << 1 ); BSONObj query = getQuery( cmdObj ); int bufSize = BSONObjMaxUserSize - 4096; BufBuilder bb( bufSize ); char * start = bb.buf(); BSONArrayBuilder arr( bb ); BSONElementSet values; long long nscanned = 0; // locations looked at long long nscannedObjects = 0; // full objects looked at long long n = 0; // matches MatchDetails md; Collection *cl = getCollection( ns ); if ( ! cl ) { result.appendArray( "values" , BSONObj() ); result.append( "stats" , BSON( "n" << 0 << "nscanned" << 0 << "nscannedObjects" << 0 ) ); return true; } shared_ptr<Cursor> cursor; if ( ! query.isEmpty() ) { cursor = getOptimizedCursor(ns.c_str() , query , BSONObj() ); } else { // query is empty, so lets see if we can find an index // with the key so we don't have to hit the raw data for (int i = 0; i < cl->nIndexes(); i++) { IndexDetails &idx = cl->idx(i); if (cl->isMultikey(i)) { continue; } if ( idx.inKeyPattern( key ) ) { cursor = getBestGuessCursor( ns.c_str() , BSONObj() , idx.keyPattern() ); if( cursor.get() ) break; } } if ( ! cursor.get() ) { cursor = getOptimizedCursor(ns.c_str() , query , BSONObj() ); } } verify( cursor ); string cursorName = cursor->toString(); auto_ptr<ClientCursor> cc (new ClientCursor(QueryOption_NoCursorTimeout, cursor, ns)); for ( ; cursor->ok(); cursor->advance() ) { nscanned++; bool loadedRecord = false; if ( cursor->currentMatches( &md ) && !cursor->getsetdup( cursor->currPK() ) ) { n++; BSONObj holder; BSONElementSet temp; loadedRecord = ! cc->getFieldsDotted( key , temp, holder ); for ( BSONElementSet::iterator i=temp.begin(); i!=temp.end(); ++i ) { BSONElement e = *i; if ( values.count( e ) ) continue; int now = bb.len(); uassert(10044, "distinct too big, 16mb cap", ( now + e.size() + 1024 ) < bufSize ); arr.append( e ); BSONElement x( start + now ); values.insert( x ); } } if ( loadedRecord || md.hasLoadedRecord() ) nscannedObjects++; RARELY killCurrentOp.checkForInterrupt(); } verify( start == bb.buf() ); result.appendArray( "values" , arr.done() ); { BSONObjBuilder b; b.appendNumber( "n" , n ); b.appendNumber( "nscanned" , nscanned ); b.appendNumber( "nscannedObjects" , nscannedObjects ); b.appendNumber( "timems" , t.millis() ); b.append( "cursor" , cursorName ); result.append( "stats" , b.obj() ); } return true; }
bool run(OperationContext* txn, const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result) { Timer t; // ensure that the key is a string uassert(18510, mongoutils::str::stream() << "The first argument to the distinct command " << "must be a string but was a " << typeName(cmdObj["key"].type()), cmdObj["key"].type() == mongo::String); // ensure that the where clause is a document if (cmdObj["query"].isNull() == false && cmdObj["query"].eoo() == false) { uassert(18511, mongoutils::str::stream() << "The query for the distinct command must be a " << "document but was a " << typeName(cmdObj["query"].type()), cmdObj["query"].type() == mongo::Object); } string key = cmdObj["key"].valuestrsafe(); BSONObj keyPattern = BSON(key << 1); BSONObj query = getQuery(cmdObj); int bufSize = BSONObjMaxUserSize - 4096; BufBuilder bb(bufSize); char* start = bb.buf(); BSONArrayBuilder arr(bb); BSONElementSet values; const string ns = parseNs(dbname, cmdObj); AutoGetCollectionForRead ctx(txn, ns); Collection* collection = ctx.getCollection(); if (!collection) { result.appendArray("values", BSONObj()); result.append("stats", BSON("n" << 0 << "nscanned" << 0 << "nscannedObjects" << 0)); return true; } auto statusWithPlanExecutor = getExecutorDistinct(txn, collection, query, key, PlanExecutor::YIELD_AUTO); if (!statusWithPlanExecutor.isOK()) { uasserted(17216, mongoutils::str::stream() << "Can't get executor for query " << query << ": " << statusWithPlanExecutor.getStatus().toString()); return 0; } unique_ptr<PlanExecutor> exec = std::move(statusWithPlanExecutor.getValue()); BSONObj obj; PlanExecutor::ExecState state; while (PlanExecutor::ADVANCED == (state = exec->getNext(&obj, NULL))) { // Distinct expands arrays. // // If our query is covered, each value of the key should be in the index key and // available to us without this. If a collection scan is providing the data, we may // have to expand an array. BSONElementSet elts; obj.getFieldsDotted(key, elts); for (BSONElementSet::iterator it = elts.begin(); it != elts.end(); ++it) { BSONElement elt = *it; if (values.count(elt)) { continue; } int currentBufPos = bb.len(); uassert(17217, "distinct too big, 16mb cap", (currentBufPos + elt.size() + 1024) < bufSize); arr.append(elt); BSONElement x(start + currentBufPos); values.insert(x); } } // Get summary information about the plan. PlanSummaryStats stats; Explain::getSummaryStats(*exec, &stats); verify(start == bb.buf()); result.appendArray("values", arr.done()); { BSONObjBuilder b; b.appendNumber("n", stats.nReturned); b.appendNumber("nscanned", stats.totalKeysExamined); b.appendNumber("nscannedObjects", stats.totalDocsExamined); b.appendNumber("timems", t.millis()); b.append("planSummary", Explain::getPlanSummary(exec.get())); result.append("stats", b.obj()); } return true; }