Status MMAPV1DatabaseCatalogEntry::renameCollection(OperationContext* txn, StringData fromNS, StringData toNS, bool stayTemp) { Status s = _renameSingleNamespace(txn, fromNS, toNS, stayTemp); if (!s.isOK()) return s; NamespaceDetails* details = _namespaceIndex.details(toNS); invariant(details); RecordStoreV1Base* systemIndexRecordStore = _getIndexRecordStore(); auto cursor = systemIndexRecordStore->getCursor(txn); while (auto record = cursor->next()) { BSONObj oldIndexSpec = record->data.releaseToBson(); if (fromNS != oldIndexSpec["ns"].valuestrsafe()) continue; BSONObj newIndexSpec; { BSONObjBuilder b; BSONObjIterator i(oldIndexSpec); while (i.more()) { BSONElement e = i.next(); if (strcmp(e.fieldName(), "ns") != 0) b.append(e); else b << "ns" << toNS; } newIndexSpec = b.obj(); } StatusWith<RecordId> newIndexSpecLoc = systemIndexRecordStore->insertRecord( txn, newIndexSpec.objdata(), newIndexSpec.objsize(), false); if (!newIndexSpecLoc.isOK()) return newIndexSpecLoc.getStatus(); const std::string& indexName = oldIndexSpec.getStringField("name"); { // Fix the IndexDetails pointer. int indexI = getCollectionCatalogEntry(toNS)->_findIndexNumber(txn, indexName); IndexDetails& indexDetails = details->idx(indexI); *txn->recoveryUnit()->writing(&indexDetails.info) = DiskLoc::fromRecordId(newIndexSpecLoc.getValue()); } { // Move the underlying namespace. std::string oldIndexNs = IndexDescriptor::makeIndexNamespace(fromNS, indexName); std::string newIndexNs = IndexDescriptor::makeIndexNamespace(toNS, indexName); Status s = _renameSingleNamespace(txn, oldIndexNs, newIndexNs, false); if (!s.isOK()) return s; } systemIndexRecordStore->deleteRecord(txn, record->id); } return Status::OK(); }
void ProcessInfo::SystemInfo::collectSystemInfo() { BSONObjBuilder bExtra; stringstream verstr; OSVERSIONINFOEX osvi; // os version MEMORYSTATUSEX mse; // memory stats SYSTEM_INFO ntsysinfo; // system stats // get basic processor properties GetNativeSystemInfo(&ntsysinfo); addrSize = (ntsysinfo.wProcessorArchitecture == PROCESSOR_ARCHITECTURE_AMD64 ? 64 : 32); numCores = ntsysinfo.dwNumberOfProcessors; pageSize = static_cast<unsigned long long>(ntsysinfo.dwPageSize); bExtra.append("pageSize", static_cast<long long>(pageSize)); // get memory info mse.dwLength = sizeof(mse); if (GlobalMemoryStatusEx(&mse)) { memSize = mse.ullTotalPhys; } // get OS version info ZeroMemory(&osvi, sizeof(osvi)); osvi.dwOSVersionInfoSize = sizeof(osvi); if (GetVersionEx((OSVERSIONINFO*)&osvi)) { verstr << osvi.dwMajorVersion << "." << osvi.dwMinorVersion; if (osvi.wServicePackMajor) verstr << " SP" << osvi.wServicePackMajor; verstr << " (build " << osvi.dwBuildNumber << ")"; osName = "Microsoft "; switch (osvi.dwMajorVersion) { case 6: switch (osvi.dwMinorVersion) { case 3: if (osvi.wProductType == VER_NT_WORKSTATION) osName += "Windows 8.1"; else osName += "Windows Server 2012 R2"; break; case 2: if (osvi.wProductType == VER_NT_WORKSTATION) osName += "Windows 8"; else osName += "Windows Server 2012"; break; case 1: if (osvi.wProductType == VER_NT_WORKSTATION) osName += "Windows 7"; else osName += "Windows Server 2008 R2"; // Windows 6.1 is either Windows 7 or Windows 2008 R2. There is no SP2 for // either of these two operating systems, but the check will hold if one // were released. This code assumes that SP2 will include fix for // http://support.microsoft.com/kb/2731284. // if ((osvi.wServicePackMajor >= 0) && (osvi.wServicePackMajor < 2)) { if (isKB2731284OrLaterUpdateInstalled()) { log() << "Hotfix KB2731284 or later update is installed, no need " "to zero-out data files"; fileZeroNeeded = false; } else { log() << "Hotfix KB2731284 or later update is not installed, will " "zero-out data files"; fileZeroNeeded = true; } } break; case 0: if (osvi.wProductType == VER_NT_WORKSTATION) osName += "Windows Vista"; else osName += "Windows Server 2008"; break; default: osName += "Windows NT version "; osName += verstr.str(); break; } break; case 5: switch (osvi.dwMinorVersion) { case 2: osName += "Windows Server 2003"; break; case 1: osName += "Windows XP"; break; case 0: if (osvi.wProductType == VER_NT_WORKSTATION) osName += "Windows 2000 Professional"; else osName += "Windows 2000 Server"; break; default: osName += "Windows NT version "; osName += verstr.str(); break; } break; } } else { // unable to get any version data osName += "Windows NT"; } if (ntsysinfo.wProcessorArchitecture == PROCESSOR_ARCHITECTURE_AMD64) { cpuArch = "x86_64"; } else if (ntsysinfo.wProcessorArchitecture == PROCESSOR_ARCHITECTURE_INTEL) { cpuArch = "x86"; } else if (ntsysinfo.wProcessorArchitecture == PROCESSOR_ARCHITECTURE_IA64) { cpuArch = "ia64"; } else { cpuArch = "unknown"; } osType = "Windows"; osVersion = verstr.str(); hasNuma = checkNumaEnabled(); _extraStats = bExtra.obj(); if (psapiGlobal == NULL) { psapiGlobal = new PsApiInit(); } }
string OpDebug::report(const CurOp& curop, const SingleThreadedLockStats& lockStats) const { StringBuilder s; if ( iscommand ) s << "command "; else s << opToString( op ) << ' '; s << ns; if ( ! query.isEmpty() ) { if ( iscommand ) { s << " command: "; Command* curCommand = curop.getCommand(); if (curCommand) { mutablebson::Document cmdToLog(query, mutablebson::Document::kInPlaceDisabled); curCommand->redactForLogging(&cmdToLog); s << curCommand->name << " "; s << cmdToLog.toString(); } else { // Should not happen but we need to handle curCommand == NULL gracefully s << query.toString(); } } else { s << " query: "; s << query.toString(); } } if (!planSummary.empty()) { s << " planSummary: " << planSummary.toString(); } if ( ! updateobj.isEmpty() ) { s << " update: "; updateobj.toString( s ); } OPDEBUG_TOSTRING_HELP( cursorid ); OPDEBUG_TOSTRING_HELP( ntoreturn ); OPDEBUG_TOSTRING_HELP( ntoskip ); OPDEBUG_TOSTRING_HELP_BOOL( exhaust ); OPDEBUG_TOSTRING_HELP( nscanned ); OPDEBUG_TOSTRING_HELP( nscannedObjects ); OPDEBUG_TOSTRING_HELP_BOOL( idhack ); OPDEBUG_TOSTRING_HELP_BOOL( scanAndOrder ); OPDEBUG_TOSTRING_HELP( nmoved ); OPDEBUG_TOSTRING_HELP( nMatched ); OPDEBUG_TOSTRING_HELP( nModified ); OPDEBUG_TOSTRING_HELP( ninserted ); OPDEBUG_TOSTRING_HELP( ndeleted ); OPDEBUG_TOSTRING_HELP_BOOL( fastmod ); OPDEBUG_TOSTRING_HELP_BOOL( fastmodinsert ); OPDEBUG_TOSTRING_HELP_BOOL( upsert ); OPDEBUG_TOSTRING_HELP_BOOL( cursorExhausted ); OPDEBUG_TOSTRING_HELP( keyUpdates ); OPDEBUG_TOSTRING_HELP( writeConflicts ); if ( extra.len() ) s << " " << extra.str(); if ( ! exceptionInfo.empty() ) { s << " exception: " << exceptionInfo.msg; if ( exceptionInfo.code ) s << " code:" << exceptionInfo.code; } s << " numYields:" << curop.numYields(); OPDEBUG_TOSTRING_HELP( nreturned ); if (responseLength > 0) { s << " reslen:" << responseLength; } { BSONObjBuilder locks; lockStats.report(&locks); s << " locks:" << locks.obj().toString(); } s << " " << executionTime << "ms"; return s.str(); }
BSONObjSet DocumentSourceGroup::getOutputSorts() { if (!_initialized) { initialize(); } if (!(_streaming || _spilled)) { return BSONObjSet(); } BSONObjBuilder sortOrder; if (_idFieldNames.empty()) { if (_spilled) { sortOrder.append("_id", 1); } else { // We have an expression like {_id: "$a"}. Check if this is a FieldPath, and if it is, // get the sort order out of it. if (auto obj = dynamic_cast<ExpressionFieldPath*>(_idExpressions[0].get())) { FieldPath _idSort = obj->getFieldPath(); sortOrder.append( "_id", _inputSort.getIntField(_idSort.getFieldName(_idSort.getPathLength() - 1))); } } } else if (_streaming) { // At this point, we know that _streaming is true, so _id must have only contained // ExpressionObjects, ExpressionConstants or ExpressionFieldPaths. We now process each // '_idExpression'. // We populate 'fieldMap' such that each key is a field the input is sorted by, and the // value is where that input field is located within the _id document. For example, if our // _id object is {_id: {x: {y: "$a.b"}}}, 'fieldMap' would be: {'a.b': '_id.x.y'}. StringMap<std::string> fieldMap; for (size_t i = 0; i < _idFieldNames.size(); i++) { intrusive_ptr<Expression> exp = _idExpressions[i]; if (auto obj = dynamic_cast<ExpressionObject*>(exp.get())) { // _id is an object containing a nested document, such as: {_id: {x: {y: "$b"}}}. getFieldPathMap(obj, "_id." + _idFieldNames[i], &fieldMap); } else if (auto fieldPath = dynamic_cast<ExpressionFieldPath*>(exp.get())) { FieldPath _idSort = fieldPath->getFieldPath(); fieldMap[_idSort.getFieldName(_idSort.getPathLength() - 1)] = "_id." + _idFieldNames[i]; } } // Because the order of '_inputSort' is important, we go through each field we are sorted on // and append it to the BSONObjBuilder in order. for (BSONElement sortField : _inputSort) { std::string sortString = sortField.fieldNameStringData().toString(); auto itr = fieldMap.find(sortString); // If our sort order is (a, b, c), we could not have converted to a streaming $group if // our _id was predicated on (a, c) but not 'b'. Verify that this is true. invariant(itr != fieldMap.end()); sortOrder.append(itr->second, _inputSort.getIntField(sortString)); } } else { // We are blocking and have spilled to disk. std::vector<std::string> outputSort; for (size_t i = 0; i < _idFieldNames.size(); i++) { intrusive_ptr<Expression> exp = _idExpressions[i]; if (auto obj = dynamic_cast<ExpressionObject*>(exp.get())) { // _id is an object containing a nested document, such as: {_id: {x: {y: "$b"}}}. getFieldPathListForSpilled(obj, "_id." + _idFieldNames[i], &outputSort); } else { outputSort.push_back("_id." + _idFieldNames[i]); } } for (auto&& field : outputSort) { sortOrder.append(field, 1); } } return allPrefixes(sortOrder.obj()); }
bool run(OperationContext* txn, const string& dbname , BSONObj& jsobj, int, string& errmsg, BSONObjBuilder& result, bool /*fromRepl*/) { DBDirectClient db; BSONElement e = jsobj.firstElement(); string toDeleteNs = dbname + '.' + e.valuestr(); LOG(0) << "CMD: reIndex " << toDeleteNs << endl; Lock::DBWrite dbXLock(txn->lockState(), dbname); Client::Context ctx(toDeleteNs); Collection* collection = ctx.db()->getCollection( txn, toDeleteNs ); if ( !collection ) { errmsg = "ns not found"; return false; } BackgroundOperation::assertNoBgOpInProgForNs( toDeleteNs ); std::vector<BSONObj> indexesInProg = stopIndexBuilds(txn, ctx.db(), jsobj); list<BSONObj> all; auto_ptr<DBClientCursor> i = db.query( dbname + ".system.indexes" , BSON( "ns" << toDeleteNs ) , 0 , 0 , 0 , QueryOption_SlaveOk ); BSONObjBuilder b; while ( i->more() ) { const BSONObj spec = i->next().removeField("v").getOwned(); const BSONObj key = spec.getObjectField("key"); const Status keyStatus = validateKeyPattern(key); if (!keyStatus.isOK()) { errmsg = str::stream() << "Cannot rebuild index " << spec << ": " << keyStatus.reason() << " For more info see http://dochub.mongodb.org/core/index-validation"; return false; } b.append( BSONObjBuilder::numStr( all.size() ) , spec ); all.push_back( spec ); } result.appendNumber( "nIndexesWas", collection->getIndexCatalog()->numIndexesTotal() ); Status s = collection->getIndexCatalog()->dropAllIndexes(txn, true); if ( !s.isOK() ) { errmsg = "dropIndexes failed"; return appendCommandStatus( result, s ); } for ( list<BSONObj>::iterator i=all.begin(); i!=all.end(); i++ ) { BSONObj o = *i; LOG(1) << "reIndex ns: " << toDeleteNs << " index: " << o << endl; Status s = collection->getIndexCatalog()->createIndex(txn, o, false); if ( !s.isOK() ) return appendCommandStatus( result, s ); } result.append( "nIndexes" , (int)all.size() ); result.appendArray( "indexes" , b.obj() ); IndexBuilder::restoreIndexes(indexesInProg); return true; }
void searchCommand(NamespaceDetails* nsd, const BSONObj& n /*near*/, double maxDistance, const BSONObj& search, BSONObjBuilder& result, unsigned limit) { Timer t; LOG(1) << "SEARCH near:" << n << " maxDistance:" << maxDistance << " search: " << search << endl; int x, y; { BSONObjIterator i(n); x = hash(i.next()); y = hash(i.next()); } int scale = static_cast<int>(ceil(maxDistance / _bucketSize)); GeoHaystackSearchHopper hopper(n, maxDistance, limit, _geoField); long long btreeMatches = 0; // TODO(hk): Consider starting with a (or b)=0, then going to a=+-1, then a=+-2, etc. // Would want a HaystackKeyIterator or similar for this, but it'd be a nice // encapsulation allowing us to S2-ify this trivially/abstract the key details. for (int a = -scale; a <= scale && !hopper.limitReached(); ++a) { for (int b = -scale; b <= scale && !hopper.limitReached(); ++b) { BSONObjBuilder bb; bb.append("", makeString(x + a, y + b)); for (unsigned i = 0; i < _otherFields.size(); i++) { // See if the non-geo field we're indexing on is in the provided search term. BSONElement e = search.getFieldDotted(_otherFields[i]); if (e.eoo()) bb.appendNull(""); else bb.appendAs(e, ""); } BSONObj key = bb.obj(); GEOQUADDEBUG("KEY: " << key); // TODO(hk): this keeps a set of all DiskLoc seen in this pass so that we don't // consider the element twice. Do we want to instead store a hash of the set? // Is this often big? set<DiskLoc> thisPass; // Lookup from key to key, inclusive. scoped_ptr<BtreeCursor> cursor(BtreeCursor::make(nsd, *getDetails(), key, key, true, 1)); while (cursor->ok() && !hopper.limitReached()) { pair<set<DiskLoc>::iterator, bool> p = thisPass.insert(cursor->currLoc()); // If a new element was inserted (haven't seen the DiskLoc before), p.second // is true. if (p.second) { hopper.consider(cursor->currLoc()); GEOQUADDEBUG("\t" << cursor->current()); btreeMatches++; } cursor->advance(); } } } BSONArrayBuilder arr(result.subarrayStart("results")); int num = hopper.appendResultsTo(&arr); arr.done(); { BSONObjBuilder b(result.subobjStart("stats")); b.append("time", t.millis()); b.appendNumber("btreeMatches", btreeMatches); b.append("n", num); b.done(); } }
void operator()(DBClientCursorBatchIterator &i) { const string to_dbname = nsToDatabase(to_collection); while (i.moreInCurrentBatch()) { if (n % 128 == 127) { time_t now = time(0); if (now - lastLog >= 60) { // report progress if (lastLog) { log() << "clone " << to_collection << ' ' << n << endl; } lastLog = now; } mayInterrupt(_mayBeInterrupted); } BSONObj js = i.nextSafe(); ++n; if (isindex) { verify(nsToCollectionSubstring(from_collection) == "system.indexes"); storedForLater->push_back(fixindex(js, to_dbname).getOwned()); } else { try { LOCK_REASON(lockReason, "cloner: copying documents into local collection"); Client::ReadContext ctx(to_collection, lockReason); if (_isCapped) { Collection *cl = getCollection(to_collection); verify(cl->isCapped()); BSONObj pk = js["$_"].Obj(); BSONObjBuilder rowBuilder; BSONObjIterator it(js); while (it.moreWithEOO()) { BSONElement e = it.next(); if (e.eoo()) { break; } if (!mongoutils::str::equals(e.fieldName(), "$_")) { rowBuilder.append(e); } } BSONObj row = rowBuilder.obj(); CappedCollection *cappedCl = cl->as<CappedCollection>(); bool indexBitChanged = false; cappedCl->insertObjectWithPK(pk, row, Collection::NO_LOCKTREE, &indexBitChanged); // Hack copied from Collection::insertObject. TODO: find a better way to do this if (indexBitChanged) { cl->noteMultiKeyChanged(); } } else { insertObject(to_collection, js, 0, logForRepl); } } catch (UserException& e) { error() << "error: exception cloning object in " << from_collection << ' ' << e.what() << " obj:" << js.toString() << '\n'; throw; } RARELY if ( time( 0 ) - saveLast > 60 ) { log() << n << " objects cloned so far from collection " << from_collection << endl; saveLast = time( 0 ); } } } }
BSONObj Model::toObject(){ BSONObjBuilder b; serialize( b ); return b.obj(); }
void Model::save( bool safe ){ ScopedDbConnection conn( modelServer() ); BSONObjBuilder b; serialize( b ); BSONElement myId; { BSONObjIterator i = b.iterator(); while ( i.more() ){ BSONElement e = i.next(); if ( strcmp( e.fieldName() , "_id" ) == 0 ){ myId = e; break; } } } if ( myId.type() ){ if ( _id.isEmpty() ){ _id = myId.wrap(); } else if ( myId.woCompare( _id.firstElement() ) ){ stringstream ss; ss << "_id from serialize and stored differ: "; ss << "[" << myId << "] != "; ss << "[" << _id.firstElement() << "]"; throw UserException( 13121 , ss.str() ); } } if ( _id.isEmpty() ){ OID oid; oid.init(); b.appendOID( "_id" , &oid ); BSONObj o = b.obj(); conn->insert( getNS() , o ); _id = o["_id"].wrap().getOwned(); log(4) << "inserted new model " << getNS() << " " << o << endl; } else { if ( myId.eoo() ){ myId = _id["_id"]; b.append( myId ); } assert( ! myId.eoo() ); BSONObjBuilder qb; qb.append( myId ); BSONObj q = qb.obj(); BSONObj o = b.obj(); log(4) << "updated model" << getNS() << " " << q << " " << o << endl; conn->update( getNS() , q , o , true ); } string errmsg = ""; if ( safe ) errmsg = conn->getLastError(); conn.done(); if ( safe && errmsg.size() ) throw UserException( 9003 , (string)"error on Model::save: " + errmsg ); }
virtual bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { string source = cmdObj.getStringField( name.c_str() ); string target = cmdObj.getStringField( "to" ); if ( !NamespaceString::validCollectionComponent(target.c_str()) ) { errmsg = "invalid collection name: " + target; return false; } if ( source.empty() || target.empty() ) { errmsg = "invalid command syntax"; return false; } if (!fromRepl) { // If it got through on the master, need to allow it here too Status sourceStatus = userAllowedWriteNS(source); if (!sourceStatus.isOK()) { errmsg = "error with source namespace: " + sourceStatus.reason(); return false; } Status targetStatus = userAllowedWriteNS(target); if (!targetStatus.isOK()) { errmsg = "error with target namespace: " + targetStatus.reason(); return false; } } string sourceDB = nsToDatabase(source); string targetDB = nsToDatabase(target); bool capped = false; long long size = 0; std::vector<BSONObj> indexesInProg; { Client::Context srcCtx( source ); Collection* sourceColl = srcCtx.db()->getCollection( source ); if ( !sourceColl ) { errmsg = "source namespace does not exist"; return false; } // Ensure that collection name does not exceed maximum length. // Ensure that index names do not push the length over the max. // Iterator includes unfinished indexes. IndexCatalog::IndexIterator sourceIndIt = sourceColl->getIndexCatalog()->getIndexIterator( true ); int longestIndexNameLength = 0; while ( sourceIndIt.more() ) { int thisLength = sourceIndIt.next()->indexName().length(); if ( thisLength > longestIndexNameLength ) longestIndexNameLength = thisLength; } unsigned int longestAllowed = min(int(Namespace::MaxNsColletionLen), int(Namespace::MaxNsLen) - 2/*strlen(".$")*/ - longestIndexNameLength); if (target.size() > longestAllowed) { StringBuilder sb; sb << "collection name length of " << target.size() << " exceeds maximum length of " << longestAllowed << ", allowing for index names"; errmsg = sb.str(); return false; } { const NamespaceDetails *nsd = nsdetails( source ); indexesInProg = stopIndexBuilds( dbname, cmdObj ); capped = nsd->isCapped(); if ( capped ) for( DiskLoc i = nsd->firstExtent(); !i.isNull(); i = i.ext()->xnext ) size += i.ext()->length; } } { Client::Context ctx( target ); // Check if the target namespace exists and if dropTarget is true. // If target exists and dropTarget is not true, return false. if ( ctx.db()->getCollection( target ) ) { if ( !cmdObj["dropTarget"].trueValue() ) { errmsg = "target namespace exists"; return false; } Status s = cc().database()->dropCollection( target ); if ( !s.isOK() ) { errmsg = s.toString(); restoreIndexBuildsOnSource( indexesInProg, source ); return false; } } // If we are renaming in the same database, just // rename the namespace and we're done. if ( sourceDB == targetDB ) { Status s = ctx.db()->renameCollection( source, target, cmdObj["stayTemp"].trueValue() ); if ( !s.isOK() ) { errmsg = s.toString(); restoreIndexBuildsOnSource( indexesInProg, source ); return false; } return true; } // Otherwise, we are enaming across databases, so we must copy all // the data and then remove the source collection. // Create the target collection. Collection* targetColl = NULL; if ( capped ) { BSONObjBuilder spec; spec.appendBool( "capped", true ); spec.append( "size", double( size ) ); spec.appendBool( "autoIndexId", false ); userCreateNS( target.c_str(), spec.obj(), errmsg, false ); targetColl = ctx.db()->getCollection( target ); } else { CollectionOptions options; options.setNoIdIndex(); // No logOp necessary because the entire renameCollection command is one logOp. targetColl = ctx.db()->createCollection( target, options ); } if ( !targetColl ) { errmsg = "Failed to create target collection."; restoreIndexBuildsOnSource( indexesInProg, source ); return false; } } // Copy over all the data from source collection to target collection. bool insertSuccessful = true; boost::scoped_ptr<CollectionIterator> sourceIt; { Client::Context srcCtx( source ); Collection* sourceColl = srcCtx.db()->getCollection( source ); sourceIt.reset( sourceColl->getIterator( DiskLoc(), false, CollectionScanParams::FORWARD ) ); } Collection* targetColl = NULL; while ( !sourceIt->isEOF() ) { BSONObj o; { Client::Context srcCtx( source ); o = sourceIt->getNext().obj(); } // Insert and check return status of insert. { Client::Context ctx( target ); if ( !targetColl ) targetColl = ctx.db()->getCollection( target ); // No logOp necessary because the entire renameCollection command is one logOp. Status s = targetColl->insertDocument( o, true ).getStatus(); if ( !s.isOK() ) { insertSuccessful = false; errmsg = s.toString(); break; } } } // If inserts were unsuccessful, drop the target collection and return false. if ( !insertSuccessful ) { Client::Context ctx( target ); Status s = ctx.db()->dropCollection( target ); if ( !s.isOK() ) errmsg = s.toString(); restoreIndexBuildsOnSource( indexesInProg, source ); return false; } // Copy over the indexes to temp storage and then to the target.. vector<BSONObj> copiedIndexes; bool indexSuccessful = true; { Client::Context srcCtx( source ); Collection* sourceColl = srcCtx.db()->getCollection( source ); IndexCatalog::IndexIterator sourceIndIt = sourceColl->getIndexCatalog()->getIndexIterator( true ); while ( sourceIndIt.more() ) { BSONObj currIndex = sourceIndIt.next()->infoObj(); // Process the source index. BSONObjBuilder b; BSONObjIterator i( currIndex ); while( i.moreWithEOO() ) { BSONElement e = i.next(); if ( e.eoo() ) break; else if ( strcmp( e.fieldName(), "ns" ) == 0 ) b.append( "ns", target ); else b.append( e ); } BSONObj newIndex = b.obj(); copiedIndexes.push_back( newIndex ); } } { Client::Context ctx( target ); if ( !targetColl ) targetColl = ctx.db()->getCollection( target ); for ( vector<BSONObj>::iterator it = copiedIndexes.begin(); it != copiedIndexes.end(); ++it ) { Status s = targetColl->getIndexCatalog()->createIndex( *it, true ); if ( !s.isOK() ) { indexSuccessful = false; errmsg = s.toString(); break; } } // If indexes were unsuccessful, drop the target collection and return false. if ( !indexSuccessful ) { Status s = ctx.db()->dropCollection( target ); if ( !s.isOK() ) errmsg = s.toString(); restoreIndexBuildsOnSource( indexesInProg, source ); return false; } } // Drop the source collection. { Client::Context srcCtx( source ); Status s = srcCtx.db()->dropCollection( source ); if ( !s.isOK() ) { errmsg = s.toString(); restoreIndexBuildsOnSource( indexesInProg, source ); return false; } } return true; }
Status UpdateDriver::populateDocumentWithQueryFields(const CanonicalQuery* query, mutablebson::Document& doc) const { MatchExpression* root = query->root(); MatchExpression::MatchType rootType = root->matchType(); // These copies are needed until we apply the modifiers at the end. std::vector<BSONObj> copies; // We only care about equality and "and"ed equality fields, everything else is ignored if (rootType != MatchExpression::EQ && rootType != MatchExpression::AND) return Status::OK(); if (isDocReplacement()) { BSONElement idElem = query->getQueryObj().getField("_id"); // Replacement mods need the _id field copied explicitly. if (idElem.ok()) { mb::Element elem = doc.makeElement(idElem); return doc.root().pushFront(elem); } return Status::OK(); } // Create a new UpdateDriver to create the base doc from the query Options opts; opts.logOp = false; opts.modOptions = modOptions(); UpdateDriver insertDriver(opts); insertDriver.setContext(ModifierInterface::ExecInfo::INSERT_CONTEXT); // If we are a single equality match query if (root->matchType() == MatchExpression::EQ) { EqualityMatchExpression* eqMatch = static_cast<EqualityMatchExpression*>(root); const BSONElement matchData = eqMatch->getData(); BSONElement childElem = matchData; // Make copy to new path if not the same field name (for cases like $all) if (!root->path().empty() && matchData.fieldNameStringData() != root->path()) { BSONObjBuilder copyBuilder; copyBuilder.appendAs(eqMatch->getData(), root->path()); const BSONObj copy = copyBuilder.obj(); copies.push_back(copy); childElem = copy[root->path()]; } // Add this element as a $set modifier Status s = insertDriver.addAndParse(modifiertable::MOD_SET, childElem); if (!s.isOK()) return s; } else { // parse query $set mods, including only equality stuff for (size_t i = 0; i < root->numChildren(); ++i) { MatchExpression* child = root->getChild(i); if (child->matchType() == MatchExpression::EQ) { EqualityMatchExpression* eqMatch = static_cast<EqualityMatchExpression*>(child); const BSONElement matchData = eqMatch->getData(); BSONElement childElem = matchData; // Make copy to new path if not the same field name (for cases like $all) if (!child->path().empty() && matchData.fieldNameStringData() != child->path()) { BSONObjBuilder copyBuilder; copyBuilder.appendAs(eqMatch->getData(), child->path()); const BSONObj copy = copyBuilder.obj(); copies.push_back(copy); childElem = copy[child->path()]; } // Add this element as a $set modifier Status s = insertDriver.addAndParse(modifiertable::MOD_SET, childElem); if (!s.isOK()) return s; } } } // update the document with base field Status s = insertDriver.update(StringData(), &doc); copies.clear(); if (!s.isOK()) { return Status(ErrorCodes::UnsupportedFormat, str::stream() << "Cannot create base during" " insert of update. Caused by :" << s.toString()); } return Status::OK(); }
bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) { string target = cmdObj.firstElement().valuestrsafe(); Shard s = Shard::make( target ); if ( ! grid.knowAboutShard( s.getConnString() ) ) { errmsg = "unknown shard"; return false; } ScopedDbConnection conn( configServer.getPrimary() ); // If the server is not yet draining chunks, put it in draining mode. BSONObj searchDoc = BSON( "_id" << s.getName() ); BSONObj drainingDoc = BSON( "_id" << s.getName() << ShardFields::draining(true) ); BSONObj shardDoc = conn->findOne( "config.shards", drainingDoc ); if ( shardDoc.isEmpty() ) { // TODO prevent move chunks to this shard. log() << "going to start draining shard: " << s.getName() << endl; BSONObj newStatus = BSON( "$set" << BSON( ShardFields::draining(true) ) ); conn->update( "config.shards" , searchDoc , newStatus, false /* do no upsert */); errmsg = conn->getLastError(); if ( errmsg.size() ) { log() << "error starting remove shard: " << s.getName() << " err: " << errmsg << endl; return false; } Shard::reloadShardInfo(); result.append( "msg" , "draining started successfully" ); result.append( "state" , "started" ); result.append( "shard" , s.getName() ); conn.done(); return true; } // If the server has been completely drained, remove it from the ConfigDB. // Check not only for chunks but also databases. BSONObj shardIDDoc = BSON( "shard" << shardDoc[ "_id" ].str() ); long long chunkCount = conn->count( "config.chunks" , shardIDDoc ); BSONObj primaryDoc = BSON( "primary" << shardDoc[ "_id" ].str() ); long long dbCount = conn->count( "config.databases" , primaryDoc ); if ( ( chunkCount == 0 ) && ( dbCount == 0 ) ) { log() << "going to remove shard: " << s.getName() << endl; conn->remove( "config.shards" , searchDoc ); errmsg = conn->getLastError(); if ( errmsg.size() ) { log() << "error concluding remove shard: " << s.getName() << " err: " << errmsg << endl; return false; } Shard::removeShard( shardDoc[ "_id" ].str() ); Shard::reloadShardInfo(); result.append( "msg" , "removeshard completed successfully" ); result.append( "state" , "completed" ); result.append( "shard" , s.getName() ); conn.done(); return true; } // If the server is already in draining mode, just report on its progress. // Report on databases (not just chunks) that are left too. result.append( "msg" , "draining ongoing" ); result.append( "state" , "ongoing" ); BSONObjBuilder inner; inner.append( "chunks" , chunkCount ); inner.append( "dbs" , dbCount ); result.append( "remaining" , inner.obj() ); conn.done(); return true; }
bool handleSpecialNamespaces( Request& r , QueryMessage& q ) { const char * ns = r.getns(); ns = strstr( r.getns() , ".$cmd.sys." ); if ( ! ns ) return false; ns += 10; BSONObjBuilder b; vector<Shard> shards; if ( strcmp( ns , "inprog" ) == 0 ) { Shard::getAllShards( shards ); BSONArrayBuilder arr( b.subarrayStart( "inprog" ) ); for ( unsigned i=0; i<shards.size(); i++ ) { Shard shard = shards[i]; ScopedDbConnection conn( shard ); BSONObj temp = conn->findOne( r.getns() , BSONObj() ); if ( temp["inprog"].isABSONObj() ) { BSONObjIterator i( temp["inprog"].Obj() ); while ( i.more() ) { BSONObjBuilder x; BSONObjIterator j( i.next().Obj() ); while( j.more() ) { BSONElement e = j.next(); if ( str::equals( e.fieldName() , "opid" ) ) { stringstream ss; ss << shard.getName() << ':' << e.numberInt(); x.append( "opid" , ss.str() ); } else if ( str::equals( e.fieldName() , "client" ) ) { x.appendAs( e , "client_s" ); } else { x.append( e ); } } arr.append( x.obj() ); } } conn.done(); } arr.done(); } else if ( strcmp( ns , "killop" ) == 0 ) { BSONElement e = q.query["op"]; if ( strstr( r.getns() , "admin." ) != 0 ) { b.append( "err" , "unauthorized" ); } else if ( e.type() != String ) { b.append( "err" , "bad op" ); b.append( e ); } else { b.append( e ); string s = e.String(); string::size_type i = s.find( ':' ); if ( i == string::npos ) { b.append( "err" , "bad opid" ); } else { string shard = s.substr( 0 , i ); int opid = atoi( s.substr( i + 1 ).c_str() ); b.append( "shard" , shard ); b.append( "shardid" , opid ); log() << "want to kill op: " << e << endl; Shard s(shard); ScopedDbConnection conn( s ); conn->findOne( r.getns() , BSON( "op" << opid ) ); conn.done(); } } } else if ( strcmp( ns , "unlock" ) == 0 ) { b.append( "err" , "can't do unlock through mongos" ); } else { log( LL_WARNING ) << "unknown sys command [" << ns << "]" << endl; return false; } BSONObj x = b.done(); replyToQuery(0, r.p(), r.m(), x); return true; }
Status MMAPV1DatabaseCatalogEntry::_renameSingleNamespace(OperationContext* txn, StringData fromNS, StringData toNS, bool stayTemp) { // some sanity checking NamespaceDetails* fromDetails = _namespaceIndex.details(fromNS); if (!fromDetails) return Status(ErrorCodes::BadValue, "from namespace doesn't exist"); if (_namespaceIndex.details(toNS)) return Status(ErrorCodes::BadValue, "to namespace already exists"); // at this point, we haven't done anything destructive yet // ---- // actually start moving // ---- // this could throw, but if it does we're ok _namespaceIndex.add_ns(txn, toNS, fromDetails); NamespaceDetails* toDetails = _namespaceIndex.details(toNS); try { toDetails->copyingFrom(txn, toNS, _namespaceIndex, fromDetails); // fixes extraOffset } catch (DBException&) { // could end up here if .ns is full - if so try to clean up / roll back a little _namespaceIndex.kill_ns(txn, toNS); throw; } // at this point, code .ns stuff moved _namespaceIndex.kill_ns(txn, fromNS); fromDetails = NULL; // fix system.namespaces BSONObj newSpec; RecordId oldSpecLocation = getCollectionCatalogEntry(fromNS)->getNamespacesRecordId(); invariant(!oldSpecLocation.isNull()); { BSONObj oldSpec = _getNamespaceRecordStore()->dataFor(txn, oldSpecLocation).releaseToBson(); invariant(!oldSpec.isEmpty()); BSONObjBuilder b; BSONObjIterator i(oldSpec.getObjectField("options")); while (i.more()) { BSONElement e = i.next(); if (strcmp(e.fieldName(), "create") != 0) { if (stayTemp || (strcmp(e.fieldName(), "temp") != 0)) b.append(e); } else { b << "create" << toNS; } } newSpec = b.obj(); } RecordId rid = _addNamespaceToNamespaceCollection(txn, toNS, newSpec.isEmpty() ? 0 : &newSpec); _getNamespaceRecordStore()->deleteRecord(txn, oldSpecLocation); Entry*& entry = _collections[toNS.toString()]; invariant(entry == NULL); txn->recoveryUnit()->registerChange(new EntryInsertion(toNS, this)); entry = new Entry(); _removeFromCache(txn->recoveryUnit(), fromNS); _insertInCache(txn, toNS, rid, entry); return Status::OK(); }
void ReplSetImpl::_summarizeStatus(BSONObjBuilder& b) const { vector<BSONObj> v; const Member *_self = this->_self; assert( _self ); MemberState myState = box.getState(); // add self { BSONObjBuilder bb; bb.append("_id", (int) _self->id()); bb.append("name", _self->fullName()); bb.append("health", 1.0); bb.append("state", (int)myState.s); bb.append("stateStr", myState.toString()); bb.append("uptime", (unsigned)(time(0) - cmdLine.started)); if (!_self->config().arbiterOnly) { bb.appendTimestamp("optime", lastOpTimeWritten.asDate()); bb.appendDate("optimeDate", lastOpTimeWritten.getSecs() * 1000LL); } int maintenance = _maintenanceMode; if (maintenance) { bb.append("maintenanceMode", maintenance); } if (theReplSet) { string s = theReplSet->hbmsg(); if( !s.empty() ) bb.append("errmsg", s); } bb.append("self", true); v.push_back(bb.obj()); } Member *m =_members.head(); while( m ) { BSONObjBuilder bb; bb.append("_id", (int) m->id()); bb.append("name", m->fullName()); double h = m->hbinfo().health; bb.append("health", h); bb.append("state", (int) m->state().s); if( h == 0 ) { // if we can't connect the state info is from the past and could be confusing to show bb.append("stateStr", "(not reachable/healthy)"); } else { bb.append("stateStr", m->state().toString()); } bb.append("uptime", (unsigned) (m->hbinfo().upSince ? (time(0)-m->hbinfo().upSince) : 0)); if (!m->config().arbiterOnly) { bb.appendTimestamp("optime", m->hbinfo().opTime.asDate()); bb.appendDate("optimeDate", m->hbinfo().opTime.getSecs() * 1000LL); } bb.appendTimeT("lastHeartbeat", m->hbinfo().lastHeartbeat); bb.append("pingMs", m->hbinfo().ping); string s = m->lhb(); if( !s.empty() ) bb.append("errmsg", s); if (m->hbinfo().authIssue) { bb.append("authenticated", false); } v.push_back(bb.obj()); m = m->next(); } sort(v.begin(), v.end()); b.append("set", name()); b.appendTimeT("date", time(0)); b.append("myState", myState.s); const Member *syncTarget = _currentSyncTarget; if (syncTarget && myState != MemberState::RS_PRIMARY) { b.append("syncingTo", syncTarget->fullName()); } b.append("members", v); if( replSetBlind ) b.append("blind",true); // to avoid confusion if set...normally never set except for testing. }
BSONObj ReplSetHeartbeatArgsV1::toBSON() const { invariant(isInitialized()); BSONObjBuilder builder; addToBSON(&builder); return builder.obj(); }
void ParallelSortClusteredCursor::_finishCons() { _numServers = _servers.size(); _lastFrom = 0; _cursors = 0; if (!_qSpec.isEmpty()) { _needToSkip = _qSpec.ntoskip(); _cursors = 0; _sortKey = _qSpec.sort(); _fields = _qSpec.fields(); } // Partition sort key fields into (a) text meta fields and (b) all other fields. set<string> textMetaSortKeyFields; set<string> normalSortKeyFields; // Transform _sortKey fields {a:{$meta:"textScore"}} into {a:-1}, in order to apply the // merge sort for text metadata in the correct direction. BSONObjBuilder transformedSortKeyBuilder; BSONObjIterator sortKeyIt(_sortKey); while (sortKeyIt.more()) { BSONElement e = sortKeyIt.next(); if (QueryRequest::isTextScoreMeta(e)) { textMetaSortKeyFields.insert(e.fieldName()); transformedSortKeyBuilder.append(e.fieldName(), -1); } else { normalSortKeyFields.insert(e.fieldName()); transformedSortKeyBuilder.append(e); } } _sortKey = transformedSortKeyBuilder.obj(); // Verify that that all text metadata sort fields are in the projection. For all other sort // fields, copy them into the projection if they are missing (and if projection is // negative). if (!_sortKey.isEmpty() && !_fields.isEmpty()) { BSONObjBuilder b; bool isNegative = false; { BSONObjIterator i(_fields); while (i.more()) { BSONElement e = i.next(); b.append(e); string fieldName = e.fieldName(); if (QueryRequest::isTextScoreMeta(e)) { textMetaSortKeyFields.erase(fieldName); } else { // exact field bool found = normalSortKeyFields.erase(fieldName); // subfields set<string>::const_iterator begin = normalSortKeyFields.lower_bound(fieldName + ".\x00"); set<string>::const_iterator end = normalSortKeyFields.lower_bound(fieldName + ".\xFF"); normalSortKeyFields.erase(begin, end); if (!e.trueValue()) { uassert(13431, "have to have sort key in projection and removing it", !found && begin == end); } else if (!e.isABSONObj()) { isNegative = true; } } } } if (isNegative) { for (set<string>::const_iterator it(normalSortKeyFields.begin()), end(normalSortKeyFields.end()); it != end; ++it) { b.append(*it, 1); } } _fields = b.obj(); } if (!_qSpec.isEmpty()) { _qSpec.setFields(_fields); } uassert( 17306, "have to have all text meta sort keys in projection", textMetaSortKeyFields.empty()); }
Status ProjectionExec::transform(WorkingSetMember* member) const { if (_hasReturnKey) { BSONObj keyObj; if (member->hasComputed(WSM_INDEX_KEY)) { const IndexKeyComputedData* key = static_cast<const IndexKeyComputedData*>(member->getComputed(WSM_INDEX_KEY)); keyObj = key->getKey(); } member->state = WorkingSetMember::OWNED_OBJ; member->obj = keyObj; member->keyData.clear(); member->loc = DiskLoc(); return Status::OK(); } BSONObjBuilder bob; if (!requiresDocument()) { // Go field by field. if (_includeID) { BSONElement elt; // Sometimes the _id field doesn't exist... if (member->getFieldDotted("_id", &elt) && !elt.eoo()) { bob.appendAs(elt, "_id"); } } BSONObjIterator it(_source); while (it.more()) { BSONElement specElt = it.next(); if (mongoutils::str::equals("_id", specElt.fieldName())) { continue; } BSONElement keyElt; // We can project a field that doesn't exist. We just ignore it. if (member->getFieldDotted(specElt.fieldName(), &keyElt) && !keyElt.eoo()) { bob.appendAs(keyElt, specElt.fieldName()); } } } else { // Planner should have done this. verify(member->hasObj()); MatchDetails matchDetails; // If it's a positional projection we need a MatchDetails. if (transformRequiresDetails()) { matchDetails.requestElemMatchKey(); verify(NULL != _queryExpression); verify(_queryExpression->matchesBSON(member->obj, &matchDetails)); } Status projStatus = transform(member->obj, &bob, &matchDetails); if (!projStatus.isOK()) { return projStatus; } } for (MetaMap::const_iterator it = _meta.begin(); it != _meta.end(); ++it) { if (META_TEXT == it->second) { if (member->hasComputed(WSM_COMPUTED_TEXT_SCORE)) { const TextScoreComputedData* score = static_cast<const TextScoreComputedData*>( member->getComputed(WSM_COMPUTED_TEXT_SCORE)); bob.append(it->first, score->getScore()); } else { bob.append(it->first, 0.0); } } else if (META_DISKLOC == it->second) { bob.append(it->first, member->loc.toBSONObj()); } } BSONObj newObj = bob.obj(); member->state = WorkingSetMember::OWNED_OBJ; member->obj = newObj; member->keyData.clear(); member->loc = DiskLoc(); return Status::OK(); }
int main() { // As of legacy-1.0-rc1-pre, you must initialize the driver in order to use // the BSON library OID class. mongo::client::GlobalInstance instance; if (!instance.initialized()) { std::cout << "failed to initialize the client driver: " << instance.status() << std::endl; return EXIT_FAILURE; } std::cout << "build bits: " << 8 * sizeof(char*) << '\n' << std::endl; /* a bson object defaults on construction to { } */ BSONObj empty; std::cout << "empty: " << empty << std::endl; /* make a simple { _id : <generated>, when : <now>, name : 'joe', age : 33.7 } object */ { const mongo::OID generated = mongo::OID::gen(); std::cout << "Generated an OID: " << generated << std::endl; BSONObjBuilder b; b.append("_id", generated); b.append("when", mongo::jsTime()); b.append("name", "joe"); b.append("age", 33.7); BSONObj result = b.obj(); std::cout << "json for object with _id: " << result << std::endl; } /* make { name : 'joe', age : 33.7 } with a more compact notation. */ BSONObj x = BSONObjBuilder().append("name", "joe").append("age", 33.7).obj(); /* convert from bson to json */ std::string json = x.toString(); std::cout << "json for x:" << json << std::endl; /* access some fields of bson object x */ std::cout << "Some x things: " << x["name"] << ' ' << x["age"].Number() << ' ' << x.isEmpty() << std::endl; /* make a bit more complex object with some nesting { x : 'asdf', y : true, subobj : { z : 3, q : 4 } } */ BSONObj y = BSON("x" << "asdf" << "y" << true << "subobj" << BSON("z" << 3 << "q" << 4)); /* print it */ std::cout << "y: " << y << std::endl; /* reach in and get subobj.z */ std::cout << "subobj.z: " << y.getFieldDotted("subobj.z").Number() << std::endl; /* alternate syntax: */ std::cout << "subobj.z: " << y["subobj"]["z"].Number() << std::endl; /* fetch all *top level* elements from object y into a vector */ std::vector<BSONElement> v; y.elems(v); std::cout << v[0] << std::endl; /* into an array */ std::list<BSONElement> L; y.elems(L); BSONObj sub = y["subobj"].Obj(); iter(y); return 0; }
BSONObj UUID::toBSON() const { BSONObjBuilder builder; appendToBuilder(&builder, "uuid"); return builder.obj(); }
/* Prepare to build an index. Does not actually build it (except for a special _id case). - We validate that the params are good - That the index does not already exist - Creates the source collection if it DNE example of 'io': { ns : 'test.foo', name : 'z', key : { z : 1 } } throws DBException @param sourceNS - source NS we are indexing @param sourceCollection - its details ptr @return true if ok to continue. when false we stop/fail silently (index already exists) */ bool prepareToBuildIndex(const BSONObj& io, bool god, string& sourceNS, NamespaceDetails *&sourceCollection, BSONObj& fixedIndexObject ) { sourceCollection = 0; // logical name of the index. todo: get rid of the name, we don't need it! const char *name = io.getStringField("name"); uassert(12523, "no index name specified", *name); // the collection for which we are building an index sourceNS = io.getStringField("ns"); uassert(10096, "invalid ns to index", sourceNS.find( '.' ) != string::npos); uassert(10097, "bad table to index name on add index attempt", cc().database()->name == nsToDatabase(sourceNS.c_str())); BSONObj key = io.getObjectField("key"); uassert(12524, "index key pattern too large", key.objsize() <= 2048); if( !validKeyPattern(key) ) { string s = string("bad index key pattern ") + key.toString(); uasserted(10098 , s.c_str()); } if ( sourceNS.empty() || key.isEmpty() ) { log(2) << "bad add index attempt name:" << (name?name:"") << "\n ns:" << sourceNS << "\n idxobj:" << io.toString() << endl; string s = "bad add index attempt " + sourceNS + " key:" + key.toString(); uasserted(12504, s); } sourceCollection = nsdetails(sourceNS.c_str()); if( sourceCollection == 0 ) { // try to create it string err; if ( !userCreateNS(sourceNS.c_str(), BSONObj(), err, false) ) { problem() << "ERROR: failed to create collection while adding its index. " << sourceNS << endl; return false; } sourceCollection = nsdetails(sourceNS.c_str()); tlog() << "info: creating collection " << sourceNS << " on add index" << endl; assert( sourceCollection ); } if ( sourceCollection->findIndexByName(name) >= 0 ) { // index already exists. return false; } if( sourceCollection->findIndexByKeyPattern(key) >= 0 ) { log(2) << "index already exists with diff name " << name << ' ' << key.toString() << endl; return false; } if ( sourceCollection->nIndexes >= NamespaceDetails::NIndexesMax ) { stringstream ss; ss << "add index fails, too many indexes for " << sourceNS << " key:" << key.toString(); string s = ss.str(); log() << s << '\n'; uasserted(12505,s); } /* we can't build a new index for the ns if a build is already in progress in the background - EVEN IF this is a foreground build. */ uassert(12588, "cannot add index with a background operation in progress", !BackgroundOperation::inProgForNs(sourceNS.c_str())); /* this is because we want key patterns like { _id : 1 } and { _id : <someobjid> } to all be treated as the same pattern. */ if ( IndexDetails::isIdIndexPattern(key) ) { if( !god ) { ensureHaveIdIndex( sourceNS.c_str() ); return false; } } else { /* is buildIndexes:false set for this replica set member? if so we don't build any indexes except _id */ if( theReplSet && !theReplSet->buildIndexes() ) return false; } string pluginName = IndexPlugin::findPluginName( key ); IndexPlugin * plugin = pluginName.size() ? IndexPlugin::get( pluginName ) : 0; { BSONObj o = io; if ( plugin ) { o = plugin->adjustIndexSpec(o); } BSONObjBuilder b; int v = DefaultIndexVersionNumber; if( o.hasElement("_id") ) b.append( o["_id"] ); if( !o["v"].eoo() ) { double vv = o["v"].Number(); // note (one day) we may be able to fresh build less versions than we can use // isASupportedIndexVersionNumber() is what we can use uassert(14803, str::stream() << "this version of mongod cannot build new indexes of version number " << vv, vv == 0 || vv == 1); v = (int) vv; } // idea is to put things we use a lot earlier b.append("v", v); b.append(o["key"]); if( o["unique"].trueValue() ) b.appendBool("unique", true); // normalize to bool true in case was int 1 or something... b.append(o["ns"]); b.appendElementsUnique(o); fixedIndexObject = b.obj(); } return true; }
PlanStage::StageState IndexScan::work(WorkingSetID* out) { ++_commonStats.works; // Adds the amount of time taken by work() to executionTimeMillis. ScopedTimer timer(&_commonStats.executionTimeMillis); if (NULL == _indexCursor.get()) { // First call to work(). Perform possibly heavy init. initIndexScan(); checkEnd(); } else if (_yieldMovedCursor) { _yieldMovedCursor = false; // Note that we're not calling next() here. We got the next thing when we recovered // from yielding. } if (isEOF()) { return PlanStage::IS_EOF; } // Grab the next (key, value) from the index. BSONObj keyObj = _indexCursor->getKey(); DiskLoc loc = _indexCursor->getValue(); // Move to the next result. // The underlying IndexCursor points at the *next* thing we want to return. We do this so // that if we're scanning an index looking for docs to delete we don't continually clobber // the thing we're pointing at. _indexCursor->next(); checkEnd(); if (_shouldDedup) { ++_specificStats.dupsTested; if (_returned.end() != _returned.find(loc)) { ++_specificStats.dupsDropped; ++_commonStats.needTime; return PlanStage::NEED_TIME; } else { _returned.insert(loc); } } if (Filter::passes(keyObj, _keyPattern, _filter)) { if (NULL != _filter) { ++_specificStats.matchTested; } // We must make a copy of the on-disk data since it can mutate during the execution of // this query. BSONObj ownedKeyObj = keyObj.getOwned(); // Fill out the WSM. WorkingSetID id = _workingSet->allocate(); WorkingSetMember* member = _workingSet->get(id); member->loc = loc; member->keyData.push_back(IndexKeyDatum(_keyPattern, ownedKeyObj)); member->state = WorkingSetMember::LOC_AND_IDX; if (_params.addKeyMetadata) { BSONObjBuilder bob; bob.appendKeys(_keyPattern, ownedKeyObj); member->addComputed(new IndexKeyComputedData(bob.obj())); } *out = id; ++_commonStats.advanced; return PlanStage::ADVANCED; } ++_commonStats.needTime; return PlanStage::NEED_TIME; }
virtual bool run(OperationContext* txn, const string& db, BSONObj& cmdObj, int options, string& errmsg, BSONObjBuilder& result) { const std::string ns = parseNs(db, cmdObj); if (nsToCollectionSubstring(ns).empty()) { errmsg = "missing collection name"; return false; } NamespaceString nss(ns); intrusive_ptr<ExpressionContext> pCtx = new ExpressionContext(txn, nss); pCtx->tempDir = storageGlobalParams.dbpath + "/_tmp"; /* try to parse the command; if this fails, then we didn't run */ intrusive_ptr<Pipeline> pPipeline = Pipeline::parseCommand(errmsg, cmdObj, pCtx); if (!pPipeline.get()) return false; // This is outside of the if block to keep the object alive until the pipeline is finished. BSONObj parsed; if (kDebugBuild && !pPipeline->isExplain() && !pCtx->inShard) { // Make sure all operations round-trip through Pipeline::toBson() correctly by // reparsing every command in debug builds. This is important because sharded // aggregations rely on this ability. Skipping when inShard because this has // already been through the transformation (and this unsets pCtx->inShard). parsed = pPipeline->serialize().toBson(); pPipeline = Pipeline::parseCommand(errmsg, parsed, pCtx); verify(pPipeline); } unique_ptr<ClientCursorPin> pin; // either this OR the exec will be non-null unique_ptr<PlanExecutor> exec; { // This will throw if the sharding version for this connection is out of date. The // lock must be held continuously from now until we have we created both the output // ClientCursor and the input executor. This ensures that both are using the same // sharding version that we synchronize on here. This is also why we always need to // create a ClientCursor even when we aren't outputting to a cursor. See the comment // on ShardFilterStage for more details. AutoGetCollectionForRead ctx(txn, nss.ns()); Collection* collection = ctx.getCollection(); // This does mongod-specific stuff like creating the input PlanExecutor and adding // it to the front of the pipeline if needed. std::shared_ptr<PlanExecutor> input = PipelineD::prepareCursorSource(txn, collection, nss, pPipeline, pCtx); pPipeline->stitch(); if (collection && input) { // Record the indexes used by the input executor. Retrieval of summary stats for a // PlanExecutor is normally done post execution. DocumentSourceCursor however will // destroy the input PlanExecutor once the result set has been exhausted. For // that reason we need to collect the indexes used prior to plan execution. PlanSummaryStats stats; Explain::getSummaryStats(*input, &stats); collection->infoCache()->notifyOfQuery(txn, stats.indexesUsed); auto curOp = CurOp::get(txn); { stdx::lock_guard<Client>(*txn->getClient()); curOp->setPlanSummary_inlock(Explain::getPlanSummary(input.get())); } // TODO SERVER-23265: Confirm whether this is the correct place to gather all // metrics. There is no harm adding here for the time being. curOp->debug().setPlanSummaryMetrics(stats); if (curOp->shouldDBProfile(curOp->elapsedMillis())) { BSONObjBuilder execStatsBob; Explain::getWinningPlanStats(input.get(), &execStatsBob); curOp->debug().execStats.set(execStatsBob.obj()); } } // Create the PlanExecutor which returns results from the pipeline. The WorkingSet // ('ws') and the PipelineProxyStage ('proxy') will be owned by the created // PlanExecutor. auto ws = make_unique<WorkingSet>(); auto proxy = make_unique<PipelineProxyStage>(txn, pPipeline, input, ws.get()); auto statusWithPlanExecutor = (NULL == collection) ? PlanExecutor::make( txn, std::move(ws), std::move(proxy), nss.ns(), PlanExecutor::YIELD_MANUAL) : PlanExecutor::make( txn, std::move(ws), std::move(proxy), collection, PlanExecutor::YIELD_MANUAL); invariant(statusWithPlanExecutor.isOK()); exec = std::move(statusWithPlanExecutor.getValue()); if (!collection && input) { // If we don't have a collection, we won't be able to register any executors, so // make sure that the input PlanExecutor (likely wrapping an EOFStage) doesn't // need to be registered. invariant(!input->collection()); } if (collection) { const bool isAggCursor = true; // enable special locking behavior ClientCursor* cursor = new ClientCursor(collection->getCursorManager(), exec.release(), nss.ns(), txn->recoveryUnit()->isReadingFromMajorityCommittedSnapshot(), 0, cmdObj.getOwned(), isAggCursor); pin.reset(new ClientCursorPin(collection->getCursorManager(), cursor->cursorid())); // Don't add any code between here and the start of the try block. } // At this point, it is safe to release the collection lock. // - In the case where we have a collection: we will need to reacquire the // collection lock later when cleaning up our ClientCursorPin. // - In the case where we don't have a collection: our PlanExecutor won't be // registered, so it will be safe to clean it up outside the lock. invariant(NULL == exec.get() || NULL == exec->collection()); } try { // Unless set to true, the ClientCursor created above will be deleted on block exit. bool keepCursor = false; const bool isCursorCommand = !cmdObj["cursor"].eoo(); // If both explain and cursor are specified, explain wins. if (pPipeline->isExplain()) { result << "stages" << Value(pPipeline->writeExplainOps()); } else if (isCursorCommand) { keepCursor = handleCursorCommand(txn, nss.ns(), pin.get(), pin ? pin->c()->getExecutor() : exec.get(), cmdObj, result); } else { pPipeline->run(result); } // Clean up our ClientCursorPin, if needed. We must reacquire the collection lock // in order to do so. if (pin) { // We acquire locks here with DBLock and CollectionLock instead of using // AutoGetCollectionForRead. AutoGetCollectionForRead will throw if the // sharding version is out of date, and we don't care if the sharding version // has changed. Lock::DBLock dbLock(txn->lockState(), nss.db(), MODE_IS); Lock::CollectionLock collLock(txn->lockState(), nss.ns(), MODE_IS); if (keepCursor) { pin->release(); } else { pin->deleteUnderlying(); } } } catch (...) { // On our way out of scope, we clean up our ClientCursorPin if needed. if (pin) { Lock::DBLock dbLock(txn->lockState(), nss.db(), MODE_IS); Lock::CollectionLock collLock(txn->lockState(), nss.ns(), MODE_IS); pin->deleteUnderlying(); } throw; } // Any code that needs the cursor pinned must be inside the try block, above. return true; }
void HaystackAccessMethod::searchCommand(OperationContext* txn, Collection* collection, const BSONObj& nearObj, double maxDistance, const BSONObj& search, BSONObjBuilder* result, unsigned limit) { Timer t; LOG(1) << "SEARCH near:" << nearObj << " maxDistance:" << maxDistance << " search: " << search << endl; int x, y; { BSONObjIterator i(nearObj); x = ExpressionKeysPrivate::hashHaystackElement(i.next(), _bucketSize); y = ExpressionKeysPrivate::hashHaystackElement(i.next(), _bucketSize); } int scale = static_cast<int>(ceil(maxDistance / _bucketSize)); GeoHaystackSearchHopper hopper(nearObj, maxDistance, limit, _geoField, collection); long long btreeMatches = 0; for (int a = -scale; a <= scale && !hopper.limitReached(); ++a) { for (int b = -scale; b <= scale && !hopper.limitReached(); ++b) { BSONObjBuilder bb; bb.append("", ExpressionKeysPrivate::makeHaystackString(x + a, y + b)); for (unsigned i = 0; i < _otherFields.size(); i++) { // See if the non-geo field we're indexing on is in the provided search term. BSONElement e = search.getFieldDotted(_otherFields[i]); if (e.eoo()) bb.appendNull(""); else bb.appendAs(e, ""); } BSONObj key = bb.obj(); unordered_set<DiskLoc, DiskLoc::Hasher> thisPass; scoped_ptr<Runner> runner(InternalPlanner::indexScan(txn, collection, _descriptor, key, key, true)); Runner::RunnerState state; DiskLoc loc; while (Runner::RUNNER_ADVANCED == (state = runner->getNext(NULL, &loc))) { if (hopper.limitReached()) { break; } pair<unordered_set<DiskLoc, DiskLoc::Hasher>::iterator, bool> p = thisPass.insert(loc); // If a new element was inserted (haven't seen the DiskLoc before), p.second // is true. if (p.second) { hopper.consider(loc); btreeMatches++; } } } } BSONArrayBuilder arr(result->subarrayStart("results")); int num = hopper.appendResultsTo(&arr); arr.done(); { BSONObjBuilder b(result->subobjStart("stats")); b.append("time", t.millis()); b.appendNumber("btreeMatches", btreeMatches); b.append("n", num); b.done(); } }
BSONObj LockerImpl::reportState() { BSONObjBuilder b; reportState(&b); return b.obj(); }
Status CatalogManagerReplicaSet::shardCollection(OperationContext* txn, const string& ns, const ShardKeyPattern& fieldsAndOrder, bool unique, const vector<BSONObj>& initPoints, const set<ShardId>& initShardIds) { // Lock the collection globally so that no other mongos can try to shard or drop the collection // at the same time. auto scopedDistLock = getDistLockManager()->lock(ns, "shardCollection"); if (!scopedDistLock.isOK()) { return scopedDistLock.getStatus(); } auto status = getDatabase(txn, nsToDatabase(ns)); if (!status.isOK()) { return status.getStatus(); } ShardId dbPrimaryShardId = status.getValue().value.getPrimary(); const auto primaryShard = grid.shardRegistry()->getShard(txn, dbPrimaryShardId); { // In 3.0 and prior we include this extra safety check that the collection is not getting // sharded concurrently by two different mongos instances. It is not 100%-proof, but it // reduces the chance that two invocations of shard collection will step on each other's // toes. Now we take the distributed lock so going forward this check won't be necessary // but we leave it around for compatibility with other mongoses from 3.0. // TODO(spencer): Remove this after 3.2 ships. const auto configShard = grid.shardRegistry()->getShard(txn, "config"); const auto readHost = configShard->getTargeter()->findHost(kConfigReadSelector); if (!readHost.isOK()) { return readHost.getStatus(); } auto countStatus = _runCountCommandOnConfig( readHost.getValue(), NamespaceString(ChunkType::ConfigNS), BSON(ChunkType::ns(ns))); if (!countStatus.isOK()) { return countStatus.getStatus(); } if (countStatus.getValue() > 0) { return Status(ErrorCodes::AlreadyInitialized, str::stream() << "collection " << ns << " already sharded with " << countStatus.getValue() << " chunks."); } } // Record start in changelog { BSONObjBuilder collectionDetail; collectionDetail.append("shardKey", fieldsAndOrder.toBSON()); collectionDetail.append("collection", ns); collectionDetail.append("primary", primaryShard->toString()); { BSONArrayBuilder initialShards(collectionDetail.subarrayStart("initShards")); for (const ShardId& shardId : initShardIds) { initialShards.append(shardId); } } collectionDetail.append("numChunks", static_cast<int>(initPoints.size() + 1)); logChange(txn, txn->getClient()->clientAddress(true), "shardCollection.start", ns, collectionDetail.obj()); } shared_ptr<ChunkManager> manager(new ChunkManager(ns, fieldsAndOrder, unique)); manager->createFirstChunks(txn, dbPrimaryShardId, &initPoints, &initShardIds); manager->loadExistingRanges(txn, nullptr); CollectionInfo collInfo; collInfo.useChunkManager(manager); collInfo.save(txn, ns); manager->reload(txn, true); // Tell the primary mongod to refresh its data // TODO: Think the real fix here is for mongos to just // assume that all collections are sharded, when we get there SetShardVersionRequest ssv = SetShardVersionRequest::makeForVersioningNoPersist( grid.shardRegistry()->getConfigServerConnectionString(), dbPrimaryShardId, primaryShard->getConnString(), NamespaceString(ns), ChunkVersionAndOpTime(manager->getVersion(), manager->getConfigOpTime()), true); auto ssvStatus = grid.shardRegistry()->runCommandWithNotMasterRetries( txn, dbPrimaryShardId, "admin", ssv.toBSON()); if (!ssvStatus.isOK()) { warning() << "could not update initial version of " << ns << " on shard primary " << dbPrimaryShardId << ssvStatus.getStatus(); } logChange(txn, txn->getClient()->clientAddress(true), "shardCollection", ns, BSON("version" << manager->getVersion().toString())); return Status::OK(); }
void ExpressionKeysPrivate::getS2Keys(const BSONObj& obj, const BSONObj& keyPattern, const S2IndexingParams& params, BSONObjSet* keys) { BSONObjSet keysToAdd; // Does one of our documents have a geo field? bool haveGeoField = false; // We output keys in the same order as the fields we index. BSONObjIterator i(keyPattern); while (i.more()) { BSONElement e = i.next(); // First, we get the keys that this field adds. Either they're added literally from // the value of the field, or they're transformed if the field is geo. BSONElementSet fieldElements; // false means Don't expand the last array, duh. obj.getFieldsDotted(e.fieldName(), fieldElements, false); BSONObjSet keysForThisField; if (IndexNames::GEO_2DSPHERE == e.valuestr()) { if (S2_INDEX_VERSION_2 == params.indexVersion) { // For V2, // geo: null, // geo: undefined // geo: [] // should all behave like there is no geo field. So we look for these cases and // throw out the field elements if we find them. if (1 == fieldElements.size()) { BSONElement elt = *fieldElements.begin(); // Get the :null and :undefined cases. if (elt.isNull() || Undefined == elt.type()) { fieldElements.clear(); } else if (elt.isABSONObj()) { // And this is the :[] case. BSONObj obj = elt.Obj(); if (0 == obj.nFields()) { fieldElements.clear(); } } } // V2 2dsphere indices require that at least one geo field to be present in a // document in order to index it. if (fieldElements.size() > 0) { haveGeoField = true; } } getS2GeoKeys(obj, fieldElements, params, &keysForThisField); } else { getS2LiteralKeys(fieldElements, &keysForThisField); } // We expect there to be the missing field element present in the keys if data is // missing. So, this should be non-empty. verify(!keysForThisField.empty()); // We take the Cartesian product of all of the keys. This requires that we have // some keys to take the Cartesian product with. If keysToAdd.empty(), we // initialize it. if (keysToAdd.empty()) { keysToAdd = keysForThisField; continue; } BSONObjSet updatedKeysToAdd; for (BSONObjSet::const_iterator it = keysToAdd.begin(); it != keysToAdd.end(); ++it) { for (BSONObjSet::const_iterator newIt = keysForThisField.begin(); newIt!= keysForThisField.end(); ++newIt) { BSONObjBuilder b; b.appendElements(*it); b.append(newIt->firstElement()); updatedKeysToAdd.insert(b.obj()); } } keysToAdd = updatedKeysToAdd; } // Make sure that if we're V2 there's at least one geo field present in the doc. if (S2_INDEX_VERSION_2 == params.indexVersion) { if (!haveGeoField) { return; } } if (keysToAdd.size() > params.maxKeysPerInsert) { warning() << "insert of geo object generated lots of keys (" << keysToAdd.size() << ") consider creating larger buckets. obj=" << obj; } *keys = keysToAdd; }
bool ClientInfo::getLastError( const BSONObj& options , BSONObjBuilder& result , bool fromWriteBackListener ) { set<string> * shards = getPrev(); if ( shards->size() == 0 ) { result.appendNull( "err" ); return true; } vector<WBInfo> writebacks; // handle single server if ( shards->size() == 1 ) { string theShard = *(shards->begin() ); ShardConnection conn( theShard , "", true ); BSONObj res; bool ok = false; try{ ok = conn->runCommand( "admin" , options , res ); } catch( std::exception &e ){ warning() << "could not get last error from shard " << theShard << causedBy( e ) << endl; // Catch everything that happens here, since we need to ensure we return our connection when we're // finished. conn.done(); return false; } res = res.getOwned(); conn.done(); _addWriteBack( writebacks , res ); // hit other machines just to block for ( set<string>::const_iterator i=sinceLastGetError().begin(); i!=sinceLastGetError().end(); ++i ) { string temp = *i; if ( temp == theShard ) continue; ShardConnection conn( temp , "" ); try { _addWriteBack( writebacks , conn->getLastErrorDetailed() ); } catch( std::exception &e ){ warning() << "could not clear last error from shard " << temp << causedBy( e ) << endl; } conn.done(); } clearSinceLastGetError(); if ( writebacks.size() ){ vector<BSONObj> v = _handleWriteBacks( writebacks , fromWriteBackListener ); if ( v.size() == 0 && fromWriteBackListener ) { // ok } else { // this will usually be 1 // it can be greater than 1 if a write to a different shard // than the last write op had a writeback // all we're going to report is the first // since that's the current write // but we block for all assert( v.size() >= 1 ); result.appendElements( v[0] ); result.appendElementsUnique( res ); result.append( "writebackGLE" , v[0] ); result.append( "initialGLEHost" , theShard ); } } else { result.append( "singleShard" , theShard ); result.appendElements( res ); } return ok; } BSONArrayBuilder bbb( result.subarrayStart( "shards" ) ); BSONObjBuilder shardRawGLE; long long n = 0; int updatedExistingStat = 0; // 0 is none, -1 has but false, 1 has true // hit each shard vector<string> errors; vector<BSONObj> errorObjects; for ( set<string>::iterator i = shards->begin(); i != shards->end(); i++ ) { string theShard = *i; bbb.append( theShard ); ShardConnection conn( theShard , "", true ); BSONObj res; bool ok = false; try { ok = conn->runCommand( "admin" , options , res ); shardRawGLE.append( theShard , res ); } catch( std::exception &e ){ // Safe to return here, since we haven't started any extra processing yet, just collecting // responses. warning() << "could not get last error from a shard " << theShard << causedBy( e ) << endl; conn.done(); return false; } _addWriteBack( writebacks, res ); string temp = DBClientWithCommands::getLastErrorString( res ); if ( conn->type() != ConnectionString::SYNC && ( ok == false || temp.size() ) ) { errors.push_back( temp ); errorObjects.push_back( res ); } n += res["n"].numberLong(); if ( res["updatedExisting"].type() ) { if ( res["updatedExisting"].trueValue() ) updatedExistingStat = 1; else if ( updatedExistingStat == 0 ) updatedExistingStat = -1; } conn.done(); } bbb.done(); result.append( "shardRawGLE" , shardRawGLE.obj() ); result.appendNumber( "n" , n ); if ( updatedExistingStat ) result.appendBool( "updatedExisting" , updatedExistingStat > 0 ); // hit other machines just to block for ( set<string>::const_iterator i=sinceLastGetError().begin(); i!=sinceLastGetError().end(); ++i ) { string temp = *i; if ( shards->count( temp ) ) continue; ShardConnection conn( temp , "" ); try { _addWriteBack( writebacks, conn->getLastErrorDetailed() ); } catch( std::exception &e ){ warning() << "could not clear last error from a shard " << temp << causedBy( e ) << endl; } } clearSinceLastGetError(); if ( errors.size() == 0 ) { result.appendNull( "err" ); _handleWriteBacks( writebacks , fromWriteBackListener ); return true; } result.append( "err" , errors[0].c_str() ); { // errs BSONArrayBuilder all( result.subarrayStart( "errs" ) ); for ( unsigned i=0; i<errors.size(); i++ ) { all.append( errors[i].c_str() ); } all.done(); } { // errObjects BSONArrayBuilder all( result.subarrayStart( "errObjects" ) ); for ( unsigned i=0; i<errorObjects.size(); i++ ) { all.append( errorObjects[i] ); } all.done(); } _handleWriteBacks( writebacks , fromWriteBackListener ); return true; }
void handleRESTQuery( string ns , string action , BSONObj & params , int & responseCode , stringstream & out ) { Timer t; int skip = _getOption( params["skip"] , 0 ); int num = _getOption( params["limit"] , _getOption( params["count" ] , 1000 ) ); // count is old, limit is new int one = 0; if ( params["one"].type() == String && tolower( params["one"].valuestr()[0] ) == 't' ) { num = 1; one = 1; } BSONObjBuilder queryBuilder; BSONObjIterator i(params); while ( i.more() ){ BSONElement e = i.next(); string name = e.fieldName(); if ( ! name.find( "filter_" ) == 0 ) continue; const char * field = name.substr( 7 ).c_str(); const char * val = e.valuestr(); char * temp; // TODO: this is how i guess if something is a number. pretty lame right now double number = strtod( val , &temp ); if ( temp != val ) queryBuilder.append( field , number ); else queryBuilder.append( field , val ); } BSONObj query = queryBuilder.obj(); auto_ptr<DBClientCursor> cursor = db.query( ns.c_str() , query, num , skip ); if ( one ) { if ( cursor->more() ) { BSONObj obj = cursor->next(); out << obj.jsonString() << "\n"; } else { responseCode = 404; } return; } out << "{\n"; out << " \"offset\" : " << skip << ",\n"; out << " \"rows\": [\n"; int howMany = 0; while ( cursor->more() ) { if ( howMany++ ) out << " ,\n"; BSONObj obj = cursor->next(); out << " " << obj.jsonString(); } out << "\n ],\n\n"; out << " \"total_rows\" : " << howMany << " ,\n"; out << " \"query\" : " << query.jsonString() << " ,\n"; out << " \"millis\" : " << t.millis() << "\n"; out << "}\n"; }
std::string runQuery(OperationContext* txn, QueryMessage& q, const NamespaceString& nss, Message& result) { CurOp& curop = *CurOp::get(txn); // Validate the namespace. uassert(16256, str::stream() << "Invalid ns [" << nss.ns() << "]", nss.isValid()); invariant(!nss.isCommand()); // Set curop information. beginQueryOp(txn, nss, q.query, q.ntoreturn, q.ntoskip); // Parse the qm into a CanonicalQuery. auto statusWithCQ = CanonicalQuery::canonicalize(q, WhereCallbackReal(txn, nss.db())); if (!statusWithCQ.isOK()) { uasserted( 17287, str::stream() << "Can't canonicalize query: " << statusWithCQ.getStatus().toString()); } unique_ptr<CanonicalQuery> cq = std::move(statusWithCQ.getValue()); invariant(cq.get()); LOG(5) << "Running query:\n" << cq->toString(); LOG(2) << "Running query: " << cq->toStringShort(); // Parse, canonicalize, plan, transcribe, and get a plan executor. AutoGetCollectionForRead ctx(txn, nss); Collection* collection = ctx.getCollection(); const int dbProfilingLevel = ctx.getDb() ? ctx.getDb()->getProfilingLevel() : serverGlobalParams.defaultProfile; // We have a parsed query. Time to get the execution plan for it. std::unique_ptr<PlanExecutor> exec = uassertStatusOK( getExecutorFind(txn, collection, nss, std::move(cq), PlanExecutor::YIELD_AUTO)); const LiteParsedQuery& pq = exec->getCanonicalQuery()->getParsed(); // If it's actually an explain, do the explain and return rather than falling through // to the normal query execution loop. if (pq.isExplain()) { BufBuilder bb; bb.skip(sizeof(QueryResult::Value)); BSONObjBuilder explainBob; Explain::explainStages(exec.get(), ExplainCommon::EXEC_ALL_PLANS, &explainBob); // Add the resulting object to the return buffer. BSONObj explainObj = explainBob.obj(); bb.appendBuf((void*)explainObj.objdata(), explainObj.objsize()); // TODO: Does this get overwritten/do we really need to set this twice? curop.debug().query = q.query; // Set query result fields. QueryResult::View qr = bb.buf(); bb.decouple(); qr.setResultFlagsToOk(); qr.msgdata().setLen(bb.len()); curop.debug().responseLength = bb.len(); qr.msgdata().setOperation(opReply); qr.setCursorId(0); qr.setStartingFrom(0); qr.setNReturned(1); result.setData(qr.view2ptr(), true); return ""; } ShardingState* const shardingState = ShardingState::get(txn); // We freak out later if this changes before we're done with the query. const ChunkVersion shardingVersionAtStart = shardingState->getVersion(nss.ns()); // Handle query option $maxTimeMS (not used with commands). curop.setMaxTimeMicros(static_cast<unsigned long long>(pq.getMaxTimeMS()) * 1000); txn->checkForInterrupt(); // May trigger maxTimeAlwaysTimeOut fail point. // uassert if we are not on a primary, and not a secondary with SlaveOk query parameter set. bool slaveOK = pq.isSlaveOk() || pq.hasReadPref(); Status serveReadsStatus = repl::getGlobalReplicationCoordinator()->checkCanServeReadsFor(txn, nss, slaveOK); uassertStatusOK(serveReadsStatus); // Run the query. // bb is used to hold query results // this buffer should contain either requested documents per query or // explain information, but not both BufBuilder bb(32768); bb.skip(sizeof(QueryResult::Value)); // How many results have we obtained from the executor? int numResults = 0; // If we're replaying the oplog, we save the last time that we read. Timestamp slaveReadTill; BSONObj obj; PlanExecutor::ExecState state; // uint64_t numMisplacedDocs = 0; // Get summary info about which plan the executor is using. curop.debug().planSummary = Explain::getPlanSummary(exec.get()); while (PlanExecutor::ADVANCED == (state = exec->getNext(&obj, NULL))) { // Add result to output buffer. bb.appendBuf((void*)obj.objdata(), obj.objsize()); // Count the result. ++numResults; // Possibly note slave's position in the oplog. if (pq.isOplogReplay()) { BSONElement e = obj["ts"]; if (Date == e.type() || bsonTimestamp == e.type()) { slaveReadTill = e.timestamp(); } } if (FindCommon::enoughForFirstBatch(pq, numResults, bb.len())) { LOG(5) << "Enough for first batch, wantMore=" << pq.wantMore() << " ntoreturn=" << pq.getNToReturn().value_or(0) << " numResults=" << numResults << endl; break; } } // If we cache the executor later, we want to deregister it as it receives notifications // anyway by virtue of being cached. // // If we don't cache the executor later, we are deleting it, so it must be deregistered. // // So, no matter what, deregister the executor. exec->deregisterExec(); // Caller expects exceptions thrown in certain cases. if (PlanExecutor::FAILURE == state || PlanExecutor::DEAD == state) { const unique_ptr<PlanStageStats> stats(exec->getStats()); error() << "Plan executor error during find: " << PlanExecutor::statestr(state) << ", stats: " << Explain::statsToBSON(*stats); uasserted(17144, "Executor error: " + WorkingSetCommon::toStatusString(obj)); } // TODO: Currently, chunk ranges are kept around until all ClientCursors created while the // chunk belonged on this node are gone. Separating chunk lifetime management from // ClientCursor should allow this check to go away. if (!shardingState->getVersion(nss.ns()).isWriteCompatibleWith(shardingVersionAtStart)) { // if the version changed during the query we might be missing some data and its safe to // send this as mongols can resend at this point throw SendStaleConfigException(nss.ns(), "version changed during initial query", shardingVersionAtStart, shardingState->getVersion(nss.ns())); } // Fill out curop based on query results. If we have a cursorid, we will fill out curop with // this cursorid later. long long ccId = 0; if (shouldSaveCursor(txn, collection, state, exec.get())) { // We won't use the executor until it's getMore'd. exec->saveState(); exec->detachFromOperationContext(); // Allocate a new ClientCursor. We don't have to worry about leaking it as it's // inserted into a global map by its ctor. ClientCursor* cc = new ClientCursor(collection->getCursorManager(), exec.release(), nss.ns(), txn->recoveryUnit()->isReadingFromMajorityCommittedSnapshot(), pq.getOptions(), pq.getFilter()); ccId = cc->cursorid(); LOG(5) << "caching executor with cursorid " << ccId << " after returning " << numResults << " results" << endl; // TODO document if (pq.isOplogReplay() && !slaveReadTill.isNull()) { cc->slaveReadTill(slaveReadTill); } // TODO document if (pq.isExhaust()) { curop.debug().exhaust = true; } cc->setPos(numResults); // If the query had a time limit, remaining time is "rolled over" to the cursor (for // use by future getmore ops). cc->setLeftoverMaxTimeMicros(curop.getRemainingMaxTimeMicros()); endQueryOp(txn, collection, *cc->getExecutor(), dbProfilingLevel, numResults, ccId); } else { LOG(5) << "Not caching executor but returning " << numResults << " results.\n"; endQueryOp(txn, collection, *exec, dbProfilingLevel, numResults, ccId); } // Add the results from the query into the output buffer. result.appendData(bb.buf(), bb.len()); bb.decouple(); // Fill out the output buffer's header. QueryResult::View qr = result.header().view2ptr(); qr.setCursorId(ccId); qr.setResultFlagsToOk(); qr.msgdata().setOperation(opReply); qr.setStartingFrom(0); qr.setNReturned(numResults); // curop.debug().exhaust is set above. return curop.debug().exhaust ? nss.ns() : ""; }