virtual bool run(OperationContext* txn, const string& db, BSONObj& cmdObj, int options, string& errmsg, BSONObjBuilder& result) { const std::string ns = parseNs(db, cmdObj); if (nsToCollectionSubstring(ns).empty()) { errmsg = "missing collection name"; return false; } NamespaceString nss(ns); // Parse the options for this request. auto request = AggregationRequest::parseFromBSON(nss, cmdObj); if (!request.isOK()) { return appendCommandStatus(result, request.getStatus()); } // Set up the ExpressionContext. intrusive_ptr<ExpressionContext> expCtx = new ExpressionContext(txn, request.getValue()); expCtx->tempDir = storageGlobalParams.dbpath + "/_tmp"; // Parse the pipeline. auto statusWithPipeline = Pipeline::parse(request.getValue().getPipeline(), expCtx); if (!statusWithPipeline.isOK()) { return appendCommandStatus(result, statusWithPipeline.getStatus()); } auto pipeline = std::move(statusWithPipeline.getValue()); auto resolvedNamespaces = resolveInvolvedNamespaces(txn, pipeline, expCtx); if (!resolvedNamespaces.isOK()) { return appendCommandStatus(result, resolvedNamespaces.getStatus()); } expCtx->resolvedNamespaces = std::move(resolvedNamespaces.getValue()); unique_ptr<ClientCursorPin> pin; // either this OR the exec will be non-null unique_ptr<PlanExecutor> exec; auto curOp = CurOp::get(txn); { // This will throw if the sharding version for this connection is out of date. If the // namespace is a view, the lock will be released before re-running the aggregation. // Otherwise, the lock must be held continuously from now until we have we created both // the output ClientCursor and the input executor. This ensures that both are using the // same sharding version that we synchronize on here. This is also why we always need to // create a ClientCursor even when we aren't outputting to a cursor. See the comment on // ShardFilterStage for more details. AutoGetCollectionOrViewForRead ctx(txn, nss); Collection* collection = ctx.getCollection(); // If running $collStats on a view, we do not resolve the view since we want stats // on this view namespace. auto startsWithCollStats = [&pipeline]() { const Pipeline::SourceContainer& sources = pipeline->getSources(); return !sources.empty() && dynamic_cast<DocumentSourceCollStats*>(sources.front().get()); }; // If this is a view, resolve it by finding the underlying collection and stitching view // pipelines and this request's pipeline together. We then release our locks before // recursively calling run, which will re-acquire locks on the underlying collection. // (The lock must be released because recursively acquiring locks on the database will // prohibit yielding.) auto view = ctx.getView(); if (view && !startsWithCollStats()) { auto viewDefinition = ViewShardingCheck::getResolvedViewIfSharded(txn, ctx.getDb(), view); if (!viewDefinition.isOK()) { return appendCommandStatus(result, viewDefinition.getStatus()); } if (!viewDefinition.getValue().isEmpty()) { ViewShardingCheck::appendShardedViewStatus(viewDefinition.getValue(), &result); return false; } auto resolvedView = ctx.getDb()->getViewCatalog()->resolveView(txn, nss); if (!resolvedView.isOK()) { return appendCommandStatus(result, resolvedView.getStatus()); } // With the view resolved, we can relinquish locks. ctx.releaseLocksForView(); // Parse the resolved view into a new aggregation request. auto viewCmd = resolvedView.getValue().asExpandedViewAggregation(request.getValue()); if (!viewCmd.isOK()) { return appendCommandStatus(result, viewCmd.getStatus()); } bool status = this->run(txn, db, viewCmd.getValue(), options, errmsg, result); { // Set the namespace of the curop back to the view namespace so ctx records // stats on this view namespace on destruction. stdx::lock_guard<Client>(*txn->getClient()); curOp->setNS_inlock(nss.ns()); } return status; } // If the pipeline does not have a user-specified collation, set it from the collection // default. if (request.getValue().getCollation().isEmpty() && collection && collection->getDefaultCollator()) { invariant(!expCtx->getCollator()); expCtx->setCollator(collection->getDefaultCollator()->clone()); } // Propagate the ExpressionContext throughout all of the pipeline's stages and // expressions. pipeline->injectExpressionContext(expCtx); // The pipeline must be optimized after the correct collator has been set on it (by // injecting the ExpressionContext containing the collator). This is necessary because // optimization may make string comparisons, e.g. optimizing {$eq: [<str1>, <str2>]} to // a constant. pipeline->optimizePipeline(); if (kDebugBuild && !expCtx->isExplain && !expCtx->inShard) { // Make sure all operations round-trip through Pipeline::serialize() correctly by // re-parsing every command in debug builds. This is important because sharded // aggregations rely on this ability. Skipping when inShard because this has // already been through the transformation (and this un-sets expCtx->inShard). pipeline = reparsePipeline(pipeline, request.getValue(), expCtx); } // This does mongod-specific stuff like creating the input PlanExecutor and adding // it to the front of the pipeline if needed. PipelineD::prepareCursorSource(collection, pipeline); // Create the PlanExecutor which returns results from the pipeline. The WorkingSet // ('ws') and the PipelineProxyStage ('proxy') will be owned by the created // PlanExecutor. auto ws = make_unique<WorkingSet>(); auto proxy = make_unique<PipelineProxyStage>(txn, pipeline, ws.get()); auto statusWithPlanExecutor = (NULL == collection) ? PlanExecutor::make( txn, std::move(ws), std::move(proxy), nss.ns(), PlanExecutor::YIELD_MANUAL) : PlanExecutor::make( txn, std::move(ws), std::move(proxy), collection, PlanExecutor::YIELD_MANUAL); invariant(statusWithPlanExecutor.isOK()); exec = std::move(statusWithPlanExecutor.getValue()); { auto planSummary = Explain::getPlanSummary(exec.get()); stdx::lock_guard<Client>(*txn->getClient()); curOp->setPlanSummary_inlock(std::move(planSummary)); } if (collection) { PlanSummaryStats stats; Explain::getSummaryStats(*exec, &stats); collection->infoCache()->notifyOfQuery(txn, stats.indexesUsed); } if (collection) { const bool isAggCursor = true; // enable special locking behavior ClientCursor* cursor = new ClientCursor(collection->getCursorManager(), exec.release(), nss.ns(), txn->recoveryUnit()->isReadingFromMajorityCommittedSnapshot(), 0, cmdObj.getOwned(), isAggCursor); pin.reset(new ClientCursorPin(collection->getCursorManager(), cursor->cursorid())); // Don't add any code between here and the start of the try block. } // At this point, it is safe to release the collection lock. // - In the case where we have a collection: we will need to reacquire the // collection lock later when cleaning up our ClientCursorPin. // - In the case where we don't have a collection: our PlanExecutor won't be // registered, so it will be safe to clean it up outside the lock. invariant(!exec || !collection); } try { // Unless set to true, the ClientCursor created above will be deleted on block exit. bool keepCursor = false; // Use of the aggregate command without specifying to use a cursor is deprecated. // Applications should migrate to using cursors. Cursors are strictly more useful than // outputting the results as a single document, since results that fit inside a single // BSONObj will also fit inside a single batch. // // We occasionally log a deprecation warning. if (!request.getValue().isCursorCommand()) { RARELY { warning() << "Use of the aggregate command without the 'cursor' " "option is deprecated. See " "http://dochub.mongodb.org/core/aggregate-without-cursor-deprecation."; } } // If both explain and cursor are specified, explain wins. if (expCtx->isExplain) { result << "stages" << Value(pipeline->writeExplainOps()); } else if (request.getValue().isCursorCommand()) { keepCursor = handleCursorCommand(txn, nss.ns(), pin.get(), pin ? pin->c()->getExecutor() : exec.get(), request.getValue(), result); } else { pipeline->run(result); } if (!expCtx->isExplain) { PlanSummaryStats stats; Explain::getSummaryStats(pin ? *pin->c()->getExecutor() : *exec.get(), &stats); curOp->debug().setPlanSummaryMetrics(stats); curOp->debug().nreturned = stats.nReturned; } // Clean up our ClientCursorPin, if needed. We must reacquire the collection lock // in order to do so. if (pin) { // We acquire locks here with DBLock and CollectionLock instead of using // AutoGetCollectionForRead. AutoGetCollectionForRead will throw if the // sharding version is out of date, and we don't care if the sharding version // has changed. Lock::DBLock dbLock(txn->lockState(), nss.db(), MODE_IS); Lock::CollectionLock collLock(txn->lockState(), nss.ns(), MODE_IS); if (keepCursor) { pin->release(); } else { pin->deleteUnderlying(); } } } catch (...) {
virtual bool run(OperationContext* txn, const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { if ( cmdObj.firstElement().type() != Array ) { errmsg = "ops has to be an array"; return false; } BSONObj ops = cmdObj.firstElement().Obj(); { // check input BSONObjIterator i( ops ); while ( i.more() ) { BSONElement e = i.next(); if (!_checkOperation(e, errmsg)) { return false; } } } // SERVER-4328 todo : is global ok or does this take a long time? i believe multiple // ns used so locking individually requires more analysis ScopedTransaction scopedXact(txn, MODE_X); Lock::GlobalWrite globalWriteLock(txn->lockState()); if (!fromRepl && !repl::getGlobalReplicationCoordinator()->canAcceptWritesForDatabase(dbname)) { return appendCommandStatus(result, Status(ErrorCodes::NotMaster, str::stream() << "Not primary while applying ops to database " << dbname)); } // Preconditions check reads the database state, so needs to be done locked if ( cmdObj["preCondition"].type() == Array ) { BSONObjIterator i( cmdObj["preCondition"].Obj() ); while ( i.more() ) { BSONObj f = i.next().Obj(); DBDirectClient db( txn ); BSONObj realres = db.findOne( f["ns"].String() , f["q"].Obj() ); // Apply-ops would never have a $where matcher, so use the default callback, // which will throw an error if $where is found. Matcher m(f["res"].Obj()); if ( ! m.matches( realres ) ) { result.append( "got" , realres ); result.append( "whatFailed" , f ); errmsg = "pre-condition failed"; return false; } } } // apply int num = 0; int errors = 0; BSONObjIterator i( ops ); BSONArrayBuilder ab; const bool alwaysUpsert = cmdObj.hasField("alwaysUpsert") ? cmdObj["alwaysUpsert"].trueValue() : true; while ( i.more() ) { BSONElement e = i.next(); const BSONObj& temp = e.Obj(); // Ignore 'n' operations. const char *opType = temp["op"].valuestrsafe(); if (*opType == 'n') continue; const string ns = temp["ns"].String(); // Run operations under a nested lock as a hack to prevent yielding. // // The list of operations is supposed to be applied atomically; yielding // would break atomicity by allowing an interruption or a shutdown to occur // after only some operations are applied. We are already locked globally // at this point, so taking a DBLock on the namespace creates a nested lock, // and yields are disallowed for operations that hold a nested lock. // // We do not have a wrapping WriteUnitOfWork so it is possible for a journal // commit to happen with a subset of ops applied. // TODO figure out what to do about this. Lock::GlobalWrite globalWriteLockDisallowTempRelease(txn->lockState()); // Ensures that yielding will not happen (see the comment above). DEV { Locker::LockSnapshot lockSnapshot; invariant(!txn->lockState()->saveLockStateAndUnlock(&lockSnapshot)); }; OldClientContext ctx(txn, ns); Status status(ErrorCodes::InternalError, ""); while (true) { try { // We assume that in the WriteConflict retry case, either the op rolls back // any changes it makes or is otherwise safe to rerun. status = repl::applyOperation_inlock(txn, ctx.db(), temp, false, alwaysUpsert); break; } catch (const WriteConflictException& wce) { LOG(2) << "WriteConflictException in applyOps command, retrying."; txn->recoveryUnit()->commitAndRestart(); continue; } } ab.append(status.isOK()); if (!status.isOK()) { errors++; } num++; WriteUnitOfWork wuow(txn); logOpForDbHash(txn, ns.c_str()); wuow.commit(); } result.append( "applied" , num ); result.append( "results" , ab.arr() ); if ( ! fromRepl ) { // We want this applied atomically on slaves // so we re-wrap without the pre-condition for speed string tempNS = str::stream() << dbname << ".$cmd"; // TODO: possibly use mutable BSON to remove preCondition field // once it is available BSONObjIterator iter(cmdObj); BSONObjBuilder cmdBuilder; while (iter.more()) { BSONElement elem(iter.next()); if (strcmp(elem.fieldName(), "preCondition") != 0) { cmdBuilder.append(elem); } } const BSONObj cmdRewritten = cmdBuilder.done(); // We currently always logOp the command regardless of whether the individial ops // succeeded and rely on any failures to also happen on secondaries. This isn't // perfect, but it's what the command has always done and is part of its "correct" // behavior. while (true) { try { WriteUnitOfWork wunit(txn); getGlobalEnvironment()->getOpObserver()->onApplyOps(txn, tempNS, cmdRewritten); wunit.commit(); break; } catch (const WriteConflictException& wce) { LOG(2) << "WriteConflictException while logging applyOps command, retrying."; txn->recoveryUnit()->commitAndRestart(); continue; } } } if (errors != 0) { return false; } return true; }
bool run(const string& dbname , BSONObj& jsobj, int, string& errmsg, BSONObjBuilder& result, bool /*fromRepl*/) { static DBDirectClient db; BSONElement e = jsobj.firstElement(); string toDeleteNs = dbname + '.' + e.valuestr(); MONGO_TLOG(0) << "CMD: reIndex " << toDeleteNs << endl; Lock::DBWrite dbXLock(dbname); Client::Context ctx(toDeleteNs); Collection* collection = cc().database()->getCollection( toDeleteNs ); if ( !collection ) { errmsg = "ns not found"; return false; } BackgroundOperation::assertNoBgOpInProgForNs( toDeleteNs ); std::vector<BSONObj> indexesInProg = stopIndexBuilds(cc().database(), jsobj); list<BSONObj> all; auto_ptr<DBClientCursor> i = db.query( dbname + ".system.indexes" , BSON( "ns" << toDeleteNs ) , 0 , 0 , 0 , QueryOption_SlaveOk ); BSONObjBuilder b; while ( i->more() ) { const BSONObj spec = i->next().removeField("v").getOwned(); const BSONObj key = spec.getObjectField("key"); const Status keyStatus = validateKeyPattern(key); if (!keyStatus.isOK()) { errmsg = str::stream() << "Cannot rebuild index " << spec << ": " << keyStatus.reason() << " For more info see http://dochub.mongodb.org/core/index-validation"; return false; } b.append( BSONObjBuilder::numStr( all.size() ) , spec ); all.push_back( spec ); } result.appendNumber( "nIndexesWas", collection->getIndexCatalog()->numIndexesTotal() ); Status s = collection->getIndexCatalog()->dropAllIndexes( true ); if ( !s.isOK() ) { errmsg = "dropIndexes failed"; return appendCommandStatus( result, s ); } for ( list<BSONObj>::iterator i=all.begin(); i!=all.end(); i++ ) { BSONObj o = *i; LOG(1) << "reIndex ns: " << toDeleteNs << " index: " << o << endl; Status s = collection->getIndexCatalog()->createIndex( o, false ); if ( !s.isOK() ) return appendCommandStatus( result, s ); } result.append( "nIndexes" , (int)all.size() ); result.appendArray( "indexes" , b.obj() ); IndexBuilder::restoreIndexes(indexesInProg); return true; }
void Command::execCommandClientBasic(Command * c , ClientBasic& client, int queryOptions, const char *ns, BSONObj& cmdObj, BSONObjBuilder& result, bool fromRepl ) { verify(c); std::string dbname = nsToDatabase(ns); // Access control checks if (!noauth) { std::vector<Privilege> privileges; c->addRequiredPrivileges(dbname, cmdObj, &privileges); AuthorizationManager* authManager = client.getAuthorizationManager(); if (!authManager->checkAuthForPrivileges(privileges).isOK()) { result.append("note", str::stream() << "not authorized for command: " << c->name << " on database " << dbname); appendCommandStatus(result, false, "unauthorized"); return; } } if (c->adminOnly() && c->localHostOnlyIfNoAuth(cmdObj) && noauth && !client.getIsLocalHostConnection()) { log() << "command denied: " << cmdObj.toString() << endl; appendCommandStatus(result, false, "unauthorized: this command must run from localhost when running db " "without auth"); return; } if (c->adminOnly() && !startsWith(ns, "admin.")) { log() << "command denied: " << cmdObj.toString() << endl; appendCommandStatus(result, false, "access denied - use admin db"); return; } // End of access control checks if (cmdObj.getBoolField("help")) { stringstream help; help << "help for: " << c->name << " "; c->help( help ); result.append( "help" , help.str() ); result.append( "lockType" , c->locktype() ); appendCommandStatus(result, true, ""); return; } std::string errmsg; bool ok; try { ok = c->run( dbname , cmdObj, queryOptions, errmsg, result, false ); } catch (DBException& e) { ok = false; int code = e.getCode(); if (code == RecvStaleConfigCode) { // code for StaleConfigException throw; } stringstream ss; ss << "exception: " << e.what(); errmsg = ss.str(); result.append( "code" , code ); } appendCommandStatus(result, ok, errmsg); }
virtual bool run( const string& dbname, BSONObj& cmdObj, int options, string& errmsg, BSONObjBuilder& result, bool fromRepl = false ) { // --- parse NamespaceString ns( dbname, cmdObj[name].String() ); Status status = userAllowedWriteNS( ns ); if ( !status.isOK() ) return appendCommandStatus( result, status ); if ( cmdObj["indexes"].type() != Array ) { errmsg = "indexes has to be an array"; result.append( "cmdObj", cmdObj ); return false; } std::vector<BSONObj> specs; { BSONObjIterator i( cmdObj["indexes"].Obj() ); while ( i.more() ) { BSONElement e = i.next(); if ( e.type() != Object ) { errmsg = "everything in indexes has to be an Object"; result.append( "cmdObj", cmdObj ); return false; } specs.push_back( e.Obj() ); } } if ( specs.size() == 0 ) { errmsg = "no indexes to add"; return false; } // check specs for ( size_t i = 0; i < specs.size(); i++ ) { BSONObj spec = specs[i]; if ( spec["ns"].eoo() ) { spec = _addNsToSpec( ns, spec ); specs[i] = spec; } if ( spec["ns"].type() != String ) { errmsg = "spec has no ns"; result.append( "spec", spec ); return false; } if ( ns != spec["ns"].String() ) { errmsg = "namespace mismatch"; result.append( "spec", spec ); return false; } } { // We first take a read lock to see if we need to do anything // as many calls are ensureIndex (and hence no-ops), this is good so its a shared // lock for common calls. We only take write lock if needed. Client::ReadContext readContext( ns ); const Collection* collection = readContext.ctx().db()->getCollection( ns.ns() ); if ( collection ) { for ( size_t i = 0; i < specs.size(); i++ ) { BSONObj spec = specs[i]; StatusWith<BSONObj> statusWithSpec = collection->getIndexCatalog()->prepareSpecForCreate( spec ); status = statusWithSpec.getStatus(); if ( status.code() == ErrorCodes::IndexAlreadyExists ) { specs.erase( specs.begin() + i ); i--; continue; } if ( !status.isOK() ) return appendCommandStatus( result, status ); } if ( specs.size() == 0 ) { result.append( "numIndexesBefore", collection->getIndexCatalog()->numIndexesTotal() ); result.append( "note", "all indexes already exist" ); return true; } // need to create index } } // now we know we have to create index(es) Client::WriteContext writeContext( ns.ns() ); Database* db = writeContext.ctx().db(); Collection* collection = db->getCollection( ns.ns() ); result.appendBool( "createdCollectionAutomatically", collection == NULL ); if ( !collection ) { collection = db->createCollection( ns.ns() ); invariant( collection ); } result.append( "numIndexesBefore", collection->getIndexCatalog()->numIndexesTotal() ); for ( size_t i = 0; i < specs.size(); i++ ) { BSONObj spec = specs[i]; status = collection->getIndexCatalog()->createIndex( spec, true ); if ( status.code() == ErrorCodes::IndexAlreadyExists ) { if ( !result.hasField( "note" ) ) result.append( "note", "index already exists" ); continue; } if ( !status.isOK() ) { appendCommandStatus( result, status ); return false; } } result.append( "numIndexesAfter", collection->getIndexCatalog()->numIndexesTotal() ); if ( !fromRepl ) { string cmdNs = ns.getCommandNS(); logOp( "c", cmdNs.c_str(), cmdObj ); } return true; }
bool run(OperationContext* txn, const string& dbname , BSONObj& jsobj, int, string& errmsg, BSONObjBuilder& result, bool /*fromRepl*/) { DBDirectClient db(txn); BSONElement e = jsobj.firstElement(); string toDeleteNs = dbname + '.' + e.valuestr(); LOG(0) << "CMD: reIndex " << toDeleteNs << endl; Lock::DBLock dbXLock(txn->lockState(), dbname, MODE_X); Client::Context ctx(txn, toDeleteNs); Collection* collection = ctx.db()->getCollection( txn, toDeleteNs ); if ( !collection ) { errmsg = "ns not found"; return false; } BackgroundOperation::assertNoBgOpInProgForNs( toDeleteNs ); std::vector<BSONObj> indexesInProg = stopIndexBuilds(txn, ctx.db(), jsobj); vector<BSONObj> all; { vector<string> indexNames; collection->getCatalogEntry()->getAllIndexes( txn, &indexNames ); for ( size_t i = 0; i < indexNames.size(); i++ ) { const string& name = indexNames[i]; BSONObj spec = collection->getCatalogEntry()->getIndexSpec( txn, name ); all.push_back(spec.removeField("v").getOwned()); const BSONObj key = spec.getObjectField("key"); const Status keyStatus = validateKeyPattern(key); if (!keyStatus.isOK()) { errmsg = str::stream() << "Cannot rebuild index " << spec << ": " << keyStatus.reason() << " For more info see http://dochub.mongodb.org/core/index-validation"; return false; } } } result.appendNumber( "nIndexesWas", all.size() ); { WriteUnitOfWork wunit(txn); Status s = collection->getIndexCatalog()->dropAllIndexes(txn, true); if ( !s.isOK() ) { errmsg = "dropIndexes failed"; return appendCommandStatus( result, s ); } wunit.commit(); } MultiIndexBlock indexer(txn, collection); // do not want interruption as that will leave us without indexes. Status status = indexer.init(all); if (!status.isOK()) return appendCommandStatus( result, status ); status = indexer.insertAllDocumentsInCollection(); if (!status.isOK()) return appendCommandStatus( result, status ); { WriteUnitOfWork wunit(txn); indexer.commit(); wunit.commit(); } result.append( "nIndexes", (int)all.size() ); result.append( "indexes", all ); IndexBuilder::restoreIndexes(indexesInProg); return true; }
bool run(OperationContext* txn, const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result) { BSONElement first = cmdObj.firstElement(); uassert(28528, str::stream() << "Argument to listIndexes must be of type String, not " << typeName(first.type()), first.type() == String); StringData collectionName = first.valueStringData(); uassert(28529, str::stream() << "Argument to listIndexes must be a collection name, " << "not the empty string", !collectionName.empty()); const NamespaceString ns(dbname, collectionName); const long long defaultBatchSize = std::numeric_limits<long long>::max(); long long batchSize; Status parseCursorStatus = parseCommandCursorOptions(cmdObj, defaultBatchSize, &batchSize); if (!parseCursorStatus.isOK()) { return appendCommandStatus(result, parseCursorStatus); } AutoGetCollectionForRead autoColl(txn, ns); if (!autoColl.getDb()) { return appendCommandStatus(result, Status(ErrorCodes::NamespaceNotFound, "no database")); } const Collection* collection = autoColl.getCollection(); if (!collection) { return appendCommandStatus(result, Status(ErrorCodes::NamespaceNotFound, "no collection")); } const CollectionCatalogEntry* cce = collection->getCatalogEntry(); invariant(cce); vector<string> indexNames; MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN { indexNames.clear(); cce->getAllIndexes(txn, &indexNames); } MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "listIndexes", ns.ns()); auto ws = make_unique<WorkingSet>(); auto root = make_unique<QueuedDataStage>(txn, ws.get()); for (size_t i = 0; i < indexNames.size(); i++) { BSONObj indexSpec; MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN { indexSpec = cce->getIndexSpec(txn, indexNames[i]); } MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "listIndexes", ns.ns()); WorkingSetID id = ws->allocate(); WorkingSetMember* member = ws->get(id); member->keyData.clear(); member->loc = RecordId(); member->obj = Snapshotted<BSONObj>(SnapshotId(), indexSpec.getOwned()); member->transitionToOwnedObj(); root->pushBack(id); } std::string cursorNamespace = str::stream() << dbname << ".$cmd." << name << "." << ns.coll(); dassert(NamespaceString(cursorNamespace).isValid()); dassert(NamespaceString(cursorNamespace).isListIndexesCursorNS()); dassert(ns == NamespaceString(cursorNamespace).getTargetNSForListIndexes()); auto statusWithPlanExecutor = PlanExecutor::make( txn, std::move(ws), std::move(root), cursorNamespace, PlanExecutor::YIELD_MANUAL); if (!statusWithPlanExecutor.isOK()) { return appendCommandStatus(result, statusWithPlanExecutor.getStatus()); } unique_ptr<PlanExecutor> exec = std::move(statusWithPlanExecutor.getValue()); BSONArrayBuilder firstBatch; const int byteLimit = FindCommon::kMaxBytesToReturnToClientAtOnce; for (long long objCount = 0; objCount < batchSize && firstBatch.len() < byteLimit; objCount++) { BSONObj next; PlanExecutor::ExecState state = exec->getNext(&next, NULL); if (state == PlanExecutor::IS_EOF) { break; } invariant(state == PlanExecutor::ADVANCED); firstBatch.append(next); } CursorId cursorId = 0LL; if (!exec->isEOF()) { exec->saveState(); exec->detachFromOperationContext(); ClientCursor* cursor = new ClientCursor(CursorManager::getGlobalCursorManager(), exec.release(), cursorNamespace, txn->recoveryUnit()->isReadingFromMajorityCommittedSnapshot()); cursorId = cursor->cursorid(); } appendCursorResponseObject(cursorId, cursorNamespace, firstBatch.arr(), &result); return true; }
virtual bool run(OperationContext* txn, const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { Lock::GlobalWrite globalWriteLock(txn->lockState()); string source = cmdObj.getStringField( name.c_str() ); string target = cmdObj.getStringField( "to" ); // We stay in source context the whole time. This is mostly to set the CurOp namespace. Client::Context ctx(txn, source); if ( !NamespaceString::validCollectionComponent(target.c_str()) ) { errmsg = "invalid collection name: " + target; return false; } if ( source.empty() || target.empty() ) { errmsg = "invalid command syntax"; return false; } if (!fromRepl) { // If it got through on the master, need to allow it here too Status sourceStatus = userAllowedWriteNS(source); if (!sourceStatus.isOK()) { errmsg = "error with source namespace: " + sourceStatus.reason(); return false; } Status targetStatus = userAllowedWriteNS(target); if (!targetStatus.isOK()) { errmsg = "error with target namespace: " + targetStatus.reason(); return false; } } if (NamespaceString(source).coll() == "system.indexes" || NamespaceString(target).coll() == "system.indexes") { errmsg = "renaming system.indexes is not allowed"; return false; } Database* const sourceDB = dbHolder().get(txn, nsToDatabase(source)); Collection* const sourceColl = sourceDB ? sourceDB->getCollection(txn, source) : NULL; if (!sourceColl) { errmsg = "source namespace does not exist"; return false; } { // Ensure that collection name does not exceed maximum length. // Ensure that index names do not push the length over the max. // Iterator includes unfinished indexes. IndexCatalog::IndexIterator sourceIndIt = sourceColl->getIndexCatalog()->getIndexIterator( txn, true ); int longestIndexNameLength = 0; while ( sourceIndIt.more() ) { int thisLength = sourceIndIt.next()->indexName().length(); if ( thisLength > longestIndexNameLength ) longestIndexNameLength = thisLength; } unsigned int longestAllowed = min(int(NamespaceString::MaxNsCollectionLen), int(NamespaceString::MaxNsLen) - 2/*strlen(".$")*/ - longestIndexNameLength); if (target.size() > longestAllowed) { StringBuilder sb; sb << "collection name length of " << target.size() << " exceeds maximum length of " << longestAllowed << ", allowing for index names"; errmsg = sb.str(); return false; } } const std::vector<BSONObj> indexesInProg = stopIndexBuilds(txn, sourceDB, cmdObj); // Dismissed on success ScopeGuard indexBuildRestorer = MakeGuard(IndexBuilder::restoreIndexes, indexesInProg); Database* const targetDB = dbHolder().openDb(txn, nsToDatabase(target)); { WriteUnitOfWork wunit(txn); // Check if the target namespace exists and if dropTarget is true. // If target exists and dropTarget is not true, return false. if (targetDB->getCollection(txn, target)) { if (!cmdObj["dropTarget"].trueValue()) { errmsg = "target namespace exists"; return false; } Status s = targetDB->dropCollection(txn, target); if ( !s.isOK() ) { errmsg = s.toString(); return false; } } // If we are renaming in the same database, just // rename the namespace and we're done. if (sourceDB == targetDB) { Status s = targetDB->renameCollection(txn, source, target, cmdObj["stayTemp"].trueValue() ); if (!s.isOK()) { return appendCommandStatus(result, s); } if (!fromRepl) { repl::logOp(txn, "c", (dbname + ".$cmd").c_str(), cmdObj); } wunit.commit(); indexBuildRestorer.Dismiss(); return true; } wunit.commit(); } // If we get here, we are renaming across databases, so we must copy all the data and // indexes, then remove the source collection. // Create the target collection. It will be removed if we fail to copy the collection. // TODO use a temp collection and unset the temp flag on success. Collection* targetColl = NULL; { CollectionOptions options; options.setNoIdIndex(); if (sourceColl->isCapped()) { const CollectionOptions sourceOpts = sourceColl->getCatalogEntry()->getCollectionOptions(txn); options.capped = true; options.cappedSize = sourceOpts.cappedSize; options.cappedMaxDocs = sourceOpts.cappedMaxDocs; } WriteUnitOfWork wunit(txn); // No logOp necessary because the entire renameCollection command is one logOp. targetColl = targetDB->createCollection(txn, target, options); if (!targetColl) { errmsg = "Failed to create target collection."; return false; } wunit.commit(); } // Dismissed on success ScopeGuard targetCollectionDropper = MakeGuard(dropCollection, txn, targetDB, target); MultiIndexBlock indexer(txn, targetColl); indexer.allowInterruption(); // Copy the index descriptions from the source collection, adjusting the ns field. { std::vector<BSONObj> indexesToCopy; IndexCatalog::IndexIterator sourceIndIt = sourceColl->getIndexCatalog()->getIndexIterator( txn, true ); while (sourceIndIt.more()) { const BSONObj currIndex = sourceIndIt.next()->infoObj(); // Process the source index. BSONObjBuilder newIndex; newIndex.append("ns", target); newIndex.appendElementsUnique(currIndex); indexesToCopy.push_back(newIndex.obj()); } indexer.init(indexesToCopy); } { // Copy over all the data from source collection to target collection. boost::scoped_ptr<RecordIterator> sourceIt(sourceColl->getIterator(txn)); while (!sourceIt->isEOF()) { txn->checkForInterrupt(false); const BSONObj obj = sourceColl->docFor(txn, sourceIt->getNext()); WriteUnitOfWork wunit(txn); // No logOp necessary because the entire renameCollection command is one logOp. Status status = targetColl->insertDocument(txn, obj, &indexer, true).getStatus(); if (!status.isOK()) return appendCommandStatus(result, status); wunit.commit(); } } Status status = indexer.doneInserting(); if (!status.isOK()) return appendCommandStatus(result, status); { // Getting here means we successfully built the target copy. We now remove the // source collection and finalize the rename. WriteUnitOfWork wunit(txn); Status status = sourceDB->dropCollection(txn, source); if (!status.isOK()) return appendCommandStatus(result, status); indexer.commit(); if (!fromRepl) { repl::logOp(txn, "c", (dbname + ".$cmd").c_str(), cmdObj); } wunit.commit(); } indexBuildRestorer.Dismiss(); targetCollectionDropper.Dismiss(); return true; }
virtual bool run(OperationContext* txn, const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { Status status = getGlobalReplicationCoordinator()->processReplSetGetRBID(&result); return appendCommandStatus(result, status); }
virtual bool run(OperationContext* txn, const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { int secs = (int) cmdObj.firstElement().numberInt(); return appendCommandStatus( result, getGlobalReplicationCoordinator()->processReplSetFreeze(secs, &result)); }
bool WriteCmd::run(const string& dbName, BSONObj& cmdObj, int options, string& errMsg, BSONObjBuilder& result, bool fromRepl) { // Can't be run on secondaries (logTheOp() == false, slaveOk() == false). dassert( !fromRepl ); BatchedCommandRequest request( _writeType ); BatchedCommandResponse response; if ( !request.parseBSON( cmdObj, &errMsg ) || !request.isValid( &errMsg ) ) { // Batch parse failure response.setOk( false ); response.setN( 0 ); response.setErrCode( ErrorCodes::FailedToParse ); response.setErrMessage( errMsg ); dassert( response.isValid( &errMsg ) ); result.appendElements( response.toBSON() ); // TODO // There's a pending issue about how to report response here. If we use // the command infra-structure, we should reuse the 'errmsg' field. But // we have already filed that message inside the BatchCommandResponse. // return response.getOk(); return true; } // Note that this is a runCommmand, and therefore, the database and the collection name // are in different parts of the grammar for the command. But it's more convenient to // work with a NamespaceString. We built it here and replace it in the parsed command. // Internally, everything work with the namespace string as opposed to just the // collection name. NamespaceString nss(dbName, request.getNS()); request.setNS(nss.ns()); Status status = userAllowedWriteNS( nss ); if ( !status.isOK() ) return appendCommandStatus( result, status ); if ( cc().curop() ) cc().curop()->setNS( nss.ns() ); if ( request.getBatchType() == BatchedCommandRequest::BatchType_Insert ) { // check all docs BatchedInsertRequest* insertRequest = request.getInsertRequest(); vector<BSONObj>& docsToInsert = insertRequest->getDocuments(); for ( size_t i = 0; i < docsToInsert.size(); i++ ) { StatusWith<BSONObj> fixed = fixDocumentForInsert( docsToInsert[i] ); if ( !fixed.isOK() ) { // we don't return early since each doc can be handled independantly continue; } if ( fixed.getValue().isEmpty() ) { continue; } docsToInsert[i] = fixed.getValue(); } } BSONObj defaultWriteConcern; // This is really bad - it's only safe because we leak the defaults by overriding them with // new defaults and because we never reset to an empty default. // TODO: fix this for sane behavior where we query repl set object if ( getLastErrorDefault ) defaultWriteConcern = *getLastErrorDefault; if ( defaultWriteConcern.isEmpty() ) { BSONObjBuilder b; b.append( "w", 1 ); defaultWriteConcern = b.obj(); } WriteBatchExecutor writeBatchExecutor(defaultWriteConcern, &cc(), &globalOpCounters, lastError.get()); writeBatchExecutor.executeBatch( request, &response ); result.appendElements( response.toBSON() ); // TODO // There's a pending issue about how to report response here. If we use // the command infra-structure, we should reuse the 'errmsg' field. But // we have already filed that message inside the BatchCommandResponse. // return response.getOk(); return true; }
/** * Generates the next batch of results for a ClientCursor. * * TODO: Do we need to support some equivalent of OP_REPLY responseFlags? * * TODO: Is it possible to support awaitData? */ bool run(OperationContext* txn, const std::string& dbname, BSONObj& cmdObj, int options, std::string& errmsg, BSONObjBuilder& result) override { // Counted as a getMore, not as a command. globalOpCounters.gotGetMore(); if (txn->getClient()->isInDirectClient()) { return appendCommandStatus(result, Status(ErrorCodes::IllegalOperation, "Cannot run getMore command from eval()")); } StatusWith<GetMoreRequest> parseStatus = GetMoreRequest::parseFromBSON(dbname, cmdObj); if (!parseStatus.isOK()) { return appendCommandStatus(result, parseStatus.getStatus()); } const GetMoreRequest& request = parseStatus.getValue(); // Depending on the type of cursor being operated on, we hold locks for the whole // getMore, or none of the getMore, or part of the getMore. The three cases in detail: // // 1) Normal cursor: we lock with "ctx" and hold it for the whole getMore. // 2) Cursor owned by global cursor manager: we don't lock anything. These cursors // don't own any collection state. // 3) Agg cursor: we lock with "ctx", then release, then relock with "unpinDBLock" and // "unpinCollLock". This is because agg cursors handle locking internally (hence the // release), but the pin and unpin of the cursor must occur under the collection // lock. We don't use our AutoGetCollectionForRead "ctx" to relock, because // AutoGetCollectionForRead checks the sharding version (and we want the relock for // the unpin to succeed even if the sharding version has changed). // // Note that we declare our locks before our ClientCursorPin, in order to ensure that // the pin's destructor is called before the lock destructors (so that the unpin occurs // under the lock). std::unique_ptr<AutoGetCollectionForRead> ctx; std::unique_ptr<Lock::DBLock> unpinDBLock; std::unique_ptr<Lock::CollectionLock> unpinCollLock; CursorManager* cursorManager; CursorManager* globalCursorManager = CursorManager::getGlobalCursorManager(); if (globalCursorManager->ownsCursorId(request.cursorid)) { cursorManager = globalCursorManager; } else { ctx.reset(new AutoGetCollectionForRead(txn, request.nss)); Collection* collection = ctx->getCollection(); if (!collection) { return appendCommandStatus(result, Status(ErrorCodes::OperationFailed, "collection dropped between getMore calls")); } cursorManager = collection->getCursorManager(); } ClientCursorPin ccPin(cursorManager, request.cursorid); ClientCursor* cursor = ccPin.c(); if (!cursor) { // We didn't find the cursor. return appendCommandStatus(result, Status(ErrorCodes::CursorNotFound, str::stream() << "Cursor not found, cursor id: " << request.cursorid)); } if (request.nss.ns() != cursor->ns()) { return appendCommandStatus(result, Status(ErrorCodes::Unauthorized, str::stream() << "Requested getMore on namespace '" << request.nss.ns() << "', but cursor belongs to a different namespace")); } // On early return, get rid of the the cursor. ScopeGuard cursorFreer = MakeGuard(&ClientCursorPin::deleteUnderlying, ccPin); if (!cursor->hasRecoveryUnit()) { // Start using a new RecoveryUnit. cursor->setOwnedRecoveryUnit( getGlobalServiceContext()->getGlobalStorageEngine()->newRecoveryUnit()); } // Swap RecoveryUnit(s) between the ClientCursor and OperationContext. ScopedRecoveryUnitSwapper ruSwapper(cursor, txn); // Reset timeout timer on the cursor since the cursor is still in use. cursor->setIdleTime(0); // If the operation that spawned this cursor had a time limit set, apply leftover // time to this getmore. txn->getCurOp()->setMaxTimeMicros(cursor->getLeftoverMaxTimeMicros()); txn->checkForInterrupt(); // May trigger maxTimeAlwaysTimeOut fail point. if (cursor->isAggCursor()) { // Agg cursors handle their own locking internally. ctx.reset(); // unlocks } PlanExecutor* exec = cursor->getExecutor(); exec->restoreState(txn); // TODO: Handle result sets larger than 16MB. BSONArrayBuilder nextBatch; BSONObj obj; PlanExecutor::ExecState state; int numResults = 0; while (PlanExecutor::ADVANCED == (state = exec->getNext(&obj, NULL))) { // Add result to output buffer. nextBatch.append(obj); numResults++; if (enoughForGetMore(request.batchSize, numResults, nextBatch.len())) { break; } } // If we are operating on an aggregation cursor, then we dropped our collection lock // earlier and need to reacquire it in order to clean up our ClientCursorPin. // // TODO: We need to ensure that this relock happens if we release the pin above in // response to PlanExecutor::getNext() throwing an exception. if (cursor->isAggCursor()) { invariant(NULL == ctx.get()); unpinDBLock.reset(new Lock::DBLock(txn->lockState(), request.nss.db(), MODE_IS)); unpinCollLock.reset( new Lock::CollectionLock(txn->lockState(), request.nss.ns(), MODE_IS)); } // Fail the command if the PlanExecutor reports execution failure. if (PlanExecutor::FAILURE == state) { const std::unique_ptr<PlanStageStats> stats(exec->getStats()); error() << "GetMore executor error, stats: " << Explain::statsToBSON(*stats); return appendCommandStatus(result, Status(ErrorCodes::OperationFailed, str::stream() << "GetMore executor error: " << WorkingSetCommon::toStatusString(obj))); } CursorId respondWithId = 0; if (shouldSaveCursorGetMore(state, exec, isCursorTailable(cursor))) { respondWithId = request.cursorid; exec->saveState(); cursor->setLeftoverMaxTimeMicros(txn->getCurOp()->getRemainingMaxTimeMicros()); cursor->incPos(numResults); if (isCursorTailable(cursor) && state == PlanExecutor::IS_EOF) { // Rather than swapping their existing RU into the client cursor, tailable // cursors should get a new recovery unit. ruSwapper.dismiss(); } } else { txn->getCurOp()->debug().cursorExhausted = true; } appendGetMoreResponseObject(respondWithId, request.nss.ns(), nextBatch.arr(), &result); if (respondWithId) { cursorFreer.Dismiss(); } return true; }
/** * Runs a query using the following steps: * --Parsing. * --Acquire locks. * --Plan query, obtaining an executor that can run it. * --Generate the first batch. * --Save state for getMore, transferring ownership of the executor to a ClientCursor. * --Generate response to send to the client. */ bool run(OperationContext* txn, const std::string& dbname, BSONObj& cmdObj, int options, std::string& errmsg, BSONObjBuilder& result) override { const NamespaceString nss(parseNs(dbname, cmdObj)); if (!nss.isValid() || nss.isCommand() || nss.isSpecialCommand()) { return appendCommandStatus(result, {ErrorCodes::InvalidNamespace, str::stream() << "Invalid collection name: " << nss.ns()}); } // Although it is a command, a find command gets counted as a query. globalOpCounters.gotQuery(); if (txn->getClient()->isInDirectClient()) { return appendCommandStatus( result, Status(ErrorCodes::IllegalOperation, "Cannot run find command from eval()")); } // Parse the command BSON to a QueryRequest. const bool isExplain = false; auto qrStatus = QueryRequest::makeFromFindCommand(nss, cmdObj, isExplain); if (!qrStatus.isOK()) { return appendCommandStatus(result, qrStatus.getStatus()); } auto& qr = qrStatus.getValue(); // Validate term before acquiring locks, if provided. if (auto term = qr->getReplicationTerm()) { auto replCoord = repl::ReplicationCoordinator::get(txn); Status status = replCoord->updateTerm(txn, *term); // Note: updateTerm returns ok if term stayed the same. if (!status.isOK()) { return appendCommandStatus(result, status); } } // Fill out curop information. // // We pass negative values for 'ntoreturn' and 'ntoskip' to indicate that these values // should be omitted from the log line. Limit and skip information is already present in the // find command parameters, so these fields are redundant. const int ntoreturn = -1; const int ntoskip = -1; beginQueryOp(txn, nss, cmdObj, ntoreturn, ntoskip); // Finish the parsing step by using the QueryRequest to create a CanonicalQuery. ExtensionsCallbackReal extensionsCallback(txn, &nss); auto statusWithCQ = CanonicalQuery::canonicalize(txn, std::move(qr), extensionsCallback); if (!statusWithCQ.isOK()) { return appendCommandStatus(result, statusWithCQ.getStatus()); } std::unique_ptr<CanonicalQuery> cq = std::move(statusWithCQ.getValue()); // Acquire locks. AutoGetCollectionForRead ctx(txn, nss); Collection* collection = ctx.getCollection(); // Get the execution plan for the query. auto statusWithPlanExecutor = getExecutorFind(txn, collection, nss, std::move(cq), PlanExecutor::YIELD_AUTO); if (!statusWithPlanExecutor.isOK()) { return appendCommandStatus(result, statusWithPlanExecutor.getStatus()); } std::unique_ptr<PlanExecutor> exec = std::move(statusWithPlanExecutor.getValue()); { stdx::lock_guard<Client>(*txn->getClient()); CurOp::get(txn)->setPlanSummary_inlock(Explain::getPlanSummary(exec.get())); } if (!collection) { // No collection. Just fill out curop indicating that there were zero results and // there is no ClientCursor id, and then return. const long long numResults = 0; const CursorId cursorId = 0; endQueryOp(txn, collection, *exec, numResults, cursorId); appendCursorResponseObject(cursorId, nss.ns(), BSONArray(), &result); return true; } const QueryRequest& originalQR = exec->getCanonicalQuery()->getQueryRequest(); // Stream query results, adding them to a BSONArray as we go. CursorResponseBuilder firstBatch(/*isInitialResponse*/ true, &result); BSONObj obj; PlanExecutor::ExecState state = PlanExecutor::ADVANCED; long long numResults = 0; while (!FindCommon::enoughForFirstBatch(originalQR, numResults) && PlanExecutor::ADVANCED == (state = exec->getNext(&obj, NULL))) { // If we can't fit this result inside the current batch, then we stash it for later. if (!FindCommon::haveSpaceForNext(obj, numResults, firstBatch.bytesUsed())) { exec->enqueue(obj); break; } // Add result to output buffer. firstBatch.append(obj); numResults++; } // Throw an assertion if query execution fails for any reason. if (PlanExecutor::FAILURE == state || PlanExecutor::DEAD == state) { firstBatch.abandon(); error() << "Plan executor error during find command: " << PlanExecutor::statestr(state) << ", stats: " << Explain::getWinningPlanStats(exec.get()); return appendCommandStatus(result, Status(ErrorCodes::OperationFailed, str::stream() << "Executor error during find command: " << WorkingSetCommon::toStatusString(obj))); } // Before saving the cursor, ensure that whatever plan we established happened with the // expected collection version auto css = CollectionShardingState::get(txn, nss); css->checkShardVersionOrThrow(txn); // Set up the cursor for getMore. CursorId cursorId = 0; if (shouldSaveCursor(txn, collection, state, exec.get())) { // Register the execution plan inside a ClientCursor. Ownership of the PlanExecutor is // transferred to the ClientCursor. // // First unregister the PlanExecutor so it can be re-registered with ClientCursor. exec->deregisterExec(); // Create a ClientCursor containing this plan executor. We don't have to worry about // leaking it as it's inserted into a global map by its ctor. ClientCursor* cursor = new ClientCursor(collection->getCursorManager(), exec.release(), nss.ns(), txn->recoveryUnit()->isReadingFromMajorityCommittedSnapshot(), originalQR.getOptions(), cmdObj.getOwned()); cursorId = cursor->cursorid(); invariant(!exec); PlanExecutor* cursorExec = cursor->getExecutor(); // State will be restored on getMore. cursorExec->saveState(); cursorExec->detachFromOperationContext(); cursor->setLeftoverMaxTimeMicros(txn->getRemainingMaxTimeMicros()); cursor->setPos(numResults); // Fill out curop based on the results. endQueryOp(txn, collection, *cursorExec, numResults, cursorId); } else { endQueryOp(txn, collection, *exec, numResults, cursorId); } // Generate the response object to send to the client. firstBatch.done(cursorId, nss.ns()); return true; }
bool run(OperationContext* txn, const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result) { BSONElement first = cmdObj.firstElement(); uassert( 28528, str::stream() << "Argument to listIndexes must be of type String, not " << typeName(first.type()), first.type() == String); const NamespaceString ns(parseNs(dbname, cmdObj)); uassert( 28529, str::stream() << "Argument to listIndexes must be a collection name, " << "not the empty string", !ns.coll().empty()); const long long defaultBatchSize = std::numeric_limits<long long>::max(); long long batchSize; Status parseCursorStatus = parseCommandCursorOptions(cmdObj, defaultBatchSize, &batchSize); if (!parseCursorStatus.isOK()) { return appendCommandStatus(result, parseCursorStatus); } AutoGetCollectionForRead autoColl(txn, ns); if (!autoColl.getDb()) { return appendCommandStatus( result, Status( ErrorCodes::NamespaceNotFound, "no database" ) ); } const Collection* collection = autoColl.getCollection(); if (!collection) { return appendCommandStatus( result, Status( ErrorCodes::NamespaceNotFound, "no collection" ) ); } const CollectionCatalogEntry* cce = collection->getCatalogEntry(); invariant(cce); vector<string> indexNames; MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN { indexNames.clear(); cce->getAllIndexes( txn, &indexNames ); } MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "listIndexes", ns.ns()); std::auto_ptr<WorkingSet> ws(new WorkingSet()); std::auto_ptr<QueuedDataStage> root(new QueuedDataStage(ws.get())); for ( size_t i = 0; i < indexNames.size(); i++ ) { BSONObj indexSpec; MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN { indexSpec = cce->getIndexSpec( txn, indexNames[i] ); } MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "listIndexes", ns.ns()); WorkingSetMember member; member.state = WorkingSetMember::OWNED_OBJ; member.keyData.clear(); member.loc = RecordId(); member.obj = Snapshotted<BSONObj>(SnapshotId(), indexSpec.getOwned()); root->pushBack(member); } std::string cursorNamespace = str::stream() << dbname << ".$cmd." << name << "." << ns.coll(); dassert(NamespaceString(cursorNamespace).isValid()); dassert(NamespaceString(cursorNamespace).isListIndexesGetMore()); dassert(ns == NamespaceString(cursorNamespace).getTargetNSForListIndexesGetMore()); PlanExecutor* rawExec; Status makeStatus = PlanExecutor::make(txn, ws.release(), root.release(), cursorNamespace, PlanExecutor::YIELD_MANUAL, &rawExec); std::auto_ptr<PlanExecutor> exec(rawExec); if (!makeStatus.isOK()) { return appendCommandStatus( result, makeStatus ); } BSONArrayBuilder firstBatch; const int byteLimit = MaxBytesToReturnToClientAtOnce; for (long long objCount = 0; objCount < batchSize && firstBatch.len() < byteLimit; objCount++) { BSONObj next; PlanExecutor::ExecState state = exec->getNext(&next, NULL); if ( state == PlanExecutor::IS_EOF ) { break; } invariant( state == PlanExecutor::ADVANCED ); firstBatch.append(next); } CursorId cursorId = 0LL; if ( !exec->isEOF() ) { exec->saveState(); ClientCursor* cursor = new ClientCursor(CursorManager::getGlobalCursorManager(), exec.release(), cursorNamespace); cursorId = cursor->cursorid(); } appendCursorResponseObject( cursorId, cursorNamespace, firstBatch.arr(), &result ); return true; }
bool run(OperationContext* txn, const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool /*fromRepl*/) { BSONElement first = cmdObj.firstElement(); uassert( 28528, str::stream() << "Argument to listIndexes must be of type String, not " << typeName(first.type()), first.type() == String); const NamespaceString ns(parseNs(dbname, cmdObj)); uassert( 28529, str::stream() << "Argument to listIndexes must be a collection name, " << "not the empty string", !ns.coll().empty()); AutoGetCollectionForRead autoColl(txn, ns); if (!autoColl.getDb()) { return appendCommandStatus( result, Status( ErrorCodes::NamespaceNotFound, "no database" ) ); } const Collection* collection = autoColl.getCollection(); if (!collection) { return appendCommandStatus( result, Status( ErrorCodes::NamespaceNotFound, "no collection" ) ); } const CollectionCatalogEntry* cce = collection->getCatalogEntry(); invariant(cce); vector<string> indexNames; cce->getAllIndexes( txn, &indexNames ); // TODO: Handle options specified in the command request object under the "cursor" // field. // TODO: If the full result set does not fit in one batch, allocate a cursor to store // the remainder of the results. BSONArrayBuilder arr; for ( size_t i = 0; i < indexNames.size(); i++ ) { arr.append( cce->getIndexSpec( txn, indexNames[i] ) ); } if ( cmdObj["cursor"].type() == mongo::Object ) { const long long cursorId = 0LL; std::string cursorNamespace = str::stream() << dbname << ".$cmd." << name << "." << ns.coll(); Command::appendCursorResponseObject( cursorId, cursorNamespace, arr.arr(), &result ); } else { result.append( "indexes", arr.arr() ); } return true; }
/** * Runs a query using the following steps: * 1) Parsing. * 2) Acquire locks. * 3) Plan query, obtaining an executor that can run it. * 4) Setup a cursor for the query, which may be used on subsequent getMores. * 5) Generate the first batch. * 6) Save state for getMore. * 7) Generate response to send to the client. * * TODO: Rather than using the sharding version available in thread-local storage (i.e. the * call to ShardingState::needCollectionMetadata() below), shard version information * should be passed as part of the command parameter. */ bool run(OperationContext* txn, const std::string& dbname, BSONObj& cmdObj, int options, std::string& errmsg, BSONObjBuilder& result) override { const std::string fullns = parseNs(dbname, cmdObj); const NamespaceString nss(fullns); if (!nss.isValid()) { return appendCommandStatus(result, {ErrorCodes::InvalidNamespace, str::stream() << "Invalid collection name: " << nss.ns()}); } // Although it is a command, a find command gets counted as a query. globalOpCounters.gotQuery(); if (txn->getClient()->isInDirectClient()) { return appendCommandStatus( result, Status(ErrorCodes::IllegalOperation, "Cannot run find command from eval()")); } // 1a) Parse the command BSON to a LiteParsedQuery. const bool isExplain = false; auto lpqStatus = LiteParsedQuery::makeFromFindCommand(nss, cmdObj, isExplain); if (!lpqStatus.isOK()) { return appendCommandStatus(result, lpqStatus.getStatus()); } auto& lpq = lpqStatus.getValue(); // Validate term, if provided. if (auto term = lpq->getReplicationTerm()) { auto replCoord = repl::ReplicationCoordinator::get(txn); Status status = replCoord->updateTerm(*term); // Note: updateTerm returns ok if term stayed the same. if (!status.isOK()) { return appendCommandStatus(result, status); } } // Fill out curop information. long long ntoreturn = lpq->getBatchSize().value_or(0); beginQueryOp(txn, nss, cmdObj, ntoreturn, lpq->getSkip()); // 1b) Finish the parsing step by using the LiteParsedQuery to create a CanonicalQuery. WhereCallbackReal whereCallback(txn, nss.db()); auto statusWithCQ = CanonicalQuery::canonicalize(lpq.release(), whereCallback); if (!statusWithCQ.isOK()) { return appendCommandStatus(result, statusWithCQ.getStatus()); } std::unique_ptr<CanonicalQuery> cq = std::move(statusWithCQ.getValue()); // 2) Acquire locks. AutoGetCollectionForRead ctx(txn, nss); Collection* collection = ctx.getCollection(); const int dbProfilingLevel = ctx.getDb() ? ctx.getDb()->getProfilingLevel() : serverGlobalParams.defaultProfile; ShardingState* const shardingState = ShardingState::get(txn); // It is possible that the sharding version will change during yield while we are // retrieving a plan executor. If this happens we will throw an error and mongos will // retry. const ChunkVersion shardingVersionAtStart = shardingState->getVersion(nss.ns()); // 3) Get the execution plan for the query. auto statusWithPlanExecutor = getExecutorFind(txn, collection, nss, std::move(cq), PlanExecutor::YIELD_AUTO); if (!statusWithPlanExecutor.isOK()) { return appendCommandStatus(result, statusWithPlanExecutor.getStatus()); } std::unique_ptr<PlanExecutor> exec = std::move(statusWithPlanExecutor.getValue()); // TODO: Currently, chunk ranges are kept around until all ClientCursors created while // the chunk belonged on this node are gone. Separating chunk lifetime management from // ClientCursor should allow this check to go away. if (!shardingState->getVersion(nss.ns()).isWriteCompatibleWith(shardingVersionAtStart)) { // Version changed while retrieving a PlanExecutor. Terminate the operation, // signaling that mongos should retry. throw SendStaleConfigException(nss.ns(), "version changed during find command", shardingVersionAtStart, shardingState->getVersion(nss.ns())); } if (!collection) { // No collection. Just fill out curop indicating that there were zero results and // there is no ClientCursor id, and then return. const long long numResults = 0; const CursorId cursorId = 0; endQueryOp(txn, *exec, dbProfilingLevel, numResults, cursorId); appendCursorResponseObject(cursorId, nss.ns(), BSONArray(), &result); return true; } const LiteParsedQuery& pq = exec->getCanonicalQuery()->getParsed(); // 4) If possible, register the execution plan inside a ClientCursor, and pin that // cursor. In this case, ownership of the PlanExecutor is transferred to the // ClientCursor, and 'exec' becomes null. // // First unregister the PlanExecutor so it can be re-registered with ClientCursor. exec->deregisterExec(); // Create a ClientCursor containing this plan executor. We don't have to worry // about leaking it as it's inserted into a global map by its ctor. ClientCursor* cursor = new ClientCursor(collection->getCursorManager(), exec.release(), nss.ns(), txn->recoveryUnit()->isReadingFromMajorityCommittedSnapshot(), pq.getOptions(), pq.getFilter()); CursorId cursorId = cursor->cursorid(); ClientCursorPin ccPin(collection->getCursorManager(), cursorId); // On early return, get rid of the the cursor. ScopeGuard cursorFreer = MakeGuard(&ClientCursorPin::deleteUnderlying, ccPin); invariant(!exec); PlanExecutor* cursorExec = cursor->getExecutor(); // 5) Stream query results, adding them to a BSONArray as we go. BSONArrayBuilder firstBatch; BSONObj obj; PlanExecutor::ExecState state; long long numResults = 0; while (!enoughForFirstBatch(pq, numResults, firstBatch.len()) && PlanExecutor::ADVANCED == (state = cursorExec->getNext(&obj, NULL))) { // If adding this object will cause us to exceed the BSON size limit, then we stash // it for later. if (firstBatch.len() + obj.objsize() > BSONObjMaxUserSize && numResults > 0) { cursorExec->enqueue(obj); break; } // Add result to output buffer. firstBatch.append(obj); numResults++; } // Throw an assertion if query execution fails for any reason. if (PlanExecutor::FAILURE == state || PlanExecutor::DEAD == state) { const std::unique_ptr<PlanStageStats> stats(cursorExec->getStats()); error() << "Plan executor error during find command: " << PlanExecutor::statestr(state) << ", stats: " << Explain::statsToBSON(*stats); return appendCommandStatus(result, Status(ErrorCodes::OperationFailed, str::stream() << "Executor error during find command: " << WorkingSetCommon::toStatusString(obj))); } // 6) Set up the cursor for getMore. if (shouldSaveCursor(txn, collection, state, cursorExec)) { // State will be restored on getMore. cursorExec->saveState(); cursorExec->detachFromOperationContext(); cursor->setLeftoverMaxTimeMicros(CurOp::get(txn)->getRemainingMaxTimeMicros()); cursor->setPos(numResults); } else { cursorId = 0; } // Fill out curop based on the results. endQueryOp(txn, *cursorExec, dbProfilingLevel, numResults, cursorId); // 7) Generate the response object to send to the client. appendCursorResponseObject(cursorId, nss.ns(), firstBatch.arr(), &result); if (cursorId) { cursorFreer.Dismiss(); } return true; }
bool run(OperationContext* txn, const std::string& dbname, BSONObj& cmdObj, int options, std::string& errmsg, BSONObjBuilder& result) override { // Counted as a getMore, not as a command. globalOpCounters.gotGetMore(); if (txn->getClient()->isInDirectClient()) { return appendCommandStatus(result, Status(ErrorCodes::IllegalOperation, "Cannot run getMore command from eval()")); } StatusWith<GetMoreRequest> parseStatus = GetMoreRequest::parseFromBSON(dbname, cmdObj); if (!parseStatus.isOK()) { return appendCommandStatus(result, parseStatus.getStatus()); } const GetMoreRequest& request = parseStatus.getValue(); // Depending on the type of cursor being operated on, we hold locks for the whole // getMore, or none of the getMore, or part of the getMore. The three cases in detail: // // 1) Normal cursor: we lock with "ctx" and hold it for the whole getMore. // 2) Cursor owned by global cursor manager: we don't lock anything. These cursors // don't own any collection state. // 3) Agg cursor: we lock with "ctx", then release, then relock with "unpinDBLock" and // "unpinCollLock". This is because agg cursors handle locking internally (hence the // release), but the pin and unpin of the cursor must occur under the collection // lock. We don't use our AutoGetCollectionForRead "ctx" to relock, because // AutoGetCollectionForRead checks the sharding version (and we want the relock for // the unpin to succeed even if the sharding version has changed). // // Note that we declare our locks before our ClientCursorPin, in order to ensure that // the pin's destructor is called before the lock destructors (so that the unpin occurs // under the lock). std::unique_ptr<AutoGetCollectionForRead> ctx; std::unique_ptr<Lock::DBLock> unpinDBLock; std::unique_ptr<Lock::CollectionLock> unpinCollLock; CursorManager* cursorManager; CursorManager* globalCursorManager = CursorManager::getGlobalCursorManager(); if (globalCursorManager->ownsCursorId(request.cursorid)) { cursorManager = globalCursorManager; } else { ctx.reset(new AutoGetCollectionForRead(txn, request.nss)); Collection* collection = ctx->getCollection(); if (!collection) { return appendCommandStatus(result, Status(ErrorCodes::OperationFailed, "collection dropped between getMore calls")); } cursorManager = collection->getCursorManager(); } ClientCursorPin ccPin(cursorManager, request.cursorid); ClientCursor* cursor = ccPin.c(); if (!cursor) { // We didn't find the cursor. return appendCommandStatus(result, Status(ErrorCodes::CursorNotFound, str::stream() << "Cursor not found, cursor id: " << request.cursorid)); } if (request.nss.ns() != cursor->ns()) { return appendCommandStatus(result, Status(ErrorCodes::Unauthorized, str::stream() << "Requested getMore on namespace '" << request.nss.ns() << "', but cursor belongs to a different namespace")); } const bool hasOwnMaxTime = CurOp::get(txn)->isMaxTimeSet(); // Validation related to awaitData. if (isCursorAwaitData(cursor)) { invariant(isCursorTailable(cursor)); if (!hasOwnMaxTime) { Status status(ErrorCodes::BadValue, str::stream() << "Must set maxTimeMS on a getMore if the initial " << "query had 'awaitData' set: " << cmdObj); return appendCommandStatus(result, status); } if (cursor->isAggCursor()) { Status status(ErrorCodes::BadValue, "awaitData cannot be set on an aggregation cursor"); return appendCommandStatus(result, status); } } // On early return, get rid of the cursor. ScopeGuard cursorFreer = MakeGuard(&GetMoreCmd::cleanupCursor, txn, &ccPin, request); if (!cursor->hasRecoveryUnit()) { // Start using a new RecoveryUnit. cursor->setOwnedRecoveryUnit( getGlobalServiceContext()->getGlobalStorageEngine()->newRecoveryUnit()); } // Swap RecoveryUnit(s) between the ClientCursor and OperationContext. ScopedRecoveryUnitSwapper ruSwapper(cursor, txn); // Reset timeout timer on the cursor since the cursor is still in use. cursor->setIdleTime(0); // If there is no time limit set directly on this getMore command, but the operation // that spawned this cursor had a time limit set, then we have to apply any leftover // time to this getMore. if (!hasOwnMaxTime) { CurOp::get(txn)->setMaxTimeMicros(cursor->getLeftoverMaxTimeMicros()); } txn->checkForInterrupt(); // May trigger maxTimeAlwaysTimeOut fail point. if (cursor->isAggCursor()) { // Agg cursors handle their own locking internally. ctx.reset(); // unlocks } PlanExecutor* exec = cursor->getExecutor(); exec->restoreState(txn); // If we're tailing a capped collection, retrieve a monotonically increasing insert // counter. uint64_t lastInsertCount = 0; if (isCursorAwaitData(cursor)) { invariant(ctx->getCollection()->isCapped()); lastInsertCount = ctx->getCollection()->getCappedInsertNotifier()->getCount(); } CursorId respondWithId = 0; BSONArrayBuilder nextBatch; BSONObj obj; PlanExecutor::ExecState state; int numResults = 0; Status batchStatus = generateBatch(cursor, request, &nextBatch, &state, &numResults); if (!batchStatus.isOK()) { return appendCommandStatus(result, batchStatus); } // If this is an await data cursor, and we hit EOF without generating any results, then // we block waiting for new oplog data to arrive. if (isCursorAwaitData(cursor) && state == PlanExecutor::IS_EOF && numResults == 0) { // Retrieve the notifier which we will wait on until new data arrives. We make sure // to do this in the lock because once we drop the lock it is possible for the // collection to become invalid. The notifier itself will outlive the collection if // the collection is dropped, as we keep a shared_ptr to it. auto notifier = ctx->getCollection()->getCappedInsertNotifier(); // Save the PlanExecutor and drop our locks. exec->saveState(); ctx.reset(); // Block waiting for data. Microseconds timeout(CurOp::get(txn)->getRemainingMaxTimeMicros()); notifier->waitForInsert(lastInsertCount, timeout); notifier.reset(); ctx.reset(new AutoGetCollectionForRead(txn, request.nss)); exec->restoreState(txn); // We woke up because either the timed_wait expired, or there was more data. Either // way, attempt to generate another batch of results. batchStatus = generateBatch(cursor, request, &nextBatch, &state, &numResults); if (!batchStatus.isOK()) { return appendCommandStatus(result, batchStatus); } } if (shouldSaveCursorGetMore(state, exec, isCursorTailable(cursor))) { respondWithId = request.cursorid; exec->saveState(); // If maxTimeMS was set directly on the getMore rather than being rolled over // from a previous find, then don't roll remaining micros over to the next // getMore. if (!hasOwnMaxTime) { cursor->setLeftoverMaxTimeMicros(CurOp::get(txn)->getRemainingMaxTimeMicros()); } cursor->incPos(numResults); if (isCursorTailable(cursor) && state == PlanExecutor::IS_EOF) { // Rather than swapping their existing RU into the client cursor, tailable // cursors should get a new recovery unit. ruSwapper.dismiss(); } } else { CurOp::get(txn)->debug().cursorExhausted = true; } appendGetMoreResponseObject(respondWithId, request.nss.ns(), nextBatch.arr(), &result); if (respondWithId) { cursorFreer.Dismiss(); // If we are operating on an aggregation cursor, then we dropped our collection lock // earlier and need to reacquire it in order to clean up our ClientCursorPin. if (cursor->isAggCursor()) { invariant(NULL == ctx.get()); unpinDBLock.reset( new Lock::DBLock(txn->lockState(), request.nss.db(), MODE_IS)); unpinCollLock.reset( new Lock::CollectionLock(txn->lockState(), request.nss.ns(), MODE_IS)); } } return true; }
virtual bool run(OperationContext* txn, const string& dbname, BSONObj& cmdObj, int options, string& errmsg, BSONObjBuilder& result, bool fromRepl) { const std::string ns = parseNsCollectionRequired(dbname, cmdObj); const BSONObj query = cmdObj.getObjectField("query"); const BSONObj fields = cmdObj.getObjectField("fields"); const BSONObj update = cmdObj.getObjectField("update"); const BSONObj sort = cmdObj.getObjectField("sort"); bool upsert = cmdObj["upsert"].trueValue(); bool returnNew = cmdObj["new"].trueValue(); bool remove = cmdObj["remove"].trueValue(); if ( remove ) { if ( upsert ) { errmsg = "remove and upsert can't co-exist"; return false; } if ( !update.isEmpty() ) { errmsg = "remove and update can't co-exist"; return false; } if ( returnNew ) { errmsg = "remove and returnNew can't co-exist"; return false; } } else if ( !cmdObj.hasField("update") ) { errmsg = "need remove or update"; return false; } bool ok = false; MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN { errmsg = ""; // We can always retry because we only ever modify one document ok = runImpl(txn, dbname, ns, query, fields, update, sort, upsert, returnNew, remove, result, errmsg); } MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "findAndModify", ns); if ( !ok && errmsg == "no-collection" ) { // Take X lock so we can create collection, then re-run operation. ScopedTransaction transaction(txn, MODE_IX); Lock::DBLock lk(txn->lockState(), dbname, MODE_X); Client::Context ctx(txn, ns, false /* don't check version */); if (!fromRepl && !repl::getGlobalReplicationCoordinator()->canAcceptWritesForDatabase(dbname)) { return appendCommandStatus(result, Status(ErrorCodes::NotMaster, str::stream() << "Not primary while creating collection " << ns << " during findAndModify")); } Database* db = ctx.db(); if ( db->getCollection( ns ) ) { // someone else beat us to it, that's ok // we might race while we unlock if someone drops // but that's ok, we'll just do nothing and error out } else { MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN { WriteUnitOfWork wuow(txn); uassertStatusOK( userCreateNS( txn, db, ns, BSONObj(), !fromRepl ) ); wuow.commit(); } MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "findAndModify", ns); } errmsg = ""; ok = runImpl(txn, dbname, ns, query, fields, update, sort, upsert, returnNew, remove, result, errmsg); }
virtual bool run(OperationContext* txn, const string& dbname, BSONObj& cmdObj, int options, string& errmsg, BSONObjBuilder& result) { // --- parse NamespaceString ns(dbname, cmdObj[name].String()); Status status = userAllowedWriteNS(ns); if (!status.isOK()) return appendCommandStatus(result, status); if (cmdObj["indexes"].type() != Array) { errmsg = "indexes has to be an array"; result.append("cmdObj", cmdObj); return false; } std::vector<BSONObj> specs; { BSONObjIterator i(cmdObj["indexes"].Obj()); while (i.more()) { BSONElement e = i.next(); if (e.type() != Object) { errmsg = "everything in indexes has to be an Object"; result.append("cmdObj", cmdObj); return false; } specs.push_back(e.Obj()); } } if (specs.size() == 0) { errmsg = "no indexes to add"; return false; } // check specs for (size_t i = 0; i < specs.size(); i++) { BSONObj spec = specs[i]; if (spec["ns"].eoo()) { spec = _addNsToSpec(ns, spec); specs[i] = spec; } if (spec["ns"].type() != String) { errmsg = "ns field must be a string"; result.append("spec", spec); return false; } std::string nsFromUser = spec["ns"].String(); if (nsFromUser.empty()) { errmsg = "ns field cannot be an empty string"; result.append("spec", spec); return false; } if (ns != nsFromUser) { errmsg = str::stream() << "value of ns field '" << nsFromUser << "' doesn't match namespace " << ns.ns(); result.append("spec", spec); return false; } } // now we know we have to create index(es) // Note: createIndexes command does not currently respect shard versioning. ScopedTransaction transaction(txn, MODE_IX); Lock::DBLock dbLock(txn->lockState(), ns.db(), MODE_X); if (!repl::getGlobalReplicationCoordinator()->canAcceptWritesFor(ns)) { return appendCommandStatus( result, Status(ErrorCodes::NotMaster, str::stream() << "Not primary while creating indexes in " << ns.ns())); } Database* db = dbHolder().get(txn, ns.db()); if (!db) { db = dbHolder().openDb(txn, ns.db()); } Collection* collection = db->getCollection(ns.ns()); if (collection) { result.appendBool("createdCollectionAutomatically", false); } else { MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN { WriteUnitOfWork wunit(txn); collection = db->createCollection(txn, ns.ns(), CollectionOptions()); invariant(collection); wunit.commit(); } MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "createIndexes", ns.ns()); result.appendBool("createdCollectionAutomatically", true); } const int numIndexesBefore = collection->getIndexCatalog()->numIndexesTotal(txn); result.append("numIndexesBefore", numIndexesBefore); auto client = txn->getClient(); ScopeGuard lastOpSetterGuard = MakeObjGuard(repl::ReplClientInfo::forClient(client), &repl::ReplClientInfo::setLastOpToSystemLastOpTime, txn); MultiIndexBlock indexer(txn, collection); indexer.allowBackgroundBuilding(); indexer.allowInterruption(); const size_t origSpecsSize = specs.size(); indexer.removeExistingIndexes(&specs); if (specs.size() == 0) { result.append("numIndexesAfter", numIndexesBefore); result.append("note", "all indexes already exist"); return true; } if (specs.size() != origSpecsSize) { result.append("note", "index already exists"); } for (size_t i = 0; i < specs.size(); i++) { const BSONObj& spec = specs[i]; if (spec["unique"].trueValue()) { status = checkUniqueIndexConstraints(txn, ns.ns(), spec["key"].Obj()); if (!status.isOK()) { return appendCommandStatus(result, status); } } if (spec["v"].isNumber() && spec["v"].numberInt() == 0) { return appendCommandStatus( result, Status(ErrorCodes::CannotCreateIndex, str::stream() << "illegal index specification: " << spec << ". " << "The option v:0 cannot be passed explicitly")); } } MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN { uassertStatusOK(indexer.init(specs)); } MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "createIndexes", ns.ns()); // If we're a background index, replace exclusive db lock with an intent lock, so that // other readers and writers can proceed during this phase. if (indexer.getBuildInBackground()) { txn->recoveryUnit()->abandonSnapshot(); dbLock.relockWithMode(MODE_IX); if (!repl::getGlobalReplicationCoordinator()->canAcceptWritesFor(ns)) { return appendCommandStatus( result, Status(ErrorCodes::NotMaster, str::stream() << "Not primary while creating background indexes in " << ns.ns())); } } try { Lock::CollectionLock colLock(txn->lockState(), ns.ns(), MODE_IX); uassertStatusOK(indexer.insertAllDocumentsInCollection()); } catch (const DBException& e) { invariant(e.getCode() != ErrorCodes::WriteConflict); // Must have exclusive DB lock before we clean up the index build via the // destructor of 'indexer'. if (indexer.getBuildInBackground()) { try { // This function cannot throw today, but we will preemptively prepare for // that day, to avoid data corruption due to lack of index cleanup. txn->recoveryUnit()->abandonSnapshot(); dbLock.relockWithMode(MODE_X); if (!repl::getGlobalReplicationCoordinator()->canAcceptWritesFor(ns)) { return appendCommandStatus( result, Status(ErrorCodes::NotMaster, str::stream() << "Not primary while creating background indexes in " << ns.ns() << ": cleaning up index build failure due to " << e.toString())); } } catch (...) { std::terminate(); } } throw; } // Need to return db lock back to exclusive, to complete the index build. if (indexer.getBuildInBackground()) { txn->recoveryUnit()->abandonSnapshot(); dbLock.relockWithMode(MODE_X); uassert(ErrorCodes::NotMaster, str::stream() << "Not primary while completing index build in " << dbname, repl::getGlobalReplicationCoordinator()->canAcceptWritesFor(ns)); Database* db = dbHolder().get(txn, ns.db()); uassert(28551, "database dropped during index build", db); uassert(28552, "collection dropped during index build", db->getCollection(ns.ns())); } MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN { WriteUnitOfWork wunit(txn); indexer.commit(); for (size_t i = 0; i < specs.size(); i++) { std::string systemIndexes = ns.getSystemIndexesCollection(); getGlobalServiceContext()->getOpObserver()->onCreateIndex( txn, systemIndexes, specs[i]); } wunit.commit(); } MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "createIndexes", ns.ns()); result.append("numIndexesAfter", collection->getIndexCatalog()->numIndexesTotal(txn)); lastOpSetterGuard.Dismiss(); return true; }
bool wrappedRun(OperationContext* txn, const string& dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& anObjBuilder) { const std::string coll = jsobj.firstElement().valuestrsafe(); if (coll.empty()) { errmsg = "no collection name specified"; return false; } const std::string toDeleteNs = dbname + '.' + coll; if (!serverGlobalParams.quiet) { LOG(0) << "CMD: dropIndexes " << toDeleteNs << endl; } Client::Context ctx(txn, toDeleteNs); Database* db = ctx.db(); Collection* collection = db->getCollection( txn, toDeleteNs ); if ( ! collection ) { errmsg = "ns not found"; return false; } stopIndexBuilds(txn, db, jsobj); IndexCatalog* indexCatalog = collection->getIndexCatalog(); anObjBuilder.appendNumber("nIndexesWas", indexCatalog->numIndexesTotal(txn) ); BSONElement f = jsobj.getField("index"); if ( f.type() == String ) { string indexToDelete = f.valuestr(); if ( indexToDelete == "*" ) { Status s = indexCatalog->dropAllIndexes(txn, false); if ( !s.isOK() ) { appendCommandStatus( anObjBuilder, s ); return false; } anObjBuilder.append("msg", "non-_id indexes dropped for collection"); return true; } IndexDescriptor* desc = collection->getIndexCatalog()->findIndexByName( txn, indexToDelete ); if ( desc == NULL ) { errmsg = str::stream() << "index not found with name [" << indexToDelete << "]"; return false; } if ( desc->isIdIndex() ) { errmsg = "cannot drop _id index"; return false; } Status s = indexCatalog->dropIndex(txn, desc); if ( !s.isOK() ) { appendCommandStatus( anObjBuilder, s ); return false; } return true; } if ( f.type() == Object ) { IndexDescriptor* desc = collection->getIndexCatalog()->findIndexByKeyPattern( txn, f.embeddedObject() ); if ( desc == NULL ) { errmsg = "can't find index with key:"; errmsg += f.embeddedObject().toString(); return false; } if ( desc->isIdIndex() ) { errmsg = "cannot drop _id index"; return false; } Status s = indexCatalog->dropIndex(txn, desc); if ( !s.isOK() ) { appendCommandStatus( anObjBuilder, s ); return false; } return true; } errmsg = "invalid index name spec"; return false; }
static bool runImpl(OperationContext* txn, const string& dbname, const string& ns, const BSONObj& query, const BSONObj& fields, const BSONObj& update, const BSONObj& sort, bool upsert, bool returnNew, bool remove , BSONObjBuilder& result, string& errmsg) { AutoGetOrCreateDb autoDb(txn, dbname, MODE_IX); Lock::CollectionLock collLock(txn->lockState(), ns, MODE_IX); Client::Context ctx(txn, ns, autoDb.getDb(), autoDb.justCreated()); if (!repl::getGlobalReplicationCoordinator()->canAcceptWritesForDatabase(dbname)) { return appendCommandStatus(result, Status(ErrorCodes::NotMaster, str::stream() << "Not primary while running findAndModify in " << ns)); } Collection* collection = ctx.db()->getCollection(ns); const WhereCallbackReal whereCallback(txn, StringData(ns)); if ( !collection ) { if ( !upsert ) { // no collectio and no upsert, so can't possible do anything _appendHelper( result, BSONObj(), false, fields, whereCallback ); return true; } // no collection, but upsert, so we want to create it // problem is we only have IX on db and collection :( // so we tell our caller who can do it errmsg = "no-collection"; return false; } Snapshotted<BSONObj> snapshotDoc; RecordId loc; bool found = false; { CanonicalQuery* cq; const BSONObj projection; const long long skip = 0; const long long limit = -1; // 1 document requested; negative indicates hard limit. uassertStatusOK(CanonicalQuery::canonicalize(ns, query, sort, projection, skip, limit, &cq, whereCallback)); PlanExecutor* rawExec; uassertStatusOK(getExecutor(txn, collection, cq, PlanExecutor::YIELD_AUTO, &rawExec, QueryPlannerParams::DEFAULT)); scoped_ptr<PlanExecutor> exec(rawExec); PlanExecutor::ExecState state = exec->getNextSnapshotted(&snapshotDoc, &loc); if (PlanExecutor::ADVANCED == state) { found = true; } else if (PlanExecutor::FAILURE == state || PlanExecutor::DEAD == state) { if (PlanExecutor::FAILURE == state && WorkingSetCommon::isValidStatusMemberObject(snapshotDoc.value())) { const Status errorStatus = WorkingSetCommon::getMemberObjectStatus(snapshotDoc.value()); invariant(!errorStatus.isOK()); uasserted(errorStatus.code(), errorStatus.reason()); } uasserted(ErrorCodes::OperationFailed, str::stream() << "executor returned " << PlanExecutor::statestr(state) << " while finding document to update"); } else { invariant(PlanExecutor::IS_EOF == state); } } WriteUnitOfWork wuow(txn); if (found) { // We found a doc, but it might not be associated with the active snapshot. // If the doc has changed or is no longer in the collection, we will throw a // write conflict exception and start again from the beginning. if (txn->recoveryUnit()->getSnapshotId() != snapshotDoc.snapshotId()) { BSONObj oldObj = snapshotDoc.value(); if (!collection->findDoc(txn, loc, &snapshotDoc)) { // Got deleted in the new snapshot. throw WriteConflictException(); } if (!oldObj.binaryEqual(snapshotDoc.value())) { // Got updated in the new snapshot. throw WriteConflictException(); } } // If we get here without throwing, then we should have the copy of the doc from // the latest snapshot. invariant(txn->recoveryUnit()->getSnapshotId() == snapshotDoc.snapshotId()); } BSONObj doc = snapshotDoc.value(); BSONObj queryModified = query; if (found && !doc["_id"].eoo() && !CanonicalQuery::isSimpleIdQuery(query)) { // we're going to re-write the query to be more efficient // we have to be a little careful because of positional operators // maybe we can pass this all through eventually, but right now isn't an easy way bool hasPositionalUpdate = false; { // if the update has a positional piece ($) // then we need to pull all query parts in // so here we check for $ // a little hacky BSONObjIterator i( update ); while ( i.more() ) { const BSONElement& elem = i.next(); if ( elem.fieldName()[0] != '$' || elem.type() != Object ) continue; BSONObjIterator j( elem.Obj() ); while ( j.more() ) { if ( str::contains( j.next().fieldName(), ".$" ) ) { hasPositionalUpdate = true; break; } } } } BSONObjBuilder b(query.objsize() + 10); b.append( doc["_id"] ); bool addedAtomic = false; BSONObjIterator i(query); while ( i.more() ) { const BSONElement& elem = i.next(); if ( str::equals( "_id" , elem.fieldName() ) ) { // we already do _id continue; } if ( ! hasPositionalUpdate ) { // if there is a dotted field, accept we may need more query parts continue; } if ( ! addedAtomic ) { b.appendBool( "$atomic" , true ); addedAtomic = true; } b.append( elem ); } queryModified = b.obj(); } if ( remove ) { _appendHelper(result, doc, found, fields, whereCallback); if ( found ) { deleteObjects(txn, ctx.db(), ns, queryModified, PlanExecutor::YIELD_MANUAL, true, true); BSONObjBuilder le( result.subobjStart( "lastErrorObject" ) ); le.appendNumber( "n" , 1 ); le.done(); } } else { // update if ( ! found && ! upsert ) { // didn't have it, and am not upserting _appendHelper(result, doc, found, fields, whereCallback); } else { // we found it or we're updating if ( ! returnNew ) { _appendHelper(result, doc, found, fields, whereCallback); } const NamespaceString requestNs(ns); UpdateRequest request(requestNs); request.setQuery(queryModified); request.setUpdates(update); request.setUpsert(upsert); request.setUpdateOpLog(); request.setStoreResultDoc(returnNew); request.setYieldPolicy(PlanExecutor::YIELD_MANUAL); // TODO(greg) We need to send if we are ignoring // the shard version below, but for now no UpdateLifecycleImpl updateLifecycle(false, requestNs); request.setLifecycle(&updateLifecycle); UpdateResult res = mongo::update(txn, ctx.db(), request, &txn->getCurOp()->debug()); if (!found && res.existing) { // No match was found during the read part of this find and modify, which // means that we're here doing an upsert. But the update also told us that // we modified an *already existing* document. This probably means that // the query reported EOF based on an out-of-date snapshot. This should be // a rare event, so we handle it by throwing a write conflict. throw WriteConflictException(); } if ( !collection ) { // collection created by an upsert collection = ctx.db()->getCollection(ns); } LOG(3) << "update result: " << res ; if (returnNew) { dassert(!res.newObj.isEmpty()); _appendHelper(result, res.newObj, true, fields, whereCallback); } BSONObjBuilder le( result.subobjStart( "lastErrorObject" ) ); le.appendBool( "updatedExisting" , res.existing ); le.appendNumber( "n" , res.numMatched ); if ( !res.upserted.isEmpty() ) { le.append( res.upserted[kUpsertedFieldName] ); } le.done(); } } // Committing the WUOW can close the current snapshot. Until this happens, the // snapshot id should not have changed. if (found) { invariant(txn->recoveryUnit()->getSnapshotId() == snapshotDoc.snapshotId()); } wuow.commit(); return true; }
virtual bool run( const string& dbname, BSONObj& cmdObj, int options, string& errmsg, BSONObjBuilder& result, bool fromRepl = false ) { NamespaceString ns( dbname, cmdObj[name].String() ); Client::ReadContext ctx(ns.ns()); Database* db = ctx.ctx().db(); Collection* collection = db->getCollection( ns ); if ( !collection ) return appendCommandStatus( result, Status( ErrorCodes::NamespaceNotFound, str::stream() << "ns does not exist: " << ns.ns() ) ); size_t numCursors = static_cast<size_t>( cmdObj["numCursors"].numberInt() ); if ( numCursors == 0 || numCursors > 10000 ) return appendCommandStatus( result, Status( ErrorCodes::BadValue, str::stream() << "numCursors has to be between 1 and 10000" << " was: " << numCursors ) ); OwnedPointerVector<RecordIterator> iterators(collection->getManyIterators()); if (iterators.size() < numCursors) { numCursors = iterators.size(); } OwnedPointerVector<MultiIteratorRunner> runners; for ( size_t i = 0; i < numCursors; i++ ) { runners.push_back(new MultiIteratorRunner(ns.ns(), collection)); } // transfer iterators to runners using a round-robin distribution. // TODO consider using a common work queue once invalidation issues go away. for (size_t i = 0; i < iterators.size(); i++) { runners[i % runners.size()]->addIterator(iterators.releaseAt(i)); } { BSONArrayBuilder bucketsBuilder; for (size_t i = 0; i < runners.size(); i++) { // transfer ownership of a runner to the ClientCursor (which manages its own // lifetime). ClientCursor* cc = new ClientCursor( collection, runners.releaseAt(i) ); // we are mimicking the aggregation cursor output here // that is why there are ns, ok and empty firstBatch BSONObjBuilder threadResult; { BSONObjBuilder cursor; cursor.appendArray( "firstBatch", BSONObj() ); cursor.append( "ns", ns ); cursor.append( "id", cc->cursorid() ); threadResult.append( "cursor", cursor.obj() ); } threadResult.appendBool( "ok", 1 ); bucketsBuilder.append( threadResult.obj() ); } result.appendArray( "cursors", bucketsBuilder.obj() ); } return true; }
/** * Runs a query using the following steps: * --Parsing. * --Acquire locks. * --Plan query, obtaining an executor that can run it. * --Generate the first batch. * --Save state for getMore, transferring ownership of the executor to a ClientCursor. * --Generate response to send to the client. */ bool run(OperationContext* txn, const std::string& dbname, BSONObj& cmdObj, int options, std::string& errmsg, BSONObjBuilder& result) override { const std::string fullns = parseNs(dbname, cmdObj); const NamespaceString nss(fullns); if (!nss.isValid() || nss.isCommand() || nss.isSpecialCommand()) { return appendCommandStatus(result, {ErrorCodes::InvalidNamespace, str::stream() << "Invalid collection name: " << nss.ns()}); } // Although it is a command, a find command gets counted as a query. globalOpCounters.gotQuery(); if (txn->getClient()->isInDirectClient()) { return appendCommandStatus( result, Status(ErrorCodes::IllegalOperation, "Cannot run find command from eval()")); } // Parse the command BSON to a LiteParsedQuery. const bool isExplain = false; auto lpqStatus = LiteParsedQuery::makeFromFindCommand(nss, cmdObj, isExplain); if (!lpqStatus.isOK()) { return appendCommandStatus(result, lpqStatus.getStatus()); } auto& lpq = lpqStatus.getValue(); // Validate term before acquiring locks, if provided. if (auto term = lpq->getReplicationTerm()) { auto replCoord = repl::ReplicationCoordinator::get(txn); Status status = replCoord->updateTerm(txn, *term); // Note: updateTerm returns ok if term stayed the same. if (!status.isOK()) { return appendCommandStatus(result, status); } } // Fill out curop information. // // We pass negative values for 'ntoreturn' and 'ntoskip' to indicate that these values // should be omitted from the log line. Limit and skip information is already present in the // find command parameters, so these fields are redundant. const int ntoreturn = -1; const int ntoskip = -1; beginQueryOp(txn, nss, cmdObj, ntoreturn, ntoskip); // Finish the parsing step by using the LiteParsedQuery to create a CanonicalQuery. ExtensionsCallbackReal extensionsCallback(txn, &nss); auto statusWithCQ = CanonicalQuery::canonicalize(lpq.release(), extensionsCallback); if (!statusWithCQ.isOK()) { return appendCommandStatus(result, statusWithCQ.getStatus()); } std::unique_ptr<CanonicalQuery> cq = std::move(statusWithCQ.getValue()); ShardingState* const shardingState = ShardingState::get(txn); if (OperationShardVersion::get(txn).hasShardVersion() && shardingState->enabled()) { ChunkVersion receivedVersion = OperationShardVersion::get(txn).getShardVersion(nss); ChunkVersion latestVersion; // Wait for migration completion to get the correct chunk version. const int maxTimeoutSec = 30; int timeoutSec = cq->getParsed().getMaxTimeMS() / 1000; if (!timeoutSec || timeoutSec > maxTimeoutSec) { timeoutSec = maxTimeoutSec; } if (!shardingState->waitTillNotInCriticalSection(timeoutSec)) { uasserted(ErrorCodes::LockTimeout, "Timeout while waiting for migration commit"); } // If the received version is newer than the version cached in 'shardingState', then we // have to refresh 'shardingState' from the config servers. We do this before acquiring // locks so that we don't hold locks while waiting on the network. uassertStatusOK(shardingState->refreshMetadataIfNeeded( txn, nss.ns(), receivedVersion, &latestVersion)); } // Acquire locks. AutoGetCollectionForRead ctx(txn, nss); Collection* collection = ctx.getCollection(); const int dbProfilingLevel = ctx.getDb() ? ctx.getDb()->getProfilingLevel() : serverGlobalParams.defaultProfile; // It is possible that the sharding version will change during yield while we are // retrieving a plan executor. If this happens we will throw an error and mongos will // retry. const ChunkVersion shardingVersionAtStart = shardingState->getVersion(nss.ns()); // Get the execution plan for the query. auto statusWithPlanExecutor = getExecutorFind(txn, collection, nss, std::move(cq), PlanExecutor::YIELD_AUTO); if (!statusWithPlanExecutor.isOK()) { return appendCommandStatus(result, statusWithPlanExecutor.getStatus()); } std::unique_ptr<PlanExecutor> exec = std::move(statusWithPlanExecutor.getValue()); if (!collection) { // No collection. Just fill out curop indicating that there were zero results and // there is no ClientCursor id, and then return. const long long numResults = 0; const CursorId cursorId = 0; endQueryOp(txn, collection, *exec, dbProfilingLevel, numResults, cursorId); appendCursorResponseObject(cursorId, nss.ns(), BSONArray(), &result); return true; } const LiteParsedQuery& pq = exec->getCanonicalQuery()->getParsed(); // Stream query results, adding them to a BSONArray as we go. BSONArrayBuilder firstBatch; BSONObj obj; PlanExecutor::ExecState state = PlanExecutor::ADVANCED; long long numResults = 0; while (!FindCommon::enoughForFirstBatch(pq, numResults, firstBatch.len()) && PlanExecutor::ADVANCED == (state = exec->getNext(&obj, NULL))) { // If adding this object will cause us to exceed the BSON size limit, then we stash // it for later. if (firstBatch.len() + obj.objsize() > BSONObjMaxUserSize && numResults > 0) { exec->enqueue(obj); break; } // Add result to output buffer. firstBatch.append(obj); numResults++; } // Throw an assertion if query execution fails for any reason. if (PlanExecutor::FAILURE == state || PlanExecutor::DEAD == state) { const std::unique_ptr<PlanStageStats> stats(exec->getStats()); error() << "Plan executor error during find command: " << PlanExecutor::statestr(state) << ", stats: " << Explain::statsToBSON(*stats); return appendCommandStatus(result, Status(ErrorCodes::OperationFailed, str::stream() << "Executor error during find command: " << WorkingSetCommon::toStatusString(obj))); } // TODO: Currently, chunk ranges are kept around until all ClientCursors created while the // chunk belonged on this node are gone. Separating chunk lifetime management from // ClientCursor should allow this check to go away. if (!shardingState->getVersion(nss.ns()).isWriteCompatibleWith(shardingVersionAtStart)) { // Version changed while retrieving a PlanExecutor. Terminate the operation, // signaling that mongos should retry. throw SendStaleConfigException(nss.ns(), "version changed during find command", shardingVersionAtStart, shardingState->getVersion(nss.ns())); } // Set up the cursor for getMore. CursorId cursorId = 0; if (shouldSaveCursor(txn, collection, state, exec.get())) { // Register the execution plan inside a ClientCursor. Ownership of the PlanExecutor is // transferred to the ClientCursor. // // First unregister the PlanExecutor so it can be re-registered with ClientCursor. exec->deregisterExec(); // Create a ClientCursor containing this plan executor. We don't have to worry about // leaking it as it's inserted into a global map by its ctor. ClientCursor* cursor = new ClientCursor(collection->getCursorManager(), exec.release(), nss.ns(), txn->recoveryUnit()->isReadingFromMajorityCommittedSnapshot(), pq.getOptions(), pq.getFilter()); cursorId = cursor->cursorid(); invariant(!exec); PlanExecutor* cursorExec = cursor->getExecutor(); // State will be restored on getMore. cursorExec->saveState(); cursorExec->detachFromOperationContext(); cursor->setLeftoverMaxTimeMicros(CurOp::get(txn)->getRemainingMaxTimeMicros()); cursor->setPos(numResults); // Fill out curop based on the results. endQueryOp(txn, collection, *cursorExec, dbProfilingLevel, numResults, cursorId); } else { endQueryOp(txn, collection, *exec, dbProfilingLevel, numResults, cursorId); } // Generate the response object to send to the client. appendCursorResponseObject(cursorId, nss.ns(), firstBatch.arr(), &result); return true; }
bool run(const string& dbname, BSONObj& jsobj, int, string& errmsg, BSONObjBuilder& anObjBuilder, bool /*fromRepl*/) { BSONElement e = jsobj.firstElement(); const string toDeleteNs = dbname + '.' + e.valuestr(); if (!serverGlobalParams.quiet) { MONGO_TLOG(0) << "CMD: dropIndexes " << toDeleteNs << endl; } Lock::DBWrite dbXLock(dbname); Client::Context ctx(toDeleteNs); Collection* collection = cc().database()->getCollection( toDeleteNs ); if ( ! collection ) { errmsg = "ns not found"; return false; } stopIndexBuilds(cc().database(), jsobj); IndexCatalog* indexCatalog = collection->getIndexCatalog(); anObjBuilder.appendNumber("nIndexesWas", indexCatalog->numIndexesTotal() ); BSONElement f = jsobj.getField("index"); if ( f.type() == String ) { string indexToDelete = f.valuestr(); if ( indexToDelete == "*" ) { Status s = indexCatalog->dropAllIndexes( false ); if ( !s.isOK() ) { appendCommandStatus( anObjBuilder, s ); return false; } anObjBuilder.append("msg", "non-_id indexes dropped for collection"); return true; } IndexDescriptor* desc = collection->getIndexCatalog()->findIndexByName( indexToDelete ); if ( desc == NULL ) { errmsg = str::stream() << "index not found with name [" << indexToDelete << "]"; return false; } if ( desc->isIdIndex() ) { errmsg = "cannot drop _id index"; return false; } Status s = indexCatalog->dropIndex( desc ); if ( !s.isOK() ) { appendCommandStatus( anObjBuilder, s ); return false; } return true; } if ( f.type() == Object ) { IndexDescriptor* desc = collection->getIndexCatalog()->findIndexByKeyPattern( f.embeddedObject() ); if ( desc == NULL ) { errmsg = "can't find index with key:"; errmsg += f.embeddedObject().toString(); return false; } if ( desc->isIdIndex() ) { errmsg = "cannot drop _id index"; return false; } Status s = indexCatalog->dropIndex( desc ); if ( !s.isOK() ) { appendCommandStatus( anObjBuilder, s ); return false; } return true; } errmsg = "invalid index name spec"; return false; }
virtual bool run(const string& db, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { string coll = cmdObj.firstElement().valuestr(); if( coll.empty() || db.empty() ) { errmsg = "no collection name specified"; return false; } if( isCurrentlyAReplSetPrimary() && !cmdObj["force"].trueValue() ) { errmsg = "will not run compact on an active replica set primary as this is a slow blocking operation. use force:true to force"; return false; } NamespaceString ns(db,coll); if ( !ns.isNormal() ) { errmsg = "bad namespace name"; return false; } if ( ns.isSystem() ) { // items in system.* cannot be moved as there might be pointers to them // i.e. system.indexes entries are pointed to from NamespaceDetails errmsg = "can't compact a system namespace"; return false; } CompactOptions compactOptions; if ( cmdObj["preservePadding"].trueValue() ) { compactOptions.paddingMode = CompactOptions::PRESERVE; if ( cmdObj.hasElement( "paddingFactor" ) || cmdObj.hasElement( "paddingBytes" ) ) { errmsg = "cannot mix preservePadding and paddingFactor|paddingBytes"; return false; } } else if ( cmdObj.hasElement( "paddingFactor" ) || cmdObj.hasElement( "paddingBytes" ) ) { compactOptions.paddingMode = CompactOptions::MANUAL; if ( cmdObj.hasElement("paddingFactor") ) { compactOptions.paddingFactor = cmdObj["paddingFactor"].Number(); if ( compactOptions.paddingFactor < 1 || compactOptions.paddingFactor > 4 ){ errmsg = "invalid padding factor"; return false; } } if ( cmdObj.hasElement("paddingBytes") ) { compactOptions.paddingBytes = cmdObj["paddingBytes"].numberInt(); if ( compactOptions.paddingBytes < 0 || compactOptions.paddingBytes > ( 1024 * 1024 ) ) { errmsg = "invalid padding bytes"; return false; } } } if ( cmdObj.hasElement("validate") ) compactOptions.validateDocuments = cmdObj["validate"].trueValue(); Lock::DBWrite lk(ns.ns()); BackgroundOperation::assertNoBgOpInProgForNs(ns.ns()); Client::Context ctx(ns); Collection* collection = ctx.db()->getCollection(ns.ns()); if( ! collection ) { errmsg = "namespace does not exist"; return false; } if ( collection->isCapped() ) { errmsg = "cannot compact a capped collection"; return false; } log() << "compact " << ns << " begin, options: " << compactOptions.toString(); std::vector<BSONObj> indexesInProg = stopIndexBuilds(db, cmdObj); StatusWith<CompactStats> status = collection->compact( &compactOptions ); if ( !status.isOK() ) return appendCommandStatus( result, status.getStatus() ); if ( status.getValue().corruptDocuments > 0 ) result.append("invalidObjects", status.getValue().corruptDocuments ); log() << "compact " << ns << " end"; IndexBuilder::restoreIndexes(indexesInProg); return true; }
virtual bool run(OperationContext* txn, const string& dbname, BSONObj& cmdObj, int options, string& errmsg, BSONObjBuilder& result) { // --- parse NamespaceString ns(dbname, cmdObj[name].String()); Status status = userAllowedWriteNS(ns); if (!status.isOK()) return appendCommandStatus(result, status); if (cmdObj["indexes"].type() != Array) { errmsg = "indexes has to be an array"; result.append("cmdObj", cmdObj); return false; } std::vector<BSONObj> specs; { BSONObjIterator i(cmdObj["indexes"].Obj()); while (i.more()) { BSONElement e = i.next(); if (e.type() != Object) { errmsg = "everything in indexes has to be an Object"; result.append("cmdObj", cmdObj); return false; } specs.push_back(e.Obj()); } } if (specs.size() == 0) { errmsg = "no indexes to add"; return false; } // check specs for (size_t i = 0; i < specs.size(); i++) { BSONObj spec = specs[i]; if (spec["ns"].eoo()) { spec = _addNsToSpec(ns, spec); specs[i] = spec; } if (spec["ns"].type() != String) { errmsg = "spec has no ns"; result.append("spec", spec); return false; } if (ns != spec["ns"].String()) { errmsg = "namespace mismatch"; result.append("spec", spec); return false; } } // now we know we have to create index(es) // Note: createIndexes command does not currently respect shard versioning. ScopedTransaction transaction(txn, MODE_IX); Lock::DBLock dbLock(txn->lockState(), ns.db(), MODE_X); if (!repl::getGlobalReplicationCoordinator()->canAcceptWritesFor(ns)) { return appendCommandStatus( result, Status(ErrorCodes::NotMaster, str::stream() << "Not primary while creating indexes in " << ns.ns())); } Database* db = dbHolder().get(txn, ns.db()); if (!db) { db = dbHolder().openDb(txn, ns.db()); } Collection* collection = db->getCollection(ns.ns()); result.appendBool("createdCollectionAutomatically", collection == NULL); if (!collection) { MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN { WriteUnitOfWork wunit(txn); collection = db->createCollection(txn, ns.ns(), CollectionOptions()); invariant(collection); wunit.commit(); } MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "createIndexes", ns.ns()); }
bool run(OperationContext* txn, const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { // // Correct behavior here is very finicky. // // 1. The first step is to append the error that occurred on the previous operation. // This adds an "err" field to the command, which is *not* the command failing. // // 2. Next we parse and validate write concern options. If these options are invalid // the command fails no matter what, even if we actually had an error earlier. The // reason for checking here is to match legacy behavior on these kind of failures - // we'll still get an "err" field for the write error. // // 3. If we had an error on the previous operation, we then return immediately. // // 4. Finally, we actually enforce the write concern. All errors *except* timeout are // reported with ok : 0.0, to match legacy behavior. // // There is a special case when "wOpTime" and "wElectionId" are explicitly provided by // the client (mongos) - in this case we *only* enforce the write concern if it is // valid. // // We always need to either report "err" (if ok : 1) or "errmsg" (if ok : 0), even if // err is null. // LastError *le = lastError.disableForCommand(); // Always append lastOp and connectionId Client& c = cc(); c.appendLastOp( result ); // for sharding; also useful in general for debugging result.appendNumber( "connectionId" , c.getConnectionId() ); OpTime lastOpTime; BSONField<OpTime> wOpTimeField("wOpTime"); FieldParser::FieldState extracted = FieldParser::extract(cmdObj, wOpTimeField, &lastOpTime, &errmsg); if (!extracted) { result.append("badGLE", cmdObj); appendCommandStatus(result, false, errmsg); return false; } bool lastOpTimePresent = extracted != FieldParser::FIELD_NONE; if (!lastOpTimePresent) { // Use the client opTime if no wOpTime is specified lastOpTime = cc().getLastOp(); } OID electionId; BSONField<OID> wElectionIdField("wElectionId"); extracted = FieldParser::extract(cmdObj, wElectionIdField, &electionId, &errmsg); if (!extracted) { result.append("badGLE", cmdObj); appendCommandStatus(result, false, errmsg); return false; } bool electionIdPresent = extracted != FieldParser::FIELD_NONE; bool errorOccurred = false; // Errors aren't reported when wOpTime is used if ( !lastOpTimePresent ) { if ( le->nPrev != 1 ) { errorOccurred = LastError::noError.appendSelf( result, false ); le->appendSelfStatus( result ); } else { errorOccurred = le->appendSelf( result, false ); } } BSONObj writeConcernDoc = cmdObj; // Use the default options if we have no gle options aside from wOpTime/wElectionId const int nFields = cmdObj.nFields(); bool useDefaultGLEOptions = (nFields == 1) || (nFields == 2 && lastOpTimePresent) || (nFields == 3 && lastOpTimePresent && electionIdPresent); if ( useDefaultGLEOptions && getLastErrorDefault ) { writeConcernDoc = *getLastErrorDefault; } // // Validate write concern no matter what, this matches 2.4 behavior // WriteConcernOptions writeConcern; Status status = writeConcern.parse( writeConcernDoc ); if ( status.isOK() ) { // Ensure options are valid for this host status = validateWriteConcern( writeConcern ); } if ( !status.isOK() ) { result.append( "badGLE", writeConcernDoc ); return appendCommandStatus( result, status ); } // Don't wait for replication if there was an error reported - this matches 2.4 behavior if ( errorOccurred ) { dassert( !lastOpTimePresent ); return true; } // No error occurred, so we won't duplicate these fields with write concern errors dassert( result.asTempObj()["err"].eoo() ); dassert( result.asTempObj()["code"].eoo() ); // If we got an electionId, make sure it matches if (electionIdPresent) { if (!theReplSet) { // Ignore electionIds of 0 from mongos. if (electionId != OID()) { errmsg = "wElectionId passed but no replication active"; result.append("code", ErrorCodes::BadValue); return false; } } else { if (electionId != theReplSet->getElectionId()) { LOG(3) << "oid passed in is " << electionId << ", but our id is " << theReplSet->getElectionId(); errmsg = "election occurred after write"; result.append("code", ErrorCodes::WriteConcernFailed); return false; } } } cc().curop()->setMessage( "waiting for write concern" ); WriteConcernResult wcResult; status = waitForWriteConcern( txn, writeConcern, lastOpTime, &wcResult ); wcResult.appendTo( writeConcern, &result ); // For backward compatibility with 2.4, wtimeout returns ok : 1.0 if ( wcResult.wTimedOut ) { dassert( !wcResult.err.empty() ); // so we always report err dassert( !status.isOK() ); result.append( "errmsg", "timed out waiting for slaves" ); result.append( "code", status.code() ); return true; } return appendCommandStatus( result, status ); }
virtual bool run(OperationContext* txn, const string& dbname, BSONObj& cmdObj, int options, string& errmsg, BSONObjBuilder& result, bool fromRepl = false ) { // --- parse NamespaceString ns( dbname, cmdObj[name].String() ); Status status = userAllowedWriteNS( ns ); if ( !status.isOK() ) return appendCommandStatus( result, status ); if ( cmdObj["indexes"].type() != Array ) { errmsg = "indexes has to be an array"; result.append( "cmdObj", cmdObj ); return false; } std::vector<BSONObj> specs; { BSONObjIterator i( cmdObj["indexes"].Obj() ); while ( i.more() ) { BSONElement e = i.next(); if ( e.type() != Object ) { errmsg = "everything in indexes has to be an Object"; result.append( "cmdObj", cmdObj ); return false; } specs.push_back( e.Obj() ); } } if ( specs.size() == 0 ) { errmsg = "no indexes to add"; return false; } // check specs for ( size_t i = 0; i < specs.size(); i++ ) { BSONObj spec = specs[i]; if ( spec["ns"].eoo() ) { spec = _addNsToSpec( ns, spec ); specs[i] = spec; } if ( spec["ns"].type() != String ) { errmsg = "spec has no ns"; result.append( "spec", spec ); return false; } if ( ns != spec["ns"].String() ) { errmsg = "namespace mismatch"; result.append( "spec", spec ); return false; } } // now we know we have to create index(es) // Note: createIndexes command does not currently respect shard versioning. Lock::DBLock lk(txn->lockState(), ns.db(), MODE_X); Client::Context ctx(txn, ns.ns(), false /* doVersion */ ); Database* db = ctx.db(); Collection* collection = db->getCollection( txn, ns.ns() ); result.appendBool( "createdCollectionAutomatically", collection == NULL ); if ( !collection ) { WriteUnitOfWork wunit(txn); collection = db->createCollection( txn, ns.ns() ); invariant( collection ); if (!fromRepl) { repl::logOp(txn, "c", (dbname + ".$cmd").c_str(), BSON("create" << ns.coll())); } wunit.commit(); } result.append( "numIndexesBefore", collection->getIndexCatalog()->numIndexesTotal(txn) ); MultiIndexBlock indexer(txn, collection); indexer.allowBackgroundBuilding(); indexer.allowInterruption(); const size_t origSpecsSize = specs.size(); indexer.removeExistingIndexes(&specs); if (specs.size() == 0) { result.append( "note", "all indexes already exist" ); return true; } if (specs.size() != origSpecsSize) { result.append( "note", "index already exists" ); } for ( size_t i = 0; i < specs.size(); i++ ) { const BSONObj& spec = specs[i]; if ( spec["unique"].trueValue() ) { status = checkUniqueIndexConstraints(txn, ns.ns(), spec["key"].Obj()); if ( !status.isOK() ) { appendCommandStatus( result, status ); return false; } } } uassertStatusOK(indexer.init(specs)); uassertStatusOK(indexer.insertAllDocumentsInCollection()); { WriteUnitOfWork wunit(txn); indexer.commit(); if ( !fromRepl ) { for ( size_t i = 0; i < specs.size(); i++ ) { std::string systemIndexes = ns.getSystemIndexesCollection(); repl::logOp(txn, "i", systemIndexes.c_str(), specs[i]); } } wunit.commit(); } result.append( "numIndexesAfter", collection->getIndexCatalog()->numIndexesTotal(txn) ); return true; }
bool CmdExplain::run(OperationContext* txn, const string& dbname, BSONObj& cmdObj, int options, string& errmsg, BSONObjBuilder& result, bool fromRepl) { // Should never get explain commands issued from replication. if (fromRepl) { Status commandStat(ErrorCodes::IllegalOperation, "explain command should not be from repl"); appendCommandStatus(result, commandStat); return false; } // Get the verbosity. We use the executionStats verbosity by default. Explain::Verbosity verbosity = Explain::EXEC_STATS; if (!cmdObj["verbosity"].eoo()) { const char* verbStr = cmdObj["verbosity"].valuestrsafe(); if (mongoutils::str::equals(verbStr, "queryPlanner")) { verbosity = Explain::QUERY_PLANNER; } else if (mongoutils::str::equals(verbStr, "allPlansExecution")) { verbosity = Explain::EXEC_ALL_PLANS; } else if (mongoutils::str::equals(verbStr, "full")) { verbosity = Explain::FULL; } else if (!mongoutils::str::equals(verbStr, "executionStats")) { Status commandStat(ErrorCodes::BadValue, "verbosity string must be one of " "{'queryPlanner', 'executionStats', 'allPlansExecution'}"); appendCommandStatus(result, commandStat); return false; } } if (Object != cmdObj.firstElement().type()) { Status commandStat(ErrorCodes::BadValue, "explain command requires a nested object"); appendCommandStatus(result, commandStat); return false; } // This is the nested command which we are explaining. BSONObj explainObj = cmdObj.firstElement().Obj(); Command* commToExplain = Command::findCommand(explainObj.firstElementFieldName()); if (NULL == commToExplain) { mongoutils::str::stream ss; ss << "unknown command: " << explainObj.firstElementFieldName(); Status explainStatus(ErrorCodes::CommandNotFound, ss); return appendCommandStatus(result, explainStatus); } // Check whether the child command is allowed to run here. TODO: this logic is // copied from Command::execCommand and should be abstracted. Until then, make // sure to keep it up to date. repl::ReplicationCoordinator* replCoord = repl::getGlobalReplicationCoordinator(); bool canRunHere = replCoord->canAcceptWritesForDatabase(dbname) || commToExplain->slaveOk() || (commToExplain->slaveOverrideOk() && (options & QueryOption_SlaveOk)); if (!canRunHere) { mongoutils::str::stream ss; ss << "Explain's child command cannot run on this node. " << "Are you explaining a write command on a secondary?"; appendCommandStatus(result, false, ss); return false; } // Actually call the nested command's explain(...) method. Status explainStatus = commToExplain->explain(txn, dbname, explainObj, verbosity, &result); if (!explainStatus.isOK()) { return appendCommandStatus(result, explainStatus); } return true; }