Esempio n. 1
0
    virtual bool run(OperationContext* txn,
                     const string& db,
                     BSONObj& cmdObj,
                     int options,
                     string& errmsg,
                     BSONObjBuilder& result) {
        const std::string ns = parseNs(db, cmdObj);
        if (nsToCollectionSubstring(ns).empty()) {
            errmsg = "missing collection name";
            return false;
        }
        NamespaceString nss(ns);

        // Parse the options for this request.
        auto request = AggregationRequest::parseFromBSON(nss, cmdObj);
        if (!request.isOK()) {
            return appendCommandStatus(result, request.getStatus());
        }

        // Set up the ExpressionContext.
        intrusive_ptr<ExpressionContext> expCtx = new ExpressionContext(txn, request.getValue());
        expCtx->tempDir = storageGlobalParams.dbpath + "/_tmp";

        // Parse the pipeline.
        auto statusWithPipeline = Pipeline::parse(request.getValue().getPipeline(), expCtx);
        if (!statusWithPipeline.isOK()) {
            return appendCommandStatus(result, statusWithPipeline.getStatus());
        }
        auto pipeline = std::move(statusWithPipeline.getValue());

        auto resolvedNamespaces = resolveInvolvedNamespaces(txn, pipeline, expCtx);
        if (!resolvedNamespaces.isOK()) {
            return appendCommandStatus(result, resolvedNamespaces.getStatus());
        }
        expCtx->resolvedNamespaces = std::move(resolvedNamespaces.getValue());

        unique_ptr<ClientCursorPin> pin;  // either this OR the exec will be non-null
        unique_ptr<PlanExecutor> exec;
        auto curOp = CurOp::get(txn);
        {
            // This will throw if the sharding version for this connection is out of date. If the
            // namespace is a view, the lock will be released before re-running the aggregation.
            // Otherwise, the lock must be held continuously from now until we have we created both
            // the output ClientCursor and the input executor. This ensures that both are using the
            // same sharding version that we synchronize on here. This is also why we always need to
            // create a ClientCursor even when we aren't outputting to a cursor. See the comment on
            // ShardFilterStage for more details.
            AutoGetCollectionOrViewForRead ctx(txn, nss);
            Collection* collection = ctx.getCollection();

            // If running $collStats on a view, we do not resolve the view since we want stats
            // on this view namespace.
            auto startsWithCollStats = [&pipeline]() {
                const Pipeline::SourceContainer& sources = pipeline->getSources();
                return !sources.empty() &&
                    dynamic_cast<DocumentSourceCollStats*>(sources.front().get());
            };

            // If this is a view, resolve it by finding the underlying collection and stitching view
            // pipelines and this request's pipeline together. We then release our locks before
            // recursively calling run, which will re-acquire locks on the underlying collection.
            // (The lock must be released because recursively acquiring locks on the database will
            // prohibit yielding.)
            auto view = ctx.getView();
            if (view && !startsWithCollStats()) {
                auto viewDefinition =
                    ViewShardingCheck::getResolvedViewIfSharded(txn, ctx.getDb(), view);
                if (!viewDefinition.isOK()) {
                    return appendCommandStatus(result, viewDefinition.getStatus());
                }

                if (!viewDefinition.getValue().isEmpty()) {
                    ViewShardingCheck::appendShardedViewStatus(viewDefinition.getValue(), &result);
                    return false;
                }

                auto resolvedView = ctx.getDb()->getViewCatalog()->resolveView(txn, nss);
                if (!resolvedView.isOK()) {
                    return appendCommandStatus(result, resolvedView.getStatus());
                }

                // With the view resolved, we can relinquish locks.
                ctx.releaseLocksForView();

                // Parse the resolved view into a new aggregation request.
                auto viewCmd =
                    resolvedView.getValue().asExpandedViewAggregation(request.getValue());
                if (!viewCmd.isOK()) {
                    return appendCommandStatus(result, viewCmd.getStatus());
                }

                bool status = this->run(txn, db, viewCmd.getValue(), options, errmsg, result);
                {
                    // Set the namespace of the curop back to the view namespace so ctx records
                    // stats on this view namespace on destruction.
                    stdx::lock_guard<Client>(*txn->getClient());
                    curOp->setNS_inlock(nss.ns());
                }
                return status;
            }

            // If the pipeline does not have a user-specified collation, set it from the collection
            // default.
            if (request.getValue().getCollation().isEmpty() && collection &&
                collection->getDefaultCollator()) {
                invariant(!expCtx->getCollator());
                expCtx->setCollator(collection->getDefaultCollator()->clone());
            }

            // Propagate the ExpressionContext throughout all of the pipeline's stages and
            // expressions.
            pipeline->injectExpressionContext(expCtx);

            // The pipeline must be optimized after the correct collator has been set on it (by
            // injecting the ExpressionContext containing the collator). This is necessary because
            // optimization may make string comparisons, e.g. optimizing {$eq: [<str1>, <str2>]} to
            // a constant.
            pipeline->optimizePipeline();

            if (kDebugBuild && !expCtx->isExplain && !expCtx->inShard) {
                // Make sure all operations round-trip through Pipeline::serialize() correctly by
                // re-parsing every command in debug builds. This is important because sharded
                // aggregations rely on this ability.  Skipping when inShard because this has
                // already been through the transformation (and this un-sets expCtx->inShard).
                pipeline = reparsePipeline(pipeline, request.getValue(), expCtx);
            }

            // This does mongod-specific stuff like creating the input PlanExecutor and adding
            // it to the front of the pipeline if needed.
            PipelineD::prepareCursorSource(collection, pipeline);

            // Create the PlanExecutor which returns results from the pipeline. The WorkingSet
            // ('ws') and the PipelineProxyStage ('proxy') will be owned by the created
            // PlanExecutor.
            auto ws = make_unique<WorkingSet>();
            auto proxy = make_unique<PipelineProxyStage>(txn, pipeline, ws.get());

            auto statusWithPlanExecutor = (NULL == collection)
                ? PlanExecutor::make(
                      txn, std::move(ws), std::move(proxy), nss.ns(), PlanExecutor::YIELD_MANUAL)
                : PlanExecutor::make(
                      txn, std::move(ws), std::move(proxy), collection, PlanExecutor::YIELD_MANUAL);
            invariant(statusWithPlanExecutor.isOK());
            exec = std::move(statusWithPlanExecutor.getValue());

            {
                auto planSummary = Explain::getPlanSummary(exec.get());
                stdx::lock_guard<Client>(*txn->getClient());
                curOp->setPlanSummary_inlock(std::move(planSummary));
            }

            if (collection) {
                PlanSummaryStats stats;
                Explain::getSummaryStats(*exec, &stats);
                collection->infoCache()->notifyOfQuery(txn, stats.indexesUsed);
            }

            if (collection) {
                const bool isAggCursor = true;  // enable special locking behavior
                ClientCursor* cursor =
                    new ClientCursor(collection->getCursorManager(),
                                     exec.release(),
                                     nss.ns(),
                                     txn->recoveryUnit()->isReadingFromMajorityCommittedSnapshot(),
                                     0,
                                     cmdObj.getOwned(),
                                     isAggCursor);
                pin.reset(new ClientCursorPin(collection->getCursorManager(), cursor->cursorid()));
                // Don't add any code between here and the start of the try block.
            }

            // At this point, it is safe to release the collection lock.
            // - In the case where we have a collection: we will need to reacquire the
            //   collection lock later when cleaning up our ClientCursorPin.
            // - In the case where we don't have a collection: our PlanExecutor won't be
            //   registered, so it will be safe to clean it up outside the lock.
            invariant(!exec || !collection);
        }

        try {
            // Unless set to true, the ClientCursor created above will be deleted on block exit.
            bool keepCursor = false;

            // Use of the aggregate command without specifying to use a cursor is deprecated.
            // Applications should migrate to using cursors. Cursors are strictly more useful than
            // outputting the results as a single document, since results that fit inside a single
            // BSONObj will also fit inside a single batch.
            //
            // We occasionally log a deprecation warning.
            if (!request.getValue().isCursorCommand()) {
                RARELY {
                    warning()
                        << "Use of the aggregate command without the 'cursor' "
                           "option is deprecated. See "
                           "http://dochub.mongodb.org/core/aggregate-without-cursor-deprecation.";
                }
            }

            // If both explain and cursor are specified, explain wins.
            if (expCtx->isExplain) {
                result << "stages" << Value(pipeline->writeExplainOps());
            } else if (request.getValue().isCursorCommand()) {
                keepCursor = handleCursorCommand(txn,
                                                 nss.ns(),
                                                 pin.get(),
                                                 pin ? pin->c()->getExecutor() : exec.get(),
                                                 request.getValue(),
                                                 result);
            } else {
                pipeline->run(result);
            }

            if (!expCtx->isExplain) {
                PlanSummaryStats stats;
                Explain::getSummaryStats(pin ? *pin->c()->getExecutor() : *exec.get(), &stats);
                curOp->debug().setPlanSummaryMetrics(stats);
                curOp->debug().nreturned = stats.nReturned;
            }

            // Clean up our ClientCursorPin, if needed.  We must reacquire the collection lock
            // in order to do so.
            if (pin) {
                // We acquire locks here with DBLock and CollectionLock instead of using
                // AutoGetCollectionForRead.  AutoGetCollectionForRead will throw if the
                // sharding version is out of date, and we don't care if the sharding version
                // has changed.
                Lock::DBLock dbLock(txn->lockState(), nss.db(), MODE_IS);
                Lock::CollectionLock collLock(txn->lockState(), nss.ns(), MODE_IS);
                if (keepCursor) {
                    pin->release();
                } else {
                    pin->deleteUnderlying();
                }
            }
        } catch (...) {
Esempio n. 2
0
        virtual bool run(OperationContext* txn, const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {

            if ( cmdObj.firstElement().type() != Array ) {
                errmsg = "ops has to be an array";
                return false;
            }

            BSONObj ops = cmdObj.firstElement().Obj();

            {
                // check input
                BSONObjIterator i( ops );
                while ( i.more() ) {
                    BSONElement e = i.next();
                    if (!_checkOperation(e, errmsg)) {
                        return false;
                    }
                }
            }

            // SERVER-4328 todo : is global ok or does this take a long time? i believe multiple 
            // ns used so locking individually requires more analysis
            ScopedTransaction scopedXact(txn, MODE_X);
            Lock::GlobalWrite globalWriteLock(txn->lockState());

            if (!fromRepl &&
                !repl::getGlobalReplicationCoordinator()->canAcceptWritesForDatabase(dbname)) {
                return appendCommandStatus(result, Status(ErrorCodes::NotMaster, str::stream()
                    << "Not primary while applying ops to database " << dbname));
            }

            // Preconditions check reads the database state, so needs to be done locked
            if ( cmdObj["preCondition"].type() == Array ) {
                BSONObjIterator i( cmdObj["preCondition"].Obj() );
                while ( i.more() ) {
                    BSONObj f = i.next().Obj();

                    DBDirectClient db( txn );
                    BSONObj realres = db.findOne( f["ns"].String() , f["q"].Obj() );

                    // Apply-ops would never have a $where matcher, so use the default callback,
                    // which will throw an error if $where is found.
                    Matcher m(f["res"].Obj());
                    if ( ! m.matches( realres ) ) {
                        result.append( "got" , realres );
                        result.append( "whatFailed" , f );
                        errmsg = "pre-condition failed";
                        return false;
                    }
                }
            }

            // apply
            int num = 0;
            int errors = 0;
            
            BSONObjIterator i( ops );
            BSONArrayBuilder ab;
            const bool alwaysUpsert = cmdObj.hasField("alwaysUpsert") ?
                    cmdObj["alwaysUpsert"].trueValue() : true;
            
            while ( i.more() ) {
                BSONElement e = i.next();
                const BSONObj& temp = e.Obj();

                // Ignore 'n' operations.
                const char *opType = temp["op"].valuestrsafe();
                if (*opType == 'n') continue;

                const string ns = temp["ns"].String();

                // Run operations under a nested lock as a hack to prevent yielding.
                //
                // The list of operations is supposed to be applied atomically; yielding
                // would break atomicity by allowing an interruption or a shutdown to occur
                // after only some operations are applied.  We are already locked globally
                // at this point, so taking a DBLock on the namespace creates a nested lock,
                // and yields are disallowed for operations that hold a nested lock.
                //
                // We do not have a wrapping WriteUnitOfWork so it is possible for a journal
                // commit to happen with a subset of ops applied.
                // TODO figure out what to do about this.
                Lock::GlobalWrite globalWriteLockDisallowTempRelease(txn->lockState());

                // Ensures that yielding will not happen (see the comment above).
                DEV {
                    Locker::LockSnapshot lockSnapshot;
                    invariant(!txn->lockState()->saveLockStateAndUnlock(&lockSnapshot));
                };

                OldClientContext ctx(txn, ns);

                Status status(ErrorCodes::InternalError, "");
                while (true) {
                    try {
                        // We assume that in the WriteConflict retry case, either the op rolls back
                        // any changes it makes or is otherwise safe to rerun.
                        status =
                            repl::applyOperation_inlock(txn, ctx.db(), temp, false, alwaysUpsert);
                        break;
                    }
                    catch (const WriteConflictException& wce) {
                        LOG(2) << "WriteConflictException in applyOps command, retrying.";
                        txn->recoveryUnit()->commitAndRestart();
                        continue;
                    }
                }

                ab.append(status.isOK());
                if (!status.isOK()) {
                    errors++;
                }

                num++;

                WriteUnitOfWork wuow(txn);
                logOpForDbHash(txn, ns.c_str());
                wuow.commit();
            }

            result.append( "applied" , num );
            result.append( "results" , ab.arr() );

            if ( ! fromRepl ) {
                // We want this applied atomically on slaves
                // so we re-wrap without the pre-condition for speed

                string tempNS = str::stream() << dbname << ".$cmd";

                // TODO: possibly use mutable BSON to remove preCondition field
                // once it is available
                BSONObjIterator iter(cmdObj);
                BSONObjBuilder cmdBuilder;

                while (iter.more()) {
                    BSONElement elem(iter.next());
                    if (strcmp(elem.fieldName(), "preCondition") != 0) {
                        cmdBuilder.append(elem);
                    }
                }

                const BSONObj cmdRewritten = cmdBuilder.done();

                // We currently always logOp the command regardless of whether the individial ops
                // succeeded and rely on any failures to also happen on secondaries. This isn't
                // perfect, but it's what the command has always done and is part of its "correct"
                // behavior.
                while (true) {
                    try {
                        WriteUnitOfWork wunit(txn);
                        getGlobalEnvironment()->getOpObserver()->onApplyOps(txn,
                                                                            tempNS,
                                                                            cmdRewritten);
                        wunit.commit();
                        break;
                    }
                    catch (const WriteConflictException& wce) {
                        LOG(2) <<
                            "WriteConflictException while logging applyOps command, retrying.";
                        txn->recoveryUnit()->commitAndRestart();
                        continue;
                    }
                }
            }

            if (errors != 0) {
                return false;
            }

            return true;
        }
Esempio n. 3
0
        bool run(const string& dbname , BSONObj& jsobj, int, string& errmsg, BSONObjBuilder& result, bool /*fromRepl*/) {
            static DBDirectClient db;

            BSONElement e = jsobj.firstElement();
            string toDeleteNs = dbname + '.' + e.valuestr();

            MONGO_TLOG(0) << "CMD: reIndex " << toDeleteNs << endl;

            Lock::DBWrite dbXLock(dbname);
            Client::Context ctx(toDeleteNs);

            Collection* collection = cc().database()->getCollection( toDeleteNs );

            if ( !collection ) {
                errmsg = "ns not found";
                return false;
            }

            BackgroundOperation::assertNoBgOpInProgForNs( toDeleteNs );

            std::vector<BSONObj> indexesInProg = stopIndexBuilds(cc().database(), jsobj);

            list<BSONObj> all;
            auto_ptr<DBClientCursor> i = db.query( dbname + ".system.indexes" , BSON( "ns" << toDeleteNs ) , 0 , 0 , 0 , QueryOption_SlaveOk );
            BSONObjBuilder b;
            while ( i->more() ) {
                const BSONObj spec = i->next().removeField("v").getOwned();
                const BSONObj key = spec.getObjectField("key");
                const Status keyStatus = validateKeyPattern(key);
                if (!keyStatus.isOK()) {
                    errmsg = str::stream()
                        << "Cannot rebuild index " << spec << ": " << keyStatus.reason()
                        << " For more info see http://dochub.mongodb.org/core/index-validation";
                    return false;
                }

                b.append( BSONObjBuilder::numStr( all.size() ) , spec );
                all.push_back( spec );
            }
            result.appendNumber( "nIndexesWas", collection->getIndexCatalog()->numIndexesTotal() );

            Status s = collection->getIndexCatalog()->dropAllIndexes( true );
            if ( !s.isOK() ) {
                errmsg = "dropIndexes failed";
                return appendCommandStatus( result, s );
            }

            for ( list<BSONObj>::iterator i=all.begin(); i!=all.end(); i++ ) {
                BSONObj o = *i;
                LOG(1) << "reIndex ns: " << toDeleteNs << " index: " << o << endl;
                Status s = collection->getIndexCatalog()->createIndex( o, false );
                if ( !s.isOK() )
                    return appendCommandStatus( result, s );
            }

            result.append( "nIndexes" , (int)all.size() );
            result.appendArray( "indexes" , b.obj() );

            IndexBuilder::restoreIndexes(indexesInProg);
            return true;
        }
Esempio n. 4
0
    void Command::execCommandClientBasic(Command * c ,
                                         ClientBasic& client,
                                         int queryOptions,
                                         const char *ns,
                                         BSONObj& cmdObj,
                                         BSONObjBuilder& result,
                                         bool fromRepl ) {
        verify(c);

        std::string dbname = nsToDatabase(ns);

        // Access control checks
        if (!noauth) {
            std::vector<Privilege> privileges;
            c->addRequiredPrivileges(dbname, cmdObj, &privileges);
            AuthorizationManager* authManager = client.getAuthorizationManager();
            if (!authManager->checkAuthForPrivileges(privileges).isOK()) {
                result.append("note", str::stream() << "not authorized for command: " <<
                                    c->name << " on database " << dbname);
                appendCommandStatus(result, false, "unauthorized");
                return;
            }
        }
        if (c->adminOnly() && c->localHostOnlyIfNoAuth(cmdObj) && noauth &&
                !client.getIsLocalHostConnection()) {
            log() << "command denied: " << cmdObj.toString() << endl;
            appendCommandStatus(result,
                               false,
                               "unauthorized: this command must run from localhost when running db "
                               "without auth");
            return;
        }
        if (c->adminOnly() && !startsWith(ns, "admin.")) {
            log() << "command denied: " << cmdObj.toString() << endl;
            appendCommandStatus(result, false, "access denied - use admin db");
            return;
        }
        // End of access control checks

        if (cmdObj.getBoolField("help")) {
            stringstream help;
            help << "help for: " << c->name << " ";
            c->help( help );
            result.append( "help" , help.str() );
            result.append( "lockType" , c->locktype() );
            appendCommandStatus(result, true, "");
            return;
        }
        std::string errmsg;
        bool ok;
        try {
            ok = c->run( dbname , cmdObj, queryOptions, errmsg, result, false );
        }
        catch (DBException& e) {
            ok = false;
            int code = e.getCode();
            if (code == RecvStaleConfigCode) { // code for StaleConfigException
                throw;
            }

            stringstream ss;
            ss << "exception: " << e.what();
            errmsg = ss.str();
            result.append( "code" , code );
        }

        appendCommandStatus(result, ok, errmsg);
    }
Esempio n. 5
0
        virtual bool run( const string& dbname, BSONObj& cmdObj, int options,
                          string& errmsg, BSONObjBuilder& result,
                          bool fromRepl = false ) {

            // ---  parse

            NamespaceString ns( dbname, cmdObj[name].String() );
            Status status = userAllowedWriteNS( ns );
            if ( !status.isOK() )
                return appendCommandStatus( result, status );

            if ( cmdObj["indexes"].type() != Array ) {
                errmsg = "indexes has to be an array";
                result.append( "cmdObj", cmdObj );
                return false;
            }

            std::vector<BSONObj> specs;
            {
                BSONObjIterator i( cmdObj["indexes"].Obj() );
                while ( i.more() ) {
                    BSONElement e = i.next();
                    if ( e.type() != Object ) {
                        errmsg = "everything in indexes has to be an Object";
                        result.append( "cmdObj", cmdObj );
                        return false;
                    }
                    specs.push_back( e.Obj() );
                }
            }

            if ( specs.size() == 0 ) {
                errmsg = "no indexes to add";
                return false;
            }

            // check specs
            for ( size_t i = 0; i < specs.size(); i++ ) {
                BSONObj spec = specs[i];
                if ( spec["ns"].eoo() ) {
                    spec = _addNsToSpec( ns, spec );
                    specs[i] = spec;
                }

                if ( spec["ns"].type() != String ) {
                    errmsg = "spec has no ns";
                    result.append( "spec", spec );
                    return false;
                }
                if ( ns != spec["ns"].String() ) {
                    errmsg = "namespace mismatch";
                    result.append( "spec", spec );
                    return false;
                }
            }


            {
                // We first take a read lock to see if we need to do anything
                // as many calls are ensureIndex (and hence no-ops), this is good so its a shared
                // lock for common calls. We only take write lock if needed.
                Client::ReadContext readContext( ns );
                const Collection* collection = readContext.ctx().db()->getCollection( ns.ns() );
                if ( collection ) {
                    for ( size_t i = 0; i < specs.size(); i++ ) {
                        BSONObj spec = specs[i];
                        StatusWith<BSONObj> statusWithSpec =
                            collection->getIndexCatalog()->prepareSpecForCreate( spec );
                        status = statusWithSpec.getStatus();
                        if ( status.code() == ErrorCodes::IndexAlreadyExists ) {
                            specs.erase( specs.begin() + i );
                            i--;
                            continue;
                        }
                        if ( !status.isOK() )
                            return appendCommandStatus( result, status );
                    }

                    if ( specs.size() == 0 ) {
                        result.append( "numIndexesBefore",
                                       collection->getIndexCatalog()->numIndexesTotal() );
                        result.append( "note", "all indexes already exist" );
                        return true;
                    }

                    // need to create index
                }
            }

            // now we know we have to create index(es)
            Client::WriteContext writeContext( ns.ns() );
            Database* db = writeContext.ctx().db();

            Collection* collection = db->getCollection( ns.ns() );
            result.appendBool( "createdCollectionAutomatically", collection == NULL );
            if ( !collection ) {
                collection = db->createCollection( ns.ns() );
                invariant( collection );
            }

            result.append( "numIndexesBefore", collection->getIndexCatalog()->numIndexesTotal() );

            for ( size_t i = 0; i < specs.size(); i++ ) {
                BSONObj spec = specs[i];

                status = collection->getIndexCatalog()->createIndex( spec, true );
                if ( status.code() == ErrorCodes::IndexAlreadyExists ) {
                    if ( !result.hasField( "note" ) )
                        result.append( "note", "index already exists" );
                    continue;
                }

                if ( !status.isOK() ) {
                    appendCommandStatus( result, status );
                    return false;
                }
            }

            result.append( "numIndexesAfter", collection->getIndexCatalog()->numIndexesTotal() );
            if ( !fromRepl ) {
                string cmdNs = ns.getCommandNS();
                logOp( "c", cmdNs.c_str(), cmdObj );
            }

            return true;
        }
Esempio n. 6
0
        bool run(OperationContext* txn, const string& dbname , BSONObj& jsobj, int, string& errmsg, BSONObjBuilder& result, bool /*fromRepl*/) {
            DBDirectClient db(txn);

            BSONElement e = jsobj.firstElement();
            string toDeleteNs = dbname + '.' + e.valuestr();

            LOG(0) << "CMD: reIndex " << toDeleteNs << endl;

            Lock::DBLock dbXLock(txn->lockState(), dbname, MODE_X);
            Client::Context ctx(txn, toDeleteNs);

            Collection* collection = ctx.db()->getCollection( txn, toDeleteNs );

            if ( !collection ) {
                errmsg = "ns not found";
                return false;
            }

            BackgroundOperation::assertNoBgOpInProgForNs( toDeleteNs );

            std::vector<BSONObj> indexesInProg = stopIndexBuilds(txn, ctx.db(), jsobj);

            vector<BSONObj> all;
            {
                vector<string> indexNames;
                collection->getCatalogEntry()->getAllIndexes( txn, &indexNames );
                for ( size_t i = 0; i < indexNames.size(); i++ ) {
                    const string& name = indexNames[i];
                    BSONObj spec = collection->getCatalogEntry()->getIndexSpec( txn, name );
                    all.push_back(spec.removeField("v").getOwned());

                    const BSONObj key = spec.getObjectField("key");
                    const Status keyStatus = validateKeyPattern(key);
                    if (!keyStatus.isOK()) {
                        errmsg = str::stream()
                            << "Cannot rebuild index " << spec << ": " << keyStatus.reason()
                            << " For more info see http://dochub.mongodb.org/core/index-validation";
                        return false;
                    }
                }
            }

            result.appendNumber( "nIndexesWas", all.size() );

            {
                WriteUnitOfWork wunit(txn);
                Status s = collection->getIndexCatalog()->dropAllIndexes(txn, true);
                if ( !s.isOK() ) {
                    errmsg = "dropIndexes failed";
                    return appendCommandStatus( result, s );
                }
                wunit.commit();
            }

            MultiIndexBlock indexer(txn, collection);
            // do not want interruption as that will leave us without indexes.

            Status status = indexer.init(all);
            if (!status.isOK())
                return appendCommandStatus( result, status );

            status = indexer.insertAllDocumentsInCollection();
            if (!status.isOK())
                return appendCommandStatus( result, status );

            {
                WriteUnitOfWork wunit(txn);
                indexer.commit();
                wunit.commit();
            }

            result.append( "nIndexes", (int)all.size() );
            result.append( "indexes", all );

            IndexBuilder::restoreIndexes(indexesInProg);
            return true;
        }
Esempio n. 7
0
    bool run(OperationContext* txn,
             const string& dbname,
             BSONObj& cmdObj,
             int,
             string& errmsg,
             BSONObjBuilder& result) {
        BSONElement first = cmdObj.firstElement();
        uassert(28528,
                str::stream() << "Argument to listIndexes must be of type String, not "
                              << typeName(first.type()),
                first.type() == String);
        StringData collectionName = first.valueStringData();
        uassert(28529,
                str::stream() << "Argument to listIndexes must be a collection name, "
                              << "not the empty string",
                !collectionName.empty());
        const NamespaceString ns(dbname, collectionName);

        const long long defaultBatchSize = std::numeric_limits<long long>::max();
        long long batchSize;
        Status parseCursorStatus = parseCommandCursorOptions(cmdObj, defaultBatchSize, &batchSize);
        if (!parseCursorStatus.isOK()) {
            return appendCommandStatus(result, parseCursorStatus);
        }

        AutoGetCollectionForRead autoColl(txn, ns);
        if (!autoColl.getDb()) {
            return appendCommandStatus(result,
                                       Status(ErrorCodes::NamespaceNotFound, "no database"));
        }

        const Collection* collection = autoColl.getCollection();
        if (!collection) {
            return appendCommandStatus(result,
                                       Status(ErrorCodes::NamespaceNotFound, "no collection"));
        }

        const CollectionCatalogEntry* cce = collection->getCatalogEntry();
        invariant(cce);

        vector<string> indexNames;
        MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
            indexNames.clear();
            cce->getAllIndexes(txn, &indexNames);
        }
        MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "listIndexes", ns.ns());

        auto ws = make_unique<WorkingSet>();
        auto root = make_unique<QueuedDataStage>(txn, ws.get());

        for (size_t i = 0; i < indexNames.size(); i++) {
            BSONObj indexSpec;
            MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
                indexSpec = cce->getIndexSpec(txn, indexNames[i]);
            }
            MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "listIndexes", ns.ns());

            WorkingSetID id = ws->allocate();
            WorkingSetMember* member = ws->get(id);
            member->keyData.clear();
            member->loc = RecordId();
            member->obj = Snapshotted<BSONObj>(SnapshotId(), indexSpec.getOwned());
            member->transitionToOwnedObj();
            root->pushBack(id);
        }

        std::string cursorNamespace = str::stream() << dbname << ".$cmd." << name << "."
                                                    << ns.coll();
        dassert(NamespaceString(cursorNamespace).isValid());
        dassert(NamespaceString(cursorNamespace).isListIndexesCursorNS());
        dassert(ns == NamespaceString(cursorNamespace).getTargetNSForListIndexes());

        auto statusWithPlanExecutor = PlanExecutor::make(
            txn, std::move(ws), std::move(root), cursorNamespace, PlanExecutor::YIELD_MANUAL);
        if (!statusWithPlanExecutor.isOK()) {
            return appendCommandStatus(result, statusWithPlanExecutor.getStatus());
        }
        unique_ptr<PlanExecutor> exec = std::move(statusWithPlanExecutor.getValue());

        BSONArrayBuilder firstBatch;

        const int byteLimit = FindCommon::kMaxBytesToReturnToClientAtOnce;
        for (long long objCount = 0; objCount < batchSize && firstBatch.len() < byteLimit;
             objCount++) {
            BSONObj next;
            PlanExecutor::ExecState state = exec->getNext(&next, NULL);
            if (state == PlanExecutor::IS_EOF) {
                break;
            }
            invariant(state == PlanExecutor::ADVANCED);
            firstBatch.append(next);
        }

        CursorId cursorId = 0LL;
        if (!exec->isEOF()) {
            exec->saveState();
            exec->detachFromOperationContext();
            ClientCursor* cursor =
                new ClientCursor(CursorManager::getGlobalCursorManager(),
                                 exec.release(),
                                 cursorNamespace,
                                 txn->recoveryUnit()->isReadingFromMajorityCommittedSnapshot());
            cursorId = cursor->cursorid();
        }

        appendCursorResponseObject(cursorId, cursorNamespace, firstBatch.arr(), &result);

        return true;
    }
Esempio n. 8
0
        virtual bool run(OperationContext* txn,
                         const string& dbname,
                         BSONObj& cmdObj,
                         int,
                         string& errmsg,
                         BSONObjBuilder& result,
                         bool fromRepl) {
            Lock::GlobalWrite globalWriteLock(txn->lockState());
            string source = cmdObj.getStringField( name.c_str() );
            string target = cmdObj.getStringField( "to" );

            // We stay in source context the whole time. This is mostly to set the CurOp namespace.
            Client::Context ctx(txn, source);

            if ( !NamespaceString::validCollectionComponent(target.c_str()) ) {
                errmsg = "invalid collection name: " + target;
                return false;
            }
            if ( source.empty() || target.empty() ) {
                errmsg = "invalid command syntax";
                return false;
            }

            if (!fromRepl) { // If it got through on the master, need to allow it here too
                Status sourceStatus = userAllowedWriteNS(source);
                if (!sourceStatus.isOK()) {
                    errmsg = "error with source namespace: " + sourceStatus.reason();
                    return false;
                }

                Status targetStatus = userAllowedWriteNS(target);
                if (!targetStatus.isOK()) {
                    errmsg = "error with target namespace: " + targetStatus.reason();
                    return false;
                }
            }

            if (NamespaceString(source).coll() == "system.indexes"
                || NamespaceString(target).coll() == "system.indexes") {
                errmsg = "renaming system.indexes is not allowed";
                return false;
            }

            Database* const sourceDB = dbHolder().get(txn, nsToDatabase(source));
            Collection* const sourceColl = sourceDB ? sourceDB->getCollection(txn, source)
                                                    : NULL;
            if (!sourceColl) {
                errmsg = "source namespace does not exist";
                return false;
            }

            {
                // Ensure that collection name does not exceed maximum length.
                // Ensure that index names do not push the length over the max.
                // Iterator includes unfinished indexes.
                IndexCatalog::IndexIterator sourceIndIt =
                    sourceColl->getIndexCatalog()->getIndexIterator( txn, true );
                int longestIndexNameLength = 0;
                while ( sourceIndIt.more() ) {
                    int thisLength = sourceIndIt.next()->indexName().length();
                    if ( thisLength > longestIndexNameLength )
                        longestIndexNameLength = thisLength;
                }

                unsigned int longestAllowed =
                    min(int(NamespaceString::MaxNsCollectionLen),
                        int(NamespaceString::MaxNsLen) - 2/*strlen(".$")*/ - longestIndexNameLength);
                if (target.size() > longestAllowed) {
                    StringBuilder sb;
                    sb << "collection name length of " << target.size()
                       << " exceeds maximum length of " << longestAllowed
                       << ", allowing for index names";
                    errmsg = sb.str();
                    return false;
                }
            }

            const std::vector<BSONObj> indexesInProg = stopIndexBuilds(txn, sourceDB, cmdObj);
            // Dismissed on success
            ScopeGuard indexBuildRestorer = MakeGuard(IndexBuilder::restoreIndexes, indexesInProg);

            Database* const targetDB = dbHolder().openDb(txn, nsToDatabase(target));

            {
                WriteUnitOfWork wunit(txn);

                // Check if the target namespace exists and if dropTarget is true.
                // If target exists and dropTarget is not true, return false.
                if (targetDB->getCollection(txn, target)) {
                    if (!cmdObj["dropTarget"].trueValue()) {
                        errmsg = "target namespace exists";
                        return false;
                    }

                    Status s = targetDB->dropCollection(txn, target);
                    if ( !s.isOK() ) {
                        errmsg = s.toString();
                        return false;
                    }
                }

                // If we are renaming in the same database, just
                // rename the namespace and we're done.
                if (sourceDB == targetDB) {
                    Status s = targetDB->renameCollection(txn,
                                                          source,
                                                          target,
                                                          cmdObj["stayTemp"].trueValue() );
                    if (!s.isOK()) {
                        return appendCommandStatus(result, s);
                    }

                    if (!fromRepl) {
                        repl::logOp(txn, "c", (dbname + ".$cmd").c_str(), cmdObj);
                    }

                    wunit.commit();
                    indexBuildRestorer.Dismiss();
                    return true;
                }

                wunit.commit();
            }

            // If we get here, we are renaming across databases, so we must copy all the data and
            // indexes, then remove the source collection.

            // Create the target collection. It will be removed if we fail to copy the collection.
            // TODO use a temp collection and unset the temp flag on success.
            Collection* targetColl = NULL;
            {
                CollectionOptions options;
                options.setNoIdIndex();

                if (sourceColl->isCapped()) {
                    const CollectionOptions sourceOpts =
                        sourceColl->getCatalogEntry()->getCollectionOptions(txn);

                    options.capped = true;
                    options.cappedSize = sourceOpts.cappedSize;
                    options.cappedMaxDocs = sourceOpts.cappedMaxDocs;
                }

                WriteUnitOfWork wunit(txn);

                // No logOp necessary because the entire renameCollection command is one logOp.
                targetColl = targetDB->createCollection(txn, target, options);
                if (!targetColl) {
                    errmsg = "Failed to create target collection.";
                    return false;
                }

                wunit.commit();
            }

            // Dismissed on success
            ScopeGuard targetCollectionDropper = MakeGuard(dropCollection, txn, targetDB, target);

            MultiIndexBlock indexer(txn, targetColl);
            indexer.allowInterruption();

            // Copy the index descriptions from the source collection, adjusting the ns field.
            {
                std::vector<BSONObj> indexesToCopy;
                IndexCatalog::IndexIterator sourceIndIt =
                    sourceColl->getIndexCatalog()->getIndexIterator( txn, true );
                while (sourceIndIt.more()) {
                    const BSONObj currIndex = sourceIndIt.next()->infoObj();

                    // Process the source index.
                    BSONObjBuilder newIndex;
                    newIndex.append("ns", target);
                    newIndex.appendElementsUnique(currIndex);
                    indexesToCopy.push_back(newIndex.obj());
                }
                indexer.init(indexesToCopy);
            }

            {
                // Copy over all the data from source collection to target collection.
                boost::scoped_ptr<RecordIterator> sourceIt(sourceColl->getIterator(txn));
                while (!sourceIt->isEOF()) {
                    txn->checkForInterrupt(false);

                    const BSONObj obj = sourceColl->docFor(txn, sourceIt->getNext());

                    WriteUnitOfWork wunit(txn);
                    // No logOp necessary because the entire renameCollection command is one logOp.
                    Status status = targetColl->insertDocument(txn, obj, &indexer, true).getStatus();
                    if (!status.isOK())
                        return appendCommandStatus(result, status);
                    wunit.commit();
                }
            }

            Status status = indexer.doneInserting();
            if (!status.isOK())
                return appendCommandStatus(result, status);

            {
                // Getting here means we successfully built the target copy. We now remove the
                // source collection and finalize the rename.
                WriteUnitOfWork wunit(txn);

                Status status = sourceDB->dropCollection(txn, source);
                if (!status.isOK())
                    return appendCommandStatus(result, status);

                indexer.commit();

                if (!fromRepl) {
                    repl::logOp(txn, "c", (dbname + ".$cmd").c_str(), cmdObj);
                }

                wunit.commit();
            }

            indexBuildRestorer.Dismiss();
            targetCollectionDropper.Dismiss();
            return true;
        }
Esempio n. 9
0
 virtual bool run(OperationContext* txn, const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
     Status status = getGlobalReplicationCoordinator()->processReplSetGetRBID(&result);
     return appendCommandStatus(result, status);
 }
Esempio n. 10
0
 virtual bool run(OperationContext* txn, const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
     int secs = (int) cmdObj.firstElement().numberInt();
     return appendCommandStatus(
             result,
             getGlobalReplicationCoordinator()->processReplSetFreeze(secs, &result));
 }
Esempio n. 11
0
    bool WriteCmd::run(const string& dbName,
                       BSONObj& cmdObj,
                       int options,
                       string& errMsg,
                       BSONObjBuilder& result,
                       bool fromRepl) {

        // Can't be run on secondaries (logTheOp() == false, slaveOk() == false).
        dassert( !fromRepl );
        BatchedCommandRequest request( _writeType );
        BatchedCommandResponse response;

        if ( !request.parseBSON( cmdObj, &errMsg ) || !request.isValid( &errMsg ) ) {

            // Batch parse failure
            response.setOk( false );
            response.setN( 0 );
            response.setErrCode( ErrorCodes::FailedToParse );
            response.setErrMessage( errMsg );

            dassert( response.isValid( &errMsg ) );
            result.appendElements( response.toBSON() );

            // TODO
            // There's a pending issue about how to report response here. If we use
            // the command infra-structure, we should reuse the 'errmsg' field. But
            // we have already filed that message inside the BatchCommandResponse.
            // return response.getOk();
            return true;
        }

        // Note that this is a runCommmand, and therefore, the database and the collection name
        // are in different parts of the grammar for the command. But it's more convenient to
        // work with a NamespaceString. We built it here and replace it in the parsed command.
        // Internally, everything work with the namespace string as opposed to just the
        // collection name.
        NamespaceString nss(dbName, request.getNS());
        request.setNS(nss.ns());

        Status status = userAllowedWriteNS( nss );
        if ( !status.isOK() )
            return appendCommandStatus( result, status );

        if ( cc().curop() )
            cc().curop()->setNS( nss.ns() );

        if ( request.getBatchType() == BatchedCommandRequest::BatchType_Insert ) {
            // check all docs
            BatchedInsertRequest* insertRequest = request.getInsertRequest();
            vector<BSONObj>& docsToInsert = insertRequest->getDocuments();
            for ( size_t i = 0; i < docsToInsert.size(); i++ ) {
                StatusWith<BSONObj> fixed = fixDocumentForInsert( docsToInsert[i] );
                if ( !fixed.isOK() ) {
                    // we don't return early since each doc can be handled independantly
                    continue;
                }
                if ( fixed.getValue().isEmpty() ) {
                    continue;
                }
                docsToInsert[i] = fixed.getValue();
            }
        }

        BSONObj defaultWriteConcern;
        // This is really bad - it's only safe because we leak the defaults by overriding them with
        // new defaults and because we never reset to an empty default.
        // TODO: fix this for sane behavior where we query repl set object
        if ( getLastErrorDefault ) defaultWriteConcern = *getLastErrorDefault;
        if ( defaultWriteConcern.isEmpty() ) {
            BSONObjBuilder b;
            b.append( "w", 1 );
            defaultWriteConcern = b.obj();
        }

        WriteBatchExecutor writeBatchExecutor(defaultWriteConcern,
                                              &cc(),
                                              &globalOpCounters,
                                              lastError.get());

        writeBatchExecutor.executeBatch( request, &response );

        result.appendElements( response.toBSON() );

        // TODO
        // There's a pending issue about how to report response here. If we use
        // the command infra-structure, we should reuse the 'errmsg' field. But
        // we have already filed that message inside the BatchCommandResponse.
        // return response.getOk();
        return true;
    }
        /**
         * Generates the next batch of results for a ClientCursor.
         *
         * TODO: Do we need to support some equivalent of OP_REPLY responseFlags?
         *
         * TODO: Is it possible to support awaitData?
         */
        bool run(OperationContext* txn,
                 const std::string& dbname,
                 BSONObj& cmdObj,
                 int options,
                 std::string& errmsg,
                 BSONObjBuilder& result) override {
            // Counted as a getMore, not as a command.
            globalOpCounters.gotGetMore();

            if (txn->getClient()->isInDirectClient()) {
                return appendCommandStatus(result,
                                           Status(ErrorCodes::IllegalOperation,
                                                  "Cannot run getMore command from eval()"));
            }

            StatusWith<GetMoreRequest> parseStatus = GetMoreRequest::parseFromBSON(dbname, cmdObj);
            if (!parseStatus.isOK()) {
                return appendCommandStatus(result, parseStatus.getStatus());
            }
            const GetMoreRequest& request = parseStatus.getValue();

            // Depending on the type of cursor being operated on, we hold locks for the whole
            // getMore, or none of the getMore, or part of the getMore.  The three cases in detail:
            //
            // 1) Normal cursor: we lock with "ctx" and hold it for the whole getMore.
            // 2) Cursor owned by global cursor manager: we don't lock anything.  These cursors
            //    don't own any collection state.
            // 3) Agg cursor: we lock with "ctx", then release, then relock with "unpinDBLock" and
            //    "unpinCollLock".  This is because agg cursors handle locking internally (hence the
            //    release), but the pin and unpin of the cursor must occur under the collection
            //    lock. We don't use our AutoGetCollectionForRead "ctx" to relock, because
            //    AutoGetCollectionForRead checks the sharding version (and we want the relock for
            //    the unpin to succeed even if the sharding version has changed).
            //
            // Note that we declare our locks before our ClientCursorPin, in order to ensure that
            // the pin's destructor is called before the lock destructors (so that the unpin occurs
            // under the lock).
            std::unique_ptr<AutoGetCollectionForRead> ctx;
            std::unique_ptr<Lock::DBLock> unpinDBLock;
            std::unique_ptr<Lock::CollectionLock> unpinCollLock;

            CursorManager* cursorManager;
            CursorManager* globalCursorManager = CursorManager::getGlobalCursorManager();
            if (globalCursorManager->ownsCursorId(request.cursorid)) {
                cursorManager = globalCursorManager;
            }
            else {
                ctx.reset(new AutoGetCollectionForRead(txn, request.nss));
                Collection* collection = ctx->getCollection();
                if (!collection) {
                    return appendCommandStatus(result,
                                               Status(ErrorCodes::OperationFailed,
                                                      "collection dropped between getMore calls"));
                }
                cursorManager = collection->getCursorManager();
            }

            ClientCursorPin ccPin(cursorManager, request.cursorid);
            ClientCursor* cursor = ccPin.c();
            if (!cursor) {
                // We didn't find the cursor.
                return appendCommandStatus(result, Status(ErrorCodes::CursorNotFound, str::stream()
                    << "Cursor not found, cursor id: " << request.cursorid));
            }

            if (request.nss.ns() != cursor->ns()) {
                return appendCommandStatus(result, Status(ErrorCodes::Unauthorized, str::stream()
                    << "Requested getMore on namespace '" << request.nss.ns()
                    << "', but cursor belongs to a different namespace"));
            }

            // On early return, get rid of the the cursor.
            ScopeGuard cursorFreer = MakeGuard(&ClientCursorPin::deleteUnderlying, ccPin);

            if (!cursor->hasRecoveryUnit()) {
                // Start using a new RecoveryUnit.
                cursor->setOwnedRecoveryUnit(
                    getGlobalServiceContext()->getGlobalStorageEngine()->newRecoveryUnit());
            }

            // Swap RecoveryUnit(s) between the ClientCursor and OperationContext.
            ScopedRecoveryUnitSwapper ruSwapper(cursor, txn);

            // Reset timeout timer on the cursor since the cursor is still in use.
            cursor->setIdleTime(0);

            // If the operation that spawned this cursor had a time limit set, apply leftover
            // time to this getmore.
            txn->getCurOp()->setMaxTimeMicros(cursor->getLeftoverMaxTimeMicros());
            txn->checkForInterrupt(); // May trigger maxTimeAlwaysTimeOut fail point.

            if (cursor->isAggCursor()) {
                // Agg cursors handle their own locking internally.
                ctx.reset(); // unlocks
            }

            PlanExecutor* exec = cursor->getExecutor();
            exec->restoreState(txn);

            // TODO: Handle result sets larger than 16MB.
            BSONArrayBuilder nextBatch;
            BSONObj obj;
            PlanExecutor::ExecState state;
            int numResults = 0;
            while (PlanExecutor::ADVANCED == (state = exec->getNext(&obj, NULL))) {
                // Add result to output buffer.
                nextBatch.append(obj);
                numResults++;

                if (enoughForGetMore(request.batchSize, numResults, nextBatch.len())) {
                    break;
                }
            }

            // If we are operating on an aggregation cursor, then we dropped our collection lock
            // earlier and need to reacquire it in order to clean up our ClientCursorPin.
            //
            // TODO: We need to ensure that this relock happens if we release the pin above in
            // response to PlanExecutor::getNext() throwing an exception.
            if (cursor->isAggCursor()) {
                invariant(NULL == ctx.get());
                unpinDBLock.reset(new Lock::DBLock(txn->lockState(), request.nss.db(), MODE_IS));
                unpinCollLock.reset(
                    new Lock::CollectionLock(txn->lockState(), request.nss.ns(), MODE_IS));
            }

            // Fail the command if the PlanExecutor reports execution failure.
            if (PlanExecutor::FAILURE == state) {
                const std::unique_ptr<PlanStageStats> stats(exec->getStats());
                error() << "GetMore executor error, stats: " << Explain::statsToBSON(*stats);
                return appendCommandStatus(result,
                                           Status(ErrorCodes::OperationFailed,
                                                  str::stream() << "GetMore executor error: "
                                                  << WorkingSetCommon::toStatusString(obj)));
            }

            CursorId respondWithId = 0;
            if (shouldSaveCursorGetMore(state, exec, isCursorTailable(cursor))) {
                respondWithId = request.cursorid;

                exec->saveState();

                cursor->setLeftoverMaxTimeMicros(txn->getCurOp()->getRemainingMaxTimeMicros());
                cursor->incPos(numResults);

                if (isCursorTailable(cursor) && state == PlanExecutor::IS_EOF) {
                    // Rather than swapping their existing RU into the client cursor, tailable
                    // cursors should get a new recovery unit.
                    ruSwapper.dismiss();
                }
            }
            else {
                txn->getCurOp()->debug().cursorExhausted = true;
            }

            appendGetMoreResponseObject(respondWithId, request.nss.ns(), nextBatch.arr(), &result);
            if (respondWithId) {
                cursorFreer.Dismiss();
            }
            return true;
        }
Esempio n. 13
0
    /**
     * Runs a query using the following steps:
     *   --Parsing.
     *   --Acquire locks.
     *   --Plan query, obtaining an executor that can run it.
     *   --Generate the first batch.
     *   --Save state for getMore, transferring ownership of the executor to a ClientCursor.
     *   --Generate response to send to the client.
     */
    bool run(OperationContext* txn,
             const std::string& dbname,
             BSONObj& cmdObj,
             int options,
             std::string& errmsg,
             BSONObjBuilder& result) override {
        const NamespaceString nss(parseNs(dbname, cmdObj));
        if (!nss.isValid() || nss.isCommand() || nss.isSpecialCommand()) {
            return appendCommandStatus(result,
                                       {ErrorCodes::InvalidNamespace,
                                        str::stream() << "Invalid collection name: " << nss.ns()});
        }

        // Although it is a command, a find command gets counted as a query.
        globalOpCounters.gotQuery();

        if (txn->getClient()->isInDirectClient()) {
            return appendCommandStatus(
                result,
                Status(ErrorCodes::IllegalOperation, "Cannot run find command from eval()"));
        }

        // Parse the command BSON to a QueryRequest.
        const bool isExplain = false;
        auto qrStatus = QueryRequest::makeFromFindCommand(nss, cmdObj, isExplain);
        if (!qrStatus.isOK()) {
            return appendCommandStatus(result, qrStatus.getStatus());
        }

        auto& qr = qrStatus.getValue();

        // Validate term before acquiring locks, if provided.
        if (auto term = qr->getReplicationTerm()) {
            auto replCoord = repl::ReplicationCoordinator::get(txn);
            Status status = replCoord->updateTerm(txn, *term);
            // Note: updateTerm returns ok if term stayed the same.
            if (!status.isOK()) {
                return appendCommandStatus(result, status);
            }
        }

        // Fill out curop information.
        //
        // We pass negative values for 'ntoreturn' and 'ntoskip' to indicate that these values
        // should be omitted from the log line. Limit and skip information is already present in the
        // find command parameters, so these fields are redundant.
        const int ntoreturn = -1;
        const int ntoskip = -1;
        beginQueryOp(txn, nss, cmdObj, ntoreturn, ntoskip);

        // Finish the parsing step by using the QueryRequest to create a CanonicalQuery.
        ExtensionsCallbackReal extensionsCallback(txn, &nss);
        auto statusWithCQ = CanonicalQuery::canonicalize(txn, std::move(qr), extensionsCallback);
        if (!statusWithCQ.isOK()) {
            return appendCommandStatus(result, statusWithCQ.getStatus());
        }
        std::unique_ptr<CanonicalQuery> cq = std::move(statusWithCQ.getValue());

        // Acquire locks.
        AutoGetCollectionForRead ctx(txn, nss);
        Collection* collection = ctx.getCollection();

        // Get the execution plan for the query.
        auto statusWithPlanExecutor =
            getExecutorFind(txn, collection, nss, std::move(cq), PlanExecutor::YIELD_AUTO);
        if (!statusWithPlanExecutor.isOK()) {
            return appendCommandStatus(result, statusWithPlanExecutor.getStatus());
        }

        std::unique_ptr<PlanExecutor> exec = std::move(statusWithPlanExecutor.getValue());

        {
            stdx::lock_guard<Client>(*txn->getClient());
            CurOp::get(txn)->setPlanSummary_inlock(Explain::getPlanSummary(exec.get()));
        }

        if (!collection) {
            // No collection. Just fill out curop indicating that there were zero results and
            // there is no ClientCursor id, and then return.
            const long long numResults = 0;
            const CursorId cursorId = 0;
            endQueryOp(txn, collection, *exec, numResults, cursorId);
            appendCursorResponseObject(cursorId, nss.ns(), BSONArray(), &result);
            return true;
        }

        const QueryRequest& originalQR = exec->getCanonicalQuery()->getQueryRequest();

        // Stream query results, adding them to a BSONArray as we go.
        CursorResponseBuilder firstBatch(/*isInitialResponse*/ true, &result);
        BSONObj obj;
        PlanExecutor::ExecState state = PlanExecutor::ADVANCED;
        long long numResults = 0;
        while (!FindCommon::enoughForFirstBatch(originalQR, numResults) &&
               PlanExecutor::ADVANCED == (state = exec->getNext(&obj, NULL))) {
            // If we can't fit this result inside the current batch, then we stash it for later.
            if (!FindCommon::haveSpaceForNext(obj, numResults, firstBatch.bytesUsed())) {
                exec->enqueue(obj);
                break;
            }

            // Add result to output buffer.
            firstBatch.append(obj);
            numResults++;
        }

        // Throw an assertion if query execution fails for any reason.
        if (PlanExecutor::FAILURE == state || PlanExecutor::DEAD == state) {
            firstBatch.abandon();
            error() << "Plan executor error during find command: " << PlanExecutor::statestr(state)
                    << ", stats: " << Explain::getWinningPlanStats(exec.get());

            return appendCommandStatus(result,
                                       Status(ErrorCodes::OperationFailed,
                                              str::stream()
                                                  << "Executor error during find command: "
                                                  << WorkingSetCommon::toStatusString(obj)));
        }

        // Before saving the cursor, ensure that whatever plan we established happened with the
        // expected collection version
        auto css = CollectionShardingState::get(txn, nss);
        css->checkShardVersionOrThrow(txn);

        // Set up the cursor for getMore.
        CursorId cursorId = 0;
        if (shouldSaveCursor(txn, collection, state, exec.get())) {
            // Register the execution plan inside a ClientCursor. Ownership of the PlanExecutor is
            // transferred to the ClientCursor.
            //
            // First unregister the PlanExecutor so it can be re-registered with ClientCursor.
            exec->deregisterExec();

            // Create a ClientCursor containing this plan executor. We don't have to worry about
            // leaking it as it's inserted into a global map by its ctor.
            ClientCursor* cursor =
                new ClientCursor(collection->getCursorManager(),
                                 exec.release(),
                                 nss.ns(),
                                 txn->recoveryUnit()->isReadingFromMajorityCommittedSnapshot(),
                                 originalQR.getOptions(),
                                 cmdObj.getOwned());
            cursorId = cursor->cursorid();

            invariant(!exec);
            PlanExecutor* cursorExec = cursor->getExecutor();

            // State will be restored on getMore.
            cursorExec->saveState();
            cursorExec->detachFromOperationContext();

            cursor->setLeftoverMaxTimeMicros(txn->getRemainingMaxTimeMicros());
            cursor->setPos(numResults);

            // Fill out curop based on the results.
            endQueryOp(txn, collection, *cursorExec, numResults, cursorId);
        } else {
            endQueryOp(txn, collection, *exec, numResults, cursorId);
        }

        // Generate the response object to send to the client.
        firstBatch.done(cursorId, nss.ns());
        return true;
    }
Esempio n. 14
0
        bool run(OperationContext* txn,
                 const string& dbname,
                 BSONObj& cmdObj,
                 int,
                 string& errmsg,
                 BSONObjBuilder& result) {

            BSONElement first = cmdObj.firstElement();
            uassert(
                28528,
                str::stream() << "Argument to listIndexes must be of type String, not "
                              << typeName(first.type()),
                first.type() == String);
            const NamespaceString ns(parseNs(dbname, cmdObj));
            uassert(
                28529,
                str::stream() << "Argument to listIndexes must be a collection name, "
                              << "not the empty string",
                !ns.coll().empty());

            const long long defaultBatchSize = std::numeric_limits<long long>::max();
            long long batchSize;
            Status parseCursorStatus = parseCommandCursorOptions(cmdObj,
                                                                 defaultBatchSize,
                                                                 &batchSize);
            if (!parseCursorStatus.isOK()) {
                return appendCommandStatus(result, parseCursorStatus);
            }

            AutoGetCollectionForRead autoColl(txn, ns);
            if (!autoColl.getDb()) {
                return appendCommandStatus( result, Status( ErrorCodes::NamespaceNotFound,
                                                            "no database" ) );
            }

            const Collection* collection = autoColl.getCollection();
            if (!collection) {
                return appendCommandStatus( result, Status( ErrorCodes::NamespaceNotFound,
                                                            "no collection" ) );
            }

            const CollectionCatalogEntry* cce = collection->getCatalogEntry();
            invariant(cce);

            vector<string> indexNames;
            MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
                indexNames.clear();
                cce->getAllIndexes( txn, &indexNames );
            } MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "listIndexes", ns.ns());

            std::auto_ptr<WorkingSet> ws(new WorkingSet());
            std::auto_ptr<QueuedDataStage> root(new QueuedDataStage(ws.get()));

            for ( size_t i = 0; i < indexNames.size(); i++ ) {
                BSONObj indexSpec;
                MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
                    indexSpec = cce->getIndexSpec( txn, indexNames[i] );
                } MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "listIndexes", ns.ns());

                WorkingSetMember member;
                member.state = WorkingSetMember::OWNED_OBJ;
                member.keyData.clear();
                member.loc = RecordId();
                member.obj = Snapshotted<BSONObj>(SnapshotId(), indexSpec.getOwned());
                root->pushBack(member);
            }

            std::string cursorNamespace = str::stream() << dbname << ".$cmd." << name << "."
                                                        << ns.coll();
            dassert(NamespaceString(cursorNamespace).isValid());
            dassert(NamespaceString(cursorNamespace).isListIndexesGetMore());
            dassert(ns == NamespaceString(cursorNamespace).getTargetNSForListIndexesGetMore());

            PlanExecutor* rawExec;
            Status makeStatus = PlanExecutor::make(txn,
                                                   ws.release(),
                                                   root.release(),
                                                   cursorNamespace,
                                                   PlanExecutor::YIELD_MANUAL,
                                                   &rawExec);
            std::auto_ptr<PlanExecutor> exec(rawExec);
            if (!makeStatus.isOK()) {
                return appendCommandStatus( result, makeStatus );
            }

            BSONArrayBuilder firstBatch;

            const int byteLimit = MaxBytesToReturnToClientAtOnce;
            for (long long objCount = 0;
                 objCount < batchSize && firstBatch.len() < byteLimit;
                 objCount++) {
                BSONObj next;
                PlanExecutor::ExecState state = exec->getNext(&next, NULL);
                if ( state == PlanExecutor::IS_EOF ) {
                    break;
                }
                invariant( state == PlanExecutor::ADVANCED );
                firstBatch.append(next);
            }

            CursorId cursorId = 0LL;
            if ( !exec->isEOF() ) {
                exec->saveState();
                ClientCursor* cursor = new ClientCursor(CursorManager::getGlobalCursorManager(),
                                                        exec.release(),
                                                        cursorNamespace);
                cursorId = cursor->cursorid();
            }

            appendCursorResponseObject( cursorId, cursorNamespace, firstBatch.arr(), &result );

            return true;
        }
Esempio n. 15
0
        bool run(OperationContext* txn,
                 const string& dbname,
                 BSONObj& cmdObj,
                 int,
                 string& errmsg,
                 BSONObjBuilder& result,
                 bool /*fromRepl*/) {

            BSONElement first = cmdObj.firstElement();
            uassert(
                28528,
                str::stream() << "Argument to listIndexes must be of type String, not "
                              << typeName(first.type()),
                first.type() == String);
            const NamespaceString ns(parseNs(dbname, cmdObj));
            uassert(
                28529,
                str::stream() << "Argument to listIndexes must be a collection name, "
                              << "not the empty string",
                !ns.coll().empty());

            AutoGetCollectionForRead autoColl(txn, ns);
            if (!autoColl.getDb()) {
                return appendCommandStatus( result, Status( ErrorCodes::NamespaceNotFound,
                                                            "no database" ) );
            }

            const Collection* collection = autoColl.getCollection();
            if (!collection) {
                return appendCommandStatus( result, Status( ErrorCodes::NamespaceNotFound,
                                                            "no collection" ) );
            }

            const CollectionCatalogEntry* cce = collection->getCatalogEntry();
            invariant(cce);

            vector<string> indexNames;
            cce->getAllIndexes( txn, &indexNames );

            // TODO: Handle options specified in the command request object under the "cursor"
            // field.

            // TODO: If the full result set does not fit in one batch, allocate a cursor to store
            // the remainder of the results.

            BSONArrayBuilder arr;
            for ( size_t i = 0; i < indexNames.size(); i++ ) {
                arr.append( cce->getIndexSpec( txn, indexNames[i] ) );
            }

            if ( cmdObj["cursor"].type() == mongo::Object ) {
                const long long cursorId = 0LL;
                std::string cursorNamespace = str::stream()
                    << dbname << ".$cmd." << name << "." << ns.coll();
                Command::appendCursorResponseObject( cursorId, cursorNamespace, arr.arr(),
                                                     &result );
            } else {
                result.append( "indexes", arr.arr() );
            }

            return true;
        }
Esempio n. 16
0
    /**
     * Runs a query using the following steps:
     *   1) Parsing.
     *   2) Acquire locks.
     *   3) Plan query, obtaining an executor that can run it.
     *   4) Setup a cursor for the query, which may be used on subsequent getMores.
     *   5) Generate the first batch.
     *   6) Save state for getMore.
     *   7) Generate response to send to the client.
     *
     * TODO: Rather than using the sharding version available in thread-local storage (i.e. the
     *       call to ShardingState::needCollectionMetadata() below), shard version information
     *       should be passed as part of the command parameter.
     */
    bool run(OperationContext* txn,
             const std::string& dbname,
             BSONObj& cmdObj,
             int options,
             std::string& errmsg,
             BSONObjBuilder& result) override {
        const std::string fullns = parseNs(dbname, cmdObj);
        const NamespaceString nss(fullns);
        if (!nss.isValid()) {
            return appendCommandStatus(result,
                                       {ErrorCodes::InvalidNamespace,
                                        str::stream() << "Invalid collection name: " << nss.ns()});
        }

        // Although it is a command, a find command gets counted as a query.
        globalOpCounters.gotQuery();

        if (txn->getClient()->isInDirectClient()) {
            return appendCommandStatus(
                result,
                Status(ErrorCodes::IllegalOperation, "Cannot run find command from eval()"));
        }

        // 1a) Parse the command BSON to a LiteParsedQuery.
        const bool isExplain = false;
        auto lpqStatus = LiteParsedQuery::makeFromFindCommand(nss, cmdObj, isExplain);
        if (!lpqStatus.isOK()) {
            return appendCommandStatus(result, lpqStatus.getStatus());
        }

        auto& lpq = lpqStatus.getValue();

        // Validate term, if provided.
        if (auto term = lpq->getReplicationTerm()) {
            auto replCoord = repl::ReplicationCoordinator::get(txn);
            Status status = replCoord->updateTerm(*term);
            // Note: updateTerm returns ok if term stayed the same.
            if (!status.isOK()) {
                return appendCommandStatus(result, status);
            }
        }

        // Fill out curop information.
        long long ntoreturn = lpq->getBatchSize().value_or(0);
        beginQueryOp(txn, nss, cmdObj, ntoreturn, lpq->getSkip());

        // 1b) Finish the parsing step by using the LiteParsedQuery to create a CanonicalQuery.
        WhereCallbackReal whereCallback(txn, nss.db());
        auto statusWithCQ = CanonicalQuery::canonicalize(lpq.release(), whereCallback);
        if (!statusWithCQ.isOK()) {
            return appendCommandStatus(result, statusWithCQ.getStatus());
        }
        std::unique_ptr<CanonicalQuery> cq = std::move(statusWithCQ.getValue());

        // 2) Acquire locks.
        AutoGetCollectionForRead ctx(txn, nss);
        Collection* collection = ctx.getCollection();

        const int dbProfilingLevel =
            ctx.getDb() ? ctx.getDb()->getProfilingLevel() : serverGlobalParams.defaultProfile;

        ShardingState* const shardingState = ShardingState::get(txn);

        // It is possible that the sharding version will change during yield while we are
        // retrieving a plan executor. If this happens we will throw an error and mongos will
        // retry.
        const ChunkVersion shardingVersionAtStart = shardingState->getVersion(nss.ns());

        // 3) Get the execution plan for the query.
        auto statusWithPlanExecutor =
            getExecutorFind(txn, collection, nss, std::move(cq), PlanExecutor::YIELD_AUTO);
        if (!statusWithPlanExecutor.isOK()) {
            return appendCommandStatus(result, statusWithPlanExecutor.getStatus());
        }

        std::unique_ptr<PlanExecutor> exec = std::move(statusWithPlanExecutor.getValue());

        // TODO: Currently, chunk ranges are kept around until all ClientCursors created while
        // the chunk belonged on this node are gone. Separating chunk lifetime management from
        // ClientCursor should allow this check to go away.
        if (!shardingState->getVersion(nss.ns()).isWriteCompatibleWith(shardingVersionAtStart)) {
            // Version changed while retrieving a PlanExecutor. Terminate the operation,
            // signaling that mongos should retry.
            throw SendStaleConfigException(nss.ns(),
                                           "version changed during find command",
                                           shardingVersionAtStart,
                                           shardingState->getVersion(nss.ns()));
        }

        if (!collection) {
            // No collection. Just fill out curop indicating that there were zero results and
            // there is no ClientCursor id, and then return.
            const long long numResults = 0;
            const CursorId cursorId = 0;
            endQueryOp(txn, *exec, dbProfilingLevel, numResults, cursorId);
            appendCursorResponseObject(cursorId, nss.ns(), BSONArray(), &result);
            return true;
        }

        const LiteParsedQuery& pq = exec->getCanonicalQuery()->getParsed();

        // 4) If possible, register the execution plan inside a ClientCursor, and pin that
        // cursor. In this case, ownership of the PlanExecutor is transferred to the
        // ClientCursor, and 'exec' becomes null.
        //
        // First unregister the PlanExecutor so it can be re-registered with ClientCursor.
        exec->deregisterExec();

        // Create a ClientCursor containing this plan executor. We don't have to worry
        // about leaking it as it's inserted into a global map by its ctor.
        ClientCursor* cursor =
            new ClientCursor(collection->getCursorManager(),
                             exec.release(),
                             nss.ns(),
                             txn->recoveryUnit()->isReadingFromMajorityCommittedSnapshot(),
                             pq.getOptions(),
                             pq.getFilter());
        CursorId cursorId = cursor->cursorid();
        ClientCursorPin ccPin(collection->getCursorManager(), cursorId);

        // On early return, get rid of the the cursor.
        ScopeGuard cursorFreer = MakeGuard(&ClientCursorPin::deleteUnderlying, ccPin);

        invariant(!exec);
        PlanExecutor* cursorExec = cursor->getExecutor();

        // 5) Stream query results, adding them to a BSONArray as we go.
        BSONArrayBuilder firstBatch;
        BSONObj obj;
        PlanExecutor::ExecState state;
        long long numResults = 0;
        while (!enoughForFirstBatch(pq, numResults, firstBatch.len()) &&
               PlanExecutor::ADVANCED == (state = cursorExec->getNext(&obj, NULL))) {
            // If adding this object will cause us to exceed the BSON size limit, then we stash
            // it for later.
            if (firstBatch.len() + obj.objsize() > BSONObjMaxUserSize && numResults > 0) {
                cursorExec->enqueue(obj);
                break;
            }

            // Add result to output buffer.
            firstBatch.append(obj);
            numResults++;
        }

        // Throw an assertion if query execution fails for any reason.
        if (PlanExecutor::FAILURE == state || PlanExecutor::DEAD == state) {
            const std::unique_ptr<PlanStageStats> stats(cursorExec->getStats());
            error() << "Plan executor error during find command: " << PlanExecutor::statestr(state)
                    << ", stats: " << Explain::statsToBSON(*stats);

            return appendCommandStatus(result,
                                       Status(ErrorCodes::OperationFailed,
                                              str::stream()
                                                  << "Executor error during find command: "
                                                  << WorkingSetCommon::toStatusString(obj)));
        }

        // 6) Set up the cursor for getMore.
        if (shouldSaveCursor(txn, collection, state, cursorExec)) {
            // State will be restored on getMore.
            cursorExec->saveState();
            cursorExec->detachFromOperationContext();

            cursor->setLeftoverMaxTimeMicros(CurOp::get(txn)->getRemainingMaxTimeMicros());
            cursor->setPos(numResults);
        } else {
            cursorId = 0;
        }

        // Fill out curop based on the results.
        endQueryOp(txn, *cursorExec, dbProfilingLevel, numResults, cursorId);

        // 7) Generate the response object to send to the client.
        appendCursorResponseObject(cursorId, nss.ns(), firstBatch.arr(), &result);
        if (cursorId) {
            cursorFreer.Dismiss();
        }
        return true;
    }
Esempio n. 17
0
        bool run(OperationContext* txn,
                 const std::string& dbname,
                 BSONObj& cmdObj,
                 int options,
                 std::string& errmsg,
                 BSONObjBuilder& result) override {
            // Counted as a getMore, not as a command.
            globalOpCounters.gotGetMore();

            if (txn->getClient()->isInDirectClient()) {
                return appendCommandStatus(result,
                                           Status(ErrorCodes::IllegalOperation,
                                                  "Cannot run getMore command from eval()"));
            }

            StatusWith<GetMoreRequest> parseStatus = GetMoreRequest::parseFromBSON(dbname, cmdObj);
            if (!parseStatus.isOK()) {
                return appendCommandStatus(result, parseStatus.getStatus());
            }
            const GetMoreRequest& request = parseStatus.getValue();

            // Depending on the type of cursor being operated on, we hold locks for the whole
            // getMore, or none of the getMore, or part of the getMore.  The three cases in detail:
            //
            // 1) Normal cursor: we lock with "ctx" and hold it for the whole getMore.
            // 2) Cursor owned by global cursor manager: we don't lock anything.  These cursors
            //    don't own any collection state.
            // 3) Agg cursor: we lock with "ctx", then release, then relock with "unpinDBLock" and
            //    "unpinCollLock".  This is because agg cursors handle locking internally (hence the
            //    release), but the pin and unpin of the cursor must occur under the collection
            //    lock. We don't use our AutoGetCollectionForRead "ctx" to relock, because
            //    AutoGetCollectionForRead checks the sharding version (and we want the relock for
            //    the unpin to succeed even if the sharding version has changed).
            //
            // Note that we declare our locks before our ClientCursorPin, in order to ensure that
            // the pin's destructor is called before the lock destructors (so that the unpin occurs
            // under the lock).
            std::unique_ptr<AutoGetCollectionForRead> ctx;
            std::unique_ptr<Lock::DBLock> unpinDBLock;
            std::unique_ptr<Lock::CollectionLock> unpinCollLock;

            CursorManager* cursorManager;
            CursorManager* globalCursorManager = CursorManager::getGlobalCursorManager();
            if (globalCursorManager->ownsCursorId(request.cursorid)) {
                cursorManager = globalCursorManager;
            }
            else {
                ctx.reset(new AutoGetCollectionForRead(txn, request.nss));
                Collection* collection = ctx->getCollection();
                if (!collection) {
                    return appendCommandStatus(result,
                                               Status(ErrorCodes::OperationFailed,
                                                      "collection dropped between getMore calls"));
                }
                cursorManager = collection->getCursorManager();
            }

            ClientCursorPin ccPin(cursorManager, request.cursorid);
            ClientCursor* cursor = ccPin.c();
            if (!cursor) {
                // We didn't find the cursor.
                return appendCommandStatus(result, Status(ErrorCodes::CursorNotFound, str::stream()
                    << "Cursor not found, cursor id: " << request.cursorid));
            }

            if (request.nss.ns() != cursor->ns()) {
                return appendCommandStatus(result, Status(ErrorCodes::Unauthorized, str::stream()
                    << "Requested getMore on namespace '" << request.nss.ns()
                    << "', but cursor belongs to a different namespace"));
            }

            const bool hasOwnMaxTime = CurOp::get(txn)->isMaxTimeSet();

            // Validation related to awaitData.
            if (isCursorAwaitData(cursor)) {
                invariant(isCursorTailable(cursor));

                if (!hasOwnMaxTime) {
                    Status status(ErrorCodes::BadValue,
                                  str::stream() << "Must set maxTimeMS on a getMore if the initial "
                                                << "query had 'awaitData' set: " << cmdObj);
                    return appendCommandStatus(result, status);
                }

                if (cursor->isAggCursor()) {
                    Status status(ErrorCodes::BadValue,
                                  "awaitData cannot be set on an aggregation cursor");
                    return appendCommandStatus(result, status);
                }
            }

            // On early return, get rid of the cursor.
            ScopeGuard cursorFreer = MakeGuard(&GetMoreCmd::cleanupCursor, txn, &ccPin, request);

            if (!cursor->hasRecoveryUnit()) {
                // Start using a new RecoveryUnit.
                cursor->setOwnedRecoveryUnit(
                    getGlobalServiceContext()->getGlobalStorageEngine()->newRecoveryUnit());
            }

            // Swap RecoveryUnit(s) between the ClientCursor and OperationContext.
            ScopedRecoveryUnitSwapper ruSwapper(cursor, txn);

            // Reset timeout timer on the cursor since the cursor is still in use.
            cursor->setIdleTime(0);

            // If there is no time limit set directly on this getMore command, but the operation
            // that spawned this cursor had a time limit set, then we have to apply any leftover
            // time to this getMore.
            if (!hasOwnMaxTime) {
                CurOp::get(txn)->setMaxTimeMicros(cursor->getLeftoverMaxTimeMicros());
            }
            txn->checkForInterrupt(); // May trigger maxTimeAlwaysTimeOut fail point.

            if (cursor->isAggCursor()) {
                // Agg cursors handle their own locking internally.
                ctx.reset(); // unlocks
            }

            PlanExecutor* exec = cursor->getExecutor();
            exec->restoreState(txn);

            // If we're tailing a capped collection, retrieve a monotonically increasing insert
            // counter.
            uint64_t lastInsertCount = 0;
            if (isCursorAwaitData(cursor)) {
                invariant(ctx->getCollection()->isCapped());
                lastInsertCount = ctx->getCollection()->getCappedInsertNotifier()->getCount();
            }

            CursorId respondWithId = 0;
            BSONArrayBuilder nextBatch;
            BSONObj obj;
            PlanExecutor::ExecState state;
            int numResults = 0;
            Status batchStatus = generateBatch(cursor, request, &nextBatch, &state, &numResults);
            if (!batchStatus.isOK()) {
                return appendCommandStatus(result, batchStatus);
            }

            // If this is an await data cursor, and we hit EOF without generating any results, then
            // we block waiting for new oplog data to arrive.
            if (isCursorAwaitData(cursor) && state == PlanExecutor::IS_EOF && numResults == 0) {
                // Retrieve the notifier which we will wait on until new data arrives. We make sure
                // to do this in the lock because once we drop the lock it is possible for the
                // collection to become invalid. The notifier itself will outlive the collection if
                // the collection is dropped, as we keep a shared_ptr to it.
                auto notifier = ctx->getCollection()->getCappedInsertNotifier();

                // Save the PlanExecutor and drop our locks.
                exec->saveState();
                ctx.reset();

                // Block waiting for data.
                Microseconds timeout(CurOp::get(txn)->getRemainingMaxTimeMicros());
                notifier->waitForInsert(lastInsertCount, timeout);
                notifier.reset();

                ctx.reset(new AutoGetCollectionForRead(txn, request.nss));
                exec->restoreState(txn);

                // We woke up because either the timed_wait expired, or there was more data. Either
                // way, attempt to generate another batch of results.
                batchStatus = generateBatch(cursor, request, &nextBatch, &state, &numResults);
                if (!batchStatus.isOK()) {
                    return appendCommandStatus(result, batchStatus);
                }
            }

            if (shouldSaveCursorGetMore(state, exec, isCursorTailable(cursor))) {
                respondWithId = request.cursorid;

                exec->saveState();

                // If maxTimeMS was set directly on the getMore rather than being rolled over
                // from a previous find, then don't roll remaining micros over to the next
                // getMore.
                if (!hasOwnMaxTime) {
                    cursor->setLeftoverMaxTimeMicros(CurOp::get(txn)->getRemainingMaxTimeMicros());
                }

                cursor->incPos(numResults);

                if (isCursorTailable(cursor) && state == PlanExecutor::IS_EOF) {
                    // Rather than swapping their existing RU into the client cursor, tailable
                    // cursors should get a new recovery unit.
                    ruSwapper.dismiss();
                }
            }
            else {
                CurOp::get(txn)->debug().cursorExhausted = true;
            }

            appendGetMoreResponseObject(respondWithId, request.nss.ns(), nextBatch.arr(), &result);

            if (respondWithId) {
                cursorFreer.Dismiss();

                // If we are operating on an aggregation cursor, then we dropped our collection lock
                // earlier and need to reacquire it in order to clean up our ClientCursorPin.
                if (cursor->isAggCursor()) {
                    invariant(NULL == ctx.get());
                    unpinDBLock.reset(
                        new Lock::DBLock(txn->lockState(), request.nss.db(), MODE_IS));
                    unpinCollLock.reset(
                        new Lock::CollectionLock(txn->lockState(), request.nss.ns(), MODE_IS));
                }
            }

            return true;
        }
        virtual bool run(OperationContext* txn,
                         const string& dbname,
                         BSONObj& cmdObj,
                         int options,
                         string& errmsg,
                         BSONObjBuilder& result,
                         bool fromRepl) {

            const std::string ns = parseNsCollectionRequired(dbname, cmdObj);

            const BSONObj query = cmdObj.getObjectField("query");
            const BSONObj fields = cmdObj.getObjectField("fields");
            const BSONObj update = cmdObj.getObjectField("update");
            const BSONObj sort = cmdObj.getObjectField("sort");
            
            bool upsert = cmdObj["upsert"].trueValue();
            bool returnNew = cmdObj["new"].trueValue();
            bool remove = cmdObj["remove"].trueValue();

            if ( remove ) {
                if ( upsert ) {
                    errmsg = "remove and upsert can't co-exist";
                    return false;
                }
                if ( !update.isEmpty() ) {
                    errmsg = "remove and update can't co-exist";
                    return false;
                }
                if ( returnNew ) {
                    errmsg = "remove and returnNew can't co-exist";
                    return false;
                }
            }
            else if ( !cmdObj.hasField("update") ) {
                errmsg = "need remove or update";
                return false;
            }

            bool ok = false;
            MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
                errmsg = "";

                // We can always retry because we only ever modify one document
                ok = runImpl(txn,
                             dbname,
                             ns,
                             query,
                             fields,
                             update,
                             sort,
                             upsert,
                             returnNew,
                             remove,
                             result,
                             errmsg);
            } MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "findAndModify", ns);

            if ( !ok && errmsg == "no-collection" ) {
                // Take X lock so we can create collection, then re-run operation.
                ScopedTransaction transaction(txn, MODE_IX);
                Lock::DBLock lk(txn->lockState(), dbname, MODE_X);
                Client::Context ctx(txn, ns, false /* don't check version */);
                if (!fromRepl &&
                    !repl::getGlobalReplicationCoordinator()->canAcceptWritesForDatabase(dbname)) {
                    return appendCommandStatus(result, Status(ErrorCodes::NotMaster, str::stream()
                        << "Not primary while creating collection " << ns
                        << " during findAndModify"));
                }
                Database* db = ctx.db();
                if ( db->getCollection( ns ) ) {
                    // someone else beat us to it, that's ok
                    // we might race while we unlock if someone drops
                    // but that's ok, we'll just do nothing and error out
                }
                else {
                    MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
                        WriteUnitOfWork wuow(txn);
                        uassertStatusOK( userCreateNS( txn, db,
                                                       ns, BSONObj(),
                                                       !fromRepl ) );
                        wuow.commit();
                    } MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "findAndModify", ns);
                }

                errmsg = "";
                ok = runImpl(txn,
                             dbname,
                             ns,
                             query,
                             fields,
                             update,
                             sort,
                             upsert,
                             returnNew,
                             remove,
                             result,
                             errmsg);
            }
Esempio n. 19
0
    virtual bool run(OperationContext* txn,
                     const string& dbname,
                     BSONObj& cmdObj,
                     int options,
                     string& errmsg,
                     BSONObjBuilder& result) {
        // ---  parse

        NamespaceString ns(dbname, cmdObj[name].String());
        Status status = userAllowedWriteNS(ns);
        if (!status.isOK())
            return appendCommandStatus(result, status);

        if (cmdObj["indexes"].type() != Array) {
            errmsg = "indexes has to be an array";
            result.append("cmdObj", cmdObj);
            return false;
        }

        std::vector<BSONObj> specs;
        {
            BSONObjIterator i(cmdObj["indexes"].Obj());
            while (i.more()) {
                BSONElement e = i.next();
                if (e.type() != Object) {
                    errmsg = "everything in indexes has to be an Object";
                    result.append("cmdObj", cmdObj);
                    return false;
                }
                specs.push_back(e.Obj());
            }
        }

        if (specs.size() == 0) {
            errmsg = "no indexes to add";
            return false;
        }

        // check specs
        for (size_t i = 0; i < specs.size(); i++) {
            BSONObj spec = specs[i];
            if (spec["ns"].eoo()) {
                spec = _addNsToSpec(ns, spec);
                specs[i] = spec;
            }

            if (spec["ns"].type() != String) {
                errmsg = "ns field must be a string";
                result.append("spec", spec);
                return false;
            }

            std::string nsFromUser = spec["ns"].String();
            if (nsFromUser.empty()) {
                errmsg = "ns field cannot be an empty string";
                result.append("spec", spec);
                return false;
            }

            if (ns != nsFromUser) {
                errmsg = str::stream() << "value of ns field '" << nsFromUser
                                       << "' doesn't match namespace " << ns.ns();
                result.append("spec", spec);
                return false;
            }
        }

        // now we know we have to create index(es)
        // Note: createIndexes command does not currently respect shard versioning.
        ScopedTransaction transaction(txn, MODE_IX);
        Lock::DBLock dbLock(txn->lockState(), ns.db(), MODE_X);
        if (!repl::getGlobalReplicationCoordinator()->canAcceptWritesFor(ns)) {
            return appendCommandStatus(
                result,
                Status(ErrorCodes::NotMaster,
                       str::stream() << "Not primary while creating indexes in " << ns.ns()));
        }

        Database* db = dbHolder().get(txn, ns.db());
        if (!db) {
            db = dbHolder().openDb(txn, ns.db());
        }

        Collection* collection = db->getCollection(ns.ns());
        if (collection) {
            result.appendBool("createdCollectionAutomatically", false);
        } else {
            MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
                WriteUnitOfWork wunit(txn);
                collection = db->createCollection(txn, ns.ns(), CollectionOptions());
                invariant(collection);
                wunit.commit();
            }
            MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "createIndexes", ns.ns());
            result.appendBool("createdCollectionAutomatically", true);
        }

        const int numIndexesBefore = collection->getIndexCatalog()->numIndexesTotal(txn);
        result.append("numIndexesBefore", numIndexesBefore);

        auto client = txn->getClient();
        ScopeGuard lastOpSetterGuard =
            MakeObjGuard(repl::ReplClientInfo::forClient(client),
                         &repl::ReplClientInfo::setLastOpToSystemLastOpTime,
                         txn);

        MultiIndexBlock indexer(txn, collection);
        indexer.allowBackgroundBuilding();
        indexer.allowInterruption();

        const size_t origSpecsSize = specs.size();
        indexer.removeExistingIndexes(&specs);

        if (specs.size() == 0) {
            result.append("numIndexesAfter", numIndexesBefore);
            result.append("note", "all indexes already exist");
            return true;
        }

        if (specs.size() != origSpecsSize) {
            result.append("note", "index already exists");
        }

        for (size_t i = 0; i < specs.size(); i++) {
            const BSONObj& spec = specs[i];
            if (spec["unique"].trueValue()) {
                status = checkUniqueIndexConstraints(txn, ns.ns(), spec["key"].Obj());

                if (!status.isOK()) {
                    return appendCommandStatus(result, status);
                }
            }
            if (spec["v"].isNumber() && spec["v"].numberInt() == 0) {
                return appendCommandStatus(
                    result,
                    Status(ErrorCodes::CannotCreateIndex,
                           str::stream() << "illegal index specification: " << spec << ". "
                                         << "The option v:0 cannot be passed explicitly"));
            }
        }

        MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
            uassertStatusOK(indexer.init(specs));
        }
        MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "createIndexes", ns.ns());

        // If we're a background index, replace exclusive db lock with an intent lock, so that
        // other readers and writers can proceed during this phase.
        if (indexer.getBuildInBackground()) {
            txn->recoveryUnit()->abandonSnapshot();
            dbLock.relockWithMode(MODE_IX);
            if (!repl::getGlobalReplicationCoordinator()->canAcceptWritesFor(ns)) {
                return appendCommandStatus(
                    result,
                    Status(ErrorCodes::NotMaster,
                           str::stream() << "Not primary while creating background indexes in "
                                         << ns.ns()));
            }
        }

        try {
            Lock::CollectionLock colLock(txn->lockState(), ns.ns(), MODE_IX);
            uassertStatusOK(indexer.insertAllDocumentsInCollection());
        } catch (const DBException& e) {
            invariant(e.getCode() != ErrorCodes::WriteConflict);
            // Must have exclusive DB lock before we clean up the index build via the
            // destructor of 'indexer'.
            if (indexer.getBuildInBackground()) {
                try {
                    // This function cannot throw today, but we will preemptively prepare for
                    // that day, to avoid data corruption due to lack of index cleanup.
                    txn->recoveryUnit()->abandonSnapshot();
                    dbLock.relockWithMode(MODE_X);
                    if (!repl::getGlobalReplicationCoordinator()->canAcceptWritesFor(ns)) {
                        return appendCommandStatus(
                            result,
                            Status(ErrorCodes::NotMaster,
                                   str::stream()
                                       << "Not primary while creating background indexes in "
                                       << ns.ns() << ": cleaning up index build failure due to "
                                       << e.toString()));
                    }
                } catch (...) {
                    std::terminate();
                }
            }
            throw;
        }
        // Need to return db lock back to exclusive, to complete the index build.
        if (indexer.getBuildInBackground()) {
            txn->recoveryUnit()->abandonSnapshot();
            dbLock.relockWithMode(MODE_X);
            uassert(ErrorCodes::NotMaster,
                    str::stream() << "Not primary while completing index build in " << dbname,
                    repl::getGlobalReplicationCoordinator()->canAcceptWritesFor(ns));

            Database* db = dbHolder().get(txn, ns.db());
            uassert(28551, "database dropped during index build", db);
            uassert(28552, "collection dropped during index build", db->getCollection(ns.ns()));
        }

        MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
            WriteUnitOfWork wunit(txn);

            indexer.commit();

            for (size_t i = 0; i < specs.size(); i++) {
                std::string systemIndexes = ns.getSystemIndexesCollection();
                getGlobalServiceContext()->getOpObserver()->onCreateIndex(
                    txn, systemIndexes, specs[i]);
            }

            wunit.commit();
        }
        MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "createIndexes", ns.ns());

        result.append("numIndexesAfter", collection->getIndexCatalog()->numIndexesTotal(txn));

        lastOpSetterGuard.Dismiss();

        return true;
    }
Esempio n. 20
0
        bool wrappedRun(OperationContext* txn,
                        const string& dbname,
                        BSONObj& jsobj,
                        string& errmsg,
                        BSONObjBuilder& anObjBuilder) {
            const std::string coll = jsobj.firstElement().valuestrsafe();
            if (coll.empty()) {
                errmsg = "no collection name specified";
                return false;
            }

            const std::string toDeleteNs = dbname + '.' + coll;
            if (!serverGlobalParams.quiet) {
                LOG(0) << "CMD: dropIndexes " << toDeleteNs << endl;
            }

            Client::Context ctx(txn, toDeleteNs);
            Database* db = ctx.db();

            Collection* collection = db->getCollection( txn, toDeleteNs );
            if ( ! collection ) {
                errmsg = "ns not found";
                return false;
            }

            stopIndexBuilds(txn, db, jsobj);

            IndexCatalog* indexCatalog = collection->getIndexCatalog();
            anObjBuilder.appendNumber("nIndexesWas", indexCatalog->numIndexesTotal(txn) );


            BSONElement f = jsobj.getField("index");
            if ( f.type() == String ) {

                string indexToDelete = f.valuestr();

                if ( indexToDelete == "*" ) {
                    Status s = indexCatalog->dropAllIndexes(txn, false);
                    if ( !s.isOK() ) {
                        appendCommandStatus( anObjBuilder, s );
                        return false;
                    }
                    anObjBuilder.append("msg", "non-_id indexes dropped for collection");
                    return true;
                }

                IndexDescriptor* desc = collection->getIndexCatalog()->findIndexByName( txn,
                                                                                        indexToDelete );
                if ( desc == NULL ) {
                    errmsg = str::stream() << "index not found with name [" << indexToDelete << "]";
                    return false;
                }

                if ( desc->isIdIndex() ) {
                    errmsg = "cannot drop _id index";
                    return false;
                }

                Status s = indexCatalog->dropIndex(txn, desc);
                if ( !s.isOK() ) {
                    appendCommandStatus( anObjBuilder, s );
                    return false;
                }

                return true;
            }

            if ( f.type() == Object ) {
                IndexDescriptor* desc =
                    collection->getIndexCatalog()->findIndexByKeyPattern( txn, f.embeddedObject() );
                if ( desc == NULL ) {
                    errmsg = "can't find index with key:";
                    errmsg += f.embeddedObject().toString();
                    return false;
                }

                if ( desc->isIdIndex() ) {
                    errmsg = "cannot drop _id index";
                    return false;
                }

                Status s = indexCatalog->dropIndex(txn, desc);
                if ( !s.isOK() ) {
                    appendCommandStatus( anObjBuilder, s );
                    return false;
                }

                return true;
            }

            errmsg = "invalid index name spec";
            return false;
        }
Esempio n. 21
0
        static bool runImpl(OperationContext* txn,
                            const string& dbname,
                            const string& ns,
                            const BSONObj& query,
                            const BSONObj& fields,
                            const BSONObj& update,
                            const BSONObj& sort,
                            bool upsert,
                            bool returnNew,
                            bool remove ,
                            BSONObjBuilder& result,
                            string& errmsg) {

            AutoGetOrCreateDb autoDb(txn, dbname, MODE_IX);
            Lock::CollectionLock collLock(txn->lockState(), ns, MODE_IX);
            Client::Context ctx(txn, ns, autoDb.getDb(), autoDb.justCreated());

            if (!repl::getGlobalReplicationCoordinator()->canAcceptWritesForDatabase(dbname)) {
                return appendCommandStatus(result, Status(ErrorCodes::NotMaster, str::stream()
                    << "Not primary while running findAndModify in " << ns));
            }

            Collection* collection = ctx.db()->getCollection(ns);

            const WhereCallbackReal whereCallback(txn, StringData(ns));

            if ( !collection ) {
                if ( !upsert ) {
                    // no collectio and no upsert, so can't possible do anything
                    _appendHelper( result, BSONObj(), false, fields, whereCallback );
                    return true;
                }
                // no collection, but upsert, so we want to create it
                // problem is we only have IX on db and collection :(
                // so we tell our caller who can do it
                errmsg = "no-collection";
                return false;
            }

            Snapshotted<BSONObj> snapshotDoc;
            RecordId loc;
            bool found = false;
            {
                CanonicalQuery* cq;
                const BSONObj projection;
                const long long skip = 0;
                const long long limit = -1; // 1 document requested; negative indicates hard limit.
                uassertStatusOK(CanonicalQuery::canonicalize(ns,
                                                             query,
                                                             sort,
                                                             projection,
                                                             skip,
                                                             limit,
                                                             &cq,
                                                             whereCallback));

                PlanExecutor* rawExec;
                uassertStatusOK(getExecutor(txn,
                                            collection,
                                            cq,
                                            PlanExecutor::YIELD_AUTO,
                                            &rawExec,
                                            QueryPlannerParams::DEFAULT));

                scoped_ptr<PlanExecutor> exec(rawExec);

                PlanExecutor::ExecState state = exec->getNextSnapshotted(&snapshotDoc, &loc);
                if (PlanExecutor::ADVANCED == state) {
                    found = true;
                }
                else if (PlanExecutor::FAILURE == state || PlanExecutor::DEAD == state) {
                    if (PlanExecutor::FAILURE == state &&
                        WorkingSetCommon::isValidStatusMemberObject(snapshotDoc.value())) {
                        const Status errorStatus =
                            WorkingSetCommon::getMemberObjectStatus(snapshotDoc.value());
                        invariant(!errorStatus.isOK());
                        uasserted(errorStatus.code(), errorStatus.reason());
                    }
                    uasserted(ErrorCodes::OperationFailed,
                              str::stream() << "executor returned " << PlanExecutor::statestr(state)
                                            << " while finding document to update");
                }
                else {
                    invariant(PlanExecutor::IS_EOF == state);
                }
            }

            WriteUnitOfWork wuow(txn);
            if (found) {
                // We found a doc, but it might not be associated with the active snapshot.
                // If the doc has changed or is no longer in the collection, we will throw a
                // write conflict exception and start again from the beginning.
                if (txn->recoveryUnit()->getSnapshotId() != snapshotDoc.snapshotId()) {
                    BSONObj oldObj = snapshotDoc.value();
                    if (!collection->findDoc(txn, loc, &snapshotDoc)) {
                        // Got deleted in the new snapshot.
                        throw WriteConflictException();
                    }

                    if (!oldObj.binaryEqual(snapshotDoc.value())) {
                        // Got updated in the new snapshot.
                        throw WriteConflictException();
                    }
                }

                // If we get here without throwing, then we should have the copy of the doc from
                // the latest snapshot.
                invariant(txn->recoveryUnit()->getSnapshotId() == snapshotDoc.snapshotId());
            }

            BSONObj doc = snapshotDoc.value();
            BSONObj queryModified = query;
            if (found && !doc["_id"].eoo() && !CanonicalQuery::isSimpleIdQuery(query)) {
                // we're going to re-write the query to be more efficient
                // we have to be a little careful because of positional operators
                // maybe we can pass this all through eventually, but right now isn't an easy way
                
                bool hasPositionalUpdate = false;
                {
                    // if the update has a positional piece ($)
                    // then we need to pull all query parts in
                    // so here we check for $
                    // a little hacky
                    BSONObjIterator i( update );
                    while ( i.more() ) {
                        const BSONElement& elem = i.next();
                        
                        if ( elem.fieldName()[0] != '$' || elem.type() != Object )
                            continue;

                        BSONObjIterator j( elem.Obj() );
                        while ( j.more() ) {
                            if ( str::contains( j.next().fieldName(), ".$" ) ) {
                                hasPositionalUpdate = true;
                                break;
                            }
                        }
                    }
                }

                BSONObjBuilder b(query.objsize() + 10);
                b.append( doc["_id"] );
                
                bool addedAtomic = false;

                BSONObjIterator i(query);
                while ( i.more() ) {
                    const BSONElement& elem = i.next();

                    if ( str::equals( "_id" , elem.fieldName() ) ) {
                        // we already do _id
                        continue;
                    }
                    
                    if ( ! hasPositionalUpdate ) {
                        // if there is a dotted field, accept we may need more query parts
                        continue;
                    }
                    
                    if ( ! addedAtomic ) {
                        b.appendBool( "$atomic" , true );
                        addedAtomic = true;
                    }

                    b.append( elem );
                }

                queryModified = b.obj();
            }

            if ( remove ) {
                _appendHelper(result, doc, found, fields, whereCallback);
                if ( found ) {
                    deleteObjects(txn, ctx.db(), ns, queryModified, PlanExecutor::YIELD_MANUAL,
                                  true, true);
                    BSONObjBuilder le( result.subobjStart( "lastErrorObject" ) );
                    le.appendNumber( "n" , 1 );
                    le.done();
                }
            }
            else {
                // update
                if ( ! found && ! upsert ) {
                    // didn't have it, and am not upserting
                    _appendHelper(result, doc, found, fields, whereCallback);
                }
                else {
                    // we found it or we're updating
                    
                    if ( ! returnNew ) {
                        _appendHelper(result, doc, found, fields, whereCallback);
                    }
                    
                    const NamespaceString requestNs(ns);
                    UpdateRequest request(requestNs);

                    request.setQuery(queryModified);
                    request.setUpdates(update);
                    request.setUpsert(upsert);
                    request.setUpdateOpLog();
                    request.setStoreResultDoc(returnNew);

                    request.setYieldPolicy(PlanExecutor::YIELD_MANUAL);

                    // TODO(greg) We need to send if we are ignoring
                    // the shard version below, but for now no
                    UpdateLifecycleImpl updateLifecycle(false, requestNs);
                    request.setLifecycle(&updateLifecycle);
                    UpdateResult res = mongo::update(txn,
                                                     ctx.db(),
                                                     request,
                                                     &txn->getCurOp()->debug());

                    if (!found && res.existing) {
                        // No match was found during the read part of this find and modify, which
                        // means that we're here doing an upsert. But the update also told us that
                        // we modified an *already existing* document. This probably means that
                        // the query reported EOF based on an out-of-date snapshot. This should be
                        // a rare event, so we handle it by throwing a write conflict.
                        throw WriteConflictException();
                    }

                    if ( !collection ) {
                        // collection created by an upsert
                        collection = ctx.db()->getCollection(ns);
                    }

                    LOG(3) << "update result: "  << res ;
                    if (returnNew) {
                        dassert(!res.newObj.isEmpty());
                        _appendHelper(result, res.newObj, true, fields, whereCallback);
                    }

                    BSONObjBuilder le( result.subobjStart( "lastErrorObject" ) );
                    le.appendBool( "updatedExisting" , res.existing );
                    le.appendNumber( "n" , res.numMatched );
                    if ( !res.upserted.isEmpty() ) {
                        le.append( res.upserted[kUpsertedFieldName] );
                    }
                    le.done();
                }
            }

            // Committing the WUOW can close the current snapshot. Until this happens, the
            // snapshot id should not have changed.
            if (found) {
                invariant(txn->recoveryUnit()->getSnapshotId() == snapshotDoc.snapshotId());
            }
            wuow.commit();

            return true;
        }
Esempio n. 22
0
        virtual bool run( const string& dbname, BSONObj& cmdObj, int options,
                          string& errmsg, BSONObjBuilder& result,
                          bool fromRepl = false ) {

            NamespaceString ns( dbname, cmdObj[name].String() );

            Client::ReadContext ctx(ns.ns());

            Database* db = ctx.ctx().db();
            Collection* collection = db->getCollection( ns );

            if ( !collection )
                return appendCommandStatus( result,
                                            Status( ErrorCodes::NamespaceNotFound,
                                                    str::stream() <<
                                                    "ns does not exist: " << ns.ns() ) );

            size_t numCursors = static_cast<size_t>( cmdObj["numCursors"].numberInt() );

            if ( numCursors == 0 || numCursors > 10000 )
                return appendCommandStatus( result,
                                            Status( ErrorCodes::BadValue,
                                                    str::stream() <<
                                                    "numCursors has to be between 1 and 10000" <<
                                                    " was: " << numCursors ) );

            OwnedPointerVector<RecordIterator> iterators(collection->getManyIterators());

            if (iterators.size() < numCursors) {
                numCursors = iterators.size();
            }

            OwnedPointerVector<MultiIteratorRunner> runners;
            for ( size_t i = 0; i < numCursors; i++ ) {
                runners.push_back(new MultiIteratorRunner(ns.ns(), collection));
            }

            // transfer iterators to runners using a round-robin distribution.
            // TODO consider using a common work queue once invalidation issues go away.
            for (size_t i = 0; i < iterators.size(); i++) {
                runners[i % runners.size()]->addIterator(iterators.releaseAt(i));
            }

            {
                BSONArrayBuilder bucketsBuilder;
                for (size_t i = 0; i < runners.size(); i++) {
                    // transfer ownership of a runner to the ClientCursor (which manages its own
                    // lifetime).
                    ClientCursor* cc = new ClientCursor( collection, runners.releaseAt(i) );

                    // we are mimicking the aggregation cursor output here
                    // that is why there are ns, ok and empty firstBatch
                    BSONObjBuilder threadResult;
                    {
                        BSONObjBuilder cursor;
                        cursor.appendArray( "firstBatch", BSONObj() );
                        cursor.append( "ns", ns );
                        cursor.append( "id", cc->cursorid() );
                        threadResult.append( "cursor", cursor.obj() );
                    }
                    threadResult.appendBool( "ok", 1 );

                    bucketsBuilder.append( threadResult.obj() );
                }
                result.appendArray( "cursors", bucketsBuilder.obj() );
            }

            return true;

        }
Esempio n. 23
0
    /**
     * Runs a query using the following steps:
     *   --Parsing.
     *   --Acquire locks.
     *   --Plan query, obtaining an executor that can run it.
     *   --Generate the first batch.
     *   --Save state for getMore, transferring ownership of the executor to a ClientCursor.
     *   --Generate response to send to the client.
     */
    bool run(OperationContext* txn,
             const std::string& dbname,
             BSONObj& cmdObj,
             int options,
             std::string& errmsg,
             BSONObjBuilder& result) override {
        const std::string fullns = parseNs(dbname, cmdObj);
        const NamespaceString nss(fullns);
        if (!nss.isValid() || nss.isCommand() || nss.isSpecialCommand()) {
            return appendCommandStatus(result,
                                       {ErrorCodes::InvalidNamespace,
                                        str::stream() << "Invalid collection name: " << nss.ns()});
        }

        // Although it is a command, a find command gets counted as a query.
        globalOpCounters.gotQuery();

        if (txn->getClient()->isInDirectClient()) {
            return appendCommandStatus(
                result,
                Status(ErrorCodes::IllegalOperation, "Cannot run find command from eval()"));
        }

        // Parse the command BSON to a LiteParsedQuery.
        const bool isExplain = false;
        auto lpqStatus = LiteParsedQuery::makeFromFindCommand(nss, cmdObj, isExplain);
        if (!lpqStatus.isOK()) {
            return appendCommandStatus(result, lpqStatus.getStatus());
        }

        auto& lpq = lpqStatus.getValue();

        // Validate term before acquiring locks, if provided.
        if (auto term = lpq->getReplicationTerm()) {
            auto replCoord = repl::ReplicationCoordinator::get(txn);
            Status status = replCoord->updateTerm(txn, *term);
            // Note: updateTerm returns ok if term stayed the same.
            if (!status.isOK()) {
                return appendCommandStatus(result, status);
            }
        }

        // Fill out curop information.
        //
        // We pass negative values for 'ntoreturn' and 'ntoskip' to indicate that these values
        // should be omitted from the log line. Limit and skip information is already present in the
        // find command parameters, so these fields are redundant.
        const int ntoreturn = -1;
        const int ntoskip = -1;
        beginQueryOp(txn, nss, cmdObj, ntoreturn, ntoskip);

        // Finish the parsing step by using the LiteParsedQuery to create a CanonicalQuery.
        ExtensionsCallbackReal extensionsCallback(txn, &nss);
        auto statusWithCQ = CanonicalQuery::canonicalize(lpq.release(), extensionsCallback);
        if (!statusWithCQ.isOK()) {
            return appendCommandStatus(result, statusWithCQ.getStatus());
        }
        std::unique_ptr<CanonicalQuery> cq = std::move(statusWithCQ.getValue());

        ShardingState* const shardingState = ShardingState::get(txn);

        if (OperationShardVersion::get(txn).hasShardVersion() && shardingState->enabled()) {
            ChunkVersion receivedVersion = OperationShardVersion::get(txn).getShardVersion(nss);
            ChunkVersion latestVersion;
            // Wait for migration completion to get the correct chunk version.
            const int maxTimeoutSec = 30;
            int timeoutSec = cq->getParsed().getMaxTimeMS() / 1000;
            if (!timeoutSec || timeoutSec > maxTimeoutSec) {
                timeoutSec = maxTimeoutSec;
            }

            if (!shardingState->waitTillNotInCriticalSection(timeoutSec)) {
                uasserted(ErrorCodes::LockTimeout, "Timeout while waiting for migration commit");
            }

            // If the received version is newer than the version cached in 'shardingState', then we
            // have to refresh 'shardingState' from the config servers. We do this before acquiring
            // locks so that we don't hold locks while waiting on the network.
            uassertStatusOK(shardingState->refreshMetadataIfNeeded(
                txn, nss.ns(), receivedVersion, &latestVersion));
        }

        // Acquire locks.
        AutoGetCollectionForRead ctx(txn, nss);
        Collection* collection = ctx.getCollection();

        const int dbProfilingLevel =
            ctx.getDb() ? ctx.getDb()->getProfilingLevel() : serverGlobalParams.defaultProfile;

        // It is possible that the sharding version will change during yield while we are
        // retrieving a plan executor. If this happens we will throw an error and mongos will
        // retry.
        const ChunkVersion shardingVersionAtStart = shardingState->getVersion(nss.ns());

        // Get the execution plan for the query.
        auto statusWithPlanExecutor =
            getExecutorFind(txn, collection, nss, std::move(cq), PlanExecutor::YIELD_AUTO);
        if (!statusWithPlanExecutor.isOK()) {
            return appendCommandStatus(result, statusWithPlanExecutor.getStatus());
        }

        std::unique_ptr<PlanExecutor> exec = std::move(statusWithPlanExecutor.getValue());

        if (!collection) {
            // No collection. Just fill out curop indicating that there were zero results and
            // there is no ClientCursor id, and then return.
            const long long numResults = 0;
            const CursorId cursorId = 0;
            endQueryOp(txn, collection, *exec, dbProfilingLevel, numResults, cursorId);
            appendCursorResponseObject(cursorId, nss.ns(), BSONArray(), &result);
            return true;
        }

        const LiteParsedQuery& pq = exec->getCanonicalQuery()->getParsed();

        // Stream query results, adding them to a BSONArray as we go.
        BSONArrayBuilder firstBatch;
        BSONObj obj;
        PlanExecutor::ExecState state = PlanExecutor::ADVANCED;
        long long numResults = 0;
        while (!FindCommon::enoughForFirstBatch(pq, numResults, firstBatch.len()) &&
               PlanExecutor::ADVANCED == (state = exec->getNext(&obj, NULL))) {
            // If adding this object will cause us to exceed the BSON size limit, then we stash
            // it for later.
            if (firstBatch.len() + obj.objsize() > BSONObjMaxUserSize && numResults > 0) {
                exec->enqueue(obj);
                break;
            }

            // Add result to output buffer.
            firstBatch.append(obj);
            numResults++;
        }

        // Throw an assertion if query execution fails for any reason.
        if (PlanExecutor::FAILURE == state || PlanExecutor::DEAD == state) {
            const std::unique_ptr<PlanStageStats> stats(exec->getStats());
            error() << "Plan executor error during find command: " << PlanExecutor::statestr(state)
                    << ", stats: " << Explain::statsToBSON(*stats);

            return appendCommandStatus(result,
                                       Status(ErrorCodes::OperationFailed,
                                              str::stream()
                                                  << "Executor error during find command: "
                                                  << WorkingSetCommon::toStatusString(obj)));
        }

        // TODO: Currently, chunk ranges are kept around until all ClientCursors created while the
        // chunk belonged on this node are gone. Separating chunk lifetime management from
        // ClientCursor should allow this check to go away.
        if (!shardingState->getVersion(nss.ns()).isWriteCompatibleWith(shardingVersionAtStart)) {
            // Version changed while retrieving a PlanExecutor. Terminate the operation,
            // signaling that mongos should retry.
            throw SendStaleConfigException(nss.ns(),
                                           "version changed during find command",
                                           shardingVersionAtStart,
                                           shardingState->getVersion(nss.ns()));
        }

        // Set up the cursor for getMore.
        CursorId cursorId = 0;
        if (shouldSaveCursor(txn, collection, state, exec.get())) {
            // Register the execution plan inside a ClientCursor. Ownership of the PlanExecutor is
            // transferred to the ClientCursor.
            //
            // First unregister the PlanExecutor so it can be re-registered with ClientCursor.
            exec->deregisterExec();

            // Create a ClientCursor containing this plan executor. We don't have to worry about
            // leaking it as it's inserted into a global map by its ctor.
            ClientCursor* cursor =
                new ClientCursor(collection->getCursorManager(),
                                 exec.release(),
                                 nss.ns(),
                                 txn->recoveryUnit()->isReadingFromMajorityCommittedSnapshot(),
                                 pq.getOptions(),
                                 pq.getFilter());
            cursorId = cursor->cursorid();

            invariant(!exec);
            PlanExecutor* cursorExec = cursor->getExecutor();

            // State will be restored on getMore.
            cursorExec->saveState();
            cursorExec->detachFromOperationContext();

            cursor->setLeftoverMaxTimeMicros(CurOp::get(txn)->getRemainingMaxTimeMicros());
            cursor->setPos(numResults);

            // Fill out curop based on the results.
            endQueryOp(txn, collection, *cursorExec, dbProfilingLevel, numResults, cursorId);
        } else {
            endQueryOp(txn, collection, *exec, dbProfilingLevel, numResults, cursorId);
        }

        // Generate the response object to send to the client.
        appendCursorResponseObject(cursorId, nss.ns(), firstBatch.arr(), &result);
        return true;
    }
Esempio n. 24
0
        bool run(const string& dbname, BSONObj& jsobj, int, string& errmsg, BSONObjBuilder& anObjBuilder, bool /*fromRepl*/) {
            BSONElement e = jsobj.firstElement();
            const string toDeleteNs = dbname + '.' + e.valuestr();
            if (!serverGlobalParams.quiet) {
                MONGO_TLOG(0) << "CMD: dropIndexes " << toDeleteNs << endl;
            }

            Lock::DBWrite dbXLock(dbname);
            Client::Context ctx(toDeleteNs);

            Collection* collection = cc().database()->getCollection( toDeleteNs );
            if ( ! collection ) {
                errmsg = "ns not found";
                return false;
            }

            stopIndexBuilds(cc().database(), jsobj);

            IndexCatalog* indexCatalog = collection->getIndexCatalog();
            anObjBuilder.appendNumber("nIndexesWas", indexCatalog->numIndexesTotal() );


            BSONElement f = jsobj.getField("index");
            if ( f.type() == String ) {

                string indexToDelete = f.valuestr();

                if ( indexToDelete == "*" ) {
                    Status s = indexCatalog->dropAllIndexes( false );
                    if ( !s.isOK() ) {
                        appendCommandStatus( anObjBuilder, s );
                        return false;
                    }
                    anObjBuilder.append("msg", "non-_id indexes dropped for collection");
                    return true;
                }

                IndexDescriptor* desc = collection->getIndexCatalog()->findIndexByName( indexToDelete );
                if ( desc == NULL ) {
                    errmsg = str::stream() << "index not found with name [" << indexToDelete << "]";
                    return false;
                }

                if ( desc->isIdIndex() ) {
                    errmsg = "cannot drop _id index";
                    return false;
                }

                Status s = indexCatalog->dropIndex( desc );
                if ( !s.isOK() ) {
                    appendCommandStatus( anObjBuilder, s );
                    return false;
                }

                return true;
            }

            if ( f.type() == Object ) {
                IndexDescriptor* desc = collection->getIndexCatalog()->findIndexByKeyPattern( f.embeddedObject() );
                if ( desc == NULL ) {
                    errmsg = "can't find index with key:";
                    errmsg += f.embeddedObject().toString();
                    return false;
                }

                if ( desc->isIdIndex() ) {
                    errmsg = "cannot drop _id index";
                    return false;
                }

                Status s = indexCatalog->dropIndex( desc );
                if ( !s.isOK() ) {
                    appendCommandStatus( anObjBuilder, s );
                    return false;
                }

                return true;
            }

            errmsg = "invalid index name spec";
            return false;
        }
Esempio n. 25
0
        virtual bool run(const string& db, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
            string coll = cmdObj.firstElement().valuestr();
            if( coll.empty() || db.empty() ) {
                errmsg = "no collection name specified";
                return false;
            }

            if( isCurrentlyAReplSetPrimary() && !cmdObj["force"].trueValue() ) {
                errmsg = "will not run compact on an active replica set primary as this is a slow blocking operation. use force:true to force";
                return false;
            }

            NamespaceString ns(db,coll);
            if ( !ns.isNormal() ) {
                errmsg = "bad namespace name";
                return false;
            }

            if ( ns.isSystem() ) {
                // items in system.* cannot be moved as there might be pointers to them
                // i.e. system.indexes entries are pointed to from NamespaceDetails
                errmsg = "can't compact a system namespace";
                return false;
            }

            CompactOptions compactOptions;

            if ( cmdObj["preservePadding"].trueValue() ) {
                compactOptions.paddingMode = CompactOptions::PRESERVE;
                if ( cmdObj.hasElement( "paddingFactor" ) ||
                     cmdObj.hasElement( "paddingBytes" ) ) {
                    errmsg = "cannot mix preservePadding and paddingFactor|paddingBytes";
                    return false;
                }
            }
            else if ( cmdObj.hasElement( "paddingFactor" ) || cmdObj.hasElement( "paddingBytes" ) ) {
                compactOptions.paddingMode = CompactOptions::MANUAL;
                if ( cmdObj.hasElement("paddingFactor") ) {
                    compactOptions.paddingFactor = cmdObj["paddingFactor"].Number();
                    if ( compactOptions.paddingFactor < 1 ||
                         compactOptions.paddingFactor > 4 ){
                        errmsg = "invalid padding factor";
                        return false;
                    }
                }
                if ( cmdObj.hasElement("paddingBytes") ) {
                    compactOptions.paddingBytes = cmdObj["paddingBytes"].numberInt();
                    if ( compactOptions.paddingBytes < 0 ||
                         compactOptions.paddingBytes > ( 1024 * 1024 ) ) {
                        errmsg = "invalid padding bytes";
                        return false;
                    }
                }
            }

            if ( cmdObj.hasElement("validate") )
                compactOptions.validateDocuments = cmdObj["validate"].trueValue();


            Lock::DBWrite lk(ns.ns());
            BackgroundOperation::assertNoBgOpInProgForNs(ns.ns());
            Client::Context ctx(ns);

            Collection* collection = ctx.db()->getCollection(ns.ns());
            if( ! collection ) {
                errmsg = "namespace does not exist";
                return false;
            }

            if ( collection->isCapped() ) {
                errmsg = "cannot compact a capped collection";
                return false;
            }

            log() << "compact " << ns << " begin, options: " << compactOptions.toString();

            std::vector<BSONObj> indexesInProg = stopIndexBuilds(db, cmdObj);

            StatusWith<CompactStats> status = collection->compact( &compactOptions );
            if ( !status.isOK() )
                return appendCommandStatus( result, status.getStatus() );

            if ( status.getValue().corruptDocuments > 0 )
                result.append("invalidObjects", status.getValue().corruptDocuments );

            log() << "compact " << ns << " end";

            IndexBuilder::restoreIndexes(indexesInProg);

            return true;
        }
Esempio n. 26
0
    virtual bool run(OperationContext* txn,
                     const string& dbname,
                     BSONObj& cmdObj,
                     int options,
                     string& errmsg,
                     BSONObjBuilder& result) {
        // ---  parse

        NamespaceString ns(dbname, cmdObj[name].String());
        Status status = userAllowedWriteNS(ns);
        if (!status.isOK())
            return appendCommandStatus(result, status);

        if (cmdObj["indexes"].type() != Array) {
            errmsg = "indexes has to be an array";
            result.append("cmdObj", cmdObj);
            return false;
        }

        std::vector<BSONObj> specs;
        {
            BSONObjIterator i(cmdObj["indexes"].Obj());
            while (i.more()) {
                BSONElement e = i.next();
                if (e.type() != Object) {
                    errmsg = "everything in indexes has to be an Object";
                    result.append("cmdObj", cmdObj);
                    return false;
                }
                specs.push_back(e.Obj());
            }
        }

        if (specs.size() == 0) {
            errmsg = "no indexes to add";
            return false;
        }

        // check specs
        for (size_t i = 0; i < specs.size(); i++) {
            BSONObj spec = specs[i];
            if (spec["ns"].eoo()) {
                spec = _addNsToSpec(ns, spec);
                specs[i] = spec;
            }

            if (spec["ns"].type() != String) {
                errmsg = "spec has no ns";
                result.append("spec", spec);
                return false;
            }
            if (ns != spec["ns"].String()) {
                errmsg = "namespace mismatch";
                result.append("spec", spec);
                return false;
            }
        }

        // now we know we have to create index(es)
        // Note: createIndexes command does not currently respect shard versioning.
        ScopedTransaction transaction(txn, MODE_IX);
        Lock::DBLock dbLock(txn->lockState(), ns.db(), MODE_X);
        if (!repl::getGlobalReplicationCoordinator()->canAcceptWritesFor(ns)) {
            return appendCommandStatus(
                result,
                Status(ErrorCodes::NotMaster,
                       str::stream() << "Not primary while creating indexes in " << ns.ns()));
        }

        Database* db = dbHolder().get(txn, ns.db());
        if (!db) {
            db = dbHolder().openDb(txn, ns.db());
        }

        Collection* collection = db->getCollection(ns.ns());
        result.appendBool("createdCollectionAutomatically", collection == NULL);
        if (!collection) {
            MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
                WriteUnitOfWork wunit(txn);
                collection = db->createCollection(txn, ns.ns(), CollectionOptions());
                invariant(collection);
                wunit.commit();
            }
            MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "createIndexes", ns.ns());
        }
Esempio n. 27
0
        bool run(OperationContext* txn, const string& dbname,
                  BSONObj& cmdObj,
                  int,
                  string& errmsg,
                  BSONObjBuilder& result,
                  bool fromRepl ) {

            //
            // Correct behavior here is very finicky.
            //
            // 1.  The first step is to append the error that occurred on the previous operation.
            // This adds an "err" field to the command, which is *not* the command failing.
            //
            // 2.  Next we parse and validate write concern options.  If these options are invalid
            // the command fails no matter what, even if we actually had an error earlier.  The
            // reason for checking here is to match legacy behavior on these kind of failures -
            // we'll still get an "err" field for the write error.
            //
            // 3.  If we had an error on the previous operation, we then return immediately.
            //
            // 4.  Finally, we actually enforce the write concern.  All errors *except* timeout are
            // reported with ok : 0.0, to match legacy behavior.
            //
            // There is a special case when "wOpTime" and "wElectionId" are explicitly provided by 
            // the client (mongos) - in this case we *only* enforce the write concern if it is 
            // valid.
            //
            // We always need to either report "err" (if ok : 1) or "errmsg" (if ok : 0), even if
            // err is null.
            //

            LastError *le = lastError.disableForCommand();

            // Always append lastOp and connectionId
            Client& c = cc();
            c.appendLastOp( result );

            // for sharding; also useful in general for debugging
            result.appendNumber( "connectionId" , c.getConnectionId() );

            OpTime lastOpTime;
            BSONField<OpTime> wOpTimeField("wOpTime");
            FieldParser::FieldState extracted = FieldParser::extract(cmdObj, wOpTimeField, 
                                                                     &lastOpTime, &errmsg);
            if (!extracted) {
                result.append("badGLE", cmdObj);
                appendCommandStatus(result, false, errmsg);
                return false;
            }
            bool lastOpTimePresent = extracted != FieldParser::FIELD_NONE;
            if (!lastOpTimePresent) {
                // Use the client opTime if no wOpTime is specified
                lastOpTime = cc().getLastOp();
            }
            
            OID electionId;
            BSONField<OID> wElectionIdField("wElectionId");
            extracted = FieldParser::extract(cmdObj, wElectionIdField, 
                                             &electionId, &errmsg);
            if (!extracted) {
                result.append("badGLE", cmdObj);
                appendCommandStatus(result, false, errmsg);
                return false;
            }

            bool electionIdPresent = extracted != FieldParser::FIELD_NONE;
            bool errorOccurred = false;

            // Errors aren't reported when wOpTime is used
            if ( !lastOpTimePresent ) {
                if ( le->nPrev != 1 ) {
                    errorOccurred = LastError::noError.appendSelf( result, false );
                    le->appendSelfStatus( result );
                }
                else {
                    errorOccurred = le->appendSelf( result, false );
                }
            }

            BSONObj writeConcernDoc = cmdObj;
            // Use the default options if we have no gle options aside from wOpTime/wElectionId
            const int nFields = cmdObj.nFields();
            bool useDefaultGLEOptions = (nFields == 1) || 
                (nFields == 2 && lastOpTimePresent) ||
                (nFields == 3 && lastOpTimePresent && electionIdPresent);

            if ( useDefaultGLEOptions && getLastErrorDefault ) {
                writeConcernDoc = *getLastErrorDefault;
            }

            //
            // Validate write concern no matter what, this matches 2.4 behavior
            //

            WriteConcernOptions writeConcern;
            Status status = writeConcern.parse( writeConcernDoc );

            if ( status.isOK() ) {
                // Ensure options are valid for this host
                status = validateWriteConcern( writeConcern );
            }

            if ( !status.isOK() ) {
                result.append( "badGLE", writeConcernDoc );
                return appendCommandStatus( result, status );
            }

            // Don't wait for replication if there was an error reported - this matches 2.4 behavior
            if ( errorOccurred ) {
                dassert( !lastOpTimePresent );
                return true;
            }

            // No error occurred, so we won't duplicate these fields with write concern errors
            dassert( result.asTempObj()["err"].eoo() );
            dassert( result.asTempObj()["code"].eoo() );

            // If we got an electionId, make sure it matches
            if (electionIdPresent) {
                if (!theReplSet) {
                    // Ignore electionIds of 0 from mongos.
                    if (electionId != OID()) {
                        errmsg = "wElectionId passed but no replication active";
                        result.append("code", ErrorCodes::BadValue);
                        return false;
                    }
                } 
                else {
                    if (electionId != theReplSet->getElectionId()) {
                        LOG(3) << "oid passed in is " << electionId
                               << ", but our id is " << theReplSet->getElectionId();
                        errmsg = "election occurred after write";
                        result.append("code", ErrorCodes::WriteConcernFailed);
                        return false;
                    }
                }
            }

            cc().curop()->setMessage( "waiting for write concern" );

            WriteConcernResult wcResult;
            status = waitForWriteConcern( txn, writeConcern, lastOpTime, &wcResult );
            wcResult.appendTo( writeConcern, &result );

            // For backward compatibility with 2.4, wtimeout returns ok : 1.0
            if ( wcResult.wTimedOut ) {
                dassert( !wcResult.err.empty() ); // so we always report err
                dassert( !status.isOK() );
                result.append( "errmsg", "timed out waiting for slaves" );
                result.append( "code", status.code() );
                return true;
            }

            return appendCommandStatus( result, status );
        }
Esempio n. 28
0
        virtual bool run(OperationContext* txn,  const string& dbname, BSONObj& cmdObj, int options,
                          string& errmsg, BSONObjBuilder& result,
                          bool fromRepl = false ) {

            // ---  parse

            NamespaceString ns( dbname, cmdObj[name].String() );
            Status status = userAllowedWriteNS( ns );
            if ( !status.isOK() )
                return appendCommandStatus( result, status );

            if ( cmdObj["indexes"].type() != Array ) {
                errmsg = "indexes has to be an array";
                result.append( "cmdObj", cmdObj );
                return false;
            }

            std::vector<BSONObj> specs;
            {
                BSONObjIterator i( cmdObj["indexes"].Obj() );
                while ( i.more() ) {
                    BSONElement e = i.next();
                    if ( e.type() != Object ) {
                        errmsg = "everything in indexes has to be an Object";
                        result.append( "cmdObj", cmdObj );
                        return false;
                    }
                    specs.push_back( e.Obj() );
                }
            }

            if ( specs.size() == 0 ) {
                errmsg = "no indexes to add";
                return false;
            }

            // check specs
            for ( size_t i = 0; i < specs.size(); i++ ) {
                BSONObj spec = specs[i];
                if ( spec["ns"].eoo() ) {
                    spec = _addNsToSpec( ns, spec );
                    specs[i] = spec;
                }

                if ( spec["ns"].type() != String ) {
                    errmsg = "spec has no ns";
                    result.append( "spec", spec );
                    return false;
                }
                if ( ns != spec["ns"].String() ) {
                    errmsg = "namespace mismatch";
                    result.append( "spec", spec );
                    return false;
                }
            }

            // now we know we have to create index(es)
            // Note: createIndexes command does not currently respect shard versioning.
            Lock::DBLock lk(txn->lockState(), ns.db(), MODE_X);
            Client::Context ctx(txn, ns.ns(), false /* doVersion */ );
            Database* db = ctx.db();

            Collection* collection = db->getCollection( txn, ns.ns() );
            result.appendBool( "createdCollectionAutomatically", collection == NULL );
            if ( !collection ) {
                WriteUnitOfWork wunit(txn);
                collection = db->createCollection( txn, ns.ns() );
                invariant( collection );
                if (!fromRepl) {
                    repl::logOp(txn, "c", (dbname + ".$cmd").c_str(), BSON("create" << ns.coll()));
                }
                wunit.commit();
            }

            result.append( "numIndexesBefore", collection->getIndexCatalog()->numIndexesTotal(txn) );

            MultiIndexBlock indexer(txn, collection);
            indexer.allowBackgroundBuilding();
            indexer.allowInterruption();

            const size_t origSpecsSize = specs.size();
            indexer.removeExistingIndexes(&specs);

            if (specs.size() == 0) {
                result.append( "note", "all indexes already exist" );
                return true;
            }

            if (specs.size() != origSpecsSize) {
                result.append( "note", "index already exists" );
            }

            for ( size_t i = 0; i < specs.size(); i++ ) {
                const BSONObj& spec = specs[i];
                if ( spec["unique"].trueValue() ) {
                    status = checkUniqueIndexConstraints(txn, ns.ns(), spec["key"].Obj());

                    if ( !status.isOK() ) {
                        appendCommandStatus( result, status );
                        return false;
                    }
                }
            }

            uassertStatusOK(indexer.init(specs));
            uassertStatusOK(indexer.insertAllDocumentsInCollection());

            {
                WriteUnitOfWork wunit(txn);

                indexer.commit();

                if ( !fromRepl ) {
                    for ( size_t i = 0; i < specs.size(); i++ ) {
                        std::string systemIndexes = ns.getSystemIndexesCollection();
                        repl::logOp(txn, "i", systemIndexes.c_str(), specs[i]);
                    }
                }

                wunit.commit();
            }

            result.append( "numIndexesAfter", collection->getIndexCatalog()->numIndexesTotal(txn) );

            return true;
        }
Esempio n. 29
0
    bool CmdExplain::run(OperationContext* txn,
                         const string& dbname,
                         BSONObj& cmdObj, int options,
                         string& errmsg,
                         BSONObjBuilder& result,
                         bool fromRepl) {
        // Should never get explain commands issued from replication.
        if (fromRepl) {
            Status commandStat(ErrorCodes::IllegalOperation,
                               "explain command should not be from repl");
            appendCommandStatus(result, commandStat);
            return false;
        }

        // Get the verbosity. We use the executionStats verbosity by default.
        Explain::Verbosity verbosity = Explain::EXEC_STATS;
        if (!cmdObj["verbosity"].eoo()) {
            const char* verbStr = cmdObj["verbosity"].valuestrsafe();
            if (mongoutils::str::equals(verbStr, "queryPlanner")) {
                verbosity = Explain::QUERY_PLANNER;
            }
            else if (mongoutils::str::equals(verbStr, "allPlansExecution")) {
                verbosity = Explain::EXEC_ALL_PLANS;
            }
            else if (mongoutils::str::equals(verbStr, "full")) {
                verbosity = Explain::FULL;
            }
            else if (!mongoutils::str::equals(verbStr, "executionStats")) {
                Status commandStat(ErrorCodes::BadValue,
                                   "verbosity string must be one of "
                                   "{'queryPlanner', 'executionStats', 'allPlansExecution'}");
                appendCommandStatus(result, commandStat);
                return false;
            }
        }

        if (Object != cmdObj.firstElement().type()) {
            Status commandStat(ErrorCodes::BadValue,
                               "explain command requires a nested object");
            appendCommandStatus(result, commandStat);
            return false;
        }

        // This is the nested command which we are explaining.
        BSONObj explainObj = cmdObj.firstElement().Obj();

        Command* commToExplain = Command::findCommand(explainObj.firstElementFieldName());
        if (NULL == commToExplain) {
            mongoutils::str::stream ss;
            ss << "unknown command: " << explainObj.firstElementFieldName();
            Status explainStatus(ErrorCodes::CommandNotFound, ss);
            return appendCommandStatus(result, explainStatus);
        }

        // Check whether the child command is allowed to run here. TODO: this logic is
        // copied from Command::execCommand and should be abstracted. Until then, make
        // sure to keep it up to date.
        repl::ReplicationCoordinator* replCoord = repl::getGlobalReplicationCoordinator();
        bool canRunHere =
            replCoord->canAcceptWritesForDatabase(dbname) ||
            commToExplain->slaveOk() ||
            (commToExplain->slaveOverrideOk() && (options & QueryOption_SlaveOk));

        if (!canRunHere) {
            mongoutils::str::stream ss;
            ss << "Explain's child command cannot run on this node. "
               << "Are you explaining a write command on a secondary?";
            appendCommandStatus(result, false, ss);
            return false;
        }

        // Actually call the nested command's explain(...) method.
        Status explainStatus = commToExplain->explain(txn, dbname, explainObj, verbosity, &result);
        if (!explainStatus.isOK()) {
            return appendCommandStatus(result, explainStatus);
        }

        return true;
    }