Exemple #1
0
    Status WriteCmd::explain(OperationContext* txn,
                             const std::string& dbname,
                             const BSONObj& cmdObj,
                             ExplainCommon::Verbosity verbosity,
                             BSONObjBuilder* out) const {
        // For now we only explain update and delete write commands.
        if ( BatchedCommandRequest::BatchType_Update != _writeType &&
             BatchedCommandRequest::BatchType_Delete != _writeType ) {
            return Status( ErrorCodes::IllegalOperation,
                           "Only update and delete write ops can be explained" );
        }

        // Parse the batch request.
        BatchedCommandRequest request( _writeType );
        std::string errMsg;
        if ( !request.parseBSON( cmdObj, &errMsg ) || !request.isValid( &errMsg ) ) {
            return Status( ErrorCodes::FailedToParse, errMsg );
        }

        // Note that this is a runCommmand, and therefore, the database and the collection name
        // are in different parts of the grammar for the command. But it's more convenient to
        // work with a NamespaceString. We built it here and replace it in the parsed command.
        // Internally, everything work with the namespace string as opposed to just the
        // collection name.
        NamespaceString nsString(dbname, request.getNS());
        request.setNSS(nsString);

        // Do the validation of the batch that is shared with non-explained write batches.
        Status isValid = WriteBatchExecutor::validateBatch( request );
        if (!isValid.isOK()) {
            return isValid;
        }

        // Explain must do one additional piece of validation: For now we only explain
        // singleton batches.
        if ( request.sizeWriteOps() != 1u ) {
            return Status( ErrorCodes::InvalidLength,
                           "explained write batches must be of size 1" );
        }

        // Get a reference to the singleton batch item (it's the 0th item in the batch).
        BatchItemRef batchItem( &request, 0 );

        if ( BatchedCommandRequest::BatchType_Update == _writeType ) {
            // Create the update request.
            UpdateRequest updateRequest( nsString );
            updateRequest.setQuery( batchItem.getUpdate()->getQuery() );
            updateRequest.setUpdates( batchItem.getUpdate()->getUpdateExpr() );
            updateRequest.setMulti( batchItem.getUpdate()->getMulti() );
            updateRequest.setUpsert( batchItem.getUpdate()->getUpsert() );
            updateRequest.setUpdateOpLog( true );
            UpdateLifecycleImpl updateLifecycle( true, updateRequest.getNamespaceString() );
            updateRequest.setLifecycle( &updateLifecycle );
            updateRequest.setExplain();

            // Explained updates can yield.
            updateRequest.setYieldPolicy(PlanExecutor::YIELD_AUTO);

            OpDebug* debug = &txn->getCurOp()->debug();

            ParsedUpdate parsedUpdate( txn, &updateRequest );
            Status parseStatus = parsedUpdate.parseRequest();
            if ( !parseStatus.isOK() ) {
                return parseStatus;
            }

            // Explains of write commands are read-only, but we take write locks so
            // that timing info is more accurate.
            AutoGetDb autoDb( txn, nsString.db(), MODE_IX );
            Lock::CollectionLock colLock( txn->lockState(), nsString.ns(), MODE_IX );

            // We check the shard version explicitly here rather than using Client::Context,
            // as Context can do implicit database creation if the db does not exist. We want
            // explain to be a no-op that reports a trivial EOF plan against non-existent dbs
            // or collections.
            ensureShardVersionOKOrThrow( nsString.ns() );

            // Get a pointer to the (possibly NULL) collection.
            Collection* collection = NULL;
            if ( autoDb.getDb() ) {
                collection = autoDb.getDb()->getCollection( txn, nsString.ns() );
            }

            PlanExecutor* rawExec;
            uassertStatusOK(getExecutorUpdate(txn, collection, &parsedUpdate, debug, &rawExec));
            boost::scoped_ptr<PlanExecutor> exec(rawExec);

            // Explain the plan tree.
            Explain::explainStages( exec.get(), verbosity, out );
            return Status::OK();
        }
        else {
            invariant( BatchedCommandRequest::BatchType_Delete == _writeType );

            // Create the delete request.
            DeleteRequest deleteRequest( nsString );
            deleteRequest.setQuery( batchItem.getDelete()->getQuery() );
            deleteRequest.setMulti( batchItem.getDelete()->getLimit() != 1 );
            deleteRequest.setUpdateOpLog(true);
            deleteRequest.setGod( false );
            deleteRequest.setExplain();

            // Explained deletes can yield.
            deleteRequest.setYieldPolicy(PlanExecutor::YIELD_AUTO);

            ParsedDelete parsedDelete(txn, &deleteRequest);
            Status parseStatus = parsedDelete.parseRequest();
            if (!parseStatus.isOK()) {
                return parseStatus;
            }

            // Explains of write commands are read-only, but we take write locks so that timing
            // info is more accurate.
            AutoGetDb autoDb(txn, nsString.db(), MODE_IX);
            Lock::CollectionLock colLock(txn->lockState(), nsString.ns(), MODE_IX);

            // We check the shard version explicitly here rather than using Client::Context,
            // as Context can do implicit database creation if the db does not exist. We want
            // explain to be a no-op that reports a trivial EOF plan against non-existent dbs
            // or collections.
            ensureShardVersionOKOrThrow( nsString.ns() );

            // Get a pointer to the (possibly NULL) collection.
            Collection* collection = NULL;
            if (autoDb.getDb()) {
                collection = autoDb.getDb()->getCollection(txn, nsString.ns());
            }

            PlanExecutor* rawExec;
            uassertStatusOK(getExecutorDelete(txn, collection, &parsedDelete, &rawExec));
            boost::scoped_ptr<PlanExecutor> exec(rawExec);

            // Explain the plan tree.
            Explain::explainStages(exec.get(), verbosity, out);
            return Status::OK();
        }
    }
    virtual bool run(OperationContext* txn,
                     const string& dbname,
                     BSONObj& cmdObj,
                     int options,
                     string& errmsg,
                     BSONObjBuilder& result) {
        const NamespaceString ns(parseNs(dbname, cmdObj));

        Status status = userAllowedWriteNS(ns);
        if (!status.isOK())
            return appendCommandStatus(result, status);

        if (cmdObj["indexes"].type() != Array) {
            errmsg = "indexes has to be an array";
            result.append("cmdObj", cmdObj);
            return false;
        }

        std::vector<BSONObj> specs;
        {
            BSONObjIterator i(cmdObj["indexes"].Obj());
            while (i.more()) {
                BSONElement e = i.next();
                if (e.type() != Object) {
                    errmsg = "everything in indexes has to be an Object";
                    result.append("cmdObj", cmdObj);
                    return false;
                }
                specs.push_back(e.Obj());
            }
        }

        if (specs.size() == 0) {
            errmsg = "no indexes to add";
            return false;
        }

        // check specs
        for (size_t i = 0; i < specs.size(); i++) {
            BSONObj spec = specs[i];
            if (spec["ns"].eoo()) {
                spec = _addNsToSpec(ns, spec);
                specs[i] = spec;
            }

            if (spec["ns"].type() != String) {
                errmsg = "ns field must be a string";
                result.append("spec", spec);
                return false;
            }

            std::string nsFromUser = spec["ns"].String();
            if (nsFromUser.empty()) {
                errmsg = "ns field cannot be an empty string";
                result.append("spec", spec);
                return false;
            }

            if (ns != nsFromUser) {
                errmsg = str::stream() << "value of ns field '" << nsFromUser
                                       << "' doesn't match namespace " << ns.ns();
                result.append("spec", spec);
                return false;
            }
        }

        // now we know we have to create index(es)
        // Note: createIndexes command does not currently respect shard versioning.
        ScopedTransaction transaction(txn, MODE_IX);
        Lock::DBLock dbLock(txn->lockState(), ns.db(), MODE_X);
        if (!repl::getGlobalReplicationCoordinator()->canAcceptWritesFor(ns)) {
            return appendCommandStatus(
                result,
                Status(ErrorCodes::NotMaster,
                       str::stream() << "Not primary while creating indexes in " << ns.ns()));
        }

        Database* db = dbHolder().get(txn, ns.db());
        if (!db) {
            db = dbHolder().openDb(txn, ns.db());
        }

        Collection* collection = db->getCollection(ns.ns());
        if (collection) {
            result.appendBool("createdCollectionAutomatically", false);
        } else {
            MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
                WriteUnitOfWork wunit(txn);
                collection = db->createCollection(txn, ns.ns(), CollectionOptions());
                invariant(collection);
                wunit.commit();
            }
            MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "createIndexes", ns.ns());
            result.appendBool("createdCollectionAutomatically", true);
        }

        const int numIndexesBefore = collection->getIndexCatalog()->numIndexesTotal(txn);
        result.append("numIndexesBefore", numIndexesBefore);

        auto client = txn->getClient();
        ScopeGuard lastOpSetterGuard =
            MakeObjGuard(repl::ReplClientInfo::forClient(client),
                         &repl::ReplClientInfo::setLastOpToSystemLastOpTime,
                         txn);

        MultiIndexBlock indexer(txn, collection);
        indexer.allowBackgroundBuilding();
        indexer.allowInterruption();

        const size_t origSpecsSize = specs.size();
        indexer.removeExistingIndexes(&specs);

        if (specs.size() == 0) {
            result.append("numIndexesAfter", numIndexesBefore);
            result.append("note", "all indexes already exist");
            return true;
        }

        if (specs.size() != origSpecsSize) {
            result.append("note", "index already exists");
        }

        for (size_t i = 0; i < specs.size(); i++) {
            const BSONObj& spec = specs[i];
            if (spec["unique"].trueValue()) {
                status = checkUniqueIndexConstraints(txn, ns.ns(), spec["key"].Obj());

                if (!status.isOK()) {
                    return appendCommandStatus(result, status);
                }
            }
            if (spec["v"].isNumber() && spec["v"].numberInt() == 0) {
                return appendCommandStatus(
                    result,
                    Status(ErrorCodes::CannotCreateIndex,
                           str::stream() << "illegal index specification: " << spec << ". "
                                         << "The option v:0 cannot be passed explicitly"));
            }
        }

        MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
            uassertStatusOK(indexer.init(specs));
        }
        MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "createIndexes", ns.ns());

        // If we're a background index, replace exclusive db lock with an intent lock, so that
        // other readers and writers can proceed during this phase.
        if (indexer.getBuildInBackground()) {
            txn->recoveryUnit()->abandonSnapshot();
            dbLock.relockWithMode(MODE_IX);
            if (!repl::getGlobalReplicationCoordinator()->canAcceptWritesFor(ns)) {
                return appendCommandStatus(
                    result,
                    Status(ErrorCodes::NotMaster,
                           str::stream() << "Not primary while creating background indexes in "
                                         << ns.ns()));
            }
        }

        try {
            Lock::CollectionLock colLock(txn->lockState(), ns.ns(), MODE_IX);
            uassertStatusOK(indexer.insertAllDocumentsInCollection());
        } catch (const DBException& e) {
            invariant(e.getCode() != ErrorCodes::WriteConflict);
            // Must have exclusive DB lock before we clean up the index build via the
            // destructor of 'indexer'.
            if (indexer.getBuildInBackground()) {
                try {
                    // This function cannot throw today, but we will preemptively prepare for
                    // that day, to avoid data corruption due to lack of index cleanup.
                    txn->recoveryUnit()->abandonSnapshot();
                    dbLock.relockWithMode(MODE_X);
                    if (!repl::getGlobalReplicationCoordinator()->canAcceptWritesFor(ns)) {
                        return appendCommandStatus(
                            result,
                            Status(ErrorCodes::NotMaster,
                                   str::stream()
                                       << "Not primary while creating background indexes in "
                                       << ns.ns() << ": cleaning up index build failure due to "
                                       << e.toString()));
                    }
                } catch (...) {
                    std::terminate();
                }
            }
            throw;
        }
        // Need to return db lock back to exclusive, to complete the index build.
        if (indexer.getBuildInBackground()) {
            txn->recoveryUnit()->abandonSnapshot();
            dbLock.relockWithMode(MODE_X);
            uassert(ErrorCodes::NotMaster,
                    str::stream() << "Not primary while completing index build in " << dbname,
                    repl::getGlobalReplicationCoordinator()->canAcceptWritesFor(ns));

            Database* db = dbHolder().get(txn, ns.db());
            uassert(28551, "database dropped during index build", db);
            uassert(28552, "collection dropped during index build", db->getCollection(ns.ns()));
        }

        MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
            WriteUnitOfWork wunit(txn);

            indexer.commit();

            for (size_t i = 0; i < specs.size(); i++) {
                std::string systemIndexes = ns.getSystemIndexesCollection();
                getGlobalServiceContext()->getOpObserver()->onCreateIndex(
                    txn, systemIndexes, specs[i]);
            }

            wunit.commit();
        }
        MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "createIndexes", ns.ns());

        result.append("numIndexesAfter", collection->getIndexCatalog()->numIndexesTotal(txn));

        lastOpSetterGuard.Dismiss();

        return true;
    }
Exemple #3
0
    Status IndexBuilder::_build(OperationContext* txn,
                                Database* db,
                                bool allowBackgroundBuilding,
                                Lock::DBLock* dbLock) const {
        const NamespaceString ns(_index["ns"].String());

        Collection* c = db->getCollection( txn, ns.ns() );
        if ( !c ) {
            WriteUnitOfWork wunit(txn);
            c = db->getOrCreateCollection( txn, ns.ns() );
            verify(c);
            wunit.commit();
        }

        // Show which index we're building in the curop display.
        txn->getCurOp()->setQuery(_index);

        MultiIndexBlock indexer(txn, c);
        indexer.allowInterruption();
        if (allowBackgroundBuilding)
            indexer.allowBackgroundBuilding();

        Status status = Status::OK();
        IndexDescriptor* descriptor(NULL);
        try {
            status = indexer.init(_index);
            if ( status.code() == ErrorCodes::IndexAlreadyExists )
                return Status::OK();

            if (status.isOK()) {
                if (allowBackgroundBuilding) {
                    descriptor = indexer.registerIndexBuild();
                    invariant(dbLock);
                    dbLock->relockWithMode(MODE_IX);
                }

                Lock::CollectionLock colLock(txn->lockState(), ns.ns(), MODE_IX);
                status = indexer.insertAllDocumentsInCollection();
            }

            if (status.isOK()) {
                if (allowBackgroundBuilding) {
                    dbLock->relockWithMode(MODE_X);
                }
                WriteUnitOfWork wunit(txn);
                indexer.commit();
                wunit.commit();
            }
        }
        catch (const DBException& e) {
            status = e.toStatus();
        }

        if (allowBackgroundBuilding) {
            dbLock->relockWithMode(MODE_X);
            Database* db = dbHolder().get(txn, ns.db());
            fassert(28553, db);
            fassert(28554, db->getCollection(txn, ns.ns()));
            indexer.unregisterIndexBuild(descriptor);
        }

        if (status.code() == ErrorCodes::InterruptedAtShutdown) {
            // leave it as-if kill -9 happened. This will be handled on restart.
            indexer.abortWithoutCleanup();
        }

        return status;
    }