Exemple #1
0
    static void _buildHotIndex(const char *ns, Message &m, const vector<BSONObj> objs) {
        uassert(16905, "Can only build one index at a time.", objs.size() == 1);

        DEV {
            // System.indexes cannot be sharded.
            Client::ShardedOperationScope sc;
            verify(!sc.handlePossibleShardedMessage(m, 0));
        }

        LOCK_REASON(lockReason, "building hot index");
        scoped_ptr<Lock::DBWrite> lk(new Lock::DBWrite(ns, lockReason));

        uassert(16902, "not master", isMasterNs(ns));

        const BSONObj &info = objs[0];
        const StringData &coll = info["ns"].Stringdata();

        scoped_ptr<Client::Transaction> transaction(new Client::Transaction(DB_SERIALIZABLE));
        shared_ptr<Collection::Indexer> indexer;

        // Prepare the index build. Performs index validation and marks
        // the collection as having an index build in progress.
        {
            Client::Context ctx(ns);
            Collection *cl = getOrCreateCollection(coll, true);
            if (cl->findIndexByKeyPattern(info["key"].Obj()) >= 0) {
                // No error or action if the index already exists. We need to commit
                // the transaction in case this is an ensure index on the _id field
                // and the ns was created by getOrCreateCollection()
                transaction->commit();
                return;
            }

            _insertObjects(ns, objs, false, 0, true);
            indexer = cl->newIndexer(info, true);
            indexer->prepare();
        }

        // Perform the index build
        {
            Lock::DBWrite::Downgrade dg(lk);
            uassert(16906, "not master: after indexer setup but before build", isMasterNs(ns));

            Client::Context ctx(ns);
            indexer->build();
        }

        uassert(16907, "not master: after indexer build but before commit", isMasterNs(ns));

        // Commit the index build
        {
            Client::Context ctx(ns);
            indexer->commit();
        }
        transaction->commit();
    }
Exemple #2
0
    void receivedUpdate(Message& m, CurOp& op) {
        DbMessage d(m);
        const char *ns = d.getns();
        op.debug().ns = ns;
        int flags = d.pullInt();
        BSONObj query = d.nextJsObj();

        assert( d.moreJSObjs() );
        assert( query.objsize() < m.header()->dataLen() );
        BSONObj toupdate = d.nextJsObj();
        uassert( 10055 , "update object too large", toupdate.objsize() <= BSONObjMaxUserSize);
        assert( toupdate.objsize() < m.header()->dataLen() );
        assert( query.objsize() + toupdate.objsize() < m.header()->dataLen() );
        bool upsert = flags & UpdateOption_Upsert;
        bool multi = flags & UpdateOption_Multi;
        bool broadcast = flags & UpdateOption_Broadcast;
        
        op.debug().query = query;
        op.setQuery(query);

        writelock lk;

        // writelock is used to synchronize stepdowns w/ writes
        uassert( 10054 ,  "not master", isMasterNs( ns ) );
        
        // if this ever moves to outside of lock, need to adjust check Client::Context::_finishInit
        if ( ! broadcast && handlePossibleShardedMessage( m , 0 ) )
            return;

        Client::Context ctx( ns );

        UpdateResult res = updateObjects(ns, toupdate, query, upsert, multi, true, op.debug() );
        lastError.getSafe()->recordUpdate( res.existing , res.num , res.upserted ); // for getlasterror
    }
Exemple #3
0
    void receivedDelete(Message& m, CurOp& op) {
        DbMessage d(m);
        const char *ns = d.getns();
        assert(*ns);
        uassert( 10056 ,  "not master", isMasterNs( ns ) );
        op.debug().str << ns << ' ';
        int flags = d.pullInt();
        bool justOne = flags & RemoveOption_JustOne;
        bool broadcast = flags & RemoveOption_Broadcast;
        assert( d.moreJSObjs() );
        BSONObj pattern = d.nextJsObj();
        {
            string s = pattern.toString();
            op.debug().str << " query: " << s;
            op.setQuery(pattern);
        }        

        writelock lk(ns);
        // if this ever moves to outside of lock, need to adjust check Client::Context::_finishInit
        if ( ! broadcast & handlePossibleShardedMessage( m , 0 ) )
            return;
        
        Client::Context ctx(ns);
        
        long long n = deleteObjects(ns, pattern, justOne, true);
        lastError.getSafe()->recordDelete( n );
    }
Exemple #4
0
    void receivedUpdate(Message& m, stringstream& ss) {
        DbMessage d(m);
        const char *ns = d.getns();
        assert(*ns);
        uassert( "not master", isMasterNs( ns ) );
        setClient(ns);
        Client& client = cc();
        client.top.setWrite();
        ss << ns << ' ';
        int flags = d.pullInt();
        BSONObj query = d.nextJsObj();

        assert( d.moreJSObjs() );
        assert( query.objsize() < m.data->dataLen() );
        BSONObj toupdate = d.nextJsObj();
        uassert("update object too large", toupdate.objsize() <= MaxBSONObjectSize);
        assert( toupdate.objsize() < m.data->dataLen() );
        assert( query.objsize() + toupdate.objsize() < m.data->dataLen() );
        bool upsert = flags & Option_Upsert;
        bool multi = flags & Option_Multi;
        {
            string s = query.toString();
            /* todo: we shouldn't do all this ss stuff when we don't need it, it will slow us down. */
            ss << " query: " << s;
            CurOp& currentOp = *client.curop();
            strncpy(currentOp.query, s.c_str(), sizeof(currentOp.query)-2);
        }        
        UpdateResult res = updateObjects(ns, toupdate, query, upsert, multi, ss, true);
        recordUpdate( res.existing , res.num ); // for getlasterror
    }
Exemple #5
0
static bool checkIsMasterForCollection(const NamespaceString& ns, WriteErrorDetail** error) {
    if (!isMasterNs(ns.ns().c_str())) {
        WriteErrorDetail* errorDetail = *error = new WriteErrorDetail;
        errorDetail->setErrCode(ErrorCodes::NotMaster);
        errorDetail->setErrMessage(std::string(mongoutils::str::stream() <<
                                               "Not primary while writing to " << ns.ns()));
        return false;
    }
    return true;
}
 static bool checkIsMasterForCollection(const std::string& ns, WriteOpResult* result) {
     if (!isMasterNs(ns.c_str())) {
         WriteErrorDetail* errorDetail = new WriteErrorDetail;
         result->setError(errorDetail);
         errorDetail->setErrCode(ErrorCodes::NotMaster);
         errorDetail->setErrMessage("Not primary while writing to " + ns);
         return false;
     }
     return true;
 }
Exemple #7
0
    void receivedInsert(Message& m, CurOp& op) {
        DbMessage d(m);
        const char *ns = d.getns();
        op.debug().ns = ns;

        // Auth checking for index writes happens later.
        if (NamespaceString(ns).coll != "system.indexes") {
            Status status = cc().getAuthorizationManager()->checkAuthForInsert(ns);
            uassert(16544, status.reason(), status.isOK());
        }

        if( !d.moreJSObjs() ) {
            // strange.  should we complain?
            return;
        }
        BSONObj first = d.nextJsObj();

        vector<BSONObj> multi;
        while (d.moreJSObjs()){
            if (multi.empty()) // first pass
                multi.push_back(first);
            multi.push_back( d.nextJsObj() );
        }

        PageFaultRetryableSection s;
        while ( true ) {
            try {
                Lock::DBWrite lk(ns);
                
                // CONCURRENCY TODO: is being read locked in big log sufficient here?
                // writelock is used to synchronize stepdowns w/ writes
                uassert( 10058 , "not master", isMasterNs(ns) );
                
                if ( handlePossibleShardedMessage( m , 0 ) )
                    return;
                
                Client::Context ctx(ns);
                
                if( !multi.empty() ) {
                    const bool keepGoing = d.reservedField() & InsertOption_ContinueOnError;
                    insertMulti(keepGoing, ns, multi, op);
                    return;
                }
                
                checkAndInsert(ns, first);
                globalOpCounters.incInsertInWriteLock(1);
                op.debug().ninserted = 1;
                return;
            }
            catch ( PageFaultException& e ) {
                e.touch();
            }
        }
    }
Exemple #8
0
    static void lockedReceivedUpdate(const char *ns, Message &m, CurOp &op, const BSONObj &updateobj, const BSONObj &query,
                                     const bool upsert, const bool multi) {
        // void ReplSetImpl::relinquish() uses big write lock so 
        // this is thus synchronized given our lock above.
        uassert(10054,  "not master", isMasterNs(ns));

        Client::Context ctx(ns);
        scoped_ptr<Client::AlternateTransactionStack> altStack(opNeedsAltTxn(ns) ? new Client::AlternateTransactionStack : NULL);
        Client::Transaction transaction(DB_SERIALIZABLE);
        UpdateResult res = updateObjects(ns, updateobj, query, upsert, multi, true);
        transaction.commit();
        lastError.getSafe()->recordUpdate( res.existing , res.num , res.upserted ); // for getlasterror
    }
Exemple #9
0
    static void lockedReceivedInsert(const char *ns, Message &m, const vector<BSONObj> &objs, CurOp &op, const bool keepGoing) {
        // writelock is used to synchronize stepdowns w/ writes
        uassert(10058, "not master", isMasterNs(ns));

        Client::Context ctx(ns);
        scoped_ptr<Client::AlternateTransactionStack> altStack(opNeedsAltTxn(ns) ? new Client::AlternateTransactionStack : NULL);
        Client::Transaction transaction(DB_SERIALIZABLE);
        insertObjects(ns, objs, keepGoing, 0, true);
        transaction.commit();
        size_t n = objs.size();
        globalOpCounters.gotInsert(n);
        op.debug().ninserted = n;
    }
Exemple #10
0
    /** we allow queries to SimpleSlave's */
    void replVerifyReadsOk(const std::string& ns, const LiteParsedQuery* pq) {
        if( replSet ) {
            // todo: speed up the secondary case.  as written here there are 2 mutex entries, it
            // can b 1.
            if (isMasterNs(ns.c_str())) return;
            if ( cc().isGod() ) return;

            uassert(NotMasterNoSlaveOkCode, "not master and slaveOk=false",
                    !pq || pq->hasOption(QueryOption_SlaveOk) || pq->hasReadPref());
            uassert(NotMasterOrSecondaryCode,
                    "not master or secondary; cannot currently read from this replSet member",
                    theReplSet && theReplSet->isSecondary() );
        }
        else {
            // master/slave
            uassert(NotMaster,
                    "not master",
                    isMasterNs(ns.c_str()) ||
                        pq == NULL || 
                        pq->hasOption(QueryOption_SlaveOk) ||
                        replSettings.slave == SimpleSlave );
        }
    }
Exemple #11
0
    void receivedInsert(Message& m, stringstream& ss) {
        DbMessage d(m);
		const char *ns = d.getns();
		assert(*ns);
        uassert( "not master", isMasterNs( ns ) );
		setClient(ns);
        cc().top.setWrite();
		ss << ns;
		
        while ( d.moreJSObjs() ) {
            BSONObj js = d.nextJsObj();
            uassert("object to insert too large", js.objsize() <= MaxBSONObjectSize);
            theDataFileMgr.insert(ns, js, false);
            logOp("i", ns, js);
        }
    }
Exemple #12
0
    void receivedInsert(Message& m, CurOp& op) {
        DbMessage d(m);
        const char *ns = d.getns();
        op.debug().ns = ns;

        if( !d.moreJSObjs() ) {
            // strange.  should we complain?
            return;
        }
        BSONObj first = d.nextJsObj();

        vector<BSONObj> multi;
        while (d.moreJSObjs()){
            if (multi.empty()) // first pass
                multi.push_back(first);
            multi.push_back( d.nextJsObj() );
        }

        PageFaultRetryableSection s;
        while ( true ) {
            try {
                Lock::DBWrite lk(ns);
                
                // CONCURRENCY TODO: is being read locked in big log sufficient here?
                // writelock is used to synchronize stepdowns w/ writes
                uassert( 10058 , "not master", isMasterNs(ns) );
                
                if ( handlePossibleShardedMessage( m , 0 ) )
                    return;
                
                Client::Context ctx(ns);
                
                if( !multi.empty() ) {
                    const bool keepGoing = d.reservedField() & InsertOption_ContinueOnError;
                    insertMulti(keepGoing, ns, multi);
                    return;
                }
                
                checkAndInsert(ns, first);
                globalOpCounters.incInsertInWriteLock(1);
                return;
            }
            catch ( PageFaultException& e ) {
                e.touch();
            }
        }
    }
Exemple #13
0
    void receivedUpdate(Message& m, CurOp& op) {
        DbMessage d(m);
        const char *ns = d.getns();
        op.debug().ns = ns;
        int flags = d.pullInt();
        BSONObj query = d.nextJsObj();

        verify( d.moreJSObjs() );
        verify( query.objsize() < m.header()->dataLen() );
        BSONObj toupdate = d.nextJsObj();
        uassert( 10055 , "update object too large", toupdate.objsize() <= BSONObjMaxUserSize);
        verify( toupdate.objsize() < m.header()->dataLen() );
        verify( query.objsize() + toupdate.objsize() < m.header()->dataLen() );
        bool upsert = flags & UpdateOption_Upsert;
        bool multi = flags & UpdateOption_Multi;
        bool broadcast = flags & UpdateOption_Broadcast;

        Status status = cc().getAuthorizationManager()->checkAuthForUpdate(ns, upsert);
        uassert(16538, status.reason(), status.isOK());

        op.debug().query = query;
        op.setQuery(query);

        PageFaultRetryableSection s;
        while ( 1 ) {
            try {
                Lock::DBWrite lk(ns);
                
                // void ReplSetImpl::relinquish() uses big write lock so 
                // this is thus synchronized given our lock above.
                uassert( 10054 ,  "not master", isMasterNs( ns ) );
                
                // if this ever moves to outside of lock, need to adjust check Client::Context::_finishInit
                if ( ! broadcast && handlePossibleShardedMessage( m , 0 ) )
                    return;
                
                Client::Context ctx( ns );
                
                UpdateResult res = updateObjects(ns, toupdate, query, upsert, multi, true, op.debug() );
                lastError.getSafe()->recordUpdate( res.existing , res.num , res.upserted ); // for getlasterror
                break;
            }
            catch ( PageFaultException& e ) {
                e.touch();
            }
        }
    }
Exemple #14
0
    void receivedDelete(Message& m, CurOp& op) {
        DbMessage d(m);
        const char *ns = d.getns();

        Status status = cc().getAuthorizationManager()->checkAuthForDelete(ns);
        uassert(16542, status.reason(), status.isOK());

        op.debug().ns = ns;
        int flags = d.pullInt();
        verify(d.moreJSObjs());
        BSONObj pattern = d.nextJsObj();

        op.debug().query = pattern;
        op.setQuery(pattern);

        const bool justOne = flags & RemoveOption_JustOne;
        const bool broadcast = flags & RemoveOption_Broadcast;

        OpSettings settings;
        settings.setQueryCursorMode(WRITE_LOCK_CURSOR);
        settings.setJustOne(justOne);
        cc().setOpSettings(settings);

        Client::ShardedOperationScope sc;
        if (!broadcast && sc.handlePossibleShardedMessage(m, 0)) {
            return;
        }

        LOCK_REASON(lockReason, "delete");
        Lock::DBRead lk(ns, lockReason);

        // writelock is used to synchronize stepdowns w/ writes
        uassert(10056, "not master", isMasterNs(ns));

        Client::Context ctx(ns);
        long long n;
        scoped_ptr<Client::AlternateTransactionStack> altStack(opNeedsAltTxn(ns) ? new Client::AlternateTransactionStack : NULL);
        Client::Transaction transaction(DB_SERIALIZABLE);
        n = deleteObjects(ns, pattern, justOne, true);
        transaction.commit();

        lastError.getSafe()->recordDelete( n );
        op.debug().ndeleted = n;
    }
Exemple #15
0
    void receivedDelete(Message& m, CurOp& op) {
        DbMessage d(m);
        const char *ns = d.getns();

        Status status = cc().getAuthorizationManager()->checkAuthForDelete(ns);
        uassert(16542, status.reason(), status.isOK());

        op.debug().ns = ns;
        int flags = d.pullInt();
        bool justOne = flags & RemoveOption_JustOne;
        bool broadcast = flags & RemoveOption_Broadcast;
        verify( d.moreJSObjs() );
        BSONObj pattern = d.nextJsObj();
        
        op.debug().query = pattern;
        op.setQuery(pattern);

        PageFaultRetryableSection s;
        while ( 1 ) {
            try {
                Lock::DBWrite lk(ns);
                
                // writelock is used to synchronize stepdowns w/ writes
                uassert( 10056 ,  "not master", isMasterNs( ns ) );
                
                // if this ever moves to outside of lock, need to adjust check Client::Context::_finishInit
                if ( ! broadcast && handlePossibleShardedMessage( m , 0 ) )
                    return;
                
                Client::Context ctx(ns);
                
                long long n = deleteObjects(ns, pattern, justOne, true);
                lastError.getSafe()->recordDelete( n );
                op.debug().ndeleted = n;
                break;
            }
            catch ( PageFaultException& e ) {
                LOG(2) << "recordDelete got a PageFaultException" << endl;
                e.touch();
            }
        }
    }
Exemple #16
0
    void receivedUpdate(Message& m, CurOp& op) {
        DbMessage d(m);
        const char *ns = d.getns();
        assert(*ns);
        op.debug().str << ns << ' ';
        int flags = d.pullInt();
        BSONObj query = d.nextJsObj();

        assert( d.moreJSObjs() );
        assert( query.objsize() < m.header()->dataLen() );
        BSONObj toupdate = d.nextJsObj();
        uassert( 10055 , "update object too large", toupdate.objsize() <= BSONObjMaxUserSize);
        assert( toupdate.objsize() < m.header()->dataLen() );
        assert( query.objsize() + toupdate.objsize() < m.header()->dataLen() );
        bool upsert = flags & UpdateOption_Upsert;
        bool multi = flags & UpdateOption_Multi;
        bool broadcast = flags & UpdateOption_Broadcast;
        {
            string s = query.toString();
            /* todo: we shouldn't do all this ss stuff when we don't need it, it will slow us down.
               instead, let's just story the query BSON in the debug object, and it can toString()
               lazily
            */
            op.debug().str << " query: " << s;
            op.setQuery(query);
        }

        writelock lk;

        // writelock is used to synchronize stepdowns w/ writes
        uassert( 10054 ,  "not master", isMasterNs( ns ) );
        
        // if this ever moves to outside of lock, need to adjust check Client::Context::_finishInit
        if ( ! broadcast && handlePossibleShardedMessage( m , 0 ) )
            return;

        Client::Context ctx( ns );

        UpdateResult res = updateObjects(ns, toupdate, query, upsert, multi, true, op.debug() );
        lastError.getSafe()->recordUpdate( res.existing , res.num , res.upserted ); // for getlasterror
    }
Exemple #17
0
 void receivedDelete(Message& m, stringstream &ss) {
     DbMessage d(m);
     const char *ns = d.getns();
     assert(*ns);
     uassert( "not master", isMasterNs( ns ) );
     setClient(ns);
     Client& client = cc();
     client.top.setWrite();
     int flags = d.pullInt();
     bool justOne = flags & 1;
     assert( d.moreJSObjs() );
     BSONObj pattern = d.nextJsObj();
     {
         string s = pattern.toString();
         ss << " query: " << s;
         CurOp& currentOp = *client.curop();
         strncpy(currentOp.query, s.c_str(), sizeof(currentOp.query)-2);
     }        
     int n = deleteObjects(ns, pattern, justOne, true);
     recordDelete( n );
 }
Exemple #18
0
    // throws DBException
    void buildAnIndex( OperationContext* txn,
                       Collection* collection,
                       IndexCatalogEntry* btreeState,
                       bool mayInterrupt ) {

        string ns = collection->ns().ns(); // our copy
        const IndexDescriptor* idx = btreeState->descriptor();
        const BSONObj& idxInfo = idx->infoObj();

        MONGO_TLOG(0) << "build index on: " << ns
                      << " properties: " << idx->toString() << endl;
        audit::logCreateIndex( currentClient.get(), &idxInfo, idx->indexName(), ns );

        Timer t;

        verify( Lock::isWriteLocked( ns ) );
        // this is so that people know there are more keys to look at when doing
        // things like in place updates, etc...
        collection->infoCache()->addedIndex();

        if ( collection->numRecords() == 0 ) {
            Status status = btreeState->accessMethod()->initializeAsEmpty(txn);
            massert( 17343,
                     str::stream() << "IndexAccessMethod::initializeAsEmpty failed" << status.toString(),
                     status.isOK() );
            MONGO_TLOG(0) << "\t added index to empty collection";
            return;
        }

        scoped_ptr<BackgroundOperation> backgroundOperation;
        bool doInBackground = false;

        if ( idxInfo["background"].trueValue() && !inDBRepair ) {
            doInBackground = true;
            backgroundOperation.reset( new BackgroundOperation(ns) );
            uassert( 13130,
                     "can't start bg index b/c in recursive lock (db.eval?)",
                     !Lock::nested() );
            log() << "\t building index in background";
        }

        Status status = btreeState->accessMethod()->initializeAsEmpty(txn);
        massert( 17342,
                 str::stream()
                 << "IndexAccessMethod::initializeAsEmpty failed"
                 << status.toString(),
                 status.isOK() );

        IndexAccessMethod* bulk = doInBackground ?
            NULL : btreeState->accessMethod()->initiateBulk(txn,
                                                            collection->numRecords());
        scoped_ptr<IndexAccessMethod> bulkHolder(bulk);
        IndexAccessMethod* iam = bulk ? bulk : btreeState->accessMethod();

        if ( bulk )
            log() << "\t building index using bulk method";

        unsigned long long n = addExistingToIndex( txn,
                                                   collection,
                                                   btreeState->descriptor(),
                                                   iam,
                                                   doInBackground );

        if ( bulk ) {
            LOG(1) << "\t bulk commit starting";
            std::set<DiskLoc> dupsToDrop;

            Status status = btreeState->accessMethod()->commitBulk( bulk,
                                                                    mayInterrupt,
                                                                    &dupsToDrop );

            // Code above us expects a uassert in case of dupkey errors.
            if (ErrorCodes::DuplicateKey == status.code()) {
                uassertStatusOK(status);
            }

            // Any other errors are probably bad and deserve a massert.
            massert( 17398,
                     str::stream() << "commitBulk failed: " << status.toString(),
                     status.isOK() );

            if ( dupsToDrop.size() )
                log() << "\t bulk dropping " << dupsToDrop.size() << " dups";

            for( set<DiskLoc>::const_iterator i = dupsToDrop.begin(); i != dupsToDrop.end(); ++i ) {
                BSONObj toDelete;
                collection->deleteDocument( txn,
                                            *i,
                                            false /* cappedOk */,
                                            true /* noWarn */,
                                            &toDelete );
                if (isMasterNs(ns.c_str())) {
                    logOp( txn, "d", ns.c_str(), toDelete );
                }
                
                txn->recoveryUnit()->commitIfNeeded();

                RARELY if ( mayInterrupt ) {
                    txn->checkForInterrupt();
                }
            }
        }

        verify( !btreeState->head().isNull() );
        MONGO_TLOG(0) << "build index done.  scanned " << n << " total records. "
                      << t.millis() / 1000.0 << " secs" << endl;

        // this one is so people know that the index is finished
        collection->infoCache()->addedIndex();
    }
Exemple #19
0
    long long DeleteExecutor::execute() {
        uassertStatusOK(prepare());
        uassert(17417,
                mongoutils::str::stream() <<
                "DeleteExecutor::prepare() failed to parse query " << _request->getQuery(),
                _isQueryParsed);
        const bool logop = _request->shouldCallLogOp();
        const NamespaceString& ns(_request->getNamespaceString());
        if (!_request->isGod()) {
            if (ns.isSystem()) {
                uassert(12050,
                        "cannot delete from system namespace",
                        legalClientSystemNS(ns.ns(), true));
            }
            if (ns.ns().find('$') != string::npos) {
                log() << "cannot delete from collection with reserved $ in name: " << ns << endl;
                uasserted( 10100, "cannot delete from collection with reserved $ in name" );
            }
        }

        massert(17418,
                mongoutils::str::stream() <<
                "dbname = " << currentClient.get()->database()->name() <<
                "; ns = " << ns.ns(),
                currentClient.get()->database()->name() == nsToDatabaseSubstring(ns.ns()));
        Collection* collection = currentClient.get()->database()->getCollection(ns.ns());
        if (NULL == collection) {
            return 0;
        }

        uassert(10101,
                str::stream() << "cannot remove from a capped collection: " << ns.ns(),
                !collection->isCapped());

        uassert(ErrorCodes::NotMaster,
                str::stream() << "Not primary while removing from " << ns.ns(),
                !logop || isMasterNs(ns.ns().c_str()));

        long long nDeleted = 0;

        const bool canYield = !_request->isGod() && (
                _canonicalQuery.get() ?
                !QueryPlannerCommon::hasNode(_canonicalQuery->root(), MatchExpression::ATOMIC) :
                LiteParsedQuery::isQueryIsolated(_request->getQuery()));

        Runner* rawRunner;
        if (_canonicalQuery.get()) {
            uassertStatusOK(getRunner(collection, _canonicalQuery.release(), &rawRunner));
        }
        else {
            CanonicalQuery* ignored;
            uassertStatusOK(getRunner(collection,
                                      ns.ns(),
                                      _request->getQuery(),
                                      &rawRunner,
                                      &ignored));
        }

        auto_ptr<Runner> runner(rawRunner);
        auto_ptr<ScopedRunnerRegistration> safety;

        if (canYield) {
            safety.reset(new ScopedRunnerRegistration(runner.get()));
            runner->setYieldPolicy(Runner::YIELD_AUTO);
        }

        DiskLoc rloc;
        Runner::RunnerState state;
        CurOp* curOp = cc().curop();
        int oldYieldCount = curOp->numYields();
        while (Runner::RUNNER_ADVANCED == (state = runner->getNext(NULL, &rloc))) {
            if (oldYieldCount != curOp->numYields()) {
                uassert(ErrorCodes::NotMaster,
                        str::stream() << "No longer primary while removing from " << ns.ns(),
                        !logop || isMasterNs(ns.ns().c_str()));
                oldYieldCount = curOp->numYields();
            }
            BSONObj toDelete;

            // TODO: do we want to buffer docs and delete them in a group rather than
            // saving/restoring state repeatedly?
            runner->saveState();
            collection->deleteDocument(rloc, false, false, logop ? &toDelete : NULL );
            runner->restoreState();

            nDeleted++;

            if (logop) {
                if ( toDelete.isEmpty() ) {
                    problem() << "deleted object without id, not logging" << endl;
                }
                else {
                    bool replJustOne = true;
                    logOp("d", ns.ns().c_str(), toDelete, 0, &replJustOne);
                }
            }

            if (!_request->isMulti()) {
                break;
            }

            if (!_request->isGod()) {
                getDur().commitIfNeeded();
            }

            if (debug && _request->isGod() && nDeleted == 100) {
                log() << "warning high number of deletes with god=true "
                      << " which could use significant memory b/c we don't commit journal";
            }
        }

        return nDeleted;
    }
Exemple #20
0
    UpdateResult update(
            OperationContext* txn,
            Database* db,
            const UpdateRequest& request,
            OpDebug* opDebug,
            UpdateDriver* driver,
            CanonicalQuery* cq) {

        LOG(3) << "processing update : " << request;

        std::auto_ptr<CanonicalQuery> cqHolder(cq);
        const NamespaceString& nsString = request.getNamespaceString();
        UpdateLifecycle* lifecycle = request.getLifecycle();

        Collection* collection = db->getCollection(nsString.ns());

        validateUpdate(nsString.ns().c_str(), request.getUpdates(), request.getQuery());


        // TODO: This seems a bit circuitious.
        opDebug->updateobj = request.getUpdates();

        if (lifecycle) {
            lifecycle->setCollection(collection);
            driver->refreshIndexKeys(lifecycle->getIndexKeys());
        }

        Runner* rawRunner;
        Status status = cq ?
            getRunner(collection, cqHolder.release(), &rawRunner) :
            getRunner(collection, nsString.ns(), request.getQuery(), &rawRunner, &cq);
        uassert(17243,
                "could not get runner " + request.getQuery().toString() + "; " + causedBy(status),
                status.isOK());

        // Create the runner and setup all deps.
        auto_ptr<Runner> runner(rawRunner);

        // Register Runner with ClientCursor
        const ScopedRunnerRegistration safety(runner.get());

        //
        // We'll start assuming we have one or more documents for this update. (Otherwise,
        // we'll fall-back to insert case (if upsert is true).)
        //

        // We are an update until we fall into the insert case below.
        driver->setContext(ModifierInterface::ExecInfo::UPDATE_CONTEXT);

        int numMatched = 0;

        // If the update was in-place, we may see it again.  This only matters if we're doing
        // a multi-update; if we're not doing a multi-update we stop after one update and we
        // won't see any more docs.
        //
        // For example: If we're scanning an index {x:1} and performing {$inc:{x:5}}, we'll keep
        // moving the document forward and it will continue to reappear in our index scan.
        // Unless the index is multikey, the underlying query machinery won't de-dup.
        //
        // If the update wasn't in-place we may see it again.  Our query may return the new
        // document and we wouldn't want to update that.
        //
        // So, no matter what, we keep track of where the doc wound up.
        typedef unordered_set<DiskLoc, DiskLoc::Hasher> DiskLocSet;
        const scoped_ptr<DiskLocSet> updatedLocs(request.isMulti() ? new DiskLocSet : NULL);

        // Reset these counters on each call. We might re-enter this function to retry this
        // update if we throw a page fault exception below, and we rely on these counters
        // reflecting only the actions taken locally. In particlar, we must have the no-op
        // counter reset so that we can meaningfully comapre it with numMatched above.
        opDebug->nscanned = 0;
        opDebug->nscannedObjects = 0;
        opDebug->nModified = 0;

        // Get the cached document from the update driver.
        mutablebson::Document& doc = driver->getDocument();
        mutablebson::DamageVector damages;

        // Used during iteration of docs
        BSONObj oldObj;

        // Get first doc, and location
        Runner::RunnerState state = Runner::RUNNER_ADVANCED;

        uassert(ErrorCodes::NotMaster,
                mongoutils::str::stream() << "Not primary while updating " << nsString.ns(),
                !request.shouldCallLogOp() || isMasterNs(nsString.ns().c_str()));

        while (true) {
            // Get next doc, and location
            DiskLoc loc;
            state = runner->getNext(&oldObj, &loc);

            if (state != Runner::RUNNER_ADVANCED) {
                if (state == Runner::RUNNER_EOF) {
                    // We have reached the logical end of the loop, so do yielding recovery
                    break;
                }
                else {
                    uassertStatusOK(Status(ErrorCodes::InternalError,
                                           str::stream() << " Update query failed -- "
                                                         << Runner::statestr(state)));
                }
            }

            // We fill this with the new locs of moved doc so we don't double-update.
            if (updatedLocs && updatedLocs->count(loc) > 0) {
                continue;
            }

            // We count how many documents we scanned even though we may skip those that are
            // deemed duplicated. The final 'numMatched' and 'nscanned' numbers may differ for
            // that reason.
            // TODO: Do we want to pull this out of the underlying query plan?
            opDebug->nscanned++;

            // Found a matching document
            opDebug->nscannedObjects++;
            numMatched++;

            // Ask the driver to apply the mods. It may be that the driver can apply those "in
            // place", that is, some values of the old document just get adjusted without any
            // change to the binary layout on the bson layer. It may be that a whole new
            // document is needed to accomodate the new bson layout of the resulting document.
            doc.reset(oldObj, mutablebson::Document::kInPlaceEnabled);
            BSONObj logObj;


            FieldRefSet updatedFields;

            Status status = Status::OK();
            if (!driver->needMatchDetails()) {
                // If we don't need match details, avoid doing the rematch
                status = driver->update(StringData(), &doc, &logObj, &updatedFields);
            }
            else {
                // If there was a matched field, obtain it.
                MatchDetails matchDetails;
                matchDetails.requestElemMatchKey();

                dassert(cq);
                verify(cq->root()->matchesBSON(oldObj, &matchDetails));

                string matchedField;
                if (matchDetails.hasElemMatchKey())
                    matchedField = matchDetails.elemMatchKey();

                // TODO: Right now, each mod checks in 'prepare' that if it needs positional
                // data, that a non-empty StringData() was provided. In principle, we could do
                // that check here in an else clause to the above conditional and remove the
                // checks from the mods.

                status = driver->update(matchedField, &doc, &logObj, &updatedFields);
            }

            if (!status.isOK()) {
                uasserted(16837, status.reason());
            }

            // Ensure _id exists and is first
            uassertStatusOK(ensureIdAndFirst(doc));

            // If the driver applied the mods in place, we can ask the mutable for what
            // changed. We call those changes "damages". :) We use the damages to inform the
            // journal what was changed, and then apply them to the original document
            // ourselves. If, however, the driver applied the mods out of place, we ask it to
            // generate a new, modified document for us. In that case, the file manager will
            // take care of the journaling details for us.
            //
            // This code flow is admittedly odd. But, right now, journaling is baked in the file
            // manager. And if we aren't using the file manager, we have to do jounaling
            // ourselves.
            bool docWasModified = false;
            BSONObj newObj;
            const char* source = NULL;
            bool inPlace = doc.getInPlaceUpdates(&damages, &source);

            // If something changed in the document, verify that no immutable fields were changed
            // and data is valid for storage.
            if ((!inPlace || !damages.empty()) ) {
                if (!(request.isFromReplication() || request.isFromMigration())) {
                    const std::vector<FieldRef*>* immutableFields = NULL;
                    if (lifecycle)
                        immutableFields = lifecycle->getImmutableFields();

                    uassertStatusOK(validate(oldObj,
                                             updatedFields,
                                             doc,
                                             immutableFields,
                                             driver->modOptions()) );
                }
            }

            // Save state before making changes
            runner->saveState();

            if (inPlace && !driver->modsAffectIndices()) {

                // If a set of modifiers were all no-ops, we are still 'in place', but there is
                // no work to do, in which case we want to consider the object unchanged.
                if (!damages.empty() ) {
                    collection->updateDocumentWithDamages( txn, loc, source, damages );
                    docWasModified = true;
                    opDebug->fastmod = true;
                }

                newObj = oldObj;
            }
            else {

                // The updates were not in place. Apply them through the file manager.
                newObj = doc.getObject();
                uassert(17419,
                        str::stream() << "Resulting document after update is larger than "
                                      << BSONObjMaxUserSize,
                        newObj.objsize() <= BSONObjMaxUserSize);
                StatusWith<DiskLoc> res = collection->updateDocument(txn,
                                                                     loc,
                                                                     newObj,
                                                                     true,
                                                                     opDebug);
                uassertStatusOK(res.getStatus());
                DiskLoc newLoc = res.getValue();
                docWasModified = true;

                // If the document moved, we might see it again in a collection scan (maybe it's
                // a document after our current document).
                //
                // If the document is indexed and the mod changes an indexed value, we might see it
                // again.  For an example, see the comment above near declaration of updatedLocs.
                if (updatedLocs && (newLoc != loc || driver->modsAffectIndices())) {
                    updatedLocs->insert(newLoc);
                }
            }

            // Restore state after modification
            uassert(17278,
                    "Update could not restore runner state after updating a document.",
                    runner->restoreState());

            // Call logOp if requested.
            if (request.shouldCallLogOp() && !logObj.isEmpty()) {
                BSONObj idQuery = driver->makeOplogEntryQuery(newObj, request.isMulti());
                logOp(txn, "u", nsString.ns().c_str(), logObj , &idQuery,
                      NULL, request.isFromMigration());
            }

            // Only record doc modifications if they wrote (exclude no-ops)
            if (docWasModified)
                opDebug->nModified++;

            if (!request.isMulti()) {
                break;
            }

            // Opportunity for journaling to write during the update.
            txn->recoveryUnit()->commitIfNeeded();
        }

        // TODO: Can this be simplified?
        if ((numMatched > 0) || (numMatched == 0 && !request.isUpsert()) ) {
            opDebug->nMatched = numMatched;
            return UpdateResult(numMatched > 0 /* updated existing object(s) */,
                                !driver->isDocReplacement() /* $mod or obj replacement */,
                                opDebug->nModified /* number of modified docs, no no-ops */,
                                numMatched /* # of docs matched/updated, even no-ops */,
                                BSONObj());
        }

        //
        // We haven't found any existing document so an insert is done
        // (upsert is true).
        //
        opDebug->upsert = true;

        // Since this is an insert (no docs found and upsert:true), we will be logging it
        // as an insert in the oplog. We don't need the driver's help to build the
        // oplog record, then. We also set the context of the update driver to the INSERT_CONTEXT.
        // Some mods may only work in that context (e.g. $setOnInsert).
        driver->setLogOp(false);
        driver->setContext(ModifierInterface::ExecInfo::INSERT_CONTEXT);

        // Reset the document we will be writing to
        doc.reset();

        // This remains the empty object in the case of an object replacement, but in the case
        // of an upsert where we are creating a base object from the query and applying mods,
        // we capture the query as the original so that we can detect immutable field mutations.
        BSONObj original = BSONObj();

        // Calling createFromQuery will populate the 'doc' with fields from the query which
        // creates the base of the update for the inserterd doc (because upsert was true)
        if (cq) {
            uassertStatusOK(driver->populateDocumentWithQueryFields(cq, doc));
            // Validate the base doc, as taken from the query -- no fields means validate all.
            FieldRefSet noFields;
            uassertStatusOK(validate(BSONObj(), noFields, doc, NULL, driver->modOptions()));
            if (!driver->isDocReplacement()) {
                opDebug->fastmodinsert = true;
                // We need all the fields from the query to compare against for validation below.
                original = doc.getObject();
            }
            else {
                original = request.getQuery();
            }
        }
        else {
            fassert(17354, CanonicalQuery::isSimpleIdQuery(request.getQuery()));
            BSONElement idElt = request.getQuery()["_id"];
            original = idElt.wrap();
            fassert(17352, doc.root().appendElement(idElt));
        }

        // Apply the update modifications and then log the update as an insert manually.
        FieldRefSet updatedFields;
        status = driver->update(StringData(), &doc, NULL, &updatedFields);
        if (!status.isOK()) {
            uasserted(16836, status.reason());
        }

        // Ensure _id exists and is first
        uassertStatusOK(ensureIdAndFirst(doc));

        // Validate that the object replacement or modifiers resulted in a document
        // that contains all the immutable keys and can be stored.
        if (!(request.isFromReplication() || request.isFromMigration())){
            const std::vector<FieldRef*>* immutableFields = NULL;
            if (lifecycle)
                immutableFields = lifecycle->getImmutableFields();

            // This will only validate the modified fields if not a replacement.
            uassertStatusOK(validate(original,
                                     updatedFields,
                                     doc,
                                     immutableFields,
                                     driver->modOptions()) );
        }

        // Only create the collection if the doc will be inserted.
        if (!collection) {
            collection = db->getCollection(request.getNamespaceString().ns());
            if (!collection) {
                collection = db->createCollection(txn, request.getNamespaceString().ns());
            }
        }

        // Insert the doc
        BSONObj newObj = doc.getObject();
        uassert(17420,
                str::stream() << "Document to upsert is larger than " << BSONObjMaxUserSize,
                newObj.objsize() <= BSONObjMaxUserSize);

        StatusWith<DiskLoc> newLoc = collection->insertDocument(txn,
                                                                newObj,
                                                                !request.isGod() /*enforceQuota*/);
        uassertStatusOK(newLoc.getStatus());
        if (request.shouldCallLogOp()) {
            logOp(txn, "i", nsString.ns().c_str(), newObj,
                   NULL, NULL, request.isFromMigration());
        }

        opDebug->nMatched = 1;
        return UpdateResult(false /* updated a non existing document */,
                            !driver->isDocReplacement() /* $mod or obj replacement? */,
                            1 /* docs written*/,
                            1 /* count of updated documents */,
                            newObj /* object that was upserted */ );
    }
Exemple #21
0
        void doTTLForDB( const string& dbName ) {

            //check isMaster before becoming god
            bool isMaster = isMasterNs( dbName.c_str() );

            Client::GodScope god;

            vector<BSONObj> indexes;
            {
                auto_ptr<DBClientCursor> cursor =
                                db.query( dbName + ".system.indexes" ,
                                          BSON( secondsExpireField << BSON( "$exists" << true ) ) ,
                                          0 , /* default nToReturn */
                                          0 , /* default nToSkip */
                                          0 , /* default fieldsToReturn */
                                          QueryOption_SlaveOk ); /* perform on secondaries too */
                if ( cursor.get() ) {
                    while ( cursor->more() ) {
                        indexes.push_back( cursor->next().getOwned() );
                    }
                }
            }
            
            for ( unsigned i=0; i<indexes.size(); i++ ) {
                BSONObj idx = indexes[i];
                

                BSONObj key = idx["key"].Obj();
                if ( key.nFields() != 1 ) {
                    error() << "key for ttl index can only have 1 field" << endl;
                    continue;
                }

                BSONObj query;
                {
                    BSONObjBuilder b;
                    b.appendDate( "$lt" , curTimeMillis64() - ( 1000 * idx[secondsExpireField].numberLong() ) );
                    query = BSON( key.firstElement().fieldName() << b.obj() );
                }
                
                LOG(1) << "TTL: " << key << " \t " << query << endl;
                
                long long n = 0;
                {
                    string ns = idx["ns"].String();
                    Client::WriteContext ctx( ns );
                    NamespaceDetails* nsd = nsdetails( ns );
                    if ( ! nsd ) {
                        // collection was dropped
                        continue;
                    }
                    if ( nsd->setUserFlag( NamespaceDetails::Flag_UsePowerOf2Sizes ) ) {
                        nsd->syncUserFlags( ns );
                    }
                    // only do deletes if on master
                    if ( ! isMaster ) {
                        continue;
                    }

                    n = deleteObjects( ns.c_str() , query , false , true );
                    ttlDeletedDocuments.increment( n );
                }

                LOG(1) << "\tTTL deleted: " << n << endl;
            }
            
            
        }
Exemple #22
0
/**
 * @ return true if not in sharded mode
                 or if version for this client is ok
 */
bool shardVersionOk( const string& ns , string& errmsg, ConfigVersion& received, ConfigVersion& wanted ) {
    if ( ! shardingState.enabled() )
        return true;

    if ( ! isMasterNs( ns.c_str() ) )  {
        // right now connections to secondaries aren't versioned at all
        return true;
    }

    ShardedConnectionInfo* info = ShardedConnectionInfo::get( false );

    if ( ! info ) {
        // this means the client has nothing sharded
        // so this allows direct connections to do whatever they want
        // which i think is the correct behavior
        return true;
    }

    if ( info->inForceVersionOkMode() ) {
        return true;
    }

    // TODO
    //   all collections at some point, be sharded or not, will have a version (and a ShardChunkManager)
    //   for now, we remove the sharding state of dropped collection
    //   so delayed request may come in. This has to be fixed.
    ConfigVersion clientVersion = info->getVersion(ns);
    ConfigVersion version;
    if ( ! shardingState.hasVersion( ns , version ) && ! clientVersion.isSet() ) {
        return true;
    }

    // The versions we're going to compare, saved for future use
    received = clientVersion;
    wanted = version;

    if ( ! version.isSet() && clientVersion.isSet() ) {
        stringstream ss;
        ss << "collection was dropped or this shard no longer valid version";
        errmsg = ss.str();
        return false;
    }

    if ( clientVersion >= version )
        return true;


    if ( ! clientVersion.isSet() ) {
        stringstream ss;
        ss << "client in sharded mode, but doesn't have version set for this collection";
        errmsg = ss.str();
        return false;
    }

    if ( version.majorVersion() == clientVersion.majorVersion() ) {
        // this means there was just a split
        // since on a split w/o a migrate this server is ok
        // going to accept
        return true;
    }

    stringstream ss;
    ss << "your version is too old";
    errmsg = ss.str();
    return false;
}
Exemple #23
0
    void receivedInsert(Message& m, CurOp& op) {
        DbMessage d(m);
        const char *ns = d.getns();
        op.debug().ns = ns;

        bool isIndexWrite = NamespaceString(ns).coll == "system.indexes";

        // Auth checking for index writes happens further down in this function.
        if (!isIndexWrite) {
            Status status = cc().getAuthorizationManager()->checkAuthForInsert(ns);
            uassert(16544, status.reason(), status.isOK());
        }

        if( !d.moreJSObjs() ) {
            // strange.  should we complain?
            return;
        }

        vector<BSONObj> multi;
        while (d.moreJSObjs()){
            BSONObj obj = d.nextJsObj();
            multi.push_back(obj);
            if (isIndexWrite) {
                string indexNS = obj.getStringField("ns");
                uassert(16548,
                        mongoutils::str::stream() << "not authorized to create index on "
                                << indexNS,
                        cc().getAuthorizationManager()->checkAuthorization(
                                indexNS, ActionType::ensureIndex));
            }
        }

        PageFaultRetryableSection s;
        while ( true ) {
            try {
                Lock::DBWrite lk(ns);
                
                // CONCURRENCY TODO: is being read locked in big log sufficient here?
                // writelock is used to synchronize stepdowns w/ writes
                uassert( 10058 , "not master", isMasterNs(ns) );
                
                if ( handlePossibleShardedMessage( m , 0 ) )
                    return;
                
                Client::Context ctx(ns);
                
                if (multi.size() > 1) {
                    const bool keepGoing = d.reservedField() & InsertOption_ContinueOnError;
                    insertMulti(keepGoing, ns, multi, op);
                } else {
                    checkAndInsert(ns, multi[0]);
                    globalOpCounters.incInsertInWriteLock(1);
                    op.debug().ninserted = 1;
                }
                return;
            }
            catch ( PageFaultException& e ) {
                e.touch();
            }
        }
    }
Exemple #24
0
    /**
     * @ return true if not in sharded mode
                     or if version for this client is ok
     */
    bool shardVersionOk( const string& ns , string& errmsg, ChunkVersion& received, ChunkVersion& wanted ) {

        if ( ! shardingState.enabled() )
            return true;

        if ( ! isMasterNs( ns.c_str() ) )  {
            // right now connections to secondaries aren't versioned at all
            return true;
        }

        ShardedConnectionInfo* info = ShardedConnectionInfo::get( false );

        if ( ! info ) {
            // this means the client has nothing sharded
            // so this allows direct connections to do whatever they want
            // which i think is the correct behavior
            return true;
        }

        if ( info->inForceVersionOkMode() ) {
            return true;
        }

        // TODO : all collections at some point, be sharded or not, will have a version
        //  (and a CollectionMetadata)
        received = info->getVersion( ns );
        wanted = shardingState.getVersion( ns );

        if( received.isWriteCompatibleWith( wanted ) ) return true;

        //
        // Figure out exactly why not compatible, send appropriate error message
        // The versions themselves are returned in the error, so not needed in messages here
        //

        // Check epoch first, to send more meaningful message, since other parameters probably
        // won't match either
        if( ! wanted.hasCompatibleEpoch( received ) ){
            errmsg = str::stream() << "version epoch mismatch detected for " << ns << ", "
                                   << "the collection may have been dropped and recreated";
            return false;
        }

        if( ! wanted.isSet() && received.isSet() ){
            errmsg = str::stream() << "this shard no longer contains chunks for " << ns << ", "
                                   << "the collection may have been dropped";
            return false;
        }

        if( wanted.isSet() && ! received.isSet() ){
            errmsg = str::stream() << "this shard contains versioned chunks for " << ns << ", "
                                   << "but no version set in request";
            return false;
        }

        if( wanted.majorVersion() != received.majorVersion() ){

            //
            // Could be > or < - wanted is > if this is the source of a migration,
            // wanted < if this is the target of a migration
            //

            errmsg = str::stream() << "version mismatch detected for " << ns << ", "
                                   << "stored major version " << wanted.majorVersion()
                                   << " does not match received " << received.majorVersion();
            return false;
        }

        // Those are all the reasons the versions can mismatch
        verify( false );

        return false;

    }