Esempio n. 1
0
        void addOp(const string& op, BSONObj o, BSONObj* o2 = NULL, const char* coll = NULL,
                   int version = 0) {
            OpTime ts(getNextGlobalOptime());

            BSONObjBuilder b;
            b.appendTimestamp("ts", ts.asLL());
            if (version != 0) {
                b.append("v", version);
            }
            b.append("op", op);
            b.append("o", o);

            if (o2) {
                b.append("o2", *o2);
            }

            if (coll) {
                b.append("ns", coll);
            }
            else {
                b.append("ns", ns());
            }

            _bgsync->addDoc(b.done());
        }
Esempio n. 2
0
    static void _logOpOld(OperationContext* txn,
                          const char *opstr,
                          const char *ns,
                          const char *logNS,
                          const BSONObj& obj,
                          BSONObj *o2,
                          bool *bb,
                          bool fromMigrate ) {
        Lock::DBWrite lk(txn->lockState(), "local");
        WriteUnitOfWork wunit(txn);
        static BufBuilder bufbuilder(8*1024); // todo there is likely a mutex on this constructor

        if ( strncmp(ns, "local.", 6) == 0 ) {
            if ( strncmp(ns, "local.slaves", 12) == 0 ) {
                resetSlaveCache();
            }
            return;
        }

        mutex::scoped_lock lk2(newOpMutex);

        OpTime ts(getNextGlobalOptime());
        newOptimeNotifier.notify_all();

        /* we jump through a bunch of hoops here to avoid copying the obj buffer twice --
           instead we do a single copy to the destination position in the memory mapped file.
        */

        bufbuilder.reset();
        BSONObjBuilder b(bufbuilder);
        b.appendTimestamp("ts", ts.asDate());
        b.append("op", opstr);
        b.append("ns", ns);
        if (fromMigrate) 
            b.appendBool("fromMigrate", true);
        if ( bb )
            b.appendBool("b", *bb);
        if ( o2 )
            b.append("o2", *o2);
        BSONObj partial = b.done(); // partial is everything except the o:... part.

        if( logNS == 0 ) {
            logNS = "local.oplog.$main";
        }

        if ( localOplogMainCollection == 0 ) {
            Client::Context ctx(txn, logNS);
            localDB = ctx.db();
            verify( localDB );
            localOplogMainCollection = localDB->getCollection(txn, logNS);
            verify( localOplogMainCollection );
        }

        Client::Context ctx(txn, logNS , localDB);
        OplogDocWriter writer( partial, obj );
        checkOplogInsert( localOplogMainCollection->insertDocument( txn, &writer, false ) );

        ctx.getClient()->setLastOp( ts );
        wunit.commit();
    }
Status ModifierCurrentDate::apply() const {
    const bool destExists = (_preparedState->elemFound.ok() &&
                             _preparedState->idxFound == (_updatePath.numParts() - 1));

    mutablebson::Document& doc = _preparedState->doc;
    StringData lastPart = _updatePath.getPart(_updatePath.numParts() - 1);
    // If the element exists and is the same type, then that is what we want to work with
    mutablebson::Element elemToSet = destExists ? _preparedState->elemFound : doc.end();

    if (!destExists) {
        // Creates the final element that's going to be $set in 'doc'.
        // fills in the value with place-holder/empty

        elemToSet = _typeIsDate ? doc.makeElementDate(lastPart, Date_t())
                                : doc.makeElementTimestamp(lastPart, OpTime());

        if (!elemToSet.ok()) {
            return Status(ErrorCodes::InternalError, "can't create new element");
        }

        // Now, we can be in two cases here, as far as attaching the element being set goes:
        // (a) none of the parts in the element's path exist, or (b) some parts of the path
        // exist but not all.
        if (!_preparedState->elemFound.ok()) {
            _preparedState->elemFound = doc.root();
            _preparedState->idxFound = 0;
        } else {
            _preparedState->idxFound++;
        }

        // createPathAt() will complete the path and attach 'elemToSet' at the end of it.
        Status s = pathsupport::createPathAt(
            _updatePath, _preparedState->idxFound, _preparedState->elemFound, elemToSet);
        if (!s.isOK())
            return s;
    }

    dassert(elemToSet.ok());

    // By the time we are here the element is in place and we just need to update the value
    if (_typeIsDate) {
        const mongo::Date_t now = mongo::jsTime();
        Status s = elemToSet.setValueDate(now);
        if (!s.isOK())
            return s;
    } else {
        Status s = elemToSet.setValueTimestamp(getNextGlobalOptime());
        if (!s.isOK())
            return s;
    }

    // Set the elemFound, idxFound to the changed element for oplog logging.
    _preparedState->elemFound = elemToSet;
    _preparedState->idxFound = (_updatePath.numParts() - 1);

    return Status::OK();
}
Esempio n. 4
0
        void insertSucceed() {
            BSONObjBuilder b;
            OpTime ts(getNextGlobalOptime());

            b.appendTimestamp("ts", ts.asLL());
            b.append("op", "i");
            b.append("o", BSON("_id" << 123 << "x" << 456));
            b.append("ns", cappedNs());
            verify(apply(b.obj()));
        }
Esempio n. 5
0
        void updateSucceed() {
            BSONObjBuilder b;
            OpTime ts(getNextGlobalOptime());

            b.appendTimestamp("ts", ts.asLL());
            b.append("op", "u");
            b.append("o", BSON("$set" << BSON("x" << 789)));
            b.append("o2", BSON("x" << 456));
            b.append("ns", cappedNs());

            verify(apply(b.obj()));
        }
Esempio n. 6
0
        BSONObj updateFail() {
            BSONObjBuilder b;
            OpTime ts(getNextGlobalOptime());

            b.appendTimestamp("ts", ts.asLL());
            b.append("op", "u");
            b.append("o", BSON("$set" << BSON("x" << 456)));
            b.append("o2", BSON("_id" << 123 << "x" << 123));
            b.append("ns", _cappedNs);
            BSONObj o = b.obj();

            verify(!apply(o));
            return o;
        }
Esempio n. 7
0
        void run() {
            OpTime o(getNextGlobalOptime());

            BSONObjBuilder b;
            b.append("ns","dummy");
            b.appendTimestamp("ts", o.asLL());
            BSONObj obj = b.obj();
            MockInitialSync mock;

            // all three should succeed
            std::vector<BSONObj> ops;
            ops.push_back(obj);
            repl::multiInitialSyncApply(ops, &mock);

            mock.failOnStep = MockInitialSync::FAIL_FIRST_APPLY;
            repl::multiInitialSyncApply(ops, &mock);

            mock.retry = false;
            repl::multiInitialSyncApply(ops, &mock);

            drop();
        }
Esempio n. 8
0
        void run() {
            OpTime o(getNextGlobalOptime());

            BSONObjBuilder b;
            b.appendTimestamp("ts", o.asLL());
            b.append("op", "u");
            b.append("o", BSON("$set" << BSON("x" << 456)));
            b.append("o2", BSON("_id" << 123));
            b.append("ns", ns());
            BSONObj obj = b.obj();
            SyncTest2 sync2;
            std::vector<BSONObj> ops;
            ops.push_back(obj);

            sync2.insertOnRetry = true;
            // succeeds
            multiInitialSyncApply(ops, &sync2);

            BSONObj fin = findOne();
            verify(fin["x"].Number() == 456);

            drop();
        }
Esempio n. 9
0
    static void _logOpRS(OperationContext* txn,
                         const char *opstr,
                         const char *ns,
                         const char *logNS,
                         const BSONObj& obj,
                         BSONObj *o2,
                         bool *bb,
                         bool fromMigrate ) {
        Lock::DBWrite lk1(txn->lockState(), "local");
        WriteUnitOfWork wunit(txn);

        if ( strncmp(ns, "local.", 6) == 0 ) {
            if ( strncmp(ns, "local.slaves", 12) == 0 )
                resetSlaveCache();
            return;
        }

        mutex::scoped_lock lk2(newOpMutex);

        OpTime ts(getNextGlobalOptime());
        newOptimeNotifier.notify_all();

        long long hashNew;
        if( theReplSet ) {
            if (!theReplSet->box.getState().primary()) {
                log() << "replSet error : logOp() but not primary";
                fassertFailed(17405);
            }
            hashNew = (theReplSet->lastH * 131 + ts.asLL()) * 17 + theReplSet->selfId();
        }
        else {
            // must be initiation
            verify( *ns == 0 );
            hashNew = 0;
        }

        /* we jump through a bunch of hoops here to avoid copying the obj buffer twice --
           instead we do a single copy to the destination position in the memory mapped file.
        */

        logopbufbuilder.reset();
        BSONObjBuilder b(logopbufbuilder);
        b.appendTimestamp("ts", ts.asDate());
        b.append("h", hashNew);
        b.append("v", OPLOG_VERSION);
        b.append("op", opstr);
        b.append("ns", ns);
        if (fromMigrate) 
            b.appendBool("fromMigrate", true);
        if ( bb )
            b.appendBool("b", *bb);
        if ( o2 )
            b.append("o2", *o2);
        BSONObj partial = b.done();

        DEV verify( logNS == 0 ); // check this was never a master/slave master

        if ( localOplogRSCollection == 0 ) {
            Client::Context ctx(txn, rsoplog);
            localDB = ctx.db();
            verify( localDB );
            localOplogRSCollection = localDB->getCollection( txn, rsoplog );
            massert(13347, "local.oplog.rs missing. did you drop it? if so restart server", localOplogRSCollection);
        }

        Client::Context ctx(txn, rsoplog, localDB);
        OplogDocWriter writer( partial, obj );
        checkOplogInsert( localOplogRSCollection->insertDocument( txn, &writer, false ) );

        /* todo: now() has code to handle clock skew.  but if the skew server to server is large it will get unhappy.
           this code (or code in now() maybe) should be improved.
        */
        if( theReplSet ) {
            if( !(theReplSet->lastOpTimeWritten<ts) ) {
                log() << "replication oplog stream went back in time. previous timestamp: "
                      << theReplSet->lastOpTimeWritten << " newest timestamp: " << ts
                      << ". attempting to sync directly from primary." << endl;
                BSONObjBuilder result;
                Status status = theReplSet->forceSyncFrom(theReplSet->box.getPrimary()->fullName(),
                                                          &result);
                if (!status.isOK()) {
                    log() << "Can't sync from primary: " << status;
                }
            }
            theReplSet->lastOpTimeWritten = ts;
            theReplSet->lastH = hashNew;
            ctx.getClient()->setLastOp( ts );
        }
        wunit.commit();

    }
Esempio n. 10
0
    void Consensus::_electSelf() {
        if( time(0) < steppedDown )
            return;

        {
            const OpTime ord = theReplSet->lastOpTimeWritten;
            if( ord == 0 ) {
                log() << "replSet info not trying to elect self, do not yet have a complete set of data from any point in time" << rsLog;
                return;
            }
        }

        bool allUp;
        int nTies;
        if( !_weAreFreshest(allUp, nTies) ) {
            return;
        }

        rs.sethbmsg("",9);

        if (!allUp && time(0) - serverGlobalParams.started < 60 * 5) {
            /* the idea here is that if a bunch of nodes bounce all at once, we don't want to drop data
               if we don't have to -- we'd rather be offline and wait a little longer instead
               todo: make this configurable.
               */
            rs.sethbmsg("not electing self, not all members up and we have been up less than 5 minutes");
            return;
        }

        Member& me = *rs._self;

        if( nTies ) {
            /* tie?  we then randomly sleep to try to not collide on our voting. */
            /* todo: smarter. */
            if( me.id() == 0 || _sleptLast ) {
                // would be fine for one node not to sleep
                // todo: biggest / highest priority nodes should be the ones that get to not sleep
            }
            else {
                verify( !rs.lockedByMe() ); // bad to go to sleep locked
                unsigned ms = ((unsigned) rand()) % 1000 + 50;
                DEV log() << "replSet tie " << nTies << " sleeping a little " << ms << "ms" << rsLog;
                _sleptLast = true;
                sleepmillis(ms);
                throw RetryAfterSleepException();
            }
        }
        _sleptLast = false;

        time_t start = time(0);
        unsigned meid = me.id();
        int tally = _yea( meid );
        bool success = false;
        try {
            log() << "replSet info electSelf " << meid << rsLog;

            BSONObj electCmd = BSON(
                                   "replSetElect" << 1 <<
                                   "set" << rs.name() <<
                                   "who" << me.fullName() <<
                                   "whoid" << me.hbinfo().id() <<
                                   "cfgver" << rs._cfg->version <<
                                   "round" << OID::gen() /* this is just for diagnostics */
                               );

            int configVersion;
            list<Target> L;
            rs.getTargets(L, configVersion);
            _multiCommand(electCmd, L);

            {
                for( list<Target>::iterator i = L.begin(); i != L.end(); i++ ) {
                    LOG(1) << "replSet elect res: " << i->result.toString() << rsLog;
                    if( i->ok ) {
                        int v = i->result["vote"].Int();
                        tally += v;
                    }
                }
                if( tally*2 <= _totalVotes() ) {
                    log() << "replSet couldn't elect self, only received " << tally << " votes" << rsLog;
                }
                else if( time(0) - start > 30 ) {
                    // defensive; should never happen as we have timeouts on connection and operation for our conn
                    log() << "replSet too much time passed during our election, ignoring result" << rsLog;
                }
                else if( configVersion != rs.config().version ) {
                    log() << "replSet config version changed during our election, ignoring result" << rsLog;
                }
                else {
                    /* succeeded. */
                    LOG(1) << "replSet election succeeded, assuming primary role" << rsLog;
                    success = true;

                    setElectionTime(getNextGlobalOptime());

                    rs.assumePrimary();
                }
            }
        }
        catch( std::exception& ) {
            if( !success ) _electionFailed(meid);
            throw;
        }
        if( !success ) _electionFailed(meid);
    }
Esempio n. 11
0
    static void _logOpRS(OperationContext* txn,
                         const char *opstr,
                         const char *ns,
                         const char *logNS,
                         const BSONObj& obj,
                         BSONObj *o2,
                         bool *bb,
                         bool fromMigrate ) {
        Lock::DBLock lk1(txn->lockState(), "local", newlm::MODE_X);
        WriteUnitOfWork wunit(txn);

        if ( strncmp(ns, "local.", 6) == 0 ) {
            if ( strncmp(ns, "local.slaves", 12) == 0 )
                resetSlaveCache();
            return;
        }
        ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();

        mutex::scoped_lock lk2(newOpMutex);

        OpTime ts(getNextGlobalOptime());
        newOptimeNotifier.notify_all();

        long long hashNew = BackgroundSync::get()->getLastAppliedHash();

        // Check to make sure logOp() is legal at this point.
        if (*opstr == 'n') {
            // 'n' operations are always logged
            invariant(*ns == '\0');

            // 'n' operations do not advance the hash, since they are not rolled back
        }
        else {
            if (!replCoord->canAcceptWritesForDatabase(nsToDatabaseSubstring(ns))) {
                severe() << "replSet error : logOp() but can't accept write to collection " << ns;
                fassertFailed(17405);
            }

            // Advance the hash
            hashNew = (hashNew * 131 + ts.asLL()) * 17 + replCoord->getMyId();
        }


        /* we jump through a bunch of hoops here to avoid copying the obj buffer twice --
           instead we do a single copy to the destination position in the memory mapped file.
        */

        logopbufbuilder.reset();
        BSONObjBuilder b(logopbufbuilder);
        b.appendTimestamp("ts", ts.asDate());
        b.append("h", hashNew);
        b.append("v", OPLOG_VERSION);
        b.append("op", opstr);
        b.append("ns", ns);
        if (fromMigrate) 
            b.appendBool("fromMigrate", true);
        if ( bb )
            b.appendBool("b", *bb);
        if ( o2 )
            b.append("o2", *o2);
        BSONObj partial = b.done();

        DEV verify( logNS == 0 ); // check this was never a master/slave master

        if ( localOplogRSCollection == 0 ) {
            Client::Context ctx(txn, rsoplog);
            localDB = ctx.db();
            verify( localDB );
            localOplogRSCollection = localDB->getCollection( txn, rsoplog );
            massert(13347, "local.oplog.rs missing. did you drop it? if so restart server", localOplogRSCollection);
        }

        Client::Context ctx(txn, rsoplog, localDB);
        OplogDocWriter writer( partial, obj );
        checkOplogInsert( localOplogRSCollection->insertDocument( txn, &writer, false ) );

        BackgroundSync::get()->setLastAppliedHash(hashNew);
        ctx.getClient()->setLastOp( ts );
        replCoord->setMyLastOptime(txn, ts);

        wunit.commit();

    }