Example #1
0
    static void _logOpOld(OperationContext* txn,
                          const char *opstr,
                          const char *ns,
                          const char *logNS,
                          const BSONObj& obj,
                          BSONObj *o2,
                          bool *bb,
                          bool fromMigrate ) {
        Lock::DBWrite lk(txn->lockState(), "local");
        WriteUnitOfWork wunit(txn);
        static BufBuilder bufbuilder(8*1024); // todo there is likely a mutex on this constructor

        if ( strncmp(ns, "local.", 6) == 0 ) {
            if ( strncmp(ns, "local.slaves", 12) == 0 ) {
                resetSlaveCache();
            }
            return;
        }

        mutex::scoped_lock lk2(newOpMutex);

        OpTime ts(getNextGlobalOptime());
        newOptimeNotifier.notify_all();

        /* we jump through a bunch of hoops here to avoid copying the obj buffer twice --
           instead we do a single copy to the destination position in the memory mapped file.
        */

        bufbuilder.reset();
        BSONObjBuilder b(bufbuilder);
        b.appendTimestamp("ts", ts.asDate());
        b.append("op", opstr);
        b.append("ns", ns);
        if (fromMigrate) 
            b.appendBool("fromMigrate", true);
        if ( bb )
            b.appendBool("b", *bb);
        if ( o2 )
            b.append("o2", *o2);
        BSONObj partial = b.done(); // partial is everything except the o:... part.

        if( logNS == 0 ) {
            logNS = "local.oplog.$main";
        }

        if ( localOplogMainCollection == 0 ) {
            Client::Context ctx(txn, logNS);
            localDB = ctx.db();
            verify( localDB );
            localOplogMainCollection = localDB->getCollection(txn, logNS);
            verify( localOplogMainCollection );
        }

        Client::Context ctx(txn, logNS , localDB);
        OplogDocWriter writer( partial, obj );
        checkOplogInsert( localOplogMainCollection->insertDocument( txn, &writer, false ) );

        ctx.getClient()->setLastOp( ts );
        wunit.commit();
    }
Example #2
0
    /* we write to local.opload.$main:
         { ts : ..., op: ..., ns: ..., o: ... }
       ts: an OpTime timestamp
       op:
        "i" insert
        "u" update
        "d" delete
        "c" db cmd
        "db" declares presence of a database (ns is set to the db name + '.')
        "n" no op
       logNS - e.g. "local.oplog.$main"
       bb:
         if not null, specifies a boolean to pass along to the other side as b: param.
         used for "justOne" or "upsert" flags on 'd', 'u'
       first: true
         when set, indicates this is the first thing we have logged for this database.
         thus, the slave does not need to copy down all the data when it sees this.
    */
    static void _logOp(const char *opstr, const char *ns, const char *logNS, const BSONObj& obj, BSONObj *o2, bool *bb, const OpTime &ts ) {
        if ( strncmp(ns, "local.", 6) == 0 ){
            if ( strncmp(ns, "local.slaves", 12) == 0 ){
                resetSlaveCache();
            }
            return;
        }

        DEV assertInWriteLock();
        
        Client::Context context;
        
        /* we jump through a bunch of hoops here to avoid copying the obj buffer twice --
           instead we do a single copy to the destination position in the memory mapped file.
        */

        BSONObjBuilder b;
        b.appendTimestamp("ts", ts.asDate());
        b.append("op", opstr);
        b.append("ns", ns);
        if ( bb )
            b.appendBool("b", *bb);
        if ( o2 )
            b.append("o2", *o2);
        BSONObj partial = b.done();
        int posz = partial.objsize();
        int len = posz + obj.objsize() + 1 + 2 /*o:*/;

        Record *r;
        if ( strncmp( logNS, "local.", 6 ) == 0 ) { // For now, assume this is olog main
            if ( localOplogMainDetails == 0 ) {
                Client::Context ctx("local.", dbpath, 0, false);
                localOplogDB = ctx.db();
                localOplogMainDetails = nsdetails(logNS);
            }
            Client::Context ctx( "" , localOplogDB, false );
            r = theDataFileMgr.fast_oplog_insert(localOplogMainDetails, logNS, len);
        } else {
            Client::Context ctx( logNS, dbpath, 0, false );
            assert( nsdetails( logNS ) );
            r = theDataFileMgr.fast_oplog_insert( nsdetails( logNS ), logNS, len);
        }

        char *p = r->data;
        memcpy(p, partial.objdata(), posz);
        *((unsigned *)p) += obj.objsize() + 1 + 2;
        p += posz - 1;
        *p++ = (char) Object;
        *p++ = 'o';
        *p++ = 0;
        memcpy(p, obj.objdata(), obj.objsize());
        p += obj.objsize();
        *p = EOO;
        
        if ( logLevel >= 6 ) {
            BSONObj temp(r);
            log( 6 ) << "logging op:" << temp << endl;
        }
        
        context.getClient()->setLastOp( ts );
    }
Example #3
0
    /* we write to local.opload.$main:
         { ts : ..., op: ..., ns: ..., o: ... }
       ts: an OpTime timestamp
       op:
        "i" insert
        "u" update
        "d" delete
        "c" db cmd
        "db" declares presence of a database (ns is set to the db name + '.')
        "n" no op
       logNS - where to log it.  0/null means "local.oplog.$main".
       bb:
         if not null, specifies a boolean to pass along to the other side as b: param.
         used for "justOne" or "upsert" flags on 'd', 'u'
       first: true
         when set, indicates this is the first thing we have logged for this database.
         thus, the slave does not need to copy down all the data when it sees this.

       note this is used for single collection logging even when --replSet is enabled.
    */
    static void _logOpOld(const char *opstr, const char *ns, const char *logNS, const BSONObj& obj, BSONObj *o2, bool *bb ) {
        DEV assertInWriteLock();
        static BufBuilder bufbuilder(8*1024);

        if ( strncmp(ns, "local.", 6) == 0 ) {
            if ( strncmp(ns, "local.slaves", 12) == 0 ) {
                resetSlaveCache();
            }
            return;
        }

        const OpTime ts = OpTime::now();
        Client::Context context;

        /* we jump through a bunch of hoops here to avoid copying the obj buffer twice --
           instead we do a single copy to the destination position in the memory mapped file.
        */

        bufbuilder.reset();
        BSONObjBuilder b(bufbuilder);
        b.appendTimestamp("ts", ts.asDate());
        b.append("op", opstr);
        b.append("ns", ns);
        if ( bb )
            b.appendBool("b", *bb);
        if ( o2 )
            b.append("o2", *o2);
        BSONObj partial = b.done(); // partial is everything except the o:... part.

        int po_sz = partial.objsize();
        int len = po_sz + obj.objsize() + 1 + 2 /*o:*/;

        Record *r;
        if( logNS == 0 ) {
            logNS = "local.oplog.$main";
            if ( localOplogMainDetails == 0 ) {
                Client::Context ctx( logNS , dbpath, 0, false);
                localDB = ctx.db();
                assert( localDB );
                localOplogMainDetails = nsdetails(logNS);
                assert( localOplogMainDetails );
            }
            Client::Context ctx( logNS , localDB, false );
            r = theDataFileMgr.fast_oplog_insert(localOplogMainDetails, logNS, len);
        }
        else {
            Client::Context ctx( logNS, dbpath, 0, false );
            assert( nsdetails( logNS ) );
            // first we allocate the space, then we fill it below.
            r = theDataFileMgr.fast_oplog_insert( nsdetails( logNS ), logNS, len);
        }

        append_O_Obj(r->data, partial, obj);

        context.getClient()->setLastOp( ts.asDate() );

        if ( logLevel >= 6 ) {
            BSONObj temp(r);
            log( 6 ) << "logging op:" << temp << endl;
        }

    }
Example #4
0
    static void _logOpRS(const char *opstr, const char *ns, const char *logNS, const BSONObj& obj, BSONObj *o2, bool *bb ) {
        DEV assertInWriteLock();

        if ( strncmp(ns, "local.", 6) == 0 ) {
            if ( strncmp(ns, "local.slaves", 12) == 0 )
                resetSlaveCache();
            return;
        }

        const OpTime ts = OpTime::now();
        long long hashNew;
        if( theReplSet ) {
            massert(13312, "replSet error : logOp() but not primary?", theReplSet->box.getState().primary());
            hashNew = (theReplSet->lastH * 131 + ts.asLL()) * 17 + theReplSet->selfId();
        }
        else {
            // must be initiation
            assert( *ns == 0 );
            hashNew = 0;
        }

        /* we jump through a bunch of hoops here to avoid copying the obj buffer twice --
           instead we do a single copy to the destination position in the memory mapped file.
        */

        logopbufbuilder.reset();
        BSONObjBuilder b(logopbufbuilder);
        b.appendTimestamp("ts", ts.asDate());
        b.append("h", hashNew);
        b.append("op", opstr);
        b.append("ns", ns);
        if ( bb )
            b.appendBool("b", *bb);
        if ( o2 )
            b.append("o2", *o2);
        BSONObj partial = b.done();
        int posz = partial.objsize();
        int len = posz + obj.objsize() + 1 + 2 /*o:*/;

        Record *r;
        DEV assert( logNS == 0 );
        {
            const char *logns = rsoplog;
            if ( rsOplogDetails == 0 ) {
                Client::Context ctx( logns , dbpath, 0, false);
                localDB = ctx.db();
                assert( localDB );
                rsOplogDetails = nsdetails(logns);
                massert(13347, "local.oplog.rs missing. did you drop it? if so restart server", rsOplogDetails);
            }
            Client::Context ctx( logns , localDB, false );
            r = theDataFileMgr.fast_oplog_insert(rsOplogDetails, logns, len);
            /* todo: now() has code to handle clock skew.  but if the skew server to server is large it will get unhappy.
                     this code (or code in now() maybe) should be improved.
                     */
            if( theReplSet ) {
                if( !(theReplSet->lastOpTimeWritten<ts) ) {
                    log() << "replSet ERROR possible failover clock skew issue? " << theReplSet->lastOpTimeWritten << ' ' << ts << rsLog;
                    log() << "replSet " << theReplSet->isPrimary() << rsLog;
                }
                theReplSet->lastOpTimeWritten = ts;
                theReplSet->lastH = hashNew;
                ctx.getClient()->setLastOp( ts.asDate() );
            }
        }

        append_O_Obj(r->data, partial, obj);

        if ( logLevel >= 6 ) {
            BSONObj temp(r);
            log( 6 ) << "logOp:" << temp << endl;
        }
    }
Example #5
0
    static void _logOpRS(OperationContext* txn,
                         const char *opstr,
                         const char *ns,
                         const char *logNS,
                         const BSONObj& obj,
                         BSONObj *o2,
                         bool *bb,
                         bool fromMigrate ) {
        Lock::DBWrite lk1(txn->lockState(), "local");
        WriteUnitOfWork wunit(txn);

        if ( strncmp(ns, "local.", 6) == 0 ) {
            if ( strncmp(ns, "local.slaves", 12) == 0 )
                resetSlaveCache();
            return;
        }

        mutex::scoped_lock lk2(newOpMutex);

        OpTime ts(getNextGlobalOptime());
        newOptimeNotifier.notify_all();

        long long hashNew;
        if( theReplSet ) {
            if (!theReplSet->box.getState().primary()) {
                log() << "replSet error : logOp() but not primary";
                fassertFailed(17405);
            }
            hashNew = (theReplSet->lastH * 131 + ts.asLL()) * 17 + theReplSet->selfId();
        }
        else {
            // must be initiation
            verify( *ns == 0 );
            hashNew = 0;
        }

        /* we jump through a bunch of hoops here to avoid copying the obj buffer twice --
           instead we do a single copy to the destination position in the memory mapped file.
        */

        logopbufbuilder.reset();
        BSONObjBuilder b(logopbufbuilder);
        b.appendTimestamp("ts", ts.asDate());
        b.append("h", hashNew);
        b.append("v", OPLOG_VERSION);
        b.append("op", opstr);
        b.append("ns", ns);
        if (fromMigrate) 
            b.appendBool("fromMigrate", true);
        if ( bb )
            b.appendBool("b", *bb);
        if ( o2 )
            b.append("o2", *o2);
        BSONObj partial = b.done();

        DEV verify( logNS == 0 ); // check this was never a master/slave master

        if ( localOplogRSCollection == 0 ) {
            Client::Context ctx(txn, rsoplog);
            localDB = ctx.db();
            verify( localDB );
            localOplogRSCollection = localDB->getCollection( txn, rsoplog );
            massert(13347, "local.oplog.rs missing. did you drop it? if so restart server", localOplogRSCollection);
        }

        Client::Context ctx(txn, rsoplog, localDB);
        OplogDocWriter writer( partial, obj );
        checkOplogInsert( localOplogRSCollection->insertDocument( txn, &writer, false ) );

        /* todo: now() has code to handle clock skew.  but if the skew server to server is large it will get unhappy.
           this code (or code in now() maybe) should be improved.
        */
        if( theReplSet ) {
            if( !(theReplSet->lastOpTimeWritten<ts) ) {
                log() << "replication oplog stream went back in time. previous timestamp: "
                      << theReplSet->lastOpTimeWritten << " newest timestamp: " << ts
                      << ". attempting to sync directly from primary." << endl;
                BSONObjBuilder result;
                Status status = theReplSet->forceSyncFrom(theReplSet->box.getPrimary()->fullName(),
                                                          &result);
                if (!status.isOK()) {
                    log() << "Can't sync from primary: " << status;
                }
            }
            theReplSet->lastOpTimeWritten = ts;
            theReplSet->lastH = hashNew;
            ctx.getClient()->setLastOp( ts );
        }
        wunit.commit();

    }
Example #6
0
    static void _logOpOld(const char *opstr, const char *ns, const char *logNS, const BSONObj& obj, BSONObj *o2, bool *bb, bool fromMigrate ) {
        Lock::DBWrite lk("local");
        static BufBuilder bufbuilder(8*1024); // todo there is likely a mutex on this constructor

        if ( strncmp(ns, "local.", 6) == 0 ) {
            if ( strncmp(ns, "local.slaves", 12) == 0 ) {
                resetSlaveCache();
            }
            return;
        }

        mutex::scoped_lock lk2(OpTime::m);

        const OpTime ts = OpTime::now(lk2);
        Client::Context context("", 0);

        /* we jump through a bunch of hoops here to avoid copying the obj buffer twice --
           instead we do a single copy to the destination position in the memory mapped file.
        */

        bufbuilder.reset();
        BSONObjBuilder b(bufbuilder);
        b.appendTimestamp("ts", ts.asDate());
        b.append("op", opstr);
        b.append("ns", ns);
        if (fromMigrate) 
            b.appendBool("fromMigrate", true);
        if ( bb )
            b.appendBool("b", *bb);
        if ( o2 )
            b.append("o2", *o2);
        BSONObj partial = b.done(); // partial is everything except the o:... part.

        int po_sz = partial.objsize();
        int len = po_sz + obj.objsize() + 1 + 2 /*o:*/;

        Record *r;
        if( logNS == 0 ) {
            logNS = "local.oplog.$main";
            if ( localOplogMainDetails == 0 ) {
                Client::Context ctx(logNS , dbpath);
                localDB = ctx.db();
                verify( localDB );
                localOplogMainDetails = nsdetails(logNS);
                verify( localOplogMainDetails );
            }
            Client::Context ctx(logNS , localDB);
            r = theDataFileMgr.fast_oplog_insert(localOplogMainDetails, logNS, len);
        }
        else {
            Client::Context ctx(logNS, dbpath);
            verify( nsdetails( logNS ) );
            // first we allocate the space, then we fill it below.
            r = theDataFileMgr.fast_oplog_insert( nsdetails( logNS ), logNS, len);
        }

        append_O_Obj(r->data(), partial, obj);

        context.getClient()->setLastOp( ts );

        LOG( 6 ) << "logging op:" << BSONObj::make(r) << endl;
    } 
    Status LegacyReplicationCoordinator::processReplSetReconfig(OperationContext* txn,
                                                                const ReplSetReconfigArgs& args,
                                                                BSONObjBuilder* resultObj) {

        if( args.force && !theReplSet ) {
            _settings.reconfig = args.newConfigObj.getOwned();
            resultObj->append("msg",
                              "will try this config momentarily, try running rs.conf() again in a "
                                      "few seconds");
            return Status::OK();
        }

        // TODO(dannenberg) once reconfig processing has been figured out in the impl, this should
        // be moved out of processReplSetReconfig and into the command body like all other cmds
        Status status = checkReplEnabledForCommand(resultObj);
        if (!status.isOK()) {
            return status;
        }

        if( !args.force && !theReplSet->box.getState().primary() ) {
            return Status(ErrorCodes::NotMaster,
                          "replSetReconfig command must be sent to the current replica set "
                                  "primary.");
        }

        try {
            {
                // just make sure we can get a write lock before doing anything else.  we'll
                // reacquire one later.  of course it could be stuck then, but this check lowers the
                // risk if weird things are up - we probably don't want a change to apply 30 minutes
                // after the initial attempt.
                time_t t = time(0);
                Lock::GlobalWrite lk(txn->lockState());
                if( time(0)-t > 20 ) {
                    return Status(ErrorCodes::ExceededTimeLimit,
                                  "took a long time to get write lock, so not initiating.  "
                                          "Initiate when server less busy?");
                }
            }


            scoped_ptr<ReplSetConfig> newConfig(ReplSetConfig::make(args.newConfigObj, args.force));

            log() << "replSet replSetReconfig config object parses ok, " <<
                    newConfig->members.size() << " members specified" << rsLog;

            Status status = ReplSetConfig::legalChange(theReplSet->getConfig(), *newConfig);
            if (!status.isOK()) {
                return status;
            }

            checkMembersUpForConfigChange(*newConfig, *resultObj, false);

            log() << "replSet replSetReconfig [2]" << rsLog;

            theReplSet->haveNewConfig(txn, *newConfig, true);
            ReplSet::startupStatusMsg.set("replSetReconfig'd");
        }
        catch(const DBException& e) {
            log() << "replSet replSetReconfig exception: " << e.what() << rsLog;
            return e.toStatus();
        }

        resetSlaveCache();
        return Status::OK();
    }
Example #8
0
    static void _logOpRS(OperationContext* txn,
                         const char *opstr,
                         const char *ns,
                         const char *logNS,
                         const BSONObj& obj,
                         BSONObj *o2,
                         bool *bb,
                         bool fromMigrate ) {
        Lock::DBLock lk1(txn->lockState(), "local", newlm::MODE_X);
        WriteUnitOfWork wunit(txn);

        if ( strncmp(ns, "local.", 6) == 0 ) {
            if ( strncmp(ns, "local.slaves", 12) == 0 )
                resetSlaveCache();
            return;
        }
        ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();

        mutex::scoped_lock lk2(newOpMutex);

        OpTime ts(getNextGlobalOptime());
        newOptimeNotifier.notify_all();

        long long hashNew = BackgroundSync::get()->getLastAppliedHash();

        // Check to make sure logOp() is legal at this point.
        if (*opstr == 'n') {
            // 'n' operations are always logged
            invariant(*ns == '\0');

            // 'n' operations do not advance the hash, since they are not rolled back
        }
        else {
            if (!replCoord->canAcceptWritesForDatabase(nsToDatabaseSubstring(ns))) {
                severe() << "replSet error : logOp() but can't accept write to collection " << ns;
                fassertFailed(17405);
            }

            // Advance the hash
            hashNew = (hashNew * 131 + ts.asLL()) * 17 + replCoord->getMyId();
        }


        /* we jump through a bunch of hoops here to avoid copying the obj buffer twice --
           instead we do a single copy to the destination position in the memory mapped file.
        */

        logopbufbuilder.reset();
        BSONObjBuilder b(logopbufbuilder);
        b.appendTimestamp("ts", ts.asDate());
        b.append("h", hashNew);
        b.append("v", OPLOG_VERSION);
        b.append("op", opstr);
        b.append("ns", ns);
        if (fromMigrate) 
            b.appendBool("fromMigrate", true);
        if ( bb )
            b.appendBool("b", *bb);
        if ( o2 )
            b.append("o2", *o2);
        BSONObj partial = b.done();

        DEV verify( logNS == 0 ); // check this was never a master/slave master

        if ( localOplogRSCollection == 0 ) {
            Client::Context ctx(txn, rsoplog);
            localDB = ctx.db();
            verify( localDB );
            localOplogRSCollection = localDB->getCollection( txn, rsoplog );
            massert(13347, "local.oplog.rs missing. did you drop it? if so restart server", localOplogRSCollection);
        }

        Client::Context ctx(txn, rsoplog, localDB);
        OplogDocWriter writer( partial, obj );
        checkOplogInsert( localOplogRSCollection->insertDocument( txn, &writer, false ) );

        BackgroundSync::get()->setLastAppliedHash(hashNew);
        ctx.getClient()->setLastOp( ts );
        replCoord->setMyLastOptime(txn, ts);

        wunit.commit();

    }
Example #9
0
 void oplogCheckCloseDatabase( Database * db ){
     localDB = 0;
     localOplogMainDetails = 0;
     rsOplogDetails = 0;
     resetSlaveCache();
 }