Beispiel #1
0
 void report() const {
     const OpTime &maxOpTimeSynced = _player->maxOpTimeSynced();
     LOG(0) << "synced up to " << fmtOpTime(maxOpTimeSynced);
     if (!_rconn) {
         LOG(0) << endl;
         return;
     }
     Query lastQuery;
     lastQuery.sort("$natural", -1);
     BSONObj lastFields = BSON("ts" << 1);
     BSONObj lastObj = _rconn->conn().findOne(_oplogns, lastQuery, &lastFields);
     BSONElement tsElt = lastObj["ts"];
     if (!tsElt.ok()) {
         warning() << "couldn't find last oplog entry on remote host" << endl;
         LOG(0) << endl;
         return;
     }
     OpTime lastOpTime = OpTime(tsElt.date());
     LOG(0) << ", source has up to " << fmtOpTime(lastOpTime);
     if (maxOpTimeSynced == lastOpTime) {
         LOG(0) << ", fully synced." << endl;
     }
     else {
         int diff = lastOpTime.getSecs() - maxOpTimeSynced.getSecs();
         if (diff > 0) {
             LOG(0) << ", " << (lastOpTime.getSecs() - maxOpTimeSynced.getSecs())
                    << " seconds behind source." << endl;
         }
         else {
             LOG(0) << ", less than 1 second behind source." << endl;
         }
     }
     _reportingTimer.reset();
 }
Beispiel #2
0
    // oplogdiags in web ui
    static void say(stringstream&ss, const bo& op) {
        ss << "<tr>";

        set<string> skip;
        be e = op["ts"];
        if( e.type() == Date || e.type() == Timestamp ) {
            OpTime ot = e._opTime();
            ss << td( time_t_to_String_short( ot.getSecs() ) );
            ss << td( ot.toString() );
            skip.insert("ts");
        }
        else ss << td("?") << td("?");

        e = op["h"];
        if( e.type() == NumberLong ) {
            ss << "<td>" << hex << e.Long() << "</td>\n";
            skip.insert("h");
        }
        else
            ss << td("?");

        ss << td(op["op"].valuestrsafe());
        ss << td(op["ns"].valuestrsafe());
        skip.insert("op");
        skip.insert("ns");

        ss << "<td>";
        for( bo::iterator i(op); i.more(); ) {
            be e = i.next();
            if( skip.count(e.fieldName()) ) continue;
            ss << e.toString() << ' ';
        }
        ss << "</td></tr>\n";
    }
Beispiel #3
0
    void SyncTail::handleSlaveDelay(const BSONObj& lastOp) {
        int sd = theReplSet->myConfig().slaveDelay;

        // ignore slaveDelay if the box is still initializing. once
        // it becomes secondary we can worry about it.
        if( sd && theReplSet->isSecondary() ) {
            const OpTime ts = lastOp["ts"]._opTime();
            long long a = ts.getSecs();
            long long b = time(0);
            long long lag = b - a;
            long long sleeptime = sd - lag;
            if( sleeptime > 0 ) {
                uassert(12000, "rs slaveDelay differential too big check clocks and systems",
                        sleeptime < 0x40000000);
                if( sleeptime < 60 ) {
                    sleepsecs((int) sleeptime);
                }
                else {
                    log() << "replSet slavedelay sleep long time: " << sleeptime << rsLog;
                    // sleep(hours) would prevent reconfigs from taking effect & such!
                    long long waitUntil = b + sleeptime;
                    while( 1 ) {
                        sleepsecs(6);
                        if( time(0) >= waitUntil )
                            break;

                        if( theReplSet->myConfig().slaveDelay != sd ) // reconf
                            break;
                    }
                }
            }
        } // endif slaveDelay
    }
Beispiel #4
0
 void logPosition() const {
     if (_player) {
         if (_player->thisTime() != OpTime()) {
             log() << "Exiting while processing operation with OpTime " << _player->thisTimeStr() << endl;
         }
         report();
         OpTime t = _player->maxOpTimeSynced();
         string tsString = mongoutils::str::stream() << t.getSecs() << ":" << t.getInc();
         log() << "Use --ts=" << tsString << " to resume." << endl;
         try {
             std::ofstream tsFile;
             tsFile.exceptions(std::ifstream::badbit | std::ifstream::failbit);
             tsFile.open(_tsFilename, std::ofstream::trunc);
             tsFile << tsString;
             tsFile.close();
             log() << "Saved timestamp to file "
                   << (boost::filesystem::current_path() / _tsFilename).string() << "." << endl;
             log() << "I'll automatically use this value next time if you run from this directory "
                   << "and don't pass --ts." << endl;
         }
         catch (std::exception &e) {
             warning() << "Error saving timestamp to file " << _tsFilename << ": " << e.what() << endl;
             warning() << "Make sure you save the timestamp somewhere, because I couldn't!" << endl;
         }
     }
 }
Beispiel #5
0
    void SyncTail::handleSlaveDelay(const BSONObj& lastOp) {
        ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();
        int slaveDelaySecs = replCoord->getSlaveDelaySecs().total_seconds();

        // ignore slaveDelay if the box is still initializing. once
        // it becomes secondary we can worry about it.
        if( slaveDelaySecs > 0 && replCoord->getCurrentMemberState().secondary() ) {
            const OpTime ts = lastOp["ts"]._opTime();
            long long a = ts.getSecs();
            long long b = time(0);
            long long lag = b - a;
            long long sleeptime = slaveDelaySecs - lag;
            if( sleeptime > 0 ) {
                uassert(12000, "rs slaveDelay differential too big check clocks and systems",
                        sleeptime < 0x40000000);
                if( sleeptime < 60 ) {
                    sleepsecs((int) sleeptime);
                }
                else {
                    warning() << "replSet slavedelay causing a long sleep of " << sleeptime
                              << " seconds" << rsLog;
                    // sleep(hours) would prevent reconfigs from taking effect & such!
                    long long waitUntil = b + sleeptime;
                    while(time(0) < waitUntil) {
                        sleepsecs(6);

                        // Handle reconfigs that changed the slave delay
                        if (replCoord->getSlaveDelaySecs().total_seconds() != slaveDelaySecs)
                            break;
                    }
                }
            }
        } // endif slaveDelay
    }
Beispiel #6
0
    bool ReplSetImpl::shouldChangeSyncTarget(const OpTime& targetOpTime) const {
        for (Member *m = _members.head(); m; m = m->next()) {
            if (m->syncable() &&
                targetOpTime.getSecs()+maxSyncSourceLagSecs < m->hbinfo().opTime.getSecs()) {
                return true;
            }
        }

        return false;
    }
Beispiel #7
0
    bool ReplSetImpl::shouldChangeSyncTarget(const OpTime& targetOpTime) const {
        for (Member *m = _members.head(); m; m = m->next()) {
            if (m->syncable() &&
                targetOpTime.getSecs()+maxSyncSourceLagSecs < m->hbinfo().opTime.getSecs()) {
                log() << "changing sync target because current sync target's most recent OpTime is "
                      << targetOpTime.toStringPretty() << " which is more than "
                      << maxSyncSourceLagSecs << " seconds behind member " << m->fullName()
                      << " whose most recent OpTime is " << m->hbinfo().opTime.getSecs();
                return true;
            }
        }

        return false;
    }
Beispiel #8
0
 bool ReplSetImpl::shouldChangeSyncTarget(const HostAndPort& currentTarget) {
     lock lk(this);
     OpTime targetOpTime = findByName(currentTarget.toString())->hbinfo().opTime;
     for (Member *m = _members.head(); m; m = m->next()) {
         if (m->syncable() &&
             targetOpTime.getSecs()+maxSyncSourceLagSecs < m->hbinfo().opTime.getSecs()) {
             log() << "changing sync target because current sync target's most recent OpTime is "
                   << targetOpTime.toStringPretty() << " which is more than "
                   << maxSyncSourceLagSecs << " seconds behind member " << m->fullName()
                   << " whose most recent OpTime is " << m->hbinfo().opTime.getSecs();
             return true;
         }
     }
     if (gotForceSync()) {
         return true;
     }
     return false;
 }
Beispiel #9
0
static string fmtOpTime(const OpTime &t) {
    stringstream ss;
    ss << t.getSecs() << ":" << t.getInc();
    return ss.str();
}
    const Member* ReplSetImpl::getMemberToSyncTo() {
        lock lk(this);

        // if we have a target we've requested to sync from, use it

        if (_forceSyncTarget) {
            Member* target = _forceSyncTarget;
            _forceSyncTarget = 0;
            sethbmsg( str::stream() << "syncing to: " << target->fullName() << " by request", 0);
            return target;
        }

        const Member* primary = box.getPrimary();

        // wait for 2N pings before choosing a sync target
        if (_cfg) {
            int needMorePings = config().members.size()*2 - HeartbeatInfo::numPings;

            if (needMorePings > 0) {
                OCCASIONALLY log() << "waiting for " << needMorePings << " pings from other members before syncing" << endl;
                return NULL;
            }

            // If we are only allowed to sync from the primary, return that
            if (!_cfg->chainingAllowed()) {
                // Returns NULL if we cannot reach the primary
                return primary;
            }
        }

        // find the member with the lowest ping time that has more data than me

        // Find primary's oplog time. Reject sync candidates that are more than
        // maxSyncSourceLagSecs seconds behind.
        OpTime primaryOpTime;
        if (primary)
            primaryOpTime = primary->hbinfo().opTime;
        else
            // choose a time that will exclude no candidates, since we don't see a primary
            primaryOpTime = OpTime(maxSyncSourceLagSecs, 0);

        if (primaryOpTime.getSecs() < static_cast<unsigned int>(maxSyncSourceLagSecs)) {
            // erh - I think this means there was just a new election
            // and we don't yet know the new primary's optime
            primaryOpTime = OpTime(maxSyncSourceLagSecs, 0);
        }

        OpTime oldestSyncOpTime(primaryOpTime.getSecs() - maxSyncSourceLagSecs, 0);

        Member *closest = 0;
        time_t now = 0;

        // Make two attempts.  The first attempt, we ignore those nodes with
        // slave delay higher than our own.  The second attempt includes such
        // nodes, in case those are the only ones we can reach.
        // This loop attempts to set 'closest'.
        for (int attempts = 0; attempts < 2; ++attempts) {
            for (Member *m = _members.head(); m; m = m->next()) {
                if (!m->syncable())
                    continue;

                if (m->state() == MemberState::RS_SECONDARY) {
                    // only consider secondaries that are ahead of where we are
                    if (m->hbinfo().opTime <= lastOpTimeWritten)
                        continue;
                    // omit secondaries that are excessively behind, on the first attempt at least.
                    if (attempts == 0 &&
                        m->hbinfo().opTime < oldestSyncOpTime)
                        continue;
                }

                // omit nodes that are more latent than anything we've already considered
                if (closest &&
                    (m->hbinfo().ping > closest->hbinfo().ping))
                    continue;

                if (attempts == 0 &&
                    (myConfig().slaveDelay < m->config().slaveDelay || m->config().hidden)) {
                    continue; // skip this one in the first attempt
                }

                map<string,time_t>::iterator vetoed = _veto.find(m->fullName());
                if (vetoed != _veto.end()) {
                    // Do some veto housekeeping
                    if (now == 0) {
                        now = time(0);
                    }

                    // if this was on the veto list, check if it was vetoed in the last "while".
                    // if it was, skip.
                    if (vetoed->second >= now) {
                        if (time(0) % 5 == 0) {
                            log() << "replSet not trying to sync from " << (*vetoed).first
                                  << ", it is vetoed for " << ((*vetoed).second - now) << " more seconds" << rsLog;
                        }
                        continue;
                    }
                    _veto.erase(vetoed);
                    // fall through, this is a valid candidate now
                }
                // This candidate has passed all tests; set 'closest'
                closest = m;
            }
            if (closest) break; // no need for second attempt
        }

        if (!closest) {
            return NULL;
        }

        sethbmsg( str::stream() << "syncing to: " << closest->fullName(), 0);

        return closest;
    }
Beispiel #11
0
void ReplSetImpl::_getOplogDiagsAsHtml(unsigned server_id, stringstream& ss) const {
    const Member *m = findById(server_id);
    if( m == 0 ) {
        ss << "Error : can't find a member with id: " << server_id << '\n';
        return;
    }

    ss << p("Server : " + m->fullName() + "<br>ns : " + rsoplog );

    //const bo fields = BSON( "o" << false << "o2" << false );
    const bo fields;

    /** todo fix we might want an so timeout here */
    DBClientConnection conn(false, 0, /*timeout*/ 20);
    {
        string errmsg;
        if( !conn.connect(m->fullName(), errmsg) ) {
            ss << "couldn't connect to " << m->fullName() << ' ' << errmsg;
            return;
        }
    }

    auto_ptr<DBClientCursor> c = conn.query(rsoplog, Query().sort("$natural",1), 20, 0, &fields);
    if( c.get() == 0 ) {
        ss << "couldn't query " << rsoplog;
        return;
    }
    static const char *h[] = {"ts","optime", "h","op","ns","rest",0};

    ss << "<style type=\"text/css\" media=\"screen\">"
       "table { font-size:75% }\n"
       // "th { background-color:#bbb; color:#000 }\n"
       // "td,th { padding:.25em }\n"
       "</style>\n";

    ss << table(h, true);
    //ss << "<pre>\n";
    int n = 0;
    // we save ts as a timestamp, so even though we don't really
    // use OpTimes anymore, this code is fine
    OpTime otFirst;
    OpTime otLast;
    OpTime otEnd;
    while( c->more() ) {
        bo o = c->next();
        otLast = o["ts"]._opTime();
        if( otFirst.isNull() )
            otFirst = otLast;
        say(ss, o);
        n++;
    }
    if( n == 0 ) {
        ss << rsoplog << " is empty\n";
    }
    else {
        auto_ptr<DBClientCursor> c = conn.query(rsoplog, Query().sort("$natural",-1), 20, 0, &fields);
        if( c.get() == 0 ) {
            ss << "couldn't query [2] " << rsoplog;
            return;
        }
        string x;
        bo o = c->next();
        otEnd = o["ts"]._opTime();
        while( 1 ) {
            stringstream z;
            if( o["ts"]._opTime() == otLast )
                break;
            say(z, o);
            x = z.str() + x;
            if( !c->more() )
                break;
            o = c->next();
        }
        if( !x.empty() ) {
            ss << "<tr><td>...</td><td>...</td><td>...</td><td>...</td><td>...</td></tr>\n" << x;
            //ss << "\n...\n\n" << x;
        }
    }
    ss << _table();
    ss << p(time_t_to_String_short(time(0)) + " current time");

    if( !otEnd.isNull() ) {
        ss << "<p>Log length in time: ";
        unsigned d = otEnd.getSecs() - otFirst.getSecs();
        double h = d / 3600.0;
        ss.precision(3);
        if( h < 72 )
            ss << h << " hours";
        else
            ss << h / 24.0 << " days";
        ss << "</p>\n";
    }
}
Beispiel #12
0
    /* slave: pull some data from the master's oplog
       note: not yet in db mutex at this point.
       @return -1 error
               0 ok, don't sleep
               1 ok, sleep
    */
    int ReplSource::sync_pullOpLog(int& nApplied) {
        int okResultCode = 1;
        string ns = string("local.oplog.$") + sourceName();
        log(2) << "repl: sync_pullOpLog " << ns << " syncedTo:" << syncedTo.toStringLong() << '\n';

        bool tailing = true;
        oplogReader.tailCheck();

        bool initial = syncedTo.isNull();

        if ( !oplogReader.haveCursor() || initial ) {
            if ( initial ) {
                // Important to grab last oplog timestamp before listing databases.
                syncToTailOfRemoteLog();
                BSONObj info;
                bool ok = oplogReader.conn()->runCommand( "admin", BSON( "listDatabases" << 1 ), info );
                massert( 10389 ,  "Unable to get database list", ok );
                BSONObjIterator i( info.getField( "databases" ).embeddedObject() );
                while( i.moreWithEOO() ) {
                    BSONElement e = i.next();
                    if ( e.eoo() )
                        break;
                    string name = e.embeddedObject().getField( "name" ).valuestr();
                    if ( !e.embeddedObject().getBoolField( "empty" ) ) {
                        if ( name != "local" ) {
                            if ( only.empty() || only == name ) {
                                log( 2 ) << "adding to 'addDbNextPass': "******"$gte", syncedTo.asDate());
            BSONObjBuilder query;
            query.append("ts", q.done());
            if ( !only.empty() ) {
                // note we may here skip a LOT of data table scanning, a lot of work for the master.
                query.appendRegex("ns", string("^") + only); // maybe append "\\." here?
            }
            BSONObj queryObj = query.done();
            // e.g. queryObj = { ts: { $gte: syncedTo } }

            oplogReader.tailingQuery(ns.c_str(), queryObj);
            tailing = false;
        }
        else {
            log(2) << "repl: tailing=true\n";
        }

        if( !oplogReader.haveCursor() ) {
            problem() << "repl: dbclient::query returns null (conn closed?)" << endl;
            oplogReader.resetConnection();
            return -1;
        }

        // show any deferred database creates from a previous pass
        {
            set<string>::iterator i = addDbNextPass.begin();
            if ( i != addDbNextPass.end() ) {
                BSONObjBuilder b;
                b.append("ns", *i + '.');
                b.append("op", "db");
                BSONObj op = b.done();
                sync_pullOpLog_applyOperation(op, false);
            }
        }

        if ( !oplogReader.more() ) {
            if ( tailing ) {
                log(2) << "repl: tailing & no new activity\n";
                if( oplogReader.awaitCapable() )
                    okResultCode = 0; // don't sleep

            }
            else {
                log() << "repl:   " << ns << " oplog is empty\n";
            }
            {
                dblock lk;
                save();
            }
            return okResultCode;
        }

        OpTime nextOpTime;
        {
            BSONObj op = oplogReader.next();
            BSONElement ts = op.getField("ts");
            if ( ts.type() != Date && ts.type() != Timestamp ) {
                string err = op.getStringField("$err");
                if ( !err.empty() ) {
                    // 13051 is "tailable cursor requested on non capped collection"
                    if (op.getIntField("code") == 13051) {
                        problem() << "trying to slave off of a non-master" << '\n';
                        massert( 13344 ,  "trying to slave off of a non-master", false );
                    }
                    else {
                        problem() << "repl: $err reading remote oplog: " + err << '\n';
                        massert( 10390 ,  "got $err reading remote oplog", false );
                    }
                }
                else {
                    problem() << "repl: bad object read from remote oplog: " << op.toString() << '\n';
                    massert( 10391 , "repl: bad object read from remote oplog", false);
                }
            }

            nextOpTime = OpTime( ts.date() );
            log(2) << "repl: first op time received: " << nextOpTime.toString() << '\n';
            if ( initial ) {
                log(1) << "repl:   initial run\n";
            }
            if( tailing ) {
                if( !( syncedTo < nextOpTime ) ) {
                    log() << "repl ASSERTION failed : syncedTo < nextOpTime" << endl;
                    log() << "repl syncTo:     " << syncedTo.toStringLong() << endl;
                    log() << "repl nextOpTime: " << nextOpTime.toStringLong() << endl;
                    assert(false);
                }
                oplogReader.putBack( op ); // op will be processed in the loop below
                nextOpTime = OpTime(); // will reread the op below
            }
            else if ( nextOpTime != syncedTo ) { // didn't get what we queried for - error
                Nullstream& l = log();
                l << "repl:   nextOpTime " << nextOpTime.toStringLong() << ' ';
                if ( nextOpTime < syncedTo )
                    l << "<??";
                else
                    l << ">";

                l << " syncedTo " << syncedTo.toStringLong() << '\n';
                log() << "repl:   time diff: " << (nextOpTime.getSecs() - syncedTo.getSecs()) << "sec\n";
                log() << "repl:   tailing: " << tailing << '\n';
                log() << "repl:   data too stale, halting replication" << endl;
                replInfo = replAllDead = "data too stale halted replication";
                assert( syncedTo < nextOpTime );
                throw SyncException();
            }
            else {
                /* t == syncedTo, so the first op was applied previously or it is the first op of initial query and need not be applied. */
            }
        }

        // apply operations
        {
            int n = 0;
            time_t saveLast = time(0);
            while ( 1 ) {

                bool moreInitialSyncsPending = !addDbNextPass.empty() && n; // we need "&& n" to assure we actually process at least one op to get a sync point recorded in the first place.

                if ( moreInitialSyncsPending || !oplogReader.more() ) {
                    dblock lk;

                    // NOTE aaron 2011-03-29 This block may be unnecessary, but I'm leaving it in place to avoid changing timing behavior.
                    {
                        dbtemprelease t;
                        if ( !moreInitialSyncsPending && oplogReader.more() ) {
                            continue;
                        }
                        // otherwise, break out of loop so we can set to completed or clone more dbs
                    }
                    
                    if( oplogReader.awaitCapable() && tailing )
                        okResultCode = 0; // don't sleep
                    syncedTo = nextOpTime;
                    save(); // note how far we are synced up to now
                    log() << "repl:   applied " << n << " operations" << endl;
                    nApplied = n;
                    log() << "repl:  end sync_pullOpLog syncedTo: " << syncedTo.toStringLong() << endl;
                    break;
                }
                else {
                }

                OCCASIONALLY if( n > 0 && ( n > 100000 || time(0) - saveLast > 60 ) ) {
                    // periodically note our progress, in case we are doing a lot of work and crash
                    dblock lk;
                    syncedTo = nextOpTime;
                    // can't update local log ts since there are pending operations from our peer
                    save();
                    log() << "repl:   checkpoint applied " << n << " operations" << endl;
                    log() << "repl:   syncedTo: " << syncedTo.toStringLong() << endl;
                    saveLast = time(0);
                    n = 0;
                }

                BSONObj op = oplogReader.next();

                unsigned b = replApplyBatchSize;
                bool justOne = b == 1;
                scoped_ptr<writelock> lk( justOne ? 0 : new writelock() );
                while( 1 ) {

                    BSONElement ts = op.getField("ts");
                    if( !( ts.type() == Date || ts.type() == Timestamp ) ) {
                        log() << "sync error: problem querying remote oplog record" << endl;
                        log() << "op: " << op.toString() << endl;
                        log() << "halting replication" << endl;
                        replInfo = replAllDead = "sync error: no ts found querying remote oplog record";
                        throw SyncException();
                    }
                    OpTime last = nextOpTime;
                    nextOpTime = OpTime( ts.date() );
                    if ( !( last < nextOpTime ) ) {
                        log() << "sync error: last applied optime at slave >= nextOpTime from master" << endl;
                        log() << " last:       " << last.toStringLong() << endl;
                        log() << " nextOpTime: " << nextOpTime.toStringLong() << endl;
                        log() << " halting replication" << endl;
                        replInfo = replAllDead = "sync error last >= nextOpTime";
                        uassert( 10123 , "replication error last applied optime at slave >= nextOpTime from master", false);
                    }
                    if ( replSettings.slavedelay && ( unsigned( time( 0 ) ) < nextOpTime.getSecs() + replSettings.slavedelay ) ) {
                        assert( justOne );
                        oplogReader.putBack( op );
                        _sleepAdviceTime = nextOpTime.getSecs() + replSettings.slavedelay + 1;
                        dblock lk;
                        if ( n > 0 ) {
                            syncedTo = last;
                            save();
                        }
                        log() << "repl:   applied " << n << " operations" << endl;
                        log() << "repl:   syncedTo: " << syncedTo.toStringLong() << endl;
                        log() << "waiting until: " << _sleepAdviceTime << " to continue" << endl;
                        return okResultCode;
                    }

                    sync_pullOpLog_applyOperation(op, !justOne);
                    n++;

                    if( --b == 0 )
                        break;
                    // if to here, we are doing mulpile applications in a singel write lock acquisition
                    if( !oplogReader.moreInCurrentBatch() ) {
                        // break if no more in batch so we release lock while reading from the master
                        break;
                    }
                    op = oplogReader.next();

                    getDur().commitIfNeeded();
                }
            }
        }

        return okResultCode;
    }
Beispiel #13
0
    virtual int doRun() {

        // authenticate
        enum Auth::Level authLevel = Auth::NONE;
        auth("", &authLevel);
        uassert(15935, "user does not have write access", authLevel == Auth::WRITE);

        boost::filesystem::path root = getParam("dir");

        // check if we're actually talking to a machine that can write
        if (!isMaster()) {
            return -1;
        }

        if (isMongos() && _db == "" && exists(root / "config")) {
            log() << "Cannot do a full restore on a sharded system" << endl;
            return -1;
        }

        _drop = hasParam( "drop" );
        _keepIndexVersion = hasParam("keepIndexVersion");
        _restoreOptions = !hasParam("noOptionsRestore");
        _restoreIndexes = !hasParam("noIndexRestore");
        _w = getParam( "w" , 1 );

        bool doOplog = hasParam( "oplogReplay" );

        if (doOplog) {
            // fail early if errors

            if (_db != "") {
                log() << "Can only replay oplog on full restore" << endl;
                return -1;
            }

            if ( ! exists(root / "oplog.bson") ) {
                log() << "No oplog file to replay. Make sure you run mongodump with --oplog." << endl;
                return -1;
            }


            BSONObj out;
            if (! conn().simpleCommand("admin", &out, "buildinfo")) {
                log() << "buildinfo command failed: " << out["errmsg"].String() << endl;
                return -1;
            }

            StringData version = out["version"].valuestr();
            if (versionCmp(version, "1.7.4-pre-") < 0) {
                log() << "Can only replay oplog to server version >= 1.7.4" << endl;
                return -1;
            }

            string oplogLimit = getParam( "oplogLimit", "" );
            string oplogInc = "0";

            if(!oplogLimit.empty()) {
                size_t i = oplogLimit.find_first_of(':');
                if ( i != string::npos ) {
                    if ( i + 1 < oplogLimit.length() ) {
                        oplogInc = oplogLimit.substr(i + 1);
                    }

                    oplogLimit = oplogLimit.substr(0, i);
                }

                try {
                    _oplogLimitTS.reset(new OpTime(
                        boost::lexical_cast<unsigned long>(oplogLimit.c_str()),
                        boost::lexical_cast<unsigned long>(oplogInc.c_str())));
                } catch( const boost::bad_lexical_cast& error) {
                    log() << "Could not parse oplogLimit into Timestamp from values ( "
                          << oplogLimit << " , " << oplogInc << " )"
                          << endl;
                    return -1;
                }

                if (!oplogLimit.empty()) {
                    // Only for a replica set as master will have no-op entries so we would need to
                    // skip them all to find the real op
                    scoped_ptr<DBClientCursor> cursor(
                            conn().query("local.oplog.rs", Query().sort(BSON("$natural" << -1)),
                                         1 /*return first*/));
                    OpTime tsOptime;
                    // get newest oplog entry and make sure it is older than the limit to apply.
                    if (cursor->more()) {
                        tsOptime = cursor->next().getField("ts")._opTime();
                        if (tsOptime > *_oplogLimitTS.get()) {
                            log() << "The oplogLimit is not newer than"
                                  << " the last oplog entry on the server."
                                  << endl;
                            return -1;
                        }
                    }

                    BSONObjBuilder tsRestrictBldr;
                    if (!tsOptime.isNull())
                        tsRestrictBldr.appendTimestamp("$gt", tsOptime.asDate());
                    tsRestrictBldr.appendTimestamp("$lt", _oplogLimitTS->asDate());

                    BSONObj query = BSON("ts" << tsRestrictBldr.obj());

                    if (!tsOptime.isNull()) {
                        log() << "Latest oplog entry on the server is " << tsOptime.getSecs()
                                << ":" << tsOptime.getInc() << endl;
                        log() << "Only applying oplog entries matching this criteria: "
                                << query.jsonString() << endl;
                    }
                    _opmatcher.reset(new Matcher(query));
                }
            }
        }

        /* If _db is not "" then the user specified a db name to restore as.
         *
         * In that case we better be given either a root directory that
         * contains only .bson files or a single .bson file  (a db).
         *
         * In the case where a collection name is specified we better be
         * given either a root directory that contains only a single
         * .bson file, or a single .bson file itself (a collection).
         */
        drillDown(root, _db != "", _coll != "", !(_oplogLimitTS.get() == NULL), true);

        // should this happen for oplog replay as well?
        conn().getLastError(_db == "" ? "admin" : _db);

        if (doOplog) {
            log() << "\t Replaying oplog" << endl;
            _curns = OPLOG_SENTINEL;
            processFile( root / "oplog.bson" );
            log() << "Applied " << _oplogEntryApplies << " oplog entries out of "
                  << _oplogEntryApplies + _oplogEntrySkips << " (" << _oplogEntrySkips
                  << " skipped)." << endl;
        }

        return EXIT_CLEAN;
    }
Beispiel #14
0
    static void syncRollbackFindCommonPoint(DBClientConnection *them, HowToFixUp& h) {
        static time_t last;
        if( time(0)-last < 60 ) {
            throw "findcommonpoint waiting a while before trying again";
        }
        last = time(0);

        assert( dbMutex.atLeastReadLocked() );
        Client::Context c(rsoplog, dbpath, 0, false);
        NamespaceDetails *nsd = nsdetails(rsoplog);
        assert(nsd);
        ReverseCappedCursor u(nsd);
        if( !u.ok() )
            throw "our oplog empty or unreadable";

        const Query q = Query().sort(reverseNaturalObj);
        const bo fields = BSON( "ts" << 1 << "h" << 1 );

        //auto_ptr<DBClientCursor> u = us->query(rsoplog, q, 0, 0, &fields, 0, 0);

        h.rbid = getRBID(them);
        auto_ptr<DBClientCursor> t = them->query(rsoplog, q, 0, 0, &fields, 0, 0);

        if( t.get() == 0 || !t->more() ) throw "remote oplog empty or unreadable";

        BSONObj ourObj = u.current();
        OpTime ourTime = ourObj["ts"]._opTime();
        BSONObj theirObj = t->nextSafe();
        OpTime theirTime = theirObj["ts"]._opTime();

        {
            long long diff = (long long) ourTime.getSecs() - ((long long) theirTime.getSecs());
            /* diff could be positive, negative, or zero */
            log() << "replSet info rollback our last optime:   " << ourTime.toStringPretty() << rsLog;
            log() << "replSet info rollback their last optime: " << theirTime.toStringPretty() << rsLog;
            log() << "replSet info rollback diff in end of log times: " << diff << " seconds" << rsLog;
            if( diff > 3600 ) {
                log() << "replSet rollback too long a time period for a rollback." << rsLog;
                throw "error not willing to roll back more than one hour of data";
            }
        }

        unsigned long long scanned = 0;
        while( 1 ) {
            scanned++;
            /* todo add code to assure no excessive scanning for too long */
            if( ourTime == theirTime ) {
                if( ourObj["h"].Long() == theirObj["h"].Long() ) {
                    // found the point back in time where we match.
                    // todo : check a few more just to be careful about hash collisions.
                    log() << "replSet rollback found matching events at " << ourTime.toStringPretty() << rsLog;
                    log() << "replSet rollback findcommonpoint scanned : " << scanned << rsLog;
                    h.commonPoint = ourTime;
                    h.commonPointOurDiskloc = u.currLoc();
                    return;
                }

                refetch(h, ourObj);

                if( !t->more() ) {
                    log() << "replSet rollback error RS100 reached beginning of remote oplog" << rsLog;
                    log() << "replSet   them:      " << them->toString() << " scanned: " << scanned << rsLog;
                    log() << "replSet   theirTime: " << theirTime.toStringLong() << rsLog;
                    log() << "replSet   ourTime:   " << ourTime.toStringLong() << rsLog;
                    throw "RS100 reached beginning of remote oplog [2]";
                }
                theirObj = t->nextSafe();
                theirTime = theirObj["ts"]._opTime();

                u.advance();
                if( !u.ok() ) {
                    log() << "replSet rollback error RS101 reached beginning of local oplog" << rsLog;
                    log() << "replSet   them:      " << them->toString() << " scanned: " << scanned << rsLog;
                    log() << "replSet   theirTime: " << theirTime.toStringLong() << rsLog;
                    log() << "replSet   ourTime:   " << ourTime.toStringLong() << rsLog;
                    throw "RS101 reached beginning of local oplog [1]";
                }
                ourObj = u.current();
                ourTime = ourObj["ts"]._opTime();
            }
            else if( theirTime > ourTime ) {
                if( !t->more() ) {
                    log() << "replSet rollback error RS100 reached beginning of remote oplog" << rsLog;
                    log() << "replSet   them:      " << them->toString() << " scanned: " << scanned << rsLog;
                    log() << "replSet   theirTime: " << theirTime.toStringLong() << rsLog;
                    log() << "replSet   ourTime:   " << ourTime.toStringLong() << rsLog;
                    throw "RS100 reached beginning of remote oplog [1]";
                }
                theirObj = t->nextSafe();
                theirTime = theirObj["ts"]._opTime();
            }
            else {
                // theirTime < ourTime
                refetch(h, ourObj);
                u.advance();
                if( !u.ok() ) {
                    log() << "replSet rollback error RS101 reached beginning of local oplog" << rsLog;
                    log() << "replSet   them:      " << them->toString() << " scanned: " << scanned << rsLog;
                    log() << "replSet   theirTime: " << theirTime.toStringLong() << rsLog;
                    log() << "replSet   ourTime:   " << ourTime.toStringLong() << rsLog;
                    throw "RS101 reached beginning of local oplog [2]";
                }
                ourObj = u.current();
                ourTime = ourObj["ts"]._opTime();
            }
        }
    }
Beispiel #15
0
 void BuilderObj::append(const OpTime& ot) {
     pBuilder->appendTimestamp(fieldName, ot.getSecs(), ot.getInc());
 }
Beispiel #16
0
 void BuilderArray::append(const OpTime& ot) {
     pBuilder->appendTimestamp(ot.getSecs(), ot.getInc());
 }