Пример #1
0
    bool BackgroundSync::isRollbackRequired(OplogReader& r) {
        string hn = r.conn()->getServerAddress();
        if (!r.more()) {
            // In vanilla Mongo, this happened for one of the
            // following reasons:
            //  - we were ahead of what we are syncing from (don't
            //    think that is possible anymore)
            //  - remote oplog is empty for some weird reason
            // in either case, if it (strangely) happens, we'll just return
            // and our caller will simply try again after a short sleep.
            log() << "replSet error empty query result from " << hn << " oplog, attempting rollback" << rsLog;
             return true;
        }

        BSONObj o = r.nextSafe();
        uint64_t ts = o["ts"]._numberLong();
        uint64_t lastHash = o["h"].numberLong();
        GTID gtid = getGTIDFromBSON("_id", o);

        if( !theReplSet->gtidManager->rollbackNeeded(gtid, ts, lastHash)) {
            log() << "Rollback NOT needed! Our GTID" << gtid << endl;
            return false;
        }

        log() << "Rollback needed! Our GTID" <<
            theReplSet->gtidManager->getLiveState().toString() <<
            " remote GTID: " << gtid.toString() << ". Attempting rollback." << rsLog;

        runRollback(r, ts);
        return true;
    }
Пример #2
0
    bool isRollbackRequired(OplogReader& r, uint64_t *lastTS) {
        string hn = r.conn()->getServerAddress();
        verify(r.more());
        BSONObj rollbackStatus;
        bool found = getRollbackStatus(rollbackStatus);
        if (found) {
            // we have a rollback in progress,
            // must complete it
            log() << "Rollback needed, found rollbackStatus: " << rollbackStatus << rsLog;
            return true;
        }

        BSONObj o = r.nextSafe();
        uint64_t ts = o["ts"]._numberLong();
        uint64_t lastHash = o["h"].numberLong();
        GTID gtid = getGTIDFromBSON("_id", o);

        if (!theReplSet->gtidManager->rollbackNeeded(gtid, ts, lastHash)) {
            log() << "Rollback NOT needed! " << gtid << endl;
            return false;
        }

        log() << "Rollback needed! Our GTID: " <<
            theReplSet->gtidManager->getLiveState().toString() <<
            ", remote GTID: " << gtid.toString() << ". Attempting rollback." << rsLog;

        *lastTS = ts;
        return true;
    }
Пример #3
0
void applyMissingOpsInOplog(GTID minUnappliedGTID) {
    std::deque<BSONObj> unappliedTransactions;
    {
        // accumulate a list of transactions that are unapplied
        LOCK_REASON(lockReason, "repl: initial sync applying missing ops");
        Client::ReadContext ctx(rsoplog, lockReason);
        Client::Transaction catchupTransaction(0);

        if (minUnappliedGTID.isInitial()) {
            // now we should have replInfo on this machine,
            // let's query the minUnappliedGTID to figure out from where
            // we should copy the opLog
            BSONObj result;
            const bool foundMinUnapplied = Collection::findOne(rsReplInfo, BSON("_id" << "minUnapplied"), result);
            verify(foundMinUnapplied);
            GTID minUnappliedGTID;
            minUnappliedGTID = getGTIDFromBSON("GTID", result);
        }
        // now we need to read the oplog forward
        GTID lastEntry;
        bool ret = getLastGTIDinOplog(&lastEntry);
        isyncassert("could not get last oplog entry after clone", ret);

        // at this point, we have got the oplog up to date,
        // now we need to read forward in the oplog
        // from minUnapplied
        BSONObjBuilder q;
        addGTIDToBSON("$gte", minUnappliedGTID, q);
        BSONObjBuilder query;
        query.append("_id", q.done());

        {
            shared_ptr<Cursor> c = getOptimizedCursor(rsoplog, query.done());
            while( c->ok() ) {
                if ( c->currentMatches()) {
                    BSONObj curr = c->current();
                    bool transactionAlreadyApplied = curr["a"].Bool();
                    if (!transactionAlreadyApplied) {
                        GTID currEntry = getGTIDFromBSON("_id", curr);
                        LOG(2) << "applying missing op gap " << currEntry.toString() << endl;
                        unappliedTransactions.push_back(curr.getOwned());
                    }
                }
                c->advance();
            }
        }
        catchupTransaction.commit(0);
    }
    while (unappliedTransactions.size() > 0) {
        BSONObj curr = unappliedTransactions.front();
        applyTransactionFromOplog(curr, NULL);
        unappliedTransactions.pop_front();
    }
}
Пример #4
0
    // does some sanity checks before finishing starting and stopping the opsync 
    // thread that we are in a decent state
    //
    // called with _mutex held
    void BackgroundSync::verifySettled() {
        // if the background sync has yet to be fully started,
        // no need to run this, we are still in initialization
        // of the replset. This can happen if
        // during initialization, after we start the manager, we
        // get a new config before we have fully started replication
        if (!_applierInProgress) {
            return;
        }
        verify(_deque.size() == 0);
        // do a sanity check on the GTID Manager
        GTID lastLiveGTID;
        GTID lastUnappliedGTID;
        theReplSet->gtidManager->getLiveGTIDs(
            &lastLiveGTID, 
            &lastUnappliedGTID
            );
        log() << "last GTIDs: " << 
            lastLiveGTID.toString() << " " << 
            lastUnappliedGTID.toString() << " " << endl;
        verify(GTID::cmp(lastUnappliedGTID, lastLiveGTID) == 0);

        GTID minLiveGTID;
        GTID minUnappliedGTID;
        theReplSet->gtidManager->getMins(
            &minLiveGTID, 
            &minUnappliedGTID
            );
        log() << "min GTIDs: " << 
            minLiveGTID.toString() << " " <<
            minUnappliedGTID.toString() << rsLog;
        verify(GTID::cmp(minUnappliedGTID, minLiveGTID) == 0);
    }
Пример #5
0
 void rollbackToGTID(GTID idToRollbackTo, RollbackDocsMap* docsMap, RollbackSaveData* rsSave) {
     // at this point, everything should be settled, the applier should
     // have nothing left (and remain that way, because this is the only
     // thread that can put work on the applier). Now we can rollback
     // the data.
     while (true) {
         BSONObj o;
         {
             LOCK_REASON(lockReason, "repl: checking for oplog data");
             Client::ReadContext ctx(rsoplog, lockReason);
             Client::Transaction txn(DB_SERIALIZABLE);
             // if there is nothing in the oplog, break
             o = getLastEntryInOplog();
             if (o.isEmpty()) {
                 throw RollbackOplogException("Oplog empty when rolling back to a GTID");
             }
         }
         GTID lastGTID = getGTIDFromBSON("_id", o);
         // if we have rolled back enough, break from while loop
         if (GTID::cmp(lastGTID, idToRollbackTo) <= 0) {
             dassert(GTID::cmp(lastGTID, idToRollbackTo) == 0);
             break;
         }
         rollbackTransactionFromOplog(o, docsMap, rsSave);
     }
     log() << "Rolling back to " << idToRollbackTo.toString() << " produced " <<
         docsMap->size() << " documents for which we need to retrieve a snapshot of." << rsLog;
 }
Пример #6
0
    void GhostSync::percolate(const BSONObj& id, const GTID& lastGTID) {
        const OID rid = id["_id"].OID();
        shared_ptr<GhostSlave> slave;
        {
            rwlock lk( _lock , false );

            MAP::iterator i = _ghostCache.find( rid );
            if ( i == _ghostCache.end() ) {
                OCCASIONALLY log() << "couldn't percolate slave " << rid << " no entry" << rsLog;
                return;
            }

            slave = i->second;
            if (!slave->init) {
                OCCASIONALLY log() << "couldn't percolate slave " << rid << " not init" << rsLog;
                return;
            }
        }
        verify(slave->slave);

        const Member *target = BackgroundSync::get()->getSyncTarget();
        if (!target || rs->box.getState().primary()
            // we are currently syncing from someone who's syncing from us
            // the target might end up with a new Member, but s.slave never
            // changes so we'll compare the names
            || target == slave->slave || target->fullName() == slave->slave->fullName()) {
            LOG(1) << "replica set ghost target no good" << endl;
            return;
        }

        if ( GTID::cmp(slave->lastGTID, lastGTID) > 0 ) {
            return;
        }

        try {
            if (!slave->reader.haveConnection()) {
                if (!slave->reader.connect(id, slave->slave->id(), target->fullName())) {
                    // error message logged in OplogReader::connect
                    return;
                }
            }
            bool ret = slave->reader.propogateSlaveLocation(lastGTID);
            if (ret) {
                slave->lastGTID = lastGTID;
                LOG(2) << "now last is " << slave->lastGTID.toString() << rsLog;
            }
            else {
                LOG(0) << "failed to percolate to with new location" << lastGTID.toString() << rsLog;
                slave->reader.resetConnection();
            }
        }
        catch (DBException& e) {
            // we'll be back
            LOG(2) << "replSet ghost sync error: " << e.what() << " for "
                   << slave->slave->fullName() << rsLog;
            slave->reader.resetConnection();
        }
    }
Пример #7
0
void ReplSetImpl::_fillGaps(OplogReader* r) {
    LOCK_REASON(lockReason, "repl: filling gaps");
    Client::ReadContext ctx(rsoplog, lockReason);
    Client::Transaction catchupTransaction(0);

    // now we should have replInfo on this machine,
    // let's query the minLiveGTID to figure out from where
    // we should copy the opLog
    BSONObj result;
    const bool foundMinLive = Collection::findOne(rsReplInfo, BSON("_id" << "minLive"), result);
    verify(foundMinLive);
    GTID minLiveGTID;
    minLiveGTID = getGTIDFromBSON("GTID", result);
    // now we need to read the oplog forward
    GTID lastEntry;
    bool ret = getLastGTIDinOplog(&lastEntry);
    isyncassert("could not get last oplog entry after clone", ret);

    GTID currEntry = minLiveGTID;
    LOG(2) << "starting to fill gaps currEntry: " << currEntry.toString() << " lastEntry: " << lastEntry.toString() <<endl;
    // first, we need to fill in the "gaps" in the oplog
    while (GTID::cmp(currEntry, lastEntry) < 0) {
        r->tailingQueryGTE(rsoplog, currEntry);
        while (GTID::cmp(currEntry, lastEntry) < 0) {
            bool hasMore = true;
            if (!r->moreInCurrentBatch()) {
                hasMore = r->more();
            }
            if (!hasMore) {
                break;
            }
            BSONObj op = r->nextSafe().getOwned();
            currEntry = getGTIDFromOplogEntry(op);
            // try inserting it into the oplog, if it does not
            // already exist
            if (!gtidExistsInOplog(currEntry)) {
                LOG(2) << "filling gap " << currEntry.toString() << endl;
                bool bigTxn;
                replicateFullTransactionToOplog(op, *r, &bigTxn);
            }
        }
    }
    catchupTransaction.commit(0);
}
Пример #8
0
 bool canStartRollback(OplogReader& r, GTID idToRollbackTo) {
     shared_ptr<DBClientConnection> conn(r.conn_shared());
     // before we start rollback, let's make sure that the minUnapplied on the remote
     // server is past the id that we are rolling back to. Otherwise, the snapshot
     // we create will not be up to date, and the rollback algorithm will not work
     BSONObjBuilder b;
     b.append("_id", "minUnapplied");
     // Note that another way to get this information is to
     // request a heartbeat. That one will technically return
     // a more up to date value for minUnapplied
     BSONObj res = findOneFromConn(conn.get(), rsReplInfo, Query(b.done()));
     GTID minUnapplied = getGTIDFromBSON("GTID", res);
     if (GTID::cmp(minUnapplied, idToRollbackTo) < 0) {
         log() << "Remote server has minUnapplied " << minUnapplied.toString() << \
             " we want to rollback to " << idToRollbackTo.toString() << \
             ". Therefore, exiting and retrying." << rsLog;
         return false;
     }
     return true;
 }
Пример #9
0
void ReplSetImpl::_summarizeStatus(BSONObjBuilder& b) const {
    vector<BSONObj> v;

    const Member *_self = this->_self;
    verify( _self );

    MemberState myState = box.getState();

    // add self
    {
        BSONObjBuilder bb;
        bb.append("_id", (int) _self->id());
        bb.append("name", _self->fullName());
        bb.append("health", 1.0);
        bb.append("state", (int)myState.s);
        bb.append("stateStr", myState.toString());
        bb.append("uptime", (unsigned)(time(0) - cmdLine.started));
        if (!_self->config().arbiterOnly) {
            GTID lastLive;
            GTID lastUnapplied;
            GTID minLive;
            GTID minUnapplied;
            gtidManager->getGTIDs(
                &lastLive,
                &lastUnapplied,
                &minLive,
                &minUnapplied
            );
            bb.appendDate("optimeDate", gtidManager->getCurrTimestamp());
            bb.append("lastGTID", lastLive.toString());
            bb.append("lastUnappliedGTID", lastUnapplied.toString());
            bb.append("minLiveGTID", minLive.toString());
            bb.append("minUnappliedGTID", minUnapplied.toString());
            bb.append("oplogVersion", ReplSetConfig::OPLOG_VERSION);
        }

        int maintenance = _maintenanceMode;
        if (maintenance) {
            bb.append("maintenanceMode", maintenance);
        }

        if (theReplSet) {
            string s = theReplSet->hbmsg();
            if( !s.empty() )
                bb.append("errmsg", s);
        }
        bb.append("self", true);
        v.push_back(bb.obj());
    }

    Member *m =_members.head();
    while( m ) {
        BSONObjBuilder bb;
        bb.append("_id", (int) m->id());
        bb.append("name", m->fullName());
        double h = m->hbinfo().health;
        bb.append("health", h);
        bb.append("state", (int) m->state().s);
        if( h == 0 ) {
            // if we can't connect the state info is from the past and could be confusing to show
            bb.append("stateStr", "(not reachable/healthy)");
        }
        else {
            bb.append("stateStr", m->state().toString());
        }
        bb.append("uptime", (unsigned) (m->hbinfo().upSince ? (time(0)-m->hbinfo().upSince) : 0));
        if (!m->config().arbiterOnly) {
            bb.appendDate("optimeDate", m->hbinfo().opTime);
            bb.append("lastGTID", m->hbinfo().gtid.toString());
            bb.append("lastUnappliedGTID", m->hbinfo().lastUnappliedGTID.toString());
            bb.append("minLiveGTID", m->hbinfo().minLiveGTID.toString());
            bb.append("minUnappliedGTID", m->hbinfo().minUnappliedGTID.toString());
            bb.append("oplogVersion", m->hbinfo().oplogVersion);
        }
        bb.appendTimeT("lastHeartbeat", m->hbinfo().lastHeartbeat);
        bb.appendTimeT("lastHeartbeatRecv", m->getLastRecvHeartbeat());
        bb.append("pingMs", m->hbinfo().ping);
        string s = m->lhb();
        if( !s.empty() )
            bb.append("lastHeartbeatMessage", s);

        if (m->hbinfo().authIssue) {
            bb.append("authenticated", false);
        }

        string syncingTo = m->hbinfo().syncingTo;
        if (!syncingTo.empty()) {
            bb.append("syncingTo", syncingTo);
        }

        v.push_back(bb.obj());
        m = m->next();
    }
    sort(v.begin(), v.end());
    b.append("set", name());
    b.appendTimeT("date", time(0));
    b.append("myState", myState.s);
    const Member *syncTarget = BackgroundSync::get()->getSyncTarget();
    if ( syncTarget &&
            (myState != MemberState::RS_PRIMARY) &&
            (myState != MemberState::RS_SHUNNED) ) {
        b.append("syncingTo", syncTarget->fullName());
    }
    b.append("members", v);
    if( replSetBlind )
        b.append("blind",true); // to avoid confusion if set...normally never set except for testing.
}
Пример #10
0
        void testGTIDManager() {
            GTID lastGTID(1,1);
            GTIDManager mgr(lastGTID, 0, 0, 0, 0);
            
            // make sure initialization is what we expect
            ASSERT(GTID::cmp(mgr._lastLiveGTID, lastGTID) == 0);
            ASSERT(GTID::cmp(mgr._minLiveGTID, lastGTID) > 0);
            lastGTID.inc();
            ASSERT(GTID::cmp(mgr._minLiveGTID, lastGTID) == 0);
            mgr.catchUnappliedToLive();
            ASSERT(GTID::cmp(mgr._lastLiveGTID, mgr._lastUnappliedGTID) == 0);
            ASSERT(GTID::cmp(mgr._minLiveGTID, mgr._minUnappliedGTID) == 0);
            GTID resetGTID(2,2);
            mgr.resetAfterInitialSync(resetGTID, 1, 1);
            mgr.verifyReadyToBecomePrimary();
            ASSERT(GTID::cmp(mgr._lastLiveGTID, resetGTID) == 0);
            ASSERT(GTID::cmp(mgr._lastLiveGTID, mgr._lastUnappliedGTID) == 0);
            ASSERT(GTID::cmp(mgr._minLiveGTID, mgr._minUnappliedGTID) == 0);
            resetGTID.inc();
            ASSERT(GTID::cmp(mgr._minLiveGTID, resetGTID) == 0);

            // now test that it works as primary
            GTID currLast = mgr.getLiveState();
            GTID currMin = mgr._minLiveGTID;
            ASSERT(GTID::cmp(currLast, mgr._lastLiveGTID) == 0);

            uint64_t ts;
            uint64_t hash;
            GTID gtid;
            mgr.getGTIDForPrimary(&gtid, &ts, &hash);
            cerr << gtid.toString() << endl;
            cerr << currMin.toString() <<endl;
            ASSERT(GTID::cmp(gtid, currMin) == 0);
            ASSERT(GTID::cmp(gtid, mgr._minLiveGTID) == 0);
            ASSERT(GTID::cmp(gtid, mgr._lastLiveGTID) == 0);
            mgr.noteLiveGTIDDone(gtid);
            ASSERT(GTID::cmp(gtid, mgr._lastLiveGTID) == 0);
            ASSERT(GTID::cmp(gtid, mgr._minLiveGTID) < 0);

            // simple test of resetManager
            currLast = mgr._lastLiveGTID;
            currMin = mgr._minLiveGTID;
            uint64_t currHkp = mgr.getHighestKnownPrimary();
            // just a sanity check, that hkp is 2
            ASSERT(currHkp == 2);
            ASSERT(mgr._newPrimaryValue == 0);
            ASSERT(!mgr.resetManager(1));
            ASSERT(!mgr.resetManager(2));
            ASSERT(mgr.resetManager(4));
            mgr.verifyReadyToBecomePrimary();
            // make sure that lastLive and minLive not changed yet
            ASSERT(GTID::cmp(currMin, mgr._minLiveGTID) == 0);
            ASSERT(GTID::cmp(currLast, mgr._lastLiveGTID) == 0);
            // now make sure that primary has increased
            ASSERT(mgr._newPrimaryValue == 4);
            mgr.getGTIDForPrimary(&gtid, &ts, &hash);
            ASSERT(mgr._newPrimaryValue == 0);

            ASSERT(gtid._primarySeqNo > currLast._primarySeqNo);
            ASSERT(gtid._primarySeqNo == 4);
            ASSERT(gtid._GTSeqNo == 0);

            mgr.noteLiveGTIDDone(gtid);
            mgr.verifyReadyToBecomePrimary();

            // now test that min is properly maintained
            currLast = mgr._lastLiveGTID;
            currMin = mgr._minLiveGTID;
            GTID gtid1, gtid2, gtid3, gtid4, gtid5;
            mgr.getGTIDForPrimary(&gtid1, &ts, &hash);
            mgr.getGTIDForPrimary(&gtid2, &ts, &hash);
            mgr.getGTIDForPrimary(&gtid3, &ts, &hash);
            mgr.getGTIDForPrimary(&gtid4, &ts, &hash);
            ASSERT(GTID::cmp(gtid1, gtid2) < 0);
            ASSERT(GTID::cmp(gtid2, gtid3) < 0);
            ASSERT(GTID::cmp(gtid3, gtid4) < 0);
            ASSERT(GTID::cmp(mgr._lastLiveGTID, gtid4) == 0);
            ASSERT(GTID::cmp(mgr._minLiveGTID, gtid1) == 0);
            // finish 2, nothing should change
            mgr.noteLiveGTIDDone(gtid2);
            ASSERT(GTID::cmp(mgr._lastLiveGTID, gtid4) == 0);
            ASSERT(GTID::cmp(mgr._minLiveGTID, gtid1) == 0);
            // finish 1, min should jump to 3
            mgr.noteLiveGTIDDone(gtid1);
            ASSERT(GTID::cmp(mgr._lastLiveGTID, gtid4) == 0);
            ASSERT(GTID::cmp(mgr._minLiveGTID, gtid3) == 0);
            // get 5, _lastLive should change
            mgr.getGTIDForPrimary(&gtid5, &ts, &hash);
            ASSERT(GTID::cmp(gtid4, gtid5) < 0);
            ASSERT(GTID::cmp(mgr._lastLiveGTID, gtid5) == 0);
            ASSERT(GTID::cmp(mgr._minLiveGTID, gtid3) == 0);
            
            // finish 3 and 4, should both jump to 5
            mgr.noteLiveGTIDDone(gtid3);
            mgr.noteLiveGTIDDone(gtid4);
            ASSERT(GTID::cmp(mgr._lastLiveGTID, gtid5) == 0);
            ASSERT(GTID::cmp(mgr._minLiveGTID, gtid5) == 0);
            // finish 5, min should jump up
            mgr.noteLiveGTIDDone(gtid5);
            ASSERT(GTID::cmp(mgr._lastLiveGTID, gtid5) == 0);
            ASSERT(GTID::cmp(mgr._minLiveGTID, gtid5) > 0);
            mgr.verifyReadyToBecomePrimary();

            GTID currLastUnapplied = mgr._lastUnappliedGTID;
            GTID currMinUnapplied = mgr._minUnappliedGTID;
            
            gtid5.inc();
            gtid5.inc();
            gtid5.inc();
            GTID gtidOther = gtid5;
            gtidOther.inc();

            GTID gtidUnapplied1 = gtid5;
            // now let's do a test for secondaries
            mgr.noteGTIDAdded(gtidUnapplied1, ts, hash);
            ASSERT(GTID::cmp(mgr._lastLiveGTID, gtidUnapplied1) == 0);
            ASSERT(GTID::cmp(mgr._minLiveGTID, gtidOther) == 0);
            gtid5.inc();
            gtidOther.inc();
            GTID gtidUnapplied2 = gtid5;
            mgr.noteGTIDAdded(gtidUnapplied2, ts, hash);
            ASSERT(GTID::cmp(mgr._lastLiveGTID, gtidUnapplied2) == 0);
            ASSERT(GTID::cmp(mgr._minLiveGTID, gtidOther) == 0);
            // verify unapplied values not changed
            ASSERT(GTID::cmp(mgr._lastUnappliedGTID, currLastUnapplied) == 0);
            ASSERT(GTID::cmp(mgr._minUnappliedGTID, currMinUnapplied) == 0);
            gtid5.inc();
            GTID gtidUnapplied3 = gtid5;
            mgr.noteGTIDAdded(gtidUnapplied3, ts, hash);
            gtid5.inc();
            GTID gtidUnapplied4 = gtid5;
            mgr.noteGTIDAdded(gtidUnapplied4, ts, hash);
            // at this point, we have 4 GTIDs that have been added, but
            // yet to be applied
            mgr.noteApplyingGTID(gtidUnapplied1);
            mgr.noteApplyingGTID(gtidUnapplied2);
            ASSERT(GTID::cmp(mgr._lastUnappliedGTID, gtidUnapplied2) == 0);
            ASSERT(GTID::cmp(mgr._minUnappliedGTID, gtidUnapplied1) == 0);
            mgr.noteGTIDApplied(gtidUnapplied2);
            ASSERT(GTID::cmp(mgr._minUnappliedGTID, gtidUnapplied1) == 0);
            mgr.noteApplyingGTID(gtidUnapplied3);
            mgr.noteApplyingGTID(gtidUnapplied4);
            ASSERT(GTID::cmp(mgr._lastUnappliedGTID, gtidUnapplied4) == 0);
            ASSERT(GTID::cmp(mgr._minUnappliedGTID, gtidUnapplied1) == 0);
            mgr.noteGTIDApplied(gtidUnapplied3);
            mgr.noteGTIDApplied(gtidUnapplied1);
            ASSERT(GTID::cmp(mgr._minUnappliedGTID, gtidUnapplied4) == 0);
            mgr.noteGTIDApplied(gtidUnapplied4);
            ASSERT(GTID::cmp(mgr._lastUnappliedGTID, gtidUnapplied4) == 0);
            ASSERT(GTID::cmp(mgr._minUnappliedGTID, gtidUnapplied4) > 0);
        }
Пример #11
0
    void BackgroundSync::runRollback(OplogReader& r, uint64_t oplogTS) {
        // starting from ourLast, we need to read the remote oplog
        // backwards until we find an entry in the remote oplog
        // that has the same GTID, timestamp, and hash as
        // what we have in our oplog. If we don't find one that is within
        // some reasonable timeframe, then we go fatal
        GTID ourLast = theReplSet->gtidManager->getLiveState();
        GTID idToRollbackTo;
        uint64_t rollbackPointTS = 0;
        uint64_t rollbackPointHash = 0;
        incRBID();
        try {
            shared_ptr<DBClientCursor> rollbackCursor = r.getRollbackCursor(ourLast);
            while (rollbackCursor->more()) {
                BSONObj remoteObj = rollbackCursor->next();
                GTID remoteGTID = getGTIDFromBSON("_id", remoteObj);
                uint64_t remoteTS = remoteObj["ts"]._numberLong();
                uint64_t remoteLastHash = remoteObj["h"].numberLong();
                if (remoteTS + 1800*1000 < oplogTS) {
                    log() << "Rollback takes us too far back, throwing exception. remoteTS: " << remoteTS << " oplogTS: " << oplogTS << rsLog;
                    throw RollbackOplogException("replSet rollback too long a time period for a rollback (at least 30 minutes).");
                    break;
                }
                //now try to find an entry in our oplog with that GTID
                BSONObjBuilder localQuery;
                BSONObj localObj;
                addGTIDToBSON("_id", remoteGTID, localQuery);
                bool foundLocally = false;
                {
                    LOCK_REASON(lockReason, "repl: looking up oplog entry for rollback");
                    Client::ReadContext ctx(rsoplog, lockReason);
                    Client::Transaction transaction(DB_SERIALIZABLE);
                    foundLocally = Collection::findOne(rsoplog, localQuery.done(), localObj);
                    transaction.commit();
                }
                if (foundLocally) {
                    GTID localGTID = getGTIDFromBSON("_id", localObj);
                    uint64_t localTS = localObj["ts"]._numberLong();
                    uint64_t localLastHash = localObj["h"].numberLong();
                    if (localLastHash == remoteLastHash &&
                        localTS == remoteTS &&
                        GTID::cmp(localGTID, remoteGTID) == 0
                        )
                    {
                        idToRollbackTo = localGTID;
                        rollbackPointTS = localTS;
                        rollbackPointHash = localLastHash;
                        log() << "found id to rollback to " << idToRollbackTo << rsLog;
                        break;
                    }
                }
            }
            // At this point, either we have found the point to try to rollback to,
            // or we have determined that we cannot rollback
            if (idToRollbackTo.isInitial()) {
                // we cannot rollback
                throw RollbackOplogException("could not find ID to rollback to");
            }
        }
        catch (DBException& e) {
            log() << "Caught DBException during rollback " << e.toString() << rsLog;
            throw RollbackOplogException("DBException while trying to find ID to rollback to: " + e.toString());
        }
        catch (std::exception& e2) {
            log() << "Caught std::exception during rollback " << e2.what() << rsLog;
            throw RollbackOplogException(str::stream() << "Exception while trying to find ID to rollback to: " << e2.what());
        }

        // proceed with the rollback to point idToRollbackTo
        // probably ought to grab a global write lock while doing this
        // I don't think we want oplog cursors reading from this machine
        // while we are rolling back. Or at least do something to protect against this

        // first, let's get all the operations that are being applied out of the way,
        // we don't want to rollback an item in the oplog while simultaneously,
        // the applier thread is applying it to the oplog
        {
            boost::unique_lock<boost::mutex> lock(_mutex);
            while (_deque.size() > 0) {
                log() << "waiting for applier to finish work before doing rollback " << rsLog;
                _queueDone.wait(lock);
            }
            verifySettled();
        }

        // now let's tell the system we are going to rollback, to do so,
        // abort live multi statement transactions, invalidate cursors, and
        // change the state to RS_ROLLBACK
        {
            // so we know nothing is simultaneously occurring
            RWLockRecursive::Exclusive e(operationLock);
            LOCK_REASON(lockReason, "repl: killing all operations for rollback");
            Lock::GlobalWrite lk(lockReason);
            ClientCursor::invalidateAllCursors();
            Client::abortLiveTransactions();
            theReplSet->goToRollbackState();
        }

        try {
            // now that we are settled, we have to take care of the GTIDManager
            // and the repl info thread.
            // We need to reset the state of the GTIDManager to the point
            // we intend to rollback to, and we need to make sure that the repl info thread
            // has captured this information.
            theReplSet->gtidManager->resetAfterInitialSync(
                idToRollbackTo,
                rollbackPointTS,
                rollbackPointHash
                );
            // now force an update of the repl info thread
            theReplSet->forceUpdateReplInfo();

            // at this point, everything should be settled, the applier should
            // have nothing left (and remain that way, because this is the only
            // thread that can put work on the applier). Now we can rollback
            // the data.
            while (true) {
                BSONObj o;
                {
                    LOCK_REASON(lockReason, "repl: checking for oplog data");
                    Lock::DBRead lk(rsoplog, lockReason);
                    Client::Transaction txn(DB_SERIALIZABLE);
                    // if there is nothing in the oplog, break
                    o = getLastEntryInOplog();
                    if( o.isEmpty() ) {
                        break;
                    }
                }
                GTID lastGTID = getGTIDFromBSON("_id", o);
                // if we have rolled back enough, break from while loop
                if (GTID::cmp(lastGTID, idToRollbackTo) <= 0) {
                    dassert(GTID::cmp(lastGTID, idToRollbackTo) == 0);
                    break;
                }
                rollbackTransactionFromOplog(o, true);
            }
            theReplSet->leaveRollbackState();
        }
        catch (DBException& e) {
            log() << "Caught DBException during rollback " << e.toString() << rsLog;
            throw RollbackOplogException("DBException while trying to run rollback: " + e.toString());
        }
        catch (std::exception& e2) {
            log() << "Caught std::exception during rollback " << e2.what() << rsLog;
            throw RollbackOplogException(str::stream() << "Exception while trying to run rollback: " << e2.what());
        }
        
    }
Пример #12
0
    void TxnContext::commit(int flags) {
        verify(!_retired);
        bool gotGTID = false;
        GTID gtid;
        // do this in case we are writing the first entry
        // we put something in that can be distinguished from
        // an initialized GTID that has never been touched
        gtid.inc_primary(); 
        // handle work related to logging of transaction for replication
        // this piece must be done before the _txn.commit
        try {
            if (hasParent()) {
                // This does something
                // a bit dangerous in that it may spill parent's stuff
                // with this child transaction that is committing. If something
                // goes wrong and this child transaction aborts, we will miss
                // some ops
                //
                // This ought to be ok, because we are in this try/catch block
                // where if something goes wrong, we will crash the server.
                // NOTHING better go wrong here, unless under bad rare
                // circumstances
                _txnOps.finishChildCommit();
            }
            else if (!_txnOps.empty()) {
                uint64_t timestamp = 0;
                uint64_t hash = 0;
                if (!_initiatingRS) {
                    dassert(txnGTIDManager);
                    txnGTIDManager->getGTIDForPrimary(&gtid, &timestamp, &hash);
                }
                else {
                    dassert(!txnGTIDManager);
                    timestamp = curTimeMillis64();
                }
                gotGTID = true;
                // In this case, the transaction we are committing has
                // no parent, so we must write the transaction's 
                // logged operations to the opLog, as part of this transaction
                dassert(logTxnOpsForReplication());
                dassert(_logTxnToOplog);
                _txnOps.rootCommit(gtid, timestamp, hash);
            }
            // handle work related to logging of transaction for chunk migrations
            if (!_txnOpsForSharding.empty()) {
                if (hasParent()) {
                    transferOpsForShardingToParent();
                }
                else {
                    writeTxnOpsToMigrateLog();
                }
            }

            _clientCursorRollback.preComplete();
            _txn.commit(flags);

            // if the commit of this transaction got a GTID, then notify 
            // the GTIDManager that the commit is now done.
            if (gotGTID && !_initiatingRS) {
                dassert(txnGTIDManager);
                // save the GTID for the client so that
                // getLastError will know what GTID slaves
                // need to be caught up to.
                cc().setLastOp(gtid);
                txnGTIDManager->noteLiveGTIDDone(gtid);
            }
        }
        catch (std::exception &e) {
            log() << "exception during critical section of txn commit, aborting system: " << e.what() << endl;
            printStackTrace();
            logflush();
            ::abort();
        }

        // These rollback items must be processed after the ydb transaction completes.
        if (hasParent()) {
            _cappedRollback.transfer(_parent->_cappedRollback);
            _nsIndexRollback.transfer(_parent->_nsIndexRollback);
        } else {
            _cappedRollback.commit();
            _nsIndexRollback.commit();
        }
        _retired = true;
    }