コード例 #1
0
ファイル: bgsync.cpp プロジェクト: igagnidz/tokumx
    void BackgroundSync::runRollback(OplogReader& r, uint64_t oplogTS) {
        // starting from ourLast, we need to read the remote oplog
        // backwards until we find an entry in the remote oplog
        // that has the same GTID, timestamp, and hash as
        // what we have in our oplog. If we don't find one that is within
        // some reasonable timeframe, then we go fatal
        GTID ourLast = theReplSet->gtidManager->getLiveState();
        GTID idToRollbackTo;
        uint64_t rollbackPointTS = 0;
        uint64_t rollbackPointHash = 0;
        incRBID();
        try {
            shared_ptr<DBClientCursor> rollbackCursor = r.getRollbackCursor(ourLast);
            while (rollbackCursor->more()) {
                BSONObj remoteObj = rollbackCursor->next();
                GTID remoteGTID = getGTIDFromBSON("_id", remoteObj);
                uint64_t remoteTS = remoteObj["ts"]._numberLong();
                uint64_t remoteLastHash = remoteObj["h"].numberLong();
                if (remoteTS + 1800*1000 < oplogTS) {
                    log() << "Rollback takes us too far back, throwing exception. remoteTS: " << remoteTS << " oplogTS: " << oplogTS << rsLog;
                    throw RollbackOplogException("replSet rollback too long a time period for a rollback (at least 30 minutes).");
                    break;
                }
                //now try to find an entry in our oplog with that GTID
                BSONObjBuilder localQuery;
                BSONObj localObj;
                addGTIDToBSON("_id", remoteGTID, localQuery);
                bool foundLocally = false;
                {
                    LOCK_REASON(lockReason, "repl: looking up oplog entry for rollback");
                    Client::ReadContext ctx(rsoplog, lockReason);
                    Client::Transaction transaction(DB_SERIALIZABLE);
                    foundLocally = Collection::findOne(rsoplog, localQuery.done(), localObj);
                    transaction.commit();
                }
                if (foundLocally) {
                    GTID localGTID = getGTIDFromBSON("_id", localObj);
                    uint64_t localTS = localObj["ts"]._numberLong();
                    uint64_t localLastHash = localObj["h"].numberLong();
                    if (localLastHash == remoteLastHash &&
                        localTS == remoteTS &&
                        GTID::cmp(localGTID, remoteGTID) == 0
                        )
                    {
                        idToRollbackTo = localGTID;
                        rollbackPointTS = localTS;
                        rollbackPointHash = localLastHash;
                        log() << "found id to rollback to " << idToRollbackTo << rsLog;
                        break;
                    }
                }
            }
            // At this point, either we have found the point to try to rollback to,
            // or we have determined that we cannot rollback
            if (idToRollbackTo.isInitial()) {
                // we cannot rollback
                throw RollbackOplogException("could not find ID to rollback to");
            }
        }
        catch (DBException& e) {
            log() << "Caught DBException during rollback " << e.toString() << rsLog;
            throw RollbackOplogException("DBException while trying to find ID to rollback to: " + e.toString());
        }
        catch (std::exception& e2) {
            log() << "Caught std::exception during rollback " << e2.what() << rsLog;
            throw RollbackOplogException(str::stream() << "Exception while trying to find ID to rollback to: " << e2.what());
        }

        // proceed with the rollback to point idToRollbackTo
        // probably ought to grab a global write lock while doing this
        // I don't think we want oplog cursors reading from this machine
        // while we are rolling back. Or at least do something to protect against this

        // first, let's get all the operations that are being applied out of the way,
        // we don't want to rollback an item in the oplog while simultaneously,
        // the applier thread is applying it to the oplog
        {
            boost::unique_lock<boost::mutex> lock(_mutex);
            while (_deque.size() > 0) {
                log() << "waiting for applier to finish work before doing rollback " << rsLog;
                _queueDone.wait(lock);
            }
            verifySettled();
        }

        // now let's tell the system we are going to rollback, to do so,
        // abort live multi statement transactions, invalidate cursors, and
        // change the state to RS_ROLLBACK
        {
            // so we know nothing is simultaneously occurring
            RWLockRecursive::Exclusive e(operationLock);
            LOCK_REASON(lockReason, "repl: killing all operations for rollback");
            Lock::GlobalWrite lk(lockReason);
            ClientCursor::invalidateAllCursors();
            Client::abortLiveTransactions();
            theReplSet->goToRollbackState();
        }

        try {
            // now that we are settled, we have to take care of the GTIDManager
            // and the repl info thread.
            // We need to reset the state of the GTIDManager to the point
            // we intend to rollback to, and we need to make sure that the repl info thread
            // has captured this information.
            theReplSet->gtidManager->resetAfterInitialSync(
                idToRollbackTo,
                rollbackPointTS,
                rollbackPointHash
                );
            // now force an update of the repl info thread
            theReplSet->forceUpdateReplInfo();

            // at this point, everything should be settled, the applier should
            // have nothing left (and remain that way, because this is the only
            // thread that can put work on the applier). Now we can rollback
            // the data.
            while (true) {
                BSONObj o;
                {
                    LOCK_REASON(lockReason, "repl: checking for oplog data");
                    Lock::DBRead lk(rsoplog, lockReason);
                    Client::Transaction txn(DB_SERIALIZABLE);
                    // if there is nothing in the oplog, break
                    o = getLastEntryInOplog();
                    if( o.isEmpty() ) {
                        break;
                    }
                }
                GTID lastGTID = getGTIDFromBSON("_id", o);
                // if we have rolled back enough, break from while loop
                if (GTID::cmp(lastGTID, idToRollbackTo) <= 0) {
                    dassert(GTID::cmp(lastGTID, idToRollbackTo) == 0);
                    break;
                }
                rollbackTransactionFromOplog(o, true);
            }
            theReplSet->leaveRollbackState();
        }
        catch (DBException& e) {
            log() << "Caught DBException during rollback " << e.toString() << rsLog;
            throw RollbackOplogException("DBException while trying to run rollback: " + e.toString());
        }
        catch (std::exception& e2) {
            log() << "Caught std::exception during rollback " << e2.what() << rsLog;
            throw RollbackOplogException(str::stream() << "Exception while trying to run rollback: " << e2.what());
        }
        
    }
コード例 #2
0
ファイル: rs_rollback.cpp プロジェクト: 7segments/mongo
 void findRollbackPoint(
     OplogReader& r, uint64_t oplogTS,
     GTID* idToRollbackTo,
     uint64_t* rollbackPointTS,
     uint64_t* rollbackPointHash
     )
 {
     bool gtidFound = false;
     try {
         GTID ourLast = theReplSet->gtidManager->getLiveState();
         shared_ptr<DBClientCursor> rollbackCursor = r.getRollbackCursor(ourLast);
         uassert(17350, "rollback failed to get a cursor to start reading backwards from.", rollbackCursor.get());
         while (rollbackCursor->more()) {
             BSONObj remoteObj = rollbackCursor->next();
             GTID remoteGTID = getGTIDFromBSON("_id", remoteObj);
             uint64_t remoteTS = remoteObj["ts"]._numberLong();
             uint64_t remoteLastHash = remoteObj["h"].numberLong();
             if (remoteTS + 1800*1000 < oplogTS) {
                 log() << "Rollback takes us too far back, throwing exception. remoteTS: " << remoteTS << " oplogTS: " << oplogTS << rsLog;
                 throw RollbackOplogException("replSet rollback too long a time period for a rollback (at least 30 minutes).");
                 break;
             }
             //now try to find an entry in our oplog with that GTID
             BSONObjBuilder localQuery;
             BSONObj localObj;
             addGTIDToBSON("_id", remoteGTID, localQuery);
             bool foundLocally = false;
             {
                 LOCK_REASON(lockReason, "repl: looking up oplog entry for rollback");
                 Client::ReadContext ctx(rsoplog, lockReason);
                 Client::Transaction transaction(DB_SERIALIZABLE);
                 foundLocally = Collection::findOne(rsoplog, localQuery.done(), localObj);
                 transaction.commit();
             }
             if (foundLocally) {
                 GTID localGTID = getGTIDFromBSON("_id", localObj);
                 uint64_t localTS = localObj["ts"]._numberLong();
                 uint64_t localLastHash = localObj["h"].numberLong();
                 if (localLastHash == remoteLastHash &&
                     localTS == remoteTS &&
                     GTID::cmp(localGTID, remoteGTID) == 0
                     )
                 {
                     *idToRollbackTo = localGTID;
                     *rollbackPointTS = localTS;
                     *rollbackPointHash = localLastHash;
                     gtidFound = true;
                     log() << "found id to rollback to " << idToRollbackTo->toString() << rsLog;
                     break;
                 }
             }
         }
         // At this point, either we have found the point to try to rollback to,
         // or we have determined that we cannot rollback
         if (!gtidFound) {
             // we cannot rollback
             throw RollbackOplogException("could not find ID to rollback to");
         }
     }
     catch (DBException& e) {
         log() << "Caught DBException during rollback " << e.toString() << rsLog;
         throw RollbackOplogException("DBException while trying to find ID to rollback to: " + e.toString());
     }
     catch (std::exception& e2) {
         log() << "Caught std::exception during rollback " << e2.what() << rsLog;
         throw RollbackOplogException(str::stream() << "Exception while trying to find ID to rollback to: " << e2.what());
     }
 }