Exemple #1
0
 void rollbackToGTID(GTID idToRollbackTo, RollbackDocsMap* docsMap, RollbackSaveData* rsSave) {
     // at this point, everything should be settled, the applier should
     // have nothing left (and remain that way, because this is the only
     // thread that can put work on the applier). Now we can rollback
     // the data.
     while (true) {
         BSONObj o;
         {
             LOCK_REASON(lockReason, "repl: checking for oplog data");
             Client::ReadContext ctx(rsoplog, lockReason);
             Client::Transaction txn(DB_SERIALIZABLE);
             // if there is nothing in the oplog, break
             o = getLastEntryInOplog();
             if (o.isEmpty()) {
                 throw RollbackOplogException("Oplog empty when rolling back to a GTID");
             }
         }
         GTID lastGTID = getGTIDFromBSON("_id", o);
         // if we have rolled back enough, break from while loop
         if (GTID::cmp(lastGTID, idToRollbackTo) <= 0) {
             dassert(GTID::cmp(lastGTID, idToRollbackTo) == 0);
             break;
         }
         rollbackTransactionFromOplog(o, docsMap, rsSave);
     }
     log() << "Rolling back to " << idToRollbackTo.toString() << " produced " <<
         docsMap->size() << " documents for which we need to retrieve a snapshot of." << rsLog;
 }
Exemple #2
0
 static void runRollbackInsertFromOplog(const char* ns, BSONObj op) {
     // handle add index case
     if (mongoutils::str::endsWith(ns, ".system.indexes")) {
         throw RollbackOplogException(str::stream() << "Not rolling back an add index on " << ns << ". Op: " << op.toString(false, true));
     }
     else {
         // the rollback of a normal insert is to do the delete
         runDeleteFromOplog(ns, op);
     }
 }
Exemple #3
0
 // on input, conn is a connection for which the caller has created a multi-statement
 // mvcc transaction over it. Reads the document from the remote server and
 // applies it locally
 void applySnapshotOfDocsMap(shared_ptr<DBClientConnection> conn) {
     size_t numDocs = 0;
     log() << "Applying documents to collections for rollback." << rsLog;
     for (RollbackDocsMapIterator it; it.ok(); it.advance()){
         numDocs++;
         DocID curr = it.current();
         LOCK_REASON(lockReason, "repl: appling snapshot of doc during rollback");
         Client::ReadContext ctx(curr.ns, lockReason);
         Collection* cl = getCollection(curr.ns);
         if (cl->isPKHidden()) {
             log() << "Collection " << curr.ns << " has a hidden PK, yet it has \
                 a document for which we want to apply a snapshot of: " << \
                 curr.pk << rsLog;
             throw RollbackOplogException("Collection for which we are applying a document has a hidden PK");
         }
         BSONObj pkWithFields = cl->fillPKWithFields(curr.pk);
         BSONObj remoteImage = findOneFromConn(conn.get(), curr.ns, Query(pkWithFields));
         if (!remoteImage.isEmpty()) {
             const uint64_t flags = Collection::NO_UNIQUE_CHECKS | Collection::NO_LOCKTREE;
             insertOneObject(cl, remoteImage, flags);
         }
     }
Exemple #4
0
 static void rollbackCommandFromOplog(const char* ns, BSONObj op) {
     BSONObj command = op[KEY_STR_ROW].embeddedObject();
     log() << "Cannot rollback command " << op << rsLog;
     throw RollbackOplogException(str::stream() << "Could not rollback command " << command << " on ns " << ns);
 }
Exemple #5
0
    void BackgroundSync::runRollback(OplogReader& r, uint64_t oplogTS) {
        // starting from ourLast, we need to read the remote oplog
        // backwards until we find an entry in the remote oplog
        // that has the same GTID, timestamp, and hash as
        // what we have in our oplog. If we don't find one that is within
        // some reasonable timeframe, then we go fatal
        GTID ourLast = theReplSet->gtidManager->getLiveState();
        GTID idToRollbackTo;
        uint64_t rollbackPointTS = 0;
        uint64_t rollbackPointHash = 0;
        incRBID();
        try {
            shared_ptr<DBClientCursor> rollbackCursor = r.getRollbackCursor(ourLast);
            while (rollbackCursor->more()) {
                BSONObj remoteObj = rollbackCursor->next();
                GTID remoteGTID = getGTIDFromBSON("_id", remoteObj);
                uint64_t remoteTS = remoteObj["ts"]._numberLong();
                uint64_t remoteLastHash = remoteObj["h"].numberLong();
                if (remoteTS + 1800*1000 < oplogTS) {
                    log() << "Rollback takes us too far back, throwing exception. remoteTS: " << remoteTS << " oplogTS: " << oplogTS << rsLog;
                    throw RollbackOplogException("replSet rollback too long a time period for a rollback (at least 30 minutes).");
                    break;
                }
                //now try to find an entry in our oplog with that GTID
                BSONObjBuilder localQuery;
                BSONObj localObj;
                addGTIDToBSON("_id", remoteGTID, localQuery);
                bool foundLocally = false;
                {
                    LOCK_REASON(lockReason, "repl: looking up oplog entry for rollback");
                    Client::ReadContext ctx(rsoplog, lockReason);
                    Client::Transaction transaction(DB_SERIALIZABLE);
                    foundLocally = Collection::findOne(rsoplog, localQuery.done(), localObj);
                    transaction.commit();
                }
                if (foundLocally) {
                    GTID localGTID = getGTIDFromBSON("_id", localObj);
                    uint64_t localTS = localObj["ts"]._numberLong();
                    uint64_t localLastHash = localObj["h"].numberLong();
                    if (localLastHash == remoteLastHash &&
                        localTS == remoteTS &&
                        GTID::cmp(localGTID, remoteGTID) == 0
                        )
                    {
                        idToRollbackTo = localGTID;
                        rollbackPointTS = localTS;
                        rollbackPointHash = localLastHash;
                        log() << "found id to rollback to " << idToRollbackTo << rsLog;
                        break;
                    }
                }
            }
            // At this point, either we have found the point to try to rollback to,
            // or we have determined that we cannot rollback
            if (idToRollbackTo.isInitial()) {
                // we cannot rollback
                throw RollbackOplogException("could not find ID to rollback to");
            }
        }
        catch (DBException& e) {
            log() << "Caught DBException during rollback " << e.toString() << rsLog;
            throw RollbackOplogException("DBException while trying to find ID to rollback to: " + e.toString());
        }
        catch (std::exception& e2) {
            log() << "Caught std::exception during rollback " << e2.what() << rsLog;
            throw RollbackOplogException(str::stream() << "Exception while trying to find ID to rollback to: " << e2.what());
        }

        // proceed with the rollback to point idToRollbackTo
        // probably ought to grab a global write lock while doing this
        // I don't think we want oplog cursors reading from this machine
        // while we are rolling back. Or at least do something to protect against this

        // first, let's get all the operations that are being applied out of the way,
        // we don't want to rollback an item in the oplog while simultaneously,
        // the applier thread is applying it to the oplog
        {
            boost::unique_lock<boost::mutex> lock(_mutex);
            while (_deque.size() > 0) {
                log() << "waiting for applier to finish work before doing rollback " << rsLog;
                _queueDone.wait(lock);
            }
            verifySettled();
        }

        // now let's tell the system we are going to rollback, to do so,
        // abort live multi statement transactions, invalidate cursors, and
        // change the state to RS_ROLLBACK
        {
            // so we know nothing is simultaneously occurring
            RWLockRecursive::Exclusive e(operationLock);
            LOCK_REASON(lockReason, "repl: killing all operations for rollback");
            Lock::GlobalWrite lk(lockReason);
            ClientCursor::invalidateAllCursors();
            Client::abortLiveTransactions();
            theReplSet->goToRollbackState();
        }

        try {
            // now that we are settled, we have to take care of the GTIDManager
            // and the repl info thread.
            // We need to reset the state of the GTIDManager to the point
            // we intend to rollback to, and we need to make sure that the repl info thread
            // has captured this information.
            theReplSet->gtidManager->resetAfterInitialSync(
                idToRollbackTo,
                rollbackPointTS,
                rollbackPointHash
                );
            // now force an update of the repl info thread
            theReplSet->forceUpdateReplInfo();

            // at this point, everything should be settled, the applier should
            // have nothing left (and remain that way, because this is the only
            // thread that can put work on the applier). Now we can rollback
            // the data.
            while (true) {
                BSONObj o;
                {
                    LOCK_REASON(lockReason, "repl: checking for oplog data");
                    Lock::DBRead lk(rsoplog, lockReason);
                    Client::Transaction txn(DB_SERIALIZABLE);
                    // if there is nothing in the oplog, break
                    o = getLastEntryInOplog();
                    if( o.isEmpty() ) {
                        break;
                    }
                }
                GTID lastGTID = getGTIDFromBSON("_id", o);
                // if we have rolled back enough, break from while loop
                if (GTID::cmp(lastGTID, idToRollbackTo) <= 0) {
                    dassert(GTID::cmp(lastGTID, idToRollbackTo) == 0);
                    break;
                }
                rollbackTransactionFromOplog(o, true);
            }
            theReplSet->leaveRollbackState();
        }
        catch (DBException& e) {
            log() << "Caught DBException during rollback " << e.toString() << rsLog;
            throw RollbackOplogException("DBException while trying to run rollback: " + e.toString());
        }
        catch (std::exception& e2) {
            log() << "Caught std::exception during rollback " << e2.what() << rsLog;
            throw RollbackOplogException(str::stream() << "Exception while trying to run rollback: " << e2.what());
        }
        
    }
Exemple #6
0
 void findRollbackPoint(
     OplogReader& r, uint64_t oplogTS,
     GTID* idToRollbackTo,
     uint64_t* rollbackPointTS,
     uint64_t* rollbackPointHash
     )
 {
     bool gtidFound = false;
     try {
         GTID ourLast = theReplSet->gtidManager->getLiveState();
         shared_ptr<DBClientCursor> rollbackCursor = r.getRollbackCursor(ourLast);
         uassert(17350, "rollback failed to get a cursor to start reading backwards from.", rollbackCursor.get());
         while (rollbackCursor->more()) {
             BSONObj remoteObj = rollbackCursor->next();
             GTID remoteGTID = getGTIDFromBSON("_id", remoteObj);
             uint64_t remoteTS = remoteObj["ts"]._numberLong();
             uint64_t remoteLastHash = remoteObj["h"].numberLong();
             if (remoteTS + 1800*1000 < oplogTS) {
                 log() << "Rollback takes us too far back, throwing exception. remoteTS: " << remoteTS << " oplogTS: " << oplogTS << rsLog;
                 throw RollbackOplogException("replSet rollback too long a time period for a rollback (at least 30 minutes).");
                 break;
             }
             //now try to find an entry in our oplog with that GTID
             BSONObjBuilder localQuery;
             BSONObj localObj;
             addGTIDToBSON("_id", remoteGTID, localQuery);
             bool foundLocally = false;
             {
                 LOCK_REASON(lockReason, "repl: looking up oplog entry for rollback");
                 Client::ReadContext ctx(rsoplog, lockReason);
                 Client::Transaction transaction(DB_SERIALIZABLE);
                 foundLocally = Collection::findOne(rsoplog, localQuery.done(), localObj);
                 transaction.commit();
             }
             if (foundLocally) {
                 GTID localGTID = getGTIDFromBSON("_id", localObj);
                 uint64_t localTS = localObj["ts"]._numberLong();
                 uint64_t localLastHash = localObj["h"].numberLong();
                 if (localLastHash == remoteLastHash &&
                     localTS == remoteTS &&
                     GTID::cmp(localGTID, remoteGTID) == 0
                     )
                 {
                     *idToRollbackTo = localGTID;
                     *rollbackPointTS = localTS;
                     *rollbackPointHash = localLastHash;
                     gtidFound = true;
                     log() << "found id to rollback to " << idToRollbackTo->toString() << rsLog;
                     break;
                 }
             }
         }
         // At this point, either we have found the point to try to rollback to,
         // or we have determined that we cannot rollback
         if (!gtidFound) {
             // we cannot rollback
             throw RollbackOplogException("could not find ID to rollback to");
         }
     }
     catch (DBException& e) {
         log() << "Caught DBException during rollback " << e.toString() << rsLog;
         throw RollbackOplogException("DBException while trying to find ID to rollback to: " + e.toString());
     }
     catch (std::exception& e2) {
         log() << "Caught std::exception during rollback " << e2.what() << rsLog;
         throw RollbackOplogException(str::stream() << "Exception while trying to find ID to rollback to: " << e2.what());
     }
 }