void rollbackToGTID(GTID idToRollbackTo, RollbackDocsMap* docsMap, RollbackSaveData* rsSave) { // at this point, everything should be settled, the applier should // have nothing left (and remain that way, because this is the only // thread that can put work on the applier). Now we can rollback // the data. while (true) { BSONObj o; { LOCK_REASON(lockReason, "repl: checking for oplog data"); Client::ReadContext ctx(rsoplog, lockReason); Client::Transaction txn(DB_SERIALIZABLE); // if there is nothing in the oplog, break o = getLastEntryInOplog(); if (o.isEmpty()) { throw RollbackOplogException("Oplog empty when rolling back to a GTID"); } } GTID lastGTID = getGTIDFromBSON("_id", o); // if we have rolled back enough, break from while loop if (GTID::cmp(lastGTID, idToRollbackTo) <= 0) { dassert(GTID::cmp(lastGTID, idToRollbackTo) == 0); break; } rollbackTransactionFromOplog(o, docsMap, rsSave); } log() << "Rolling back to " << idToRollbackTo.toString() << " produced " << docsMap->size() << " documents for which we need to retrieve a snapshot of." << rsLog; }
void BackgroundSync::runRollback(OplogReader& r, uint64_t oplogTS) { // starting from ourLast, we need to read the remote oplog // backwards until we find an entry in the remote oplog // that has the same GTID, timestamp, and hash as // what we have in our oplog. If we don't find one that is within // some reasonable timeframe, then we go fatal GTID ourLast = theReplSet->gtidManager->getLiveState(); GTID idToRollbackTo; uint64_t rollbackPointTS = 0; uint64_t rollbackPointHash = 0; incRBID(); try { shared_ptr<DBClientCursor> rollbackCursor = r.getRollbackCursor(ourLast); while (rollbackCursor->more()) { BSONObj remoteObj = rollbackCursor->next(); GTID remoteGTID = getGTIDFromBSON("_id", remoteObj); uint64_t remoteTS = remoteObj["ts"]._numberLong(); uint64_t remoteLastHash = remoteObj["h"].numberLong(); if (remoteTS + 1800*1000 < oplogTS) { log() << "Rollback takes us too far back, throwing exception. remoteTS: " << remoteTS << " oplogTS: " << oplogTS << rsLog; throw RollbackOplogException("replSet rollback too long a time period for a rollback (at least 30 minutes)."); break; } //now try to find an entry in our oplog with that GTID BSONObjBuilder localQuery; BSONObj localObj; addGTIDToBSON("_id", remoteGTID, localQuery); bool foundLocally = false; { LOCK_REASON(lockReason, "repl: looking up oplog entry for rollback"); Client::ReadContext ctx(rsoplog, lockReason); Client::Transaction transaction(DB_SERIALIZABLE); foundLocally = Collection::findOne(rsoplog, localQuery.done(), localObj); transaction.commit(); } if (foundLocally) { GTID localGTID = getGTIDFromBSON("_id", localObj); uint64_t localTS = localObj["ts"]._numberLong(); uint64_t localLastHash = localObj["h"].numberLong(); if (localLastHash == remoteLastHash && localTS == remoteTS && GTID::cmp(localGTID, remoteGTID) == 0 ) { idToRollbackTo = localGTID; rollbackPointTS = localTS; rollbackPointHash = localLastHash; log() << "found id to rollback to " << idToRollbackTo << rsLog; break; } } } // At this point, either we have found the point to try to rollback to, // or we have determined that we cannot rollback if (idToRollbackTo.isInitial()) { // we cannot rollback throw RollbackOplogException("could not find ID to rollback to"); } } catch (DBException& e) { log() << "Caught DBException during rollback " << e.toString() << rsLog; throw RollbackOplogException("DBException while trying to find ID to rollback to: " + e.toString()); } catch (std::exception& e2) { log() << "Caught std::exception during rollback " << e2.what() << rsLog; throw RollbackOplogException(str::stream() << "Exception while trying to find ID to rollback to: " << e2.what()); } // proceed with the rollback to point idToRollbackTo // probably ought to grab a global write lock while doing this // I don't think we want oplog cursors reading from this machine // while we are rolling back. Or at least do something to protect against this // first, let's get all the operations that are being applied out of the way, // we don't want to rollback an item in the oplog while simultaneously, // the applier thread is applying it to the oplog { boost::unique_lock<boost::mutex> lock(_mutex); while (_deque.size() > 0) { log() << "waiting for applier to finish work before doing rollback " << rsLog; _queueDone.wait(lock); } verifySettled(); } // now let's tell the system we are going to rollback, to do so, // abort live multi statement transactions, invalidate cursors, and // change the state to RS_ROLLBACK { // so we know nothing is simultaneously occurring RWLockRecursive::Exclusive e(operationLock); LOCK_REASON(lockReason, "repl: killing all operations for rollback"); Lock::GlobalWrite lk(lockReason); ClientCursor::invalidateAllCursors(); Client::abortLiveTransactions(); theReplSet->goToRollbackState(); } try { // now that we are settled, we have to take care of the GTIDManager // and the repl info thread. // We need to reset the state of the GTIDManager to the point // we intend to rollback to, and we need to make sure that the repl info thread // has captured this information. theReplSet->gtidManager->resetAfterInitialSync( idToRollbackTo, rollbackPointTS, rollbackPointHash ); // now force an update of the repl info thread theReplSet->forceUpdateReplInfo(); // at this point, everything should be settled, the applier should // have nothing left (and remain that way, because this is the only // thread that can put work on the applier). Now we can rollback // the data. while (true) { BSONObj o; { LOCK_REASON(lockReason, "repl: checking for oplog data"); Lock::DBRead lk(rsoplog, lockReason); Client::Transaction txn(DB_SERIALIZABLE); // if there is nothing in the oplog, break o = getLastEntryInOplog(); if( o.isEmpty() ) { break; } } GTID lastGTID = getGTIDFromBSON("_id", o); // if we have rolled back enough, break from while loop if (GTID::cmp(lastGTID, idToRollbackTo) <= 0) { dassert(GTID::cmp(lastGTID, idToRollbackTo) == 0); break; } rollbackTransactionFromOplog(o, true); } theReplSet->leaveRollbackState(); } catch (DBException& e) { log() << "Caught DBException during rollback " << e.toString() << rsLog; throw RollbackOplogException("DBException while trying to run rollback: " + e.toString()); } catch (std::exception& e2) { log() << "Caught std::exception during rollback " << e2.what() << rsLog; throw RollbackOplogException(str::stream() << "Exception while trying to run rollback: " << e2.what()); } }
virtual bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { log() << "replSet replSetInitiate admin command received from client" << rsLog; if( !replSet ) { errmsg = "server is not running with --replSet"; return false; } if( theReplSet ) { errmsg = "already initialized"; result.append("info", "try querying " + rsConfigNs + " to see current configuration"); return false; } { // just make sure we can get a write lock before doing anything else. we'll reacquire one // later. of course it could be stuck then, but this check lowers the risk if weird things // are up. time_t t = time(0); Lock::GlobalWrite lk; if( time(0)-t > 10 ) { errmsg = "took a long time to get write lock, so not initiating. Initiate when server less busy?"; return false; } /* check that we don't already have an oplog. that could cause issues. it is ok if the initiating member has *other* data than that. */ Client::Transaction transaction(DB_SERIALIZABLE); BSONObj o = getLastEntryInOplog(); if( !o.isEmpty() ) { errmsg = rsoplog + string(" is not empty on the initiating member. cannot initiate."); return false; } transaction.commit(); } if( ReplSet::startupStatus == ReplSet::BADCONFIG ) { errmsg = "server already in BADCONFIG state (check logs); not initiating"; result.append("info", ReplSet::startupStatusMsg.get()); return false; } if( ReplSet::startupStatus != ReplSet::EMPTYCONFIG ) { result.append("startupStatus", ReplSet::startupStatus); errmsg = "all members and seeds must be reachable to initiate set"; result.append("info", cmdLine._replSet); return false; } BSONObj configObj; if( cmdObj["replSetInitiate"].type() != Object ) { result.append("info2", "no configuration explicitly specified -- making one"); log() << "replSet info initiate : no configuration specified. Using a default configuration for the set" << rsLog; string name; vector<HostAndPort> seeds; set<HostAndPort> seedSet; parseReplsetCmdLine(cmdLine._replSet, name, seeds, seedSet); // may throw... bob b; b.append("_id", name); b.append("protocolVersion", ReplSetConfig::CURRENT_PROTOCOL_VERSION); bob members; members.append("0", BSON( "_id" << 0 << "host" << HostAndPort::me().toString() )); result.append("me", HostAndPort::me().toString()); for( unsigned i = 0; i < seeds.size(); i++ ) members.append(bob::numStr(i+1), BSON( "_id" << i+1 << "host" << seeds[i].toString())); b.appendArray("members", members.obj()); configObj = b.obj(); log() << "replSet created this configuration for initiation : " << configObj.toString() << rsLog; } else { configObj = ReplSetConfig::addProtocolVersionIfMissing(cmdObj["replSetInitiate"].Obj()); } bool parsed = false; try { ReplSetConfig newConfig(configObj); parsed = true; if( newConfig.version > 1 ) { errmsg = "can't initiate with a version number greater than 1"; return false; } log() << "replSet replSetInitiate config object parses ok, " << newConfig.members.size() << " members specified" << rsLog; checkMembersUpForConfigChange(newConfig, result, true); log() << "replSet replSetInitiate all members seem up" << rsLog; Lock::GlobalWrite lk; { Client::Transaction transaction(DB_SERIALIZABLE); createOplog(); openOplogFiles(); GTID minLiveGTID; GTID minUnappliedGTID; logToReplInfo(minLiveGTID, minUnappliedGTID); transaction.commit(); } bo comment = BSON( "msg" << "initiating set"); newConfig.saveConfigLocally(comment, true); log() << "replSet replSetInitiate config now saved locally. Should come online in about a minute." << rsLog; result.append("info", "Config now saved locally. Should come online in about a minute."); ReplSet::startupStatus = ReplSet::SOON; ReplSet::startupStatusMsg.set("Received replSetInitiate - should come online shortly."); } catch( DBException& e ) { log() << "replSet replSetInitiate exception: " << e.what() << rsLog; if( !parsed ) errmsg = string("couldn't parse cfg object ") + e.what(); else errmsg = string("couldn't initiate : ") + e.what(); return false; } catch( string& e2 ) { log() << e2 << rsLog; errmsg = e2; return false; } return true; }