bool BackgroundSync::isRollbackRequired(OplogReader& r) { string hn = r.conn()->getServerAddress(); if (!r.more()) { // In vanilla Mongo, this happened for one of the // following reasons: // - we were ahead of what we are syncing from (don't // think that is possible anymore) // - remote oplog is empty for some weird reason // in either case, if it (strangely) happens, we'll just return // and our caller will simply try again after a short sleep. log() << "replSet error empty query result from " << hn << " oplog, attempting rollback" << rsLog; return true; } BSONObj o = r.nextSafe(); uint64_t ts = o["ts"]._numberLong(); uint64_t lastHash = o["h"].numberLong(); GTID gtid = getGTIDFromBSON("_id", o); if( !theReplSet->gtidManager->rollbackNeeded(gtid, ts, lastHash)) { log() << "Rollback NOT needed! Our GTID" << gtid << endl; return false; } log() << "Rollback needed! Our GTID" << theReplSet->gtidManager->getLiveState().toString() << " remote GTID: " << gtid.toString() << ". Attempting rollback." << rsLog; runRollback(r, ts); return true; }
bool isRollbackRequired(OplogReader& r, uint64_t *lastTS) { string hn = r.conn()->getServerAddress(); verify(r.more()); BSONObj rollbackStatus; bool found = getRollbackStatus(rollbackStatus); if (found) { // we have a rollback in progress, // must complete it log() << "Rollback needed, found rollbackStatus: " << rollbackStatus << rsLog; return true; } BSONObj o = r.nextSafe(); uint64_t ts = o["ts"]._numberLong(); uint64_t lastHash = o["h"].numberLong(); GTID gtid = getGTIDFromBSON("_id", o); if (!theReplSet->gtidManager->rollbackNeeded(gtid, ts, lastHash)) { log() << "Rollback NOT needed! " << gtid << endl; return false; } log() << "Rollback needed! Our GTID: " << theReplSet->gtidManager->getLiveState().toString() << ", remote GTID: " << gtid.toString() << ". Attempting rollback." << rsLog; *lastTS = ts; return true; }
void applyMissingOpsInOplog(GTID minUnappliedGTID) { std::deque<BSONObj> unappliedTransactions; { // accumulate a list of transactions that are unapplied LOCK_REASON(lockReason, "repl: initial sync applying missing ops"); Client::ReadContext ctx(rsoplog, lockReason); Client::Transaction catchupTransaction(0); if (minUnappliedGTID.isInitial()) { // now we should have replInfo on this machine, // let's query the minUnappliedGTID to figure out from where // we should copy the opLog BSONObj result; const bool foundMinUnapplied = Collection::findOne(rsReplInfo, BSON("_id" << "minUnapplied"), result); verify(foundMinUnapplied); GTID minUnappliedGTID; minUnappliedGTID = getGTIDFromBSON("GTID", result); } // now we need to read the oplog forward GTID lastEntry; bool ret = getLastGTIDinOplog(&lastEntry); isyncassert("could not get last oplog entry after clone", ret); // at this point, we have got the oplog up to date, // now we need to read forward in the oplog // from minUnapplied BSONObjBuilder q; addGTIDToBSON("$gte", minUnappliedGTID, q); BSONObjBuilder query; query.append("_id", q.done()); { shared_ptr<Cursor> c = getOptimizedCursor(rsoplog, query.done()); while( c->ok() ) { if ( c->currentMatches()) { BSONObj curr = c->current(); bool transactionAlreadyApplied = curr["a"].Bool(); if (!transactionAlreadyApplied) { GTID currEntry = getGTIDFromBSON("_id", curr); LOG(2) << "applying missing op gap " << currEntry.toString() << endl; unappliedTransactions.push_back(curr.getOwned()); } } c->advance(); } } catchupTransaction.commit(0); } while (unappliedTransactions.size() > 0) { BSONObj curr = unappliedTransactions.front(); applyTransactionFromOplog(curr, NULL); unappliedTransactions.pop_front(); } }
// does some sanity checks before finishing starting and stopping the opsync // thread that we are in a decent state // // called with _mutex held void BackgroundSync::verifySettled() { // if the background sync has yet to be fully started, // no need to run this, we are still in initialization // of the replset. This can happen if // during initialization, after we start the manager, we // get a new config before we have fully started replication if (!_applierInProgress) { return; } verify(_deque.size() == 0); // do a sanity check on the GTID Manager GTID lastLiveGTID; GTID lastUnappliedGTID; theReplSet->gtidManager->getLiveGTIDs( &lastLiveGTID, &lastUnappliedGTID ); log() << "last GTIDs: " << lastLiveGTID.toString() << " " << lastUnappliedGTID.toString() << " " << endl; verify(GTID::cmp(lastUnappliedGTID, lastLiveGTID) == 0); GTID minLiveGTID; GTID minUnappliedGTID; theReplSet->gtidManager->getMins( &minLiveGTID, &minUnappliedGTID ); log() << "min GTIDs: " << minLiveGTID.toString() << " " << minUnappliedGTID.toString() << rsLog; verify(GTID::cmp(minUnappliedGTID, minLiveGTID) == 0); }
void rollbackToGTID(GTID idToRollbackTo, RollbackDocsMap* docsMap, RollbackSaveData* rsSave) { // at this point, everything should be settled, the applier should // have nothing left (and remain that way, because this is the only // thread that can put work on the applier). Now we can rollback // the data. while (true) { BSONObj o; { LOCK_REASON(lockReason, "repl: checking for oplog data"); Client::ReadContext ctx(rsoplog, lockReason); Client::Transaction txn(DB_SERIALIZABLE); // if there is nothing in the oplog, break o = getLastEntryInOplog(); if (o.isEmpty()) { throw RollbackOplogException("Oplog empty when rolling back to a GTID"); } } GTID lastGTID = getGTIDFromBSON("_id", o); // if we have rolled back enough, break from while loop if (GTID::cmp(lastGTID, idToRollbackTo) <= 0) { dassert(GTID::cmp(lastGTID, idToRollbackTo) == 0); break; } rollbackTransactionFromOplog(o, docsMap, rsSave); } log() << "Rolling back to " << idToRollbackTo.toString() << " produced " << docsMap->size() << " documents for which we need to retrieve a snapshot of." << rsLog; }
void GhostSync::percolate(const BSONObj& id, const GTID& lastGTID) { const OID rid = id["_id"].OID(); shared_ptr<GhostSlave> slave; { rwlock lk( _lock , false ); MAP::iterator i = _ghostCache.find( rid ); if ( i == _ghostCache.end() ) { OCCASIONALLY log() << "couldn't percolate slave " << rid << " no entry" << rsLog; return; } slave = i->second; if (!slave->init) { OCCASIONALLY log() << "couldn't percolate slave " << rid << " not init" << rsLog; return; } } verify(slave->slave); const Member *target = BackgroundSync::get()->getSyncTarget(); if (!target || rs->box.getState().primary() // we are currently syncing from someone who's syncing from us // the target might end up with a new Member, but s.slave never // changes so we'll compare the names || target == slave->slave || target->fullName() == slave->slave->fullName()) { LOG(1) << "replica set ghost target no good" << endl; return; } if ( GTID::cmp(slave->lastGTID, lastGTID) > 0 ) { return; } try { if (!slave->reader.haveConnection()) { if (!slave->reader.connect(id, slave->slave->id(), target->fullName())) { // error message logged in OplogReader::connect return; } } bool ret = slave->reader.propogateSlaveLocation(lastGTID); if (ret) { slave->lastGTID = lastGTID; LOG(2) << "now last is " << slave->lastGTID.toString() << rsLog; } else { LOG(0) << "failed to percolate to with new location" << lastGTID.toString() << rsLog; slave->reader.resetConnection(); } } catch (DBException& e) { // we'll be back LOG(2) << "replSet ghost sync error: " << e.what() << " for " << slave->slave->fullName() << rsLog; slave->reader.resetConnection(); } }
void ReplSetImpl::_fillGaps(OplogReader* r) { LOCK_REASON(lockReason, "repl: filling gaps"); Client::ReadContext ctx(rsoplog, lockReason); Client::Transaction catchupTransaction(0); // now we should have replInfo on this machine, // let's query the minLiveGTID to figure out from where // we should copy the opLog BSONObj result; const bool foundMinLive = Collection::findOne(rsReplInfo, BSON("_id" << "minLive"), result); verify(foundMinLive); GTID minLiveGTID; minLiveGTID = getGTIDFromBSON("GTID", result); // now we need to read the oplog forward GTID lastEntry; bool ret = getLastGTIDinOplog(&lastEntry); isyncassert("could not get last oplog entry after clone", ret); GTID currEntry = minLiveGTID; LOG(2) << "starting to fill gaps currEntry: " << currEntry.toString() << " lastEntry: " << lastEntry.toString() <<endl; // first, we need to fill in the "gaps" in the oplog while (GTID::cmp(currEntry, lastEntry) < 0) { r->tailingQueryGTE(rsoplog, currEntry); while (GTID::cmp(currEntry, lastEntry) < 0) { bool hasMore = true; if (!r->moreInCurrentBatch()) { hasMore = r->more(); } if (!hasMore) { break; } BSONObj op = r->nextSafe().getOwned(); currEntry = getGTIDFromOplogEntry(op); // try inserting it into the oplog, if it does not // already exist if (!gtidExistsInOplog(currEntry)) { LOG(2) << "filling gap " << currEntry.toString() << endl; bool bigTxn; replicateFullTransactionToOplog(op, *r, &bigTxn); } } } catchupTransaction.commit(0); }
bool canStartRollback(OplogReader& r, GTID idToRollbackTo) { shared_ptr<DBClientConnection> conn(r.conn_shared()); // before we start rollback, let's make sure that the minUnapplied on the remote // server is past the id that we are rolling back to. Otherwise, the snapshot // we create will not be up to date, and the rollback algorithm will not work BSONObjBuilder b; b.append("_id", "minUnapplied"); // Note that another way to get this information is to // request a heartbeat. That one will technically return // a more up to date value for minUnapplied BSONObj res = findOneFromConn(conn.get(), rsReplInfo, Query(b.done())); GTID minUnapplied = getGTIDFromBSON("GTID", res); if (GTID::cmp(minUnapplied, idToRollbackTo) < 0) { log() << "Remote server has minUnapplied " << minUnapplied.toString() << \ " we want to rollback to " << idToRollbackTo.toString() << \ ". Therefore, exiting and retrying." << rsLog; return false; } return true; }
void ReplSetImpl::_summarizeStatus(BSONObjBuilder& b) const { vector<BSONObj> v; const Member *_self = this->_self; verify( _self ); MemberState myState = box.getState(); // add self { BSONObjBuilder bb; bb.append("_id", (int) _self->id()); bb.append("name", _self->fullName()); bb.append("health", 1.0); bb.append("state", (int)myState.s); bb.append("stateStr", myState.toString()); bb.append("uptime", (unsigned)(time(0) - cmdLine.started)); if (!_self->config().arbiterOnly) { GTID lastLive; GTID lastUnapplied; GTID minLive; GTID minUnapplied; gtidManager->getGTIDs( &lastLive, &lastUnapplied, &minLive, &minUnapplied ); bb.appendDate("optimeDate", gtidManager->getCurrTimestamp()); bb.append("lastGTID", lastLive.toString()); bb.append("lastUnappliedGTID", lastUnapplied.toString()); bb.append("minLiveGTID", minLive.toString()); bb.append("minUnappliedGTID", minUnapplied.toString()); bb.append("oplogVersion", ReplSetConfig::OPLOG_VERSION); } int maintenance = _maintenanceMode; if (maintenance) { bb.append("maintenanceMode", maintenance); } if (theReplSet) { string s = theReplSet->hbmsg(); if( !s.empty() ) bb.append("errmsg", s); } bb.append("self", true); v.push_back(bb.obj()); } Member *m =_members.head(); while( m ) { BSONObjBuilder bb; bb.append("_id", (int) m->id()); bb.append("name", m->fullName()); double h = m->hbinfo().health; bb.append("health", h); bb.append("state", (int) m->state().s); if( h == 0 ) { // if we can't connect the state info is from the past and could be confusing to show bb.append("stateStr", "(not reachable/healthy)"); } else { bb.append("stateStr", m->state().toString()); } bb.append("uptime", (unsigned) (m->hbinfo().upSince ? (time(0)-m->hbinfo().upSince) : 0)); if (!m->config().arbiterOnly) { bb.appendDate("optimeDate", m->hbinfo().opTime); bb.append("lastGTID", m->hbinfo().gtid.toString()); bb.append("lastUnappliedGTID", m->hbinfo().lastUnappliedGTID.toString()); bb.append("minLiveGTID", m->hbinfo().minLiveGTID.toString()); bb.append("minUnappliedGTID", m->hbinfo().minUnappliedGTID.toString()); bb.append("oplogVersion", m->hbinfo().oplogVersion); } bb.appendTimeT("lastHeartbeat", m->hbinfo().lastHeartbeat); bb.appendTimeT("lastHeartbeatRecv", m->getLastRecvHeartbeat()); bb.append("pingMs", m->hbinfo().ping); string s = m->lhb(); if( !s.empty() ) bb.append("lastHeartbeatMessage", s); if (m->hbinfo().authIssue) { bb.append("authenticated", false); } string syncingTo = m->hbinfo().syncingTo; if (!syncingTo.empty()) { bb.append("syncingTo", syncingTo); } v.push_back(bb.obj()); m = m->next(); } sort(v.begin(), v.end()); b.append("set", name()); b.appendTimeT("date", time(0)); b.append("myState", myState.s); const Member *syncTarget = BackgroundSync::get()->getSyncTarget(); if ( syncTarget && (myState != MemberState::RS_PRIMARY) && (myState != MemberState::RS_SHUNNED) ) { b.append("syncingTo", syncTarget->fullName()); } b.append("members", v); if( replSetBlind ) b.append("blind",true); // to avoid confusion if set...normally never set except for testing. }
void testGTIDManager() { GTID lastGTID(1,1); GTIDManager mgr(lastGTID, 0, 0, 0, 0); // make sure initialization is what we expect ASSERT(GTID::cmp(mgr._lastLiveGTID, lastGTID) == 0); ASSERT(GTID::cmp(mgr._minLiveGTID, lastGTID) > 0); lastGTID.inc(); ASSERT(GTID::cmp(mgr._minLiveGTID, lastGTID) == 0); mgr.catchUnappliedToLive(); ASSERT(GTID::cmp(mgr._lastLiveGTID, mgr._lastUnappliedGTID) == 0); ASSERT(GTID::cmp(mgr._minLiveGTID, mgr._minUnappliedGTID) == 0); GTID resetGTID(2,2); mgr.resetAfterInitialSync(resetGTID, 1, 1); mgr.verifyReadyToBecomePrimary(); ASSERT(GTID::cmp(mgr._lastLiveGTID, resetGTID) == 0); ASSERT(GTID::cmp(mgr._lastLiveGTID, mgr._lastUnappliedGTID) == 0); ASSERT(GTID::cmp(mgr._minLiveGTID, mgr._minUnappliedGTID) == 0); resetGTID.inc(); ASSERT(GTID::cmp(mgr._minLiveGTID, resetGTID) == 0); // now test that it works as primary GTID currLast = mgr.getLiveState(); GTID currMin = mgr._minLiveGTID; ASSERT(GTID::cmp(currLast, mgr._lastLiveGTID) == 0); uint64_t ts; uint64_t hash; GTID gtid; mgr.getGTIDForPrimary(>id, &ts, &hash); cerr << gtid.toString() << endl; cerr << currMin.toString() <<endl; ASSERT(GTID::cmp(gtid, currMin) == 0); ASSERT(GTID::cmp(gtid, mgr._minLiveGTID) == 0); ASSERT(GTID::cmp(gtid, mgr._lastLiveGTID) == 0); mgr.noteLiveGTIDDone(gtid); ASSERT(GTID::cmp(gtid, mgr._lastLiveGTID) == 0); ASSERT(GTID::cmp(gtid, mgr._minLiveGTID) < 0); // simple test of resetManager currLast = mgr._lastLiveGTID; currMin = mgr._minLiveGTID; uint64_t currHkp = mgr.getHighestKnownPrimary(); // just a sanity check, that hkp is 2 ASSERT(currHkp == 2); ASSERT(mgr._newPrimaryValue == 0); ASSERT(!mgr.resetManager(1)); ASSERT(!mgr.resetManager(2)); ASSERT(mgr.resetManager(4)); mgr.verifyReadyToBecomePrimary(); // make sure that lastLive and minLive not changed yet ASSERT(GTID::cmp(currMin, mgr._minLiveGTID) == 0); ASSERT(GTID::cmp(currLast, mgr._lastLiveGTID) == 0); // now make sure that primary has increased ASSERT(mgr._newPrimaryValue == 4); mgr.getGTIDForPrimary(>id, &ts, &hash); ASSERT(mgr._newPrimaryValue == 0); ASSERT(gtid._primarySeqNo > currLast._primarySeqNo); ASSERT(gtid._primarySeqNo == 4); ASSERT(gtid._GTSeqNo == 0); mgr.noteLiveGTIDDone(gtid); mgr.verifyReadyToBecomePrimary(); // now test that min is properly maintained currLast = mgr._lastLiveGTID; currMin = mgr._minLiveGTID; GTID gtid1, gtid2, gtid3, gtid4, gtid5; mgr.getGTIDForPrimary(>id1, &ts, &hash); mgr.getGTIDForPrimary(>id2, &ts, &hash); mgr.getGTIDForPrimary(>id3, &ts, &hash); mgr.getGTIDForPrimary(>id4, &ts, &hash); ASSERT(GTID::cmp(gtid1, gtid2) < 0); ASSERT(GTID::cmp(gtid2, gtid3) < 0); ASSERT(GTID::cmp(gtid3, gtid4) < 0); ASSERT(GTID::cmp(mgr._lastLiveGTID, gtid4) == 0); ASSERT(GTID::cmp(mgr._minLiveGTID, gtid1) == 0); // finish 2, nothing should change mgr.noteLiveGTIDDone(gtid2); ASSERT(GTID::cmp(mgr._lastLiveGTID, gtid4) == 0); ASSERT(GTID::cmp(mgr._minLiveGTID, gtid1) == 0); // finish 1, min should jump to 3 mgr.noteLiveGTIDDone(gtid1); ASSERT(GTID::cmp(mgr._lastLiveGTID, gtid4) == 0); ASSERT(GTID::cmp(mgr._minLiveGTID, gtid3) == 0); // get 5, _lastLive should change mgr.getGTIDForPrimary(>id5, &ts, &hash); ASSERT(GTID::cmp(gtid4, gtid5) < 0); ASSERT(GTID::cmp(mgr._lastLiveGTID, gtid5) == 0); ASSERT(GTID::cmp(mgr._minLiveGTID, gtid3) == 0); // finish 3 and 4, should both jump to 5 mgr.noteLiveGTIDDone(gtid3); mgr.noteLiveGTIDDone(gtid4); ASSERT(GTID::cmp(mgr._lastLiveGTID, gtid5) == 0); ASSERT(GTID::cmp(mgr._minLiveGTID, gtid5) == 0); // finish 5, min should jump up mgr.noteLiveGTIDDone(gtid5); ASSERT(GTID::cmp(mgr._lastLiveGTID, gtid5) == 0); ASSERT(GTID::cmp(mgr._minLiveGTID, gtid5) > 0); mgr.verifyReadyToBecomePrimary(); GTID currLastUnapplied = mgr._lastUnappliedGTID; GTID currMinUnapplied = mgr._minUnappliedGTID; gtid5.inc(); gtid5.inc(); gtid5.inc(); GTID gtidOther = gtid5; gtidOther.inc(); GTID gtidUnapplied1 = gtid5; // now let's do a test for secondaries mgr.noteGTIDAdded(gtidUnapplied1, ts, hash); ASSERT(GTID::cmp(mgr._lastLiveGTID, gtidUnapplied1) == 0); ASSERT(GTID::cmp(mgr._minLiveGTID, gtidOther) == 0); gtid5.inc(); gtidOther.inc(); GTID gtidUnapplied2 = gtid5; mgr.noteGTIDAdded(gtidUnapplied2, ts, hash); ASSERT(GTID::cmp(mgr._lastLiveGTID, gtidUnapplied2) == 0); ASSERT(GTID::cmp(mgr._minLiveGTID, gtidOther) == 0); // verify unapplied values not changed ASSERT(GTID::cmp(mgr._lastUnappliedGTID, currLastUnapplied) == 0); ASSERT(GTID::cmp(mgr._minUnappliedGTID, currMinUnapplied) == 0); gtid5.inc(); GTID gtidUnapplied3 = gtid5; mgr.noteGTIDAdded(gtidUnapplied3, ts, hash); gtid5.inc(); GTID gtidUnapplied4 = gtid5; mgr.noteGTIDAdded(gtidUnapplied4, ts, hash); // at this point, we have 4 GTIDs that have been added, but // yet to be applied mgr.noteApplyingGTID(gtidUnapplied1); mgr.noteApplyingGTID(gtidUnapplied2); ASSERT(GTID::cmp(mgr._lastUnappliedGTID, gtidUnapplied2) == 0); ASSERT(GTID::cmp(mgr._minUnappliedGTID, gtidUnapplied1) == 0); mgr.noteGTIDApplied(gtidUnapplied2); ASSERT(GTID::cmp(mgr._minUnappliedGTID, gtidUnapplied1) == 0); mgr.noteApplyingGTID(gtidUnapplied3); mgr.noteApplyingGTID(gtidUnapplied4); ASSERT(GTID::cmp(mgr._lastUnappliedGTID, gtidUnapplied4) == 0); ASSERT(GTID::cmp(mgr._minUnappliedGTID, gtidUnapplied1) == 0); mgr.noteGTIDApplied(gtidUnapplied3); mgr.noteGTIDApplied(gtidUnapplied1); ASSERT(GTID::cmp(mgr._minUnappliedGTID, gtidUnapplied4) == 0); mgr.noteGTIDApplied(gtidUnapplied4); ASSERT(GTID::cmp(mgr._lastUnappliedGTID, gtidUnapplied4) == 0); ASSERT(GTID::cmp(mgr._minUnappliedGTID, gtidUnapplied4) > 0); }
void BackgroundSync::runRollback(OplogReader& r, uint64_t oplogTS) { // starting from ourLast, we need to read the remote oplog // backwards until we find an entry in the remote oplog // that has the same GTID, timestamp, and hash as // what we have in our oplog. If we don't find one that is within // some reasonable timeframe, then we go fatal GTID ourLast = theReplSet->gtidManager->getLiveState(); GTID idToRollbackTo; uint64_t rollbackPointTS = 0; uint64_t rollbackPointHash = 0; incRBID(); try { shared_ptr<DBClientCursor> rollbackCursor = r.getRollbackCursor(ourLast); while (rollbackCursor->more()) { BSONObj remoteObj = rollbackCursor->next(); GTID remoteGTID = getGTIDFromBSON("_id", remoteObj); uint64_t remoteTS = remoteObj["ts"]._numberLong(); uint64_t remoteLastHash = remoteObj["h"].numberLong(); if (remoteTS + 1800*1000 < oplogTS) { log() << "Rollback takes us too far back, throwing exception. remoteTS: " << remoteTS << " oplogTS: " << oplogTS << rsLog; throw RollbackOplogException("replSet rollback too long a time period for a rollback (at least 30 minutes)."); break; } //now try to find an entry in our oplog with that GTID BSONObjBuilder localQuery; BSONObj localObj; addGTIDToBSON("_id", remoteGTID, localQuery); bool foundLocally = false; { LOCK_REASON(lockReason, "repl: looking up oplog entry for rollback"); Client::ReadContext ctx(rsoplog, lockReason); Client::Transaction transaction(DB_SERIALIZABLE); foundLocally = Collection::findOne(rsoplog, localQuery.done(), localObj); transaction.commit(); } if (foundLocally) { GTID localGTID = getGTIDFromBSON("_id", localObj); uint64_t localTS = localObj["ts"]._numberLong(); uint64_t localLastHash = localObj["h"].numberLong(); if (localLastHash == remoteLastHash && localTS == remoteTS && GTID::cmp(localGTID, remoteGTID) == 0 ) { idToRollbackTo = localGTID; rollbackPointTS = localTS; rollbackPointHash = localLastHash; log() << "found id to rollback to " << idToRollbackTo << rsLog; break; } } } // At this point, either we have found the point to try to rollback to, // or we have determined that we cannot rollback if (idToRollbackTo.isInitial()) { // we cannot rollback throw RollbackOplogException("could not find ID to rollback to"); } } catch (DBException& e) { log() << "Caught DBException during rollback " << e.toString() << rsLog; throw RollbackOplogException("DBException while trying to find ID to rollback to: " + e.toString()); } catch (std::exception& e2) { log() << "Caught std::exception during rollback " << e2.what() << rsLog; throw RollbackOplogException(str::stream() << "Exception while trying to find ID to rollback to: " << e2.what()); } // proceed with the rollback to point idToRollbackTo // probably ought to grab a global write lock while doing this // I don't think we want oplog cursors reading from this machine // while we are rolling back. Or at least do something to protect against this // first, let's get all the operations that are being applied out of the way, // we don't want to rollback an item in the oplog while simultaneously, // the applier thread is applying it to the oplog { boost::unique_lock<boost::mutex> lock(_mutex); while (_deque.size() > 0) { log() << "waiting for applier to finish work before doing rollback " << rsLog; _queueDone.wait(lock); } verifySettled(); } // now let's tell the system we are going to rollback, to do so, // abort live multi statement transactions, invalidate cursors, and // change the state to RS_ROLLBACK { // so we know nothing is simultaneously occurring RWLockRecursive::Exclusive e(operationLock); LOCK_REASON(lockReason, "repl: killing all operations for rollback"); Lock::GlobalWrite lk(lockReason); ClientCursor::invalidateAllCursors(); Client::abortLiveTransactions(); theReplSet->goToRollbackState(); } try { // now that we are settled, we have to take care of the GTIDManager // and the repl info thread. // We need to reset the state of the GTIDManager to the point // we intend to rollback to, and we need to make sure that the repl info thread // has captured this information. theReplSet->gtidManager->resetAfterInitialSync( idToRollbackTo, rollbackPointTS, rollbackPointHash ); // now force an update of the repl info thread theReplSet->forceUpdateReplInfo(); // at this point, everything should be settled, the applier should // have nothing left (and remain that way, because this is the only // thread that can put work on the applier). Now we can rollback // the data. while (true) { BSONObj o; { LOCK_REASON(lockReason, "repl: checking for oplog data"); Lock::DBRead lk(rsoplog, lockReason); Client::Transaction txn(DB_SERIALIZABLE); // if there is nothing in the oplog, break o = getLastEntryInOplog(); if( o.isEmpty() ) { break; } } GTID lastGTID = getGTIDFromBSON("_id", o); // if we have rolled back enough, break from while loop if (GTID::cmp(lastGTID, idToRollbackTo) <= 0) { dassert(GTID::cmp(lastGTID, idToRollbackTo) == 0); break; } rollbackTransactionFromOplog(o, true); } theReplSet->leaveRollbackState(); } catch (DBException& e) { log() << "Caught DBException during rollback " << e.toString() << rsLog; throw RollbackOplogException("DBException while trying to run rollback: " + e.toString()); } catch (std::exception& e2) { log() << "Caught std::exception during rollback " << e2.what() << rsLog; throw RollbackOplogException(str::stream() << "Exception while trying to run rollback: " << e2.what()); } }
void TxnContext::commit(int flags) { verify(!_retired); bool gotGTID = false; GTID gtid; // do this in case we are writing the first entry // we put something in that can be distinguished from // an initialized GTID that has never been touched gtid.inc_primary(); // handle work related to logging of transaction for replication // this piece must be done before the _txn.commit try { if (hasParent()) { // This does something // a bit dangerous in that it may spill parent's stuff // with this child transaction that is committing. If something // goes wrong and this child transaction aborts, we will miss // some ops // // This ought to be ok, because we are in this try/catch block // where if something goes wrong, we will crash the server. // NOTHING better go wrong here, unless under bad rare // circumstances _txnOps.finishChildCommit(); } else if (!_txnOps.empty()) { uint64_t timestamp = 0; uint64_t hash = 0; if (!_initiatingRS) { dassert(txnGTIDManager); txnGTIDManager->getGTIDForPrimary(>id, ×tamp, &hash); } else { dassert(!txnGTIDManager); timestamp = curTimeMillis64(); } gotGTID = true; // In this case, the transaction we are committing has // no parent, so we must write the transaction's // logged operations to the opLog, as part of this transaction dassert(logTxnOpsForReplication()); dassert(_logTxnToOplog); _txnOps.rootCommit(gtid, timestamp, hash); } // handle work related to logging of transaction for chunk migrations if (!_txnOpsForSharding.empty()) { if (hasParent()) { transferOpsForShardingToParent(); } else { writeTxnOpsToMigrateLog(); } } _clientCursorRollback.preComplete(); _txn.commit(flags); // if the commit of this transaction got a GTID, then notify // the GTIDManager that the commit is now done. if (gotGTID && !_initiatingRS) { dassert(txnGTIDManager); // save the GTID for the client so that // getLastError will know what GTID slaves // need to be caught up to. cc().setLastOp(gtid); txnGTIDManager->noteLiveGTIDDone(gtid); } } catch (std::exception &e) { log() << "exception during critical section of txn commit, aborting system: " << e.what() << endl; printStackTrace(); logflush(); ::abort(); } // These rollback items must be processed after the ydb transaction completes. if (hasParent()) { _cappedRollback.transfer(_parent->_cappedRollback); _nsIndexRollback.transfer(_parent->_nsIndexRollback); } else { _cappedRollback.commit(); _nsIndexRollback.commit(); } _retired = true; }