bool getLastGTIDinOplog(GTID* gtid) { Client::ReadContext ctx(rsoplog); // TODO: Should this be using rsOplogDetails, verifying non-null? NamespaceDetails *d = nsdetails(rsoplog); shared_ptr<Cursor> c( BasicCursor::make(d, -1) ); if (c->ok()) { *gtid = getGTIDFromOplogEntry(c->current()); return true; } return false; }
bool getLastGTIDinOplog(GTID* gtid) { LOCK_REASON(lockReason, "repl: looking up last GTID in oplog"); Client::ReadContext ctx(rsoplog, lockReason); // TODO: Should this be using rsOplogDetails, verifying non-null? Collection *cl = getCollection(rsoplog); shared_ptr<Cursor> c( Cursor::make(cl, -1) ); if (c->ok()) { *gtid = getGTIDFromOplogEntry(c->current()); return true; } return false; }
// returns number of seconds to sleep, if any uint32_t BackgroundSync::produce() { // normally msgCheckNewState gets called periodically, but in a single node repl set // there are no heartbeat threads, so we do it here to be sure. this is relevant if the // singleton member has done a stepDown() and needs to come back up. if (theReplSet->config().members.size() == 1 && theReplSet->myConfig().potentiallyHot()) { Manager* mgr = theReplSet->mgr; // When would mgr be null? During replsettest'ing, in which case we should // fall through and actually apply ops as if we were a real secondary. if (mgr) { mgr->send(boost::bind(&Manager::msgCheckNewState, theReplSet->mgr)); // There should never be ops to sync in a 1-member set, anyway return 1; } } OplogReader r(true /* doHandshake */); // find a target to sync from the last op time written getOplogReader(r); // no server found GTID lastGTIDFetched = theReplSet->gtidManager->getLiveState(); { boost::unique_lock<boost::mutex> lock(_mutex); if (_currentSyncTarget == NULL) { // if there is no one to sync from return 1; //sleep one second } } r.tailingQueryGTE(rsoplog, lastGTIDFetched); // if target cut connections between connecting and querying (for // example, because it stepped down) we might not have a cursor if (!r.haveCursor()) { return 0; } try { // this method may actually run rollback, yes, the name is bad if (isRollbackRequired(r)) { // sleep 2 seconds and try again. (The 2 is arbitrary). // If we are not fatal, then we will keep trying to sync // from another machine return 2; } } catch (RollbackOplogException& re){ // we attempted a rollback and failed, we must go fatal. log() << "Caught a RollbackOplogException during rollback, going fatal" << rsLog; theReplSet->fatal(); return 2; // 2 is arbitrary, if we are going fatal, we are done } while (!_opSyncShouldExit) { while (!_opSyncShouldExit) { { // check if we should bail out boost::unique_lock<boost::mutex> lck(_mutex); if (!_opSyncShouldRun) { return 0; } } if (!r.moreInCurrentBatch()) { // check to see if we have a request to sync // from a specific target. If so, get out so that // we can restart the act of syncing and // do so from the correct target if (theReplSet->gotForceSync()) { return 0; } verify(!theReplSet->isPrimary()); if (shouldChangeSyncTarget()) { return 0; } //record time for each getmore { TimerHolder batchTimer(&getmoreReplStats); r.more(); } //increment networkByteStats.increment(r.currentBatchMessageSize()); } if (!r.more()) { break; } // This is the operation we have received from the target // that we must put in our oplog with an applied field of false BSONObj o = r.nextSafe().getOwned(); opsReadStats.increment(); LOG(3) << "replicating " << o.toString(false, true) << " from " << _currentSyncTarget->fullName() << endl; uint64_t ts = o["ts"]._numberLong(); // now that we have the element in o, let's check // if there a delay is required (via slaveDelay) before // writing it to the oplog if (theReplSet->myConfig().slaveDelay > 0) { handleSlaveDelay(ts); { boost::unique_lock<boost::mutex> lck(_mutex); if (!_opSyncShouldRun) { break; } } } { Timer timer; bool bigTxn = false; { Client::Transaction transaction(DB_SERIALIZABLE); replicateFullTransactionToOplog(o, r, &bigTxn); // we are operating as a secondary. We don't have to fsync transaction.commit(DB_TXN_NOSYNC); } { GTID currEntry = getGTIDFromOplogEntry(o); uint64_t lastHash = o["h"].numberLong(); boost::unique_lock<boost::mutex> lock(_mutex); // update counters theReplSet->gtidManager->noteGTIDAdded(currEntry, ts, lastHash); // notify applier thread that data exists if (_deque.size() == 0) { _queueCond.notify_all(); } _deque.push_back(o); bufferCountGauge.increment(); bufferSizeGauge.increment(o.objsize()); // this is a flow control mechanism, with bad numbers // hard coded for now just to get something going. // If the opSync thread notices that we have over 20000 // transactions in the queue, it waits until we get below // 10000. This is where we wait if we get too high // Once we have spilling of transactions working, this // logic will need to be redone if (_deque.size() > 20000) { _queueCond.wait(lock); } if (bigTxn) { // if we have a large transaction, we don't want // to let it pile up. We want to process it immedietely // before processing anything else. while (_deque.size() > 0) { _queueDone.wait(lock); } } } } } // end while if (shouldChangeSyncTarget()) { return 0; } r.tailCheck(); if( !r.haveCursor() ) { LOG(1) << "replSet end opSync pass" << rsLog; return 0; } // looping back is ok because this is a tailable cursor } return 0; }
void BackgroundSync::applyOpsFromOplog() { GTID lastLiveGTID; GTID lastUnappliedGTID; while (1) { try { BSONObj curr; { boost::unique_lock<boost::mutex> lck(_mutex); // wait until we know an item has been produced while (_deque.size() == 0 && !_applierShouldExit) { _queueDone.notify_all(); _queueCond.wait(lck); } if (_deque.size() == 0 && _applierShouldExit) { return; } curr = _deque.front(); } GTID currEntry = getGTIDFromOplogEntry(curr); theReplSet->gtidManager->noteApplyingGTID(currEntry); // we must do applyTransactionFromOplog in a loop // because once we have called noteApplyingGTID, we must // continue until we are successful in applying the transaction. for (uint32_t numTries = 0; numTries <= 100; numTries++) { try { numTries++; TimerHolder timer(&applyBatchStats); applyTransactionFromOplog(curr); opsAppliedStats.increment(); break; } catch (std::exception &e) { log() << "exception during applying transaction from oplog: " << e.what() << endl; log() << "oplog entry: " << curr.str() << endl; if (numTries == 100) { // something is really wrong if we fail 100 times, let's abort ::abort(); } sleepsecs(1); } } LOG(3) << "applied " << curr.toString(false, true) << endl; theReplSet->gtidManager->noteGTIDApplied(currEntry); { boost::unique_lock<boost::mutex> lck(_mutex); dassert(_deque.size() > 0); _deque.pop_front(); bufferCountGauge.increment(-1); bufferSizeGauge.increment(-curr.objsize()); // this is a flow control mechanism, with bad numbers // hard coded for now just to get something going. // If the opSync thread notices that we have over 20000 // transactions in the queue, it waits until we get below // 10000. This is where we signal that we have gotten there // Once we have spilling of transactions working, this // logic will need to be redone if (_deque.size() == 10000) { _queueCond.notify_all(); } } } catch (DBException& e) { sethbmsg(str::stream() << "db exception in producer on applier thread: " << e.toString()); sleepsecs(2); } catch (std::exception& e2) { sethbmsg(str::stream() << "exception in producer on applier thread: " << e2.what()); sleepsecs(2); } } }