bool BackgroundSync::hasCursor() { { // prevent writers from blocking readers during fsync SimpleMutex::scoped_lock fsynclk(filesLockedFsync); // we don't need the local write lock yet, but it's needed by OplogReader::connect // so we take it preemptively to avoid deadlocking. Lock::DBWrite lk("local"); boost::unique_lock<boost::mutex> lock(_mutex); if (!_oplogMarkerTarget || _currentSyncTarget != _oplogMarkerTarget) { if (!_currentSyncTarget) { return false; } log() << "replset setting oplog notifier to " << _currentSyncTarget->fullName() << rsLog; _oplogMarkerTarget = _currentSyncTarget; _oplogMarker.resetConnection(); if (!_oplogMarker.connect(_oplogMarkerTarget->fullName())) { LOG(1) << "replset could not connect to " << _oplogMarkerTarget->fullName() << rsLog; _oplogMarkerTarget = NULL; return false; } } } if (!_oplogMarker.haveCursor()) { BSONObj fields = BSON("ts" << 1); _oplogMarker.tailingQueryGTE(rsoplog, theReplSet->lastOpTimeWritten, &fields); } return _oplogMarker.haveCursor(); }
bool BackgroundSync::connectOplogNotifier() { // prevent writers from blocking readers during fsync SimpleMutex::scoped_lock fsynclk(filesLockedFsync); // we don't need the local write lock yet, but it's needed by OplogReader::connect // so we take it preemptively to avoid deadlocking. Lock::DBWrite lk("local"); boost::unique_lock<boost::mutex> lock(_mutex); if (!_oplogMarkerTarget || _currentSyncTarget != _oplogMarkerTarget) { if (!_currentSyncTarget) { return false; } log() << "replset setting oplog notifier to " << _currentSyncTarget->fullName() << rsLog; _oplogMarkerTarget = _currentSyncTarget; if (!theReplSet->syncSourceFeedback.connect(_oplogMarkerTarget)) { _oplogMarkerTarget = NULL; return false; } } return true; }
// Doles out all the work to the writer pool threads and waits for them to complete void SyncTail::multiApply( std::deque<BSONObj>& ops, MultiSyncApplyFunc applyFunc ) { // Use a ThreadPool to prefetch all the operations in a batch. prefetchOps(ops); std::vector< std::vector<BSONObj> > writerVectors(theReplSet->replWriterThreadCount); fillWriterVectors(ops, &writerVectors); LOG(2) << "replication batch size is " << ops.size() << endl; // We must grab this because we're going to grab write locks later. // We hold this mutex the entire time we're writing; it doesn't matter // because all readers are blocked anyway. SimpleMutex::scoped_lock fsynclk(filesLockedFsync); // stop all readers until we're done Lock::ParallelBatchWriterMode pbwm; applyOps(writerVectors, applyFunc); }
// Applies a batch of oplog entries, by using a set of threads to apply the operations and then // writes the oplog entries to the local oplog. OpTime SyncTail::multiApply(OperationContext* txn, const OpQueue& ops) { invariant(_applyFunc); if (getGlobalServiceContext()->getGlobalStorageEngine()->isMmapV1()) { // Use a ThreadPool to prefetch all the operations in a batch. prefetchOps(ops.getDeque(), &_prefetcherPool); } std::vector<std::vector<BSONObj>> writerVectors(replWriterThreadCount); fillWriterVectors(txn, ops.getDeque(), &writerVectors); LOG(2) << "replication batch size is " << ops.getDeque().size() << endl; // We must grab this because we're going to grab write locks later. // We hold this mutex the entire time we're writing; it doesn't matter // because all readers are blocked anyway. stdx::lock_guard<SimpleMutex> fsynclk(filesLockedFsync); // stop all readers until we're done Lock::ParallelBatchWriterMode pbwm(txn->lockState()); ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator(); if (replCoord->getMemberState().primary() && !replCoord->isWaitingForApplierToDrain()) { severe() << "attempting to replicate ops while primary"; fassertFailed(28527); } applyOps(writerVectors, &_writerPool, _applyFunc, this); OpTime lastOpTime; { ON_BLOCK_EXIT([&] { _writerPool.join(); }); std::vector<BSONObj> raws; raws.reserve(ops.getDeque().size()); for (auto&& op : ops.getDeque()) { raws.emplace_back(op.raw); } lastOpTime = writeOpsToOplog(txn, raws); if (inShutdown()) { return OpTime(); } } // We have now written all database writes and updated the oplog to match. return lastOpTime; }
void BackgroundSync::getOplogReader(OplogReader& r) { const Member *target = NULL, *stale = NULL; BSONObj oldest; { boost::unique_lock<boost::mutex> lock(_mutex); if (_lastOpTimeFetched.isNull()) { // then we're initial syncing and we're still waiting for this to be set _currentSyncTarget = NULL; return; } // Wait until we've applied the ops we have before we choose a sync target while (!_appliedBuffer) { _condvar.wait(lock); } } while (MONGO_FAIL_POINT(rsBgSyncProduce)) { sleepmillis(0); } verify(r.conn() == NULL); while ((target = theReplSet->getMemberToSyncTo()) != NULL) { string current = target->fullName(); if (!r.connect(current)) { LOG(2) << "replSet can't connect to " << current << " to read operations" << rsLog; r.resetConnection(); theReplSet->veto(current); continue; } if (isStale(r, oldest)) { r.resetConnection(); theReplSet->veto(current, 600); stale = target; continue; } // if we made it here, the target is up and not stale { boost::unique_lock<boost::mutex> lock(_mutex); _currentSyncTarget = target; } { // prevent writers from blocking readers during fsync SimpleMutex::scoped_lock fsynclk(filesLockedFsync); // we don't need the local write lock yet, but it's needed by ensureMe() // so we take it preemptively to avoid deadlocking. Lock::DBWrite lk("local"); theReplSet->syncSourceFeedback.connect(target); } return; } // the only viable sync target was stale if (stale) { theReplSet->goStale(stale, oldest); sleepsecs(120); } { boost::unique_lock<boost::mutex> lock(_mutex); _currentSyncTarget = NULL; } }