BSONObj SyncTail::oplogApplySegment(const BSONObj& applyGTEObj, const BSONObj& minValidObj, MultiSyncApplyFunc func) { OpTime applyGTE = applyGTEObj["ts"]._opTime(); OpTime minValid = minValidObj["ts"]._opTime(); // We have to keep track of the last op applied to the data, because there's no other easy // way of getting this data synchronously. Batches may go past minValidObj, so we need to // know to bump minValid past minValidObj. BSONObj lastOp = applyGTEObj; OpTime ts = applyGTE; time_t start = time(0); time_t now = start; unsigned long long n = 0, lastN = 0; while( ts < minValid ) { OpQueue ops; while (ops.getSize() < replBatchLimitBytes) { if (tryPopAndWaitForMore(&ops)) { break; } // apply replication batch limits now = time(0); if (!ops.empty()) { if (now > replBatchLimitSeconds) break; if (ops.getDeque().size() > replBatchLimitOperations) break; } } setOplogVersion(ops.getDeque().front()); multiApply(ops.getDeque(), func); n += ops.getDeque().size(); if ( n > lastN + 1000 ) { if (now - start > 10) { // simple progress metering log() << "replSet initialSyncOplogApplication applied " << n << " operations, synced to " << ts.toStringPretty() << rsLog; start = now; lastN = n; } } // we want to keep a record of the last op applied, to compare with minvalid lastOp = ops.getDeque().back(); OpTime tempTs = lastOp["ts"]._opTime(); applyOpsToOplog(&ops.getDeque()); ts = tempTs; } return lastOp; }
/* tail an oplog. ok to return, will be re-called. */ void SyncTail::oplogApplication() { while( 1 ) { OpQueue ops; verify( !Lock::isLocked() ); Timer batchTimer; int lastTimeChecked = 0; do { if (theReplSet->isPrimary()) { massert(16620, "there are ops to sync, but I'm primary", ops.empty()); return; } int now = batchTimer.seconds(); // apply replication batch limits if (!ops.empty()) { if (now > replBatchLimitSeconds) break; if (ops.getDeque().size() > replBatchLimitOperations) break; } // occasionally check some things // (always checked in the first iteration of this do-while loop, because // ops is empty) if (ops.empty() || now > lastTimeChecked) { { boost::unique_lock<boost::mutex> lock(theReplSet->initialSyncMutex); if (theReplSet->initialSyncRequested) { // got a resync command return; } } lastTimeChecked = now; // can we become secondary? // we have to check this before calling mgr, as we must be a secondary to // become primary if (!theReplSet->isSecondary()) { OpTime minvalid; OperationContextImpl txn; theReplSet->tryToGoLiveAsASecondary(&txn, minvalid); } // normally msgCheckNewState gets called periodically, but in a single node // replset there are no heartbeat threads, so we do it here to be sure. this is // relevant if the singleton member has done a stepDown() and needs to come back // up. if (theReplSet->config().members.size() == 1 && theReplSet->myConfig().potentiallyHot()) { Manager* mgr = theReplSet->mgr; // When would mgr be null? During replsettest'ing, in which case we should // fall through and actually apply ops as if we were a real secondary. if (mgr) { mgr->send(stdx::bind(&Manager::msgCheckNewState, theReplSet->mgr)); sleepsecs(1); // There should never be ops to sync in a 1-member set, anyway return; } } } const int slaveDelaySecs = theReplSet->myConfig().slaveDelay; if (!ops.empty() && slaveDelaySecs > 0) { const BSONObj& lastOp = ops.getDeque().back(); const unsigned int opTimestampSecs = lastOp["ts"]._opTime().getSecs(); // Stop the batch as the lastOp is too new to be applied. If we continue // on, we can get ops that are way ahead of the delay and this will // make this thread sleep longer when handleSlaveDelay is called // and apply ops much sooner than we like. if (opTimestampSecs > static_cast<unsigned int>(time(0) - slaveDelaySecs)) { break; } } // keep fetching more ops as long as we haven't filled up a full batch yet } while (!tryPopAndWaitForMore(&ops) && // tryPopAndWaitForMore returns true // when we need to end a batch early (ops.getSize() < replBatchLimitBytes)); // For pausing replication in tests while (MONGO_FAIL_POINT(rsSyncApplyStop)) { sleepmillis(0); } const BSONObj& lastOp = ops.getDeque().back(); setOplogVersion(lastOp); handleSlaveDelay(lastOp); // Set minValid to the last op to be applied in this next batch. // This will cause this node to go into RECOVERING state // if we should crash and restart before updating the oplog theReplSet->setMinValid(lastOp); if (BackgroundSync::get()->isAssumingPrimary()) { LOG(1) << "about to apply batch up to optime: " << ops.getDeque().back()["ts"]._opTime().toStringPretty(); } multiApply(ops.getDeque(), multiSyncApply); if (BackgroundSync::get()->isAssumingPrimary()) { LOG(1) << "about to update oplog to optime: " << ops.getDeque().back()["ts"]._opTime().toStringPretty(); } applyOpsToOplog(&ops.getDeque()); // If we're just testing (no manager), don't keep looping if we exhausted the bgqueue if (!theReplSet->mgr) { BSONObj op; if (!peek(&op)) { return; } } } }
/* tail an oplog. ok to return, will be re-called. */ void SyncTail::oplogApplication() { while( 1 ) { OpQueue ops; verify( !Lock::isLocked() ); Timer batchTimer; int lastTimeChecked = 0; // always fetch a few ops first // tryPopAndWaitForMore returns true when we need to end a batch early while (!tryPopAndWaitForMore(&ops) && (ops.getSize() < replBatchLimitBytes)) { if (theReplSet->isPrimary()) { massert(16620, "there are ops to sync, but I'm primary", ops.empty()); return; } int now = batchTimer.seconds(); // apply replication batch limits if (!ops.empty()) { if (now > replBatchLimitSeconds) break; if (ops.getDeque().size() > replBatchLimitOperations) break; } // occasionally check some things if (ops.empty() || now > lastTimeChecked) { lastTimeChecked = now; // can we become secondary? // we have to check this before calling mgr, as we must be a secondary to // become primary if (!theReplSet->isSecondary()) { OpTime minvalid; theReplSet->tryToGoLiveAsASecondary(minvalid); } // normally msgCheckNewState gets called periodically, but in a single node repl set // there are no heartbeat threads, so we do it here to be sure. this is relevant if the // singleton member has done a stepDown() and needs to come back up. if (theReplSet->config().members.size() == 1 && theReplSet->myConfig().potentiallyHot()) { Manager* mgr = theReplSet->mgr; // When would mgr be null? During replsettest'ing. if (mgr) mgr->send(boost::bind(&Manager::msgCheckNewState, theReplSet->mgr)); sleepsecs(1); // There should never be ops to sync in a 1-member set, anyway return; } } const int slaveDelaySecs = theReplSet->myConfig().slaveDelay; if (!ops.empty() && slaveDelaySecs > 0) { const BSONObj& lastOp = ops.getDeque().back(); const unsigned int opTimestampSecs = lastOp["ts"]._opTime().getSecs(); // Stop the batch as the lastOp is too new to be applied. If we continue // on, we can get ops that are way ahead of the delay and this will // make this thread sleep longer when handleSlaveDelay is called // and apply ops much sooner than we like. if (opTimestampSecs > static_cast<unsigned int>(time(0) - slaveDelaySecs)) { break; } } } // For pausing replication in tests while (MONGO_FAIL_POINT(rsSyncApplyStop)) { sleepmillis(0); } const BSONObj& lastOp = ops.getDeque().back(); setOplogVersion(lastOp); handleSlaveDelay(lastOp); // Set minValid to the last op to be applied in this next batch. // This will cause this node to go into RECOVERING state // if we should crash and restart before updating the oplog theReplSet->setMinValid(lastOp); multiApply(ops.getDeque(), multiSyncApply); applyOpsToOplog(&ops.getDeque()); } }