Status LegacyReplicationCoordinator::canServeReadsFor(OperationContext* txn, const NamespaceString& ns, bool slaveOk) { if (txn->getClient()->isGod()) { return Status::OK(); } if (canAcceptWritesForDatabase(ns.db())) { return Status::OK(); } if (getReplicationMode() == modeMasterSlave && _settings.slave == SimpleSlave) { return Status::OK(); } if (slaveOk) { if (getReplicationMode() == modeMasterSlave || getReplicationMode() == modeNone) { return Status::OK(); } if (getCurrentMemberState().secondary()) { return Status::OK(); } return Status(ErrorCodes::NotMasterOrSecondaryCode, "not master or secondary; cannot currently read from this replSet member"); } return Status(ErrorCodes::NotMasterNoSlaveOkCode, "not master and slaveOk=false"); }
void MongoDSessionCatalog::invalidateSessions(OperationContext* opCtx, boost::optional<BSONObj> singleSessionDoc) { const auto replCoord = repl::ReplicationCoordinator::get(opCtx); bool isReplSet = replCoord->getReplicationMode() == repl::ReplicationCoordinator::modeReplSet; if (isReplSet) { uassert(40528, str::stream() << "Direct writes against " << NamespaceString::kSessionTransactionsTableNamespace.ns() << " cannot be performed using a transaction or on a session.", !opCtx->getLogicalSessionId()); } const auto catalog = SessionCatalog::get(opCtx); // The use of shared_ptr here is in order to work around the limitation of stdx::function that // the functor must be copyable. auto sessionKillTokens = std::make_shared<std::vector<SessionCatalog::KillToken>>(); if (singleSessionDoc) { sessionKillTokens->emplace_back(catalog->killSession(LogicalSessionId::parse( IDLParserErrorContext("lsid"), singleSessionDoc->getField("_id").Obj()))); } else { SessionKiller::Matcher matcher( KillAllSessionsByPatternSet{makeKillAllSessionsByPattern(opCtx)}); catalog->scanSessions(matcher, [&sessionKillTokens](const ObservableSession& session) { sessionKillTokens->emplace_back(session.kill()); }); } killSessionTokensFunction(opCtx, sessionKillTokens); }
void appendReplyMetadata(OperationContext* opCtx, const OpMsgRequest& request, BSONObjBuilder* metadataBob) const override { const bool isShardingAware = ShardingState::get(opCtx)->enabled(); const bool isConfig = serverGlobalParams.clusterRole == ClusterRole::ConfigServer; auto const replCoord = repl::ReplicationCoordinator::get(opCtx); const bool isReplSet = replCoord->getReplicationMode() == repl::ReplicationCoordinator::modeReplSet; if (isReplSet) { // Attach our own last opTime. repl::OpTime lastOpTimeFromClient = repl::ReplClientInfo::forClient(opCtx->getClient()).getLastOp(); replCoord->prepareReplMetadata(request.body, lastOpTimeFromClient, metadataBob); // For commands from mongos, append some info to help getLastError(w) work. // TODO: refactor out of here as part of SERVER-18236 if (isShardingAware || isConfig) { rpc::ShardingMetadata(lastOpTimeFromClient, replCoord->getElectionId()) .writeToMetadata(metadataBob) .transitional_ignore(); } if (isShardingAware || isConfig) { auto lastCommittedOpTime = replCoord->getLastCommittedOpTime(); metadataBob->append(kLastCommittedOpTimeFieldName, lastCommittedOpTime.getTimestamp()); } } // If we're a shard other than the config shard, attach the last configOpTime we know about. if (isShardingAware && !isConfig) { auto opTime = Grid::get(opCtx)->configOpTime(); rpc::ConfigServerMetadata(opTime).writeToMetadata(metadataBob); } }
void LegacyReplicationCoordinator::prepareReplSetUpdatePositionCommandHandshakes( OperationContext* txn, std::vector<BSONObj>* handshakes) { invariant(getReplicationMode() == modeReplSet); boost::lock_guard<boost::mutex> lock(_mutex); // handshake obj for us BSONObjBuilder cmd; cmd.append("replSetUpdatePosition", 1); BSONObjBuilder sub (cmd.subobjStart("handshake")); sub.append("handshake", getMyRID(txn)); sub.append("member", theReplSet->selfId()); sub.append("config", theReplSet->myConfig().asBson()); sub.doneFast(); handshakes->push_back(cmd.obj()); // handshake objs for all chained members for (OIDMemberMap::const_iterator itr = _ridMemberMap.begin(); itr != _ridMemberMap.end(); ++itr) { BSONObjBuilder cmd; cmd.append("replSetUpdatePosition", 1); // outer handshake indicates this is a handshake command // inner is needed as part of the structure to be passed to gotHandshake BSONObjBuilder subCmd (cmd.subobjStart("handshake")); subCmd.append("handshake", itr->first); subCmd.append("member", itr->second->id()); subCmd.append("config", itr->second->config().asBson()); subCmd.doneFast(); handshakes->push_back(cmd.obj()); } }
void LegacyReplicationCoordinator::prepareReplSetUpdatePositionCommand( OperationContext* txn, BSONObjBuilder* cmdBuilder) { invariant(getReplicationMode() == modeReplSet); boost::lock_guard<boost::mutex> lock(_mutex); cmdBuilder->append("replSetUpdatePosition", 1); // create an array containing objects each member connected to us and for ourself BSONArrayBuilder arrayBuilder(cmdBuilder->subarrayStart("optimes")); OID myID = getMyRID(txn); { for (SlaveOpTimeMap::const_iterator itr = _slaveOpTimeMap.begin(); itr != _slaveOpTimeMap.end(); ++itr) { const OID& rid = itr->first; const BSONObj& config = mapFindWithDefault(_ridConfigMap, rid, BSONObj()); BSONObjBuilder entry(arrayBuilder.subobjStart()); entry.append("_id", rid); entry.append("optime", itr->second); // SERVER-14550 Even though the "config" field isn't used on the other end in 2.8, // we need to keep sending it for 2.6 compatibility. // TODO(spencer): Remove this after 2.8 is released. if (rid == myID) { entry.append("config", theReplSet->myConfig().asBson()); } else { entry.append("config", config); } } } }
bool ReplicationCoordinatorImpl::shouldIgnoreUniqueIndex(const IndexDescriptor* idx) { if (!idx->unique()) { return false; } // Never ignore _id index if (idx->isIdIndex()) { return false; } if (getReplicationMode() != modeReplSet) { return false; } // see SERVER-6671 MemberState ms = getCurrentMemberState(); if (! ((ms == MemberState::RS_STARTUP2) || (ms == MemberState::RS_RECOVERING) || (ms == MemberState::RS_ROLLBACK))) { return false; } // TODO(spencer): SERVER-14233 Remove support for old oplog versions, or move oplogVersion // into the repl coordinator /* // 2 is the oldest oplog version where operations // are fully idempotent. if (theReplSet->oplogVersion < 2) { return false; }*/ return true; }
Status LegacyReplicationCoordinator::_stepDownHelper(OperationContext* txn, bool force, const Milliseconds& initialWaitTime, const Milliseconds& stepdownTime, const Milliseconds& postStepdownWaitTime) { invariant(getReplicationMode() == modeReplSet); if (!getCurrentMemberState().primary()) { return Status(ErrorCodes::NotMaster, "not primary so can't step down"); } if (!force) { Status status = _waitForSecondary(initialWaitTime, Milliseconds(10 * 1000)); if (!status.isOK()) { return status; } } // step down bool worked = repl::theReplSet->stepDown(txn, stepdownTime.total_seconds()); if (!worked) { return Status(ErrorCodes::NotMaster, "not primary so can't step down"); } if (postStepdownWaitTime.total_milliseconds() > 0) { log() << "waiting for secondaries to catch up" << endl; // The only caller of this with a non-zero postStepdownWaitTime is // stepDownAndWaitForSecondary, and the only caller of that is the shutdown command // which doesn't actually care if secondaries failed to catch up here, so we ignore the // return status of _waitForSecondary _waitForSecondary(postStepdownWaitTime, Milliseconds(0)); } return Status::OK(); }
void ShardingInitializationMongoD::initializeShardingEnvironmentOnShardServer( OperationContext* opCtx, const ShardIdentity& shardIdentity, StringData distLockProcessId) { initializeGlobalShardingStateForMongoD( opCtx, shardIdentity.getConfigsvrConnectionString(), distLockProcessId); _replicaSetChangeListener = ReplicaSetMonitor::getNotifier().makeListener<ShardingReplicaSetChangeListener>( opCtx->getServiceContext()); // Determine primary/secondary/standalone state in order to properly initialize sharding // components. const auto replCoord = repl::ReplicationCoordinator::get(opCtx); bool isReplSet = replCoord->getReplicationMode() == repl::ReplicationCoordinator::modeReplSet; bool isStandaloneOrPrimary = !isReplSet || (replCoord->getMemberState() == repl::MemberState::RS_PRIMARY); CatalogCacheLoader::get(opCtx).initializeReplicaSetRole(isStandaloneOrPrimary); ChunkSplitter::get(opCtx).onShardingInitialization(isStandaloneOrPrimary); PeriodicBalancerConfigRefresher::get(opCtx).onShardingInitialization(opCtx->getServiceContext(), isStandaloneOrPrimary); // Start the transaction coordinator service only if the node is the primary of a replica set TransactionCoordinatorService::get(opCtx)->onShardingInitialization( opCtx, isReplSet && isStandaloneOrPrimary); Grid::get(opCtx)->setShardingInitialized(); LOG(0) << "Finished initializing sharding components for " << (isStandaloneOrPrimary ? "primary" : "secondary") << " node."; }
bool ReplicationCoordinator::isOplogDisabledFor(OperationContext* opCtx, const NamespaceString& nss) { if (getReplicationMode() == ReplicationCoordinator::modeNone) { return true; } if (!opCtx->writesAreReplicated()) { return true; } if (nss.db() == "local") { return true; } if (nss.isSystemDotProfile()) { return true; } if (nss.isDropPendingNamespace()) { return true; } fassert(28626, opCtx->recoveryUnit()); return false; }
const ConnectionString ShardLocal::getConnString() const { auto replCoord = repl::getGlobalReplicationCoordinator(); // Currently ShardLocal only works for config servers, which must be replica sets. If we // ever start using ShardLocal on shards we'll need to consider how to handle shards that are // not replica sets. invariant(replCoord->getReplicationMode() == repl::ReplicationCoordinator::modeReplSet); return replCoord->getConfig().getConnectionString(); }
OID LegacyReplicationCoordinator::getMyRID(OperationContext* txn) { Mode mode = getReplicationMode(); if (mode == modeReplSet) { return theReplSet->syncSourceFeedback.getMyRID(); } else if (mode == modeMasterSlave) { ReplSource source(txn); return source.getMyRID(); } invariant(false); // Don't have an RID if no replication is enabled }
ReplicationCoordinator::StatusAndDuration ReplicationCoordinatorImpl::awaitReplication( const OperationContext* txn, const OpTime& opId, const WriteConcernOptions& writeConcern) { // TODO(spencer): handle killop if (writeConcern.wNumNodes <= 1 && writeConcern.wMode.empty()) { // no desired replication check return StatusAndDuration(Status::OK(), Milliseconds(0)); } const Mode replMode = getReplicationMode(); if (replMode == modeNone || serverGlobalParams.configsvr) { // no replication check needed (validated above) return StatusAndDuration(Status::OK(), Milliseconds(0)); } if (writeConcern.wMode == "majority" && replMode == modeMasterSlave) { // with master/slave, majority is equivalent to w=1 return StatusAndDuration(Status::OK(), Milliseconds(0)); } Timer timer; boost::condition_variable condVar; boost::unique_lock<boost::mutex> lk(_mutex); // Must hold _mutex before constructing waitInfo as it will modify _replicationWaiterList WaiterInfo waitInfo(&_replicationWaiterList, &opId, &writeConcern, &condVar); while (!_opReplicatedEnough_inlock(opId, writeConcern)) { const int elapsed = timer.millis(); if (writeConcern.wTimeout != WriteConcernOptions::kNoTimeout && elapsed > writeConcern.wTimeout) { return StatusAndDuration(Status(ErrorCodes::ExceededTimeLimit, "waiting for replication timed out"), Milliseconds(elapsed)); } if (_inShutdown) { return StatusAndDuration(Status(ErrorCodes::ShutdownInProgress, "Replication is being shut down"), Milliseconds(elapsed)); } try { if (writeConcern.wTimeout == WriteConcernOptions::kNoTimeout) { condVar.wait(lk); } else { condVar.timed_wait(lk, Milliseconds(writeConcern.wTimeout - elapsed)); } } catch (const boost::thread_interrupted&) {} } return StatusAndDuration(Status::OK(), Milliseconds(timer.millis())); }
Status LegacyReplicationCoordinator::setLastOptime(const OID& rid, const OpTime& ts, const BSONObj& config) { std::string oplogNs = getReplicationMode() == modeReplSet? "local.oplog.rs" : "local.oplog.$main"; if (!updateSlaveTracking(BSON("_id" << rid), config, oplogNs, ts)) { return Status(ErrorCodes::NodeNotFound, str::stream() << "could not update node with _id: " << config["_id"].Int() << " beacuse it cannot be found in current ReplSetConfig"); } if (getReplicationMode() == modeReplSet && !getCurrentMemberState().primary()) { // pass along if we are not primary LOG(2) << "received notification that " << config << " has reached optime: " << ts.toStringPretty(); theReplSet->syncSourceFeedback.updateMap(rid, ts); } return Status::OK(); }
void ReplicationCoordinatorImpl::setCurrentReplicaSetConfig(const ReplicaSetConfig& newConfig, int myIndex) { invariant(getReplicationMode() == modeReplSet); boost::lock_guard<boost::mutex> lk(_mutex); _rsConfig = newConfig; _thisMembersConfigIndex = myIndex; cancelHeartbeats(); _startHeartbeats(); // TODO(SERVER-14591): instead of this, use WriteConcernOptions and store in replcoord; // in getLastError command, fetch the defaults via a getter in replcoord. // replcoord is responsible for replacing its gledefault with a new config's. /* if (getLastErrorDefault || !c.getLastErrorDefaults.isEmpty()) { // see comment in dbcommands.cpp for getlasterrordefault getLastErrorDefault = new BSONObj(c.getLastErrorDefaults); } */ }
Status LegacyReplicationCoordinator::setLastOptime(OperationContext* txn, const OID& rid, const OpTime& ts) { { boost::lock_guard<boost::mutex> lock(_mutex); if (ts <= mapFindWithDefault(_slaveOpTimeMap, rid, OpTime())) { // Only update if ts is newer than what we have already return Status::OK(); } BSONObj config = mapFindWithDefault(_ridConfigMap, rid, BSONObj()); LOG(2) << "received notification that node with RID " << rid << " and config " << config << " has reached optime: " << ts.toStringPretty(); if (rid != getMyRID(txn)) { // TODO(spencer): Remove this invariant for backwards compatibility invariant(!config.isEmpty()); // This is what updates the progress information used for satisfying write concern // and wakes up threads waiting for replication. if (!updateSlaveTracking(BSON("_id" << rid), config, ts)) { return Status(ErrorCodes::NodeNotFound, str::stream() << "could not update node with _id: " << config["_id"].Int() << " because it cannot be found in current ReplSetConfig"); } } // This updates the _slaveOpTimeMap which is used for forwarding slave progress // upstream in chained replication. LOG(2) << "Updating our knowledge of the replication progress for node with RID " << rid << " to be at optime " << ts; _slaveOpTimeMap[rid] = ts; } if (getReplicationMode() == modeReplSet && !getCurrentMemberState().primary()) { // pass along if we are not primary theReplSet->syncSourceFeedback.forwardSlaveProgress(); } return Status::OK(); }
void ReplicationCoordinatorImpl::setCurrentMemberState(const MemberState& newState) { invariant(getReplicationMode() == modeReplSet); boost::lock_guard<boost::mutex> lk(_mutex); _currentState = newState; }
void ReplicationCoordinatorImpl::setCurrentReplicaSetConfig( const TopologyCoordinator::ReplicaSetConfig& newConfig) { invariant(getReplicationMode() == modeReplSet); boost::lock_guard<boost::mutex> lk(_mutex); _rsConfig = newConfig; }
MemberState ReplicationCoordinatorImpl::getCurrentMemberState() const { invariant(getReplicationMode() == modeReplSet); boost::lock_guard<boost::mutex> lk(_mutex); return _currentState; }