void startMasterSlave() { oldRepl(); if( !replSettings.slave && !replSettings.master ) return; { Lock::GlobalWrite lk; replLocalAuth(); } if ( replSettings.slave ) { verify( replSettings.slave == SimpleSlave ); LOG(1) << "slave=true" << endl; boost::thread repl_thread(replSlaveThread); } if ( replSettings.master ) { LOG(1) << "master=true" << endl; replSettings.master = true; createOplog(); boost::thread t(replMasterThread); } while( replSettings.fastsync ) // don't allow writes until we've set up from log sleepmillis( 50 ); }
Status ReplicationCoordinatorExternalStateImpl::initializeReplSetStorage(OperationContext* txn, const BSONObj& config) { try { createOplog(txn); MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN { ScopedTransaction scopedXact(txn, MODE_X); Lock::GlobalWrite globalWrite(txn->lockState()); WriteUnitOfWork wuow(txn); Helpers::putSingleton(txn, configCollectionName, config); const auto msgObj = BSON("msg" << "initiating set"); getGlobalServiceContext()->getOpObserver()->onOpMessage(txn, msgObj); wuow.commit(); } MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "initiate oplog entry", "local.oplog.rs"); // This initializes the minvalid document with a null "ts" because older versions (<=3.2) // get angry if the minValid document is present but doesn't have a "ts" field. // Consider removing this once we no longer need to support downgrading to 3.2. _storageInterface->setMinValidToAtLeast(txn, {}); FeatureCompatibilityVersion::setIfCleanStartup(txn, _storageInterface); } catch (const DBException& ex) { return ex.toStatus(); } return Status::OK(); }
void syncDoInitialSync(ReplicationCoordinatorExternalState* replicationCoordinatorExternalState) { stdx::unique_lock<stdx::mutex> lk(_initialSyncMutex, stdx::defer_lock); if (!lk.try_lock()) { uasserted(34474, "Initial Sync Already Active."); } std::unique_ptr<BackgroundSync> bgsync; { log() << "Starting replication fetcher thread for initial sync"; auto txn = cc().makeOperationContext(); bgsync = stdx::make_unique<BackgroundSync>( replicationCoordinatorExternalState, replicationCoordinatorExternalState->makeInitialSyncOplogBuffer(txn.get())); bgsync->startup(txn.get()); createOplog(txn.get()); } ON_BLOCK_EXIT([&bgsync]() { log() << "Stopping replication fetcher thread for initial sync"; auto txn = cc().makeOperationContext(); bgsync->shutdown(txn.get()); bgsync->join(txn.get()); }); int failedAttempts = 0; while (failedAttempts < kMaxFailedAttempts) { try { // leave loop when successful Status status = _initialSync(bgsync.get()); if (status.isOK()) { break; } else { error() << status; } } catch (const DBException& e) { error() << e; // Return if in shutdown if (inShutdown()) { return; } } if (inShutdown()) { return; } error() << "initial sync attempt failed, " << (kMaxFailedAttempts - ++failedAttempts) << " attempts remaining"; sleepmillis(durationCount<Milliseconds>(kInitialSyncRetrySleepDuration)); } // No need to print a stack if (failedAttempts >= kMaxFailedAttempts) { severe() << "The maximum number of retries have been exhausted for initial sync."; fassertFailedNoTrace(16233); } }
void ReplicationCoordinatorExternalStateImpl::initiateOplog(OperationContext* txn) { createOplog(txn); ScopedTransaction scopedXact(txn, MODE_X); Lock::GlobalWrite globalWrite(txn->lockState()); WriteUnitOfWork wuow(txn); getGlobalServiceContext()->getOpObserver()->onOpMessage(txn, BSON("msg" << "initiating set")); wuow.commit(); }
Status ReplicationCoordinatorExternalStateImpl::initializeReplSetStorage(OperationContext* opCtx, const BSONObj& config) { try { createOplog(opCtx); writeConflictRetry(opCtx, "initiate oplog entry", NamespaceString::kRsOplogNamespace.toString(), [this, &opCtx, &config] { Lock::GlobalWrite globalWrite(opCtx); WriteUnitOfWork wuow(opCtx); Helpers::putSingleton(opCtx, configCollectionName, config); const auto msgObj = BSON("msg" << "initiating set"); _service->getOpObserver()->onOpMessage(opCtx, msgObj); wuow.commit(); // ReplSetTest assumes that immediately after the replSetInitiate // command returns, it can allow other nodes to initial sync with no // retries and they will succeed. Unfortunately, initial sync will // fail if it finds its sync source has an empty oplog. Thus, we // need to wait here until the seed document is visible in our oplog. AutoGetCollection oplog( opCtx, NamespaceString::kRsOplogNamespace, MODE_IS); waitForAllEarlierOplogWritesToBeVisible(opCtx); }); // Set UUIDs for all non-replicated collections. This is necessary for independent replica // sets and config server replica sets started with no data files because collections in // local are created prior to the featureCompatibilityVersion being set to 3.6, so the // collections are not created with UUIDs. We exclude ShardServers when adding UUIDs to // non-replicated collections on the primary because ShardServers are started up by default // with featureCompatibilityVersion 3.4, so we don't want to assign UUIDs to them until the // cluster's featureCompatibilityVersion is explicitly set to 3.6 by the config server. The // below UUID addition for non-replicated collections only occurs on the primary; UUIDs are // added to non-replicated collections on secondaries during InitialSync. When the config // server sets the featureCompatibilityVersion to 3.6, the shard primary will add UUIDs to // all the collections that need them. One special case here is if a shard is already in // featureCompatibilityVersion 3.6 and a new node is started up with --shardsvr and added to // that shard, the new node will still start up with featureCompatibilityVersion 3.4 and // need to have UUIDs added to each collection. These UUIDs are added during InitialSync, // because the new node is a secondary. if (serverGlobalParams.clusterRole != ClusterRole::ShardServer && FeatureCompatibilityVersion::isCleanStartUp()) { auto schemaStatus = updateUUIDSchemaVersionNonReplicated(opCtx, true); if (!schemaStatus.isOK()) { return schemaStatus; } } FeatureCompatibilityVersion::setIfCleanStartup(opCtx, _storageInterface); } catch (const DBException& ex) { return ex.toStatus(); } return Status::OK(); }
void ReplicationCoordinatorExternalStateImpl::initiateOplog(OperationContext* txn) { createOplog(txn); MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN { ScopedTransaction scopedXact(txn, MODE_X); Lock::GlobalWrite globalWrite(txn->lockState()); WriteUnitOfWork wuow(txn); getGlobalServiceContext()->getOpObserver()->onOpMessage(txn, BSON("msg" << "initiating set")); wuow.commit(); } MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "initiate oplog entry", "local.oplog.rs"); }
void RollbackResyncsCollectionOptionsTest::resyncCollectionOptionsTest( CollectionOptions localCollOptions, BSONObj remoteCollOptionsObj, BSONObj collModCmd, std::string collName) { createOplog(_opCtx.get()); auto dbName = "test"; auto nss = NamespaceString(dbName, collName); auto coll = _createCollection(_opCtx.get(), nss.toString(), localCollOptions); auto commonOpUuid = unittest::assertGet(UUID::parse("f005ba11-cafe-bead-f00d-123456789abc")); auto commonOpBson = BSON("ts" << Timestamp(1, 1) << "t" << 1LL << "op" << "n" << "o" << BSONObj() << "ns" << "rollback_test.test" << "ui" << commonOpUuid); auto commonOperation = std::make_pair(commonOpBson, RecordId(1)); auto collectionModificationOperation = makeCommandOp(Timestamp(Seconds(2), 0), coll->uuid(), nss.toString(), collModCmd, 2); RollbackSourceWithCollectionOptions rollbackSource( std::unique_ptr<OplogInterface>(new OplogInterfaceMock({commonOperation})), remoteCollOptionsObj); ASSERT_OK(syncRollback(_opCtx.get(), OplogInterfaceMock({collectionModificationOperation, commonOperation}), rollbackSource, {}, _coordinator, _replicationProcess.get())); // Make sure the collection options are correct. AutoGetCollectionForReadCommand autoColl(_opCtx.get(), NamespaceString(nss.toString())); auto collAfterRollbackOptions = autoColl.getCollection()->getCatalogEntry()->getCollectionOptions(_opCtx.get()); BSONObjBuilder expectedOptionsBob; if (localCollOptions.uuid) { localCollOptions.uuid.get().appendToBuilder(&expectedOptionsBob, "uuid"); } expectedOptionsBob.appendElements(remoteCollOptionsObj); ASSERT_BSONOBJ_EQ(expectedOptionsBob.obj(), collAfterRollbackOptions.toBSON()); }
void syncDoInitialSync(BackgroundSync* bgsync) { stdx::unique_lock<stdx::mutex> lk(_initialSyncMutex, stdx::defer_lock); if (!lk.try_lock()) { uasserted(34474, "Initial Sync Already Active."); } { const ServiceContext::UniqueOperationContext txnPtr = cc().makeOperationContext(); OperationContext& txn = *txnPtr; createOplog(&txn); } int failedAttempts = 0; while (failedAttempts < kMaxFailedAttempts) { try { // leave loop when successful Status status = _initialSync(bgsync); if (status.isOK()) { break; } else { error() << status; } } catch (const DBException& e) { error() << e; // Return if in shutdown if (inShutdown()) { return; } } if (inShutdown()) { return; } error() << "initial sync attempt failed, " << (kMaxFailedAttempts - ++failedAttempts) << " attempts remaining"; sleepmillis(durationCount<Milliseconds>(kInitialSyncRetrySleepDuration)); } // No need to print a stack if (failedAttempts >= kMaxFailedAttempts) { severe() << "The maximum number of retries have been exhausted for initial sync."; fassertFailedNoTrace(16233); } }
static void setup() { replSettings.replSet = "foo"; replSettings.oplogSize = 5 * 1024 * 1024; createOplog(); // setup background sync instance _bgsync = new BackgroundSyncTest(); // setup tail _tailer = new repl::SyncTail(_bgsync); // setup theReplSet ReplSetTest *rst = ReplSetTest::make(); rst->setSyncTail(_bgsync); delete repl::theReplSet; repl::theReplSet = rst; }
void syncDoInitialSync() { static const int maxFailedAttempts = 10; { OperationContextImpl txn; createOplog(&txn); } int failedAttempts = 0; while ( failedAttempts < maxFailedAttempts ) { try { // leave loop when successful Status status = _initialSync(); if (status.isOK()) { break; } if (status == ErrorCodes::InitialSyncOplogSourceMissing) { sleepsecs(1); return; } } catch(const DBException& e) { error() << e ; // Return if in shutdown if (inShutdown()) { return; } } if (inShutdown()) { return; } error() << "initial sync attempt failed, " << (maxFailedAttempts - ++failedAttempts) << " attempts remaining"; sleepsecs(5); } // No need to print a stack if (failedAttempts >= maxFailedAttempts) { severe() << "The maximum number of retries have been exhausted for initial sync."; fassertFailedNoTrace(16233); } }
void startReplication() { /* if we are going to be a replica set, we aren't doing other forms of replication. */ if( !cmdLine._replSet.empty() ) { if( replSettings.slave || replSettings.master ) { log() << "***" << endl; log() << "ERROR: can't use --slave or --master replication options with --replSet" << endl; log() << "***" << endl; } newRepl(); return; } oldRepl(); /* this was just to see if anything locks for longer than it should -- we need to be careful not to be locked when trying to connect() or query() the other side. */ //boost::thread tempt(tempThread); if( !replSettings.slave && !replSettings.master ) return; { dblock lk; cc().getAuthenticationInfo()->authorize("admin"); } if ( replSettings.slave ) { assert( replSettings.slave == SimpleSlave ); log(1) << "slave=true" << endl; boost::thread repl_thread(replSlaveThread); } if ( replSettings.master ) { log(1) << "master=true" << endl; replSettings.master = true; createOplog(); boost::thread t(replMasterThread); } while( replSettings.fastsync ) // don't allow writes until we've set up from log sleepmillis( 50 ); }
Status ReplicationCoordinatorExternalStateImpl::initializeReplSetStorage(OperationContext* txn, const BSONObj& config, bool updateReplOpTime) { try { createOplog(txn, rsOplogName, true); MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN { ScopedTransaction scopedXact(txn, MODE_X); Lock::GlobalWrite globalWrite(txn->lockState()); WriteUnitOfWork wuow(txn); Helpers::putSingleton(txn, configCollectionName, config); const auto msgObj = BSON("msg" << "initiating set"); if (updateReplOpTime) { getGlobalServiceContext()->getOpObserver()->onOpMessage(txn, msgObj); } else { // 'updateReplOpTime' is false when called from the replSetInitiate command when the // server is running with replication disabled. We bypass onOpMessage to invoke // _logOp directly so that we can override the replication mode and keep _logO from // updating the replication coordinator's op time (illegal operation when // replication is not enabled). repl::oplogCheckCloseDatabase(txn, nullptr); repl::_logOp(txn, "n", "", msgObj, nullptr, false, rsOplogName, ReplicationCoordinator::modeReplSet, updateReplOpTime); repl::oplogCheckCloseDatabase(txn, nullptr); } wuow.commit(); } MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "initiate oplog entry", "local.oplog.rs"); } catch (const DBException& ex) { return ex.toStatus(); } return Status::OK(); }
Status ReplicationCoordinatorExternalStateImpl::initializeReplSetStorage(OperationContext* txn, const BSONObj& config) { try { createOplog(txn); MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN { ScopedTransaction scopedXact(txn, MODE_X); Lock::GlobalWrite globalWrite(txn->lockState()); WriteUnitOfWork wuow(txn); Helpers::putSingleton(txn, configCollectionName, config); const auto msgObj = BSON("msg" << "initiating set"); getGlobalServiceContext()->getOpObserver()->onOpMessage(txn, msgObj); wuow.commit(); } MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "initiate oplog entry", "local.oplog.rs"); } catch (const DBException& ex) { return ex.toStatus(); } return Status::OK(); }
void syncDoInitialSync() { static const int maxFailedAttempts = 10; OperationContextImpl txn; createOplog(&txn); int failedAttempts = 0; while ( failedAttempts < maxFailedAttempts ) { try { _initialSync(); break; } catch(DBException& e) { failedAttempts++; mongoutils::str::stream msg; error() << "initial sync exception: " << e.toString() << " " << (maxFailedAttempts - failedAttempts) << " attempts remaining"; sleepsecs(5); } } fassert( 16233, failedAttempts < maxFailedAttempts); }
virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { log() << "replSet replSetInitiate admin command received from client" << rsLog; if( !replSet ) { errmsg = "server is not running with --replSet"; return false; } if( theReplSet ) { errmsg = "already initialized"; result.append("info", "try querying " + rsConfigNs + " to see current configuration"); return false; } { // just make sure we can get a write lock before doing anything else. we'll reacquire one // later. of course it could be stuck then, but this check lowers the risk if weird things // are up. time_t t = time(0); writelock lk(""); if( time(0)-t > 10 ) { errmsg = "took a long time to get write lock, so not initiating. Initiate when server less busy?"; return false; } /* check that we don't already have an oplog. that could cause issues. it is ok if the initiating member has *other* data than that. */ BSONObj o; if( Helpers::getFirst(rsoplog, o) ) { errmsg = rsoplog + string(" is not empty on the initiating member. cannot initiate."); return false; } } if( ReplSet::startupStatus == ReplSet::BADCONFIG ) { errmsg = "server already in BADCONFIG state (check logs); not initiating"; result.append("info", ReplSet::startupStatusMsg.get()); return false; } if( ReplSet::startupStatus != ReplSet::EMPTYCONFIG ) { result.append("startupStatus", ReplSet::startupStatus); errmsg = "all members and seeds must be reachable to initiate set"; result.append("info", cmdLine._replSet); return false; } BSONObj configObj; if( cmdObj["replSetInitiate"].type() != Object ) { result.append("info2", "no configuration explicitly specified -- making one"); log() << "replSet info initiate : no configuration specified. Using a default configuration for the set" << rsLog; string name; vector<HostAndPort> seeds; set<HostAndPort> seedSet; parseReplsetCmdLine(cmdLine._replSet, name, seeds, seedSet); // may throw... bob b; b.append("_id", name); bob members; members.append("0", BSON( "_id" << 0 << "host" << HostAndPort::Me().toString() )); for( unsigned i = 0; i < seeds.size(); i++ ) members.append(bob::numStr(i+1), BSON( "_id" << i+1 << "host" << seeds[i].toString())); b.appendArray("members", members.obj()); configObj = b.obj(); log() << "replSet created this configuration for initiation : " << configObj.toString() << rsLog; } else { configObj = cmdObj["replSetInitiate"].Obj(); } bool parsed = false; try { ReplSetConfig newConfig(configObj); parsed = true; if( newConfig.version > 1 ) { errmsg = "can't initiate with a version number greater than 1"; return false; } log() << "replSet replSetInitiate config object parses ok, " << newConfig.members.size() << " members specified" << rsLog; checkMembersUpForConfigChange(newConfig, true); log() << "replSet replSetInitiate all members seem up" << rsLog; createOplog(); writelock lk(""); bo comment = BSON( "msg" << "initiating set"); newConfig.saveConfigLocally(comment); log() << "replSet replSetInitiate config now saved locally. Should come online in about a minute." << rsLog; result.append("info", "Config now saved locally. Should come online in about a minute."); ReplSet::startupStatus = ReplSet::SOON; ReplSet::startupStatusMsg.set("Received replSetInitiate - should come online shortly."); } catch( DBException& e ) { log() << "replSet replSetInitiate exception: " << e.what() << rsLog; if( !parsed ) errmsg = string("couldn't parse cfg object ") + e.what(); else errmsg = string("couldn't initiate : ") + e.what(); return false; } return true; }
Status LegacyReplicationCoordinator::processReplSetInitiate(OperationContext* txn, const BSONObj& givenConfig, BSONObjBuilder* resultObj) { log() << "replSet replSetInitiate admin command received from client" << rsLog; if (!_settings.usingReplSets()) { return Status(ErrorCodes::NoReplicationEnabled, "server is not running with --replSet"); } if( theReplSet ) { resultObj->append("info", "try querying " + rsConfigNs + " to see current configuration"); return Status(ErrorCodes::AlreadyInitialized, "already initialized"); } try { { // just make sure we can get a write lock before doing anything else. we'll // reacquire one later. of course it could be stuck then, but this check lowers the // risk if weird things are up. time_t t = time(0); Lock::GlobalWrite lk(txn->lockState()); if( time(0)-t > 10 ) { return Status(ErrorCodes::ExceededTimeLimit, "took a long time to get write lock, so not initiating. " "Initiate when server less busy?"); } /* check that we don't already have an oplog. that could cause issues. it is ok if the initiating member has *other* data than that. */ BSONObj o; if( Helpers::getFirst(txn, rsoplog, o) ) { return Status(ErrorCodes::AlreadyInitialized, rsoplog + string(" is not empty on the initiating member. " "cannot initiate.")); } } if( ReplSet::startupStatus == ReplSet::BADCONFIG ) { resultObj->append("info", ReplSet::startupStatusMsg.get()); return Status(ErrorCodes::InvalidReplicaSetConfig, "server already in BADCONFIG state (check logs); not initiating"); } if( ReplSet::startupStatus != ReplSet::EMPTYCONFIG ) { resultObj->append("startupStatus", ReplSet::startupStatus); resultObj->append("info", _settings.replSet); return Status(ErrorCodes::InvalidReplicaSetConfig, "all members and seeds must be reachable to initiate set"); } BSONObj configObj; if (!givenConfig.isEmpty()) { configObj = givenConfig; } else { resultObj->append("info2", "no configuration explicitly specified -- making one"); log() << "replSet info initiate : no configuration specified. " "Using a default configuration for the set" << rsLog; string name; vector<HostAndPort> seeds; set<HostAndPort> seedSet; parseReplSetSeedList(_settings.replSet, name, seeds, seedSet); // may throw... BSONObjBuilder b; b.append("_id", name); BSONObjBuilder members; HostAndPort me = someHostAndPortForMe(); members.append("0", BSON( "_id" << 0 << "host" << me.toString() )); resultObj->append("me", me.toString()); for( unsigned i = 0; i < seeds.size(); i++ ) { members.append(BSONObjBuilder::numStr(i+1), BSON( "_id" << i+1 << "host" << seeds[i].toString())); } b.appendArray("members", members.obj()); configObj = b.obj(); log() << "replSet created this configuration for initiation : " << configObj.toString() << rsLog; } scoped_ptr<ReplSetConfig> newConfig; try { newConfig.reset(ReplSetConfig::make(configObj)); } catch (const DBException& e) { log() << "replSet replSetInitiate exception: " << e.what() << rsLog; return Status(ErrorCodes::InvalidReplicaSetConfig, mongoutils::str::stream() << "couldn't parse cfg object " << e.what()); } if( newConfig->version > 1 ) { return Status(ErrorCodes::InvalidReplicaSetConfig, "can't initiate with a version number greater than 1"); } log() << "replSet replSetInitiate config object parses ok, " << newConfig->members.size() << " members specified" << rsLog; checkMembersUpForConfigChange(*newConfig, *resultObj, true); log() << "replSet replSetInitiate all members seem up" << rsLog; createOplog(txn); Lock::GlobalWrite lk(txn->lockState()); BSONObj comment = BSON( "msg" << "initiating set"); newConfig->saveConfigLocally(txn, comment); log() << "replSet replSetInitiate config now saved locally. " "Should come online in about a minute." << rsLog; resultObj->append("info", "Config now saved locally. Should come online in about a minute."); ReplSet::startupStatus = ReplSet::SOON; ReplSet::startupStatusMsg.set("Received replSetInitiate - " "should come online shortly."); } catch(const DBException& e ) { return e.toStatus(); } return Status::OK(); }
Base() { cmdLine._replSet = "foo"; cmdLine.oplogSize = 5; createOplog(); }
void ReplicationCoordinatorExternalStateImpl::initiateOplog(OperationContext* txn) { createOplog(txn); logOpInitiate(txn, BSON("msg" << "initiating set")); }