void ReplicaSetMonitor::checkAll() { set<string> seen; while ( true ) { ReplicaSetMonitorPtr m; { for ( map<string,ReplicaSetMonitorPtr>::iterator i=_sets.begin(); i!=_sets.end(); ++i ) { string name = i->first; if ( seen.count( name ) ) continue; LOG(0) << "checking replica set: " << name << endl; seen.insert( name ); m = i->second; break; } } if ( ! m ) break; m->check(); } }
void DBConnectionPool::appendInfo( BSONObjBuilder& b ) { int avail = 0; long long created = 0; map<ConnectionString::ConnectionType,long long> createdByType; BSONObjBuilder bb( b.subobjStart( "hosts" ) ); { boost::lock_guard<boost::mutex> lk( _mutex ); for ( PoolMap::iterator i=_pools.begin(); i!=_pools.end(); ++i ) { if ( i->second.numCreated() == 0 ) continue; string s = str::stream() << i->first.ident << "::" << i->first.timeout; BSONObjBuilder temp( bb.subobjStart( s ) ); temp.append( "available" , i->second.numAvailable() ); temp.appendNumber( "created" , i->second.numCreated() ); temp.done(); avail += i->second.numAvailable(); created += i->second.numCreated(); long long& x = createdByType[i->second.type()]; x += i->second.numCreated(); } } bb.done(); // Always report all replica sets being tracked set<string> replicaSets = ReplicaSetMonitor::getAllTrackedSets(); BSONObjBuilder setBuilder( b.subobjStart( "replicaSets" ) ); for ( set<string>::iterator i=replicaSets.begin(); i!=replicaSets.end(); ++i ) { string rs = *i; ReplicaSetMonitorPtr m = ReplicaSetMonitor::get( rs ); if ( ! m ) { warning() << "no monitor for set: " << rs << endl; continue; } BSONObjBuilder temp( setBuilder.subobjStart( rs ) ); m->appendInfo( temp ); temp.done(); } setBuilder.done(); { BSONObjBuilder temp( bb.subobjStart( "createdByType" ) ); for ( map<ConnectionString::ConnectionType,long long>::iterator i=createdByType.begin(); i!=createdByType.end(); ++i ) { temp.appendNumber( ConnectionString::typeToString( i->first ) , i->second ); } temp.done(); } b.append( "totalAvailable" , avail ); b.appendNumber( "totalCreated" , created ); }
DBClientConnection* DBClientReplicaSet::selectNodeUsingTags( shared_ptr<ReadPreferenceSetting> readPref) { if (!shouldReevaluate() && checkLastHost(readPref.get())) { LOG( 3 ) << "dbclient_rs selecting compatible last used node " << _lastSlaveOkHost << endl; return _lastSlaveOkConn.get(); } ReplicaSetMonitorPtr monitor = _getMonitor(); _lastSlaveOkHost = monitor->getHostOrRefresh(*readPref); if ( _lastSlaveOkHost.empty() ){ LOG( 3 ) << "dbclient_rs no compatible node found" << endl; return NULL; } _lastReadPref = readPref; // Primary connection is special because it is the only connection that is // versioned in mongos. Therefore, we have to make sure that this object // maintains only one connection to the primary and use that connection // every time we need to talk to the primary. if (monitor->isPrimary(_lastSlaveOkHost)) { checkMaster(); _lastSlaveOkConn = _master; _lastSlaveOkHost = _masterHost; // implied, but still assign just to be safe LOG( 3 ) << "dbclient_rs selecting primary node " << _lastSlaveOkHost << endl; return _master.get(); } string errmsg; ConnectionString connStr(_lastSlaveOkHost); // Needs to perform a dynamic_cast because we need to set the replSet // callback. We should eventually not need this after we remove the // callback. DBClientConnection* newConn = dynamic_cast<DBClientConnection*>( connStr.connect(errmsg, _so_timeout)); // Assert here instead of returning NULL since the contract of this method is such // that returning NULL means none of the nodes were good, which is not the case here. uassert(16532, str::stream() << "Failed to connect to " << _lastSlaveOkHost.toString(), newConn != NULL); _lastSlaveOkConn.reset(newConn); _lastSlaveOkConn->setReplSetClientCallback(this); _lastSlaveOkConn->setRunCommandHook(_runCommandHook); _lastSlaveOkConn->setPostRunCommandHook(_postRunCommandHook); _auth(_lastSlaveOkConn.get()); LOG( 3 ) << "dbclient_rs selecting node " << _lastSlaveOkHost << endl; return _lastSlaveOkConn.get(); }
// This can't throw an exception because it is called in the destructor of ScopedDbConnection string DBClientReplicaSet::getServerAddress() const { ReplicaSetMonitorPtr rsm = ReplicaSetMonitor::get( _setName, true ); if ( !rsm ) { warning() << "Trying to get server address for DBClientReplicaSet, but no " "ReplicaSetMonitor exists for " << _setName << endl; return str::stream() << _setName << "/" ; } return rsm->getServerAddress(); }
void DBClientReplicaSet::isntMaster() { log() << "got not master for: " << _masterHost << endl; // Can't use _getMonitor because that will create a new monitor from the cached seed if // the monitor doesn't exist. ReplicaSetMonitorPtr monitor = ReplicaSetMonitor::get( _setName ); if ( monitor ) { monitor->failedHost( _masterHost ); } _master.reset(); }
void ParallelSortClusteredCursor::setupVersionAndHandleSlaveOk( OperationContext* txn, PCStatePtr state, const ShardId& shardId, std::shared_ptr<Shard> primary, const NamespaceString& ns, const string& vinfo, std::shared_ptr<ChunkManager> manager) { if (manager) { state->manager = manager; } else if (primary) { state->primary = primary; } verify(!primary || shardId == primary->getId()); // Setup conn if (!state->conn) { const auto shard = grid.shardRegistry()->getShard(txn, shardId); state->conn.reset(new ShardConnection(shard->getConnString(), ns.ns(), manager)); } const DBClientBase* rawConn = state->conn->getRawConn(); bool allowShardVersionFailure = rawConn->type() == ConnectionString::SET && DBClientReplicaSet::isSecondaryQuery(_qSpec.ns(), _qSpec.query(), _qSpec.options()); bool connIsDown = rawConn->isFailed(); if (allowShardVersionFailure && !connIsDown) { // If the replica set connection believes that it has a valid primary that is up, // confirm that the replica set monitor agrees that the suspected primary is indeed up. const DBClientReplicaSet* replConn = dynamic_cast<const DBClientReplicaSet*>(rawConn); invariant(replConn); ReplicaSetMonitorPtr rsMonitor = ReplicaSetMonitor::get(replConn->getSetName()); if (!rsMonitor->isHostUp(replConn->getSuspectedPrimaryHostAndPort())) { connIsDown = true; } } if (allowShardVersionFailure && connIsDown) { // If we're doing a secondary-allowed query and the primary is down, don't attempt to // set the shard version. state->conn->donotCheckVersion(); // A side effect of this short circuiting is the mongos will not be able figure out that // the primary is now up on it's own and has to rely on other threads to refresh node // states. OCCASIONALLY { const DBClientReplicaSet* repl = dynamic_cast<const DBClientReplicaSet*>(rawConn); dassert(repl); warning() << "Primary for " << repl->getServerAddress() << " was down before, bypassing setShardVersion." << " The local replica set view and targeting may be stale."; } } else {
bool Shard::containsNode( const string& node ) const { if ( _addr == node ) return true; if ( _cs.type() == ConnectionString::SET ) { ReplicaSetMonitorPtr rs = ReplicaSetMonitor::get( _cs.getSetName(), true ); return rs->contains( node ); } return false; }
BSONObj replMonitorStats(const BSONObj& a, void* data) { uassert(17134, "replMonitorStats requires a single string argument (the ReplSet name)", a.nFields() == 1 && a.firstElement().type() == String); ReplicaSetMonitorPtr rsm = ReplicaSetMonitor::get(a.firstElement().valuestrsafe(),true); if (!rsm) { return BSON("" << "no ReplSetMonitor exists by that name"); } BSONObjBuilder result; rsm->appendInfo(result); return result.obj(); }
void ParallelSortClusteredCursor::setupVersionAndHandleSlaveOk( OperationContext* txn, PCStatePtr state, const ShardId& shardId, std::shared_ptr<Shard> primary, const NamespaceString& ns, const string& vinfo, std::shared_ptr<ChunkManager> manager) { if (manager) { state->manager = manager; } else if (primary) { state->primary = primary; } verify(!primary || shardId == primary->getId()); // Setup conn if (!state->conn) { const auto shard = uassertStatusOK(Grid::get(txn)->shardRegistry()->getShard(txn, shardId)); state->conn.reset(new ShardConnection(shard->getConnString(), ns.ns(), manager)); } const DBClientBase* rawConn = state->conn->getRawConn(); bool allowShardVersionFailure = rawConn->type() == ConnectionString::SET && DBClientReplicaSet::isSecondaryQuery(_qSpec.ns(), _qSpec.query(), _qSpec.options()); // Skip shard version checking if primary is known to be down. if (allowShardVersionFailure) { const DBClientReplicaSet* replConn = dynamic_cast<const DBClientReplicaSet*>(rawConn); invariant(replConn); ReplicaSetMonitorPtr rsMonitor = ReplicaSetMonitor::get(replConn->getSetName()); uassert(16388, str::stream() << "cannot access unknown replica set: " << replConn->getSetName(), rsMonitor != nullptr); if (!rsMonitor->isKnownToHaveGoodPrimary()) { state->conn->donotCheckVersion(); // A side effect of this short circuiting is the mongos will not be able figure out // that the primary is now up on it's own and has to rely on other threads to refresh // node states. OCCASIONALLY { const DBClientReplicaSet* repl = dynamic_cast<const DBClientReplicaSet*>(rawConn); dassert(repl); warning() << "Primary for " << repl->getServerAddress() << " was down before, bypassing setShardVersion." << " The local replica set view and targeting may be stale."; } return; } }
Status DBClientShardResolver::findMaster( const std::string connString, ConnectionString* resolvedHost ) { std::string errMsg; ConnectionString rawHost = ConnectionString::parse( connString, errMsg ); dassert( errMsg == "" ); dassert( rawHost.type() == ConnectionString::SET || rawHost.type() == ConnectionString::MASTER ); if ( rawHost.type() == ConnectionString::MASTER ) { *resolvedHost = rawHost; return Status::OK(); } // // If we need to, then get the particular node we're targeting in the replica set // // Don't create the monitor unless we need to - fast path ReplicaSetMonitorPtr replMonitor = ReplicaSetMonitor::get(rawHost.getSetName()); if (!replMonitor) { // Slow path std::set<HostAndPort> seedServers(rawHost.getServers().begin(), rawHost.getServers().end()); ReplicaSetMonitor::createIfNeeded(rawHost.getSetName(), seedServers); replMonitor = ReplicaSetMonitor::get(rawHost.getSetName()); } if (!replMonitor) { return Status( ErrorCodes::ReplicaSetNotFound, string("unknown replica set ") + rawHost.getSetName() ); } try { // This can throw when we don't find a master! HostAndPort masterHostAndPort = replMonitor->getMasterOrUassert(); *resolvedHost = ConnectionString::parse( masterHostAndPort.toString(), errMsg ); dassert( errMsg == "" ); return Status::OK(); } catch ( const DBException& ) { return Status( ErrorCodes::HostNotFound, string("could not contact primary for replica set ") + replMonitor->getName() ); } // Unreachable dassert( false ); return Status( ErrorCodes::UnknownError, "" ); }
DBClientConnection * DBClientReplicaSet::checkMaster() { ReplicaSetMonitorPtr monitor = _getMonitor(); HostAndPort h = monitor->getMasterOrUassert(); if ( h == _masterHost && _master ) { // a master is selected. let's just make sure connection didn't die if ( ! _master->isFailed() ) return _master.get(); monitor->failedHost( _masterHost ); h = monitor->getMasterOrUassert(); // old master failed, try again. } _masterHost = h; ConnectionString connStr(_masterHost); string errmsg; DBClientConnection* newConn = NULL; try { // Needs to perform a dynamic_cast because we need to set the replSet // callback. We should eventually not need this after we remove the // callback. newConn = dynamic_cast<DBClientConnection*>( connStr.connect(errmsg, _so_timeout)); } catch (const AssertionException& ex) { errmsg = ex.toString(); } if (newConn == NULL || !errmsg.empty()) { monitor->failedHost(_masterHost); uasserted(13639, str::stream() << "can't connect to new replica set master [" << _masterHost.toString() << "]" << (errmsg.empty()? "" : ", err: ") << errmsg); } _master.reset(newConn); _master->setReplSetClientCallback(this); _master->setRunCommandHook(_runCommandHook); _master->setPostRunCommandHook(_postRunCommandHook); _auth( _master.get() ); return _master.get(); }
bool Shard::containsNode( const string& node ) const { if ( _addr == node ) return true; if ( _cs.type() == ConnectionString::SET ) { ReplicaSetMonitorPtr rs = ReplicaSetMonitor::get( _cs.getSetName(), true ); if (!rs) { // Possibly still yet to be initialized. See SERVER-8194. warning() << "Monitor not found for a known shard: " << _cs.getSetName() << endl; return false; } return rs->contains( node ); } return false; }
Status DBClientShardResolver::findMaster( const std::string connString, ConnectionString* resolvedHost ) { std::string errMsg; ConnectionString rawHost = ConnectionString::parse( connString, errMsg ); dassert( errMsg == "" ); dassert( rawHost.type() == ConnectionString::SET || rawHost.type() == ConnectionString::MASTER ); if ( rawHost.type() == ConnectionString::MASTER ) { *resolvedHost = rawHost; return Status::OK(); } // // If we need to, then get the particular node we're targeting in the replica set // // Does not reload the monitor if it doesn't currently exist ReplicaSetMonitorPtr replMonitor = ReplicaSetMonitor::get( rawHost.getSetName(), false ); if ( !replMonitor ) { return Status( ErrorCodes::ReplicaSetNotFound, string("unknown replica set ") + rawHost.getSetName() ); } try { // This can throw when we don't find a master! HostAndPort masterHostAndPort = replMonitor->getMasterOrUassert(); *resolvedHost = ConnectionString::parse( masterHostAndPort.toString( true ), errMsg ); dassert( errMsg == "" ); return Status::OK(); } catch ( const DBException& ) { return Status( ErrorCodes::HostNotFound, string("could not contact primary for replica set ") + replMonitor->getName() ); } // Unreachable dassert( false ); return Status( ErrorCodes::UnknownError, "" ); }
void DBClientConnection::handleNotMasterResponse(const BSONObj& replyBody, StringData errorMsgFieldName) { const BSONElement errorMsgElem = replyBody[errorMsgFieldName]; const BSONElement codeElem = replyBody["code"]; if (!isNotMasterErrorString(errorMsgElem) && !ErrorCodes::isNotMasterError(ErrorCodes::Error(codeElem.numberInt()))) { return; } ReplicaSetMonitorPtr monitor = ReplicaSetMonitor::get(_parentReplSetName); if (monitor) { monitor->failedHost(_serverAddress, {ErrorCodes::NotMaster, str::stream() << "got not master from: " << _serverAddress << " of repl set: " << _parentReplSetName}); } _markFailed(kSetFlag); }
// Ensure nothing breaks when out-of-band failedHost is called during scan TEST(ReplicaSetMonitorTests, OutOfBandFailedHost) { SetStatePtr state = boost::make_shared<SetState>("name", basicSeedsSet); ReplicaSetMonitorPtr rsm = boost::make_shared<ReplicaSetMonitor>(state); Refresher refresher = rsm->startOrContinueRefresh(); for (size_t i = 0; i != basicSeeds.size(); ++i) { NextStep ns = refresher.getNextStep(); } for (size_t i = 0; i != basicSeeds.size(); ++i) { bool primary = (i == 0); refresher.receivedIsMaster(basicSeeds[i], -1, BSON( "setName" << "name" << "ismaster" << primary << "secondary" << !primary << "hosts" << BSON_ARRAY("a" << "b" << "c") << "ok" << true )); if (i >= 1) { HostAndPort a("a"); rsm->failedHost(a); Node* node = state->findNode(a); ASSERT(node); ASSERT(!node->isUp); ASSERT(!node->isMaster); } else { Node* node = state->findNode(HostAndPort("a")); ASSERT(node); ASSERT(node->isUp); ASSERT(node->isMaster); } } }
bool Grid::addShard( string* name , const ConnectionString& servers , long long maxSize , string& errMsg ) { // name can be NULL, so provide a dummy one here to avoid testing it elsewhere string nameInternal; if ( ! name ) { name = &nameInternal; } ReplicaSetMonitorPtr rsMonitor; // Check whether the host (or set) exists and run several sanity checks on this request. // There are two set of sanity checks: making sure adding this particular shard is consistent // with the replica set state (if it exists) and making sure this shards databases can be // brought into the grid without conflict. vector<string> dbNames; try { ScopedDbConnection newShardConn(servers.toString()); newShardConn->getLastError(); if ( newShardConn->type() == ConnectionString::SYNC ) { newShardConn.done(); errMsg = "can't use sync cluster as a shard. for replica set, have to use <setname>/<server1>,<server2>,..."; return false; } BSONObj resIsMongos; bool ok = newShardConn->runCommand( "admin" , BSON( "isdbgrid" << 1 ) , resIsMongos ); // should return ok=0, cmd not found if it's a normal mongod if ( ok ) { errMsg = "can't add a mongos process as a shard"; newShardConn.done(); return false; } BSONObj resIsMaster; ok = newShardConn->runCommand( "admin" , BSON( "isMaster" << 1 ) , resIsMaster ); if ( !ok ) { ostringstream ss; ss << "failed running isMaster: " << resIsMaster; errMsg = ss.str(); newShardConn.done(); return false; } // if the shard has only one host, make sure it is not part of a replica set string setName = resIsMaster["setName"].str(); string commandSetName = servers.getSetName(); if ( commandSetName.empty() && ! setName.empty() ) { ostringstream ss; ss << "host is part of set " << setName << ", use replica set url format <setname>/<server1>,<server2>,...."; errMsg = ss.str(); newShardConn.done(); return false; } if ( !commandSetName.empty() && setName.empty() ) { ostringstream ss; ss << "host did not return a set name, is the replica set still initializing? " << resIsMaster; errMsg = ss.str(); newShardConn.done(); return false; } // if the shard is part of replica set, make sure it is the right one if ( ! commandSetName.empty() && ( commandSetName != setName ) ) { ostringstream ss; ss << "host is part of a different set: " << setName; errMsg = ss.str(); newShardConn.done(); return false; } if( setName.empty() ) { // check this isn't a --configsvr BSONObj res; bool ok = newShardConn->runCommand("admin",BSON("replSetGetStatus"<<1),res); ostringstream ss; if( !ok && res["info"].type() == String && res["info"].String() == "configsvr" ) { errMsg = "the specified mongod is a --configsvr and should thus not be a shard server"; newShardConn.done(); return false; } } // if the shard is part of a replica set, make sure all the hosts mentioned in 'servers' are part of // the set. It is fine if not all members of the set are present in 'servers'. bool foundAll = true; string offendingHost; if ( ! commandSetName.empty() ) { set<string> hostSet; BSONObjIterator iter( resIsMaster["hosts"].Obj() ); while ( iter.more() ) { hostSet.insert( iter.next().String() ); // host:port } if ( resIsMaster["passives"].isABSONObj() ) { BSONObjIterator piter( resIsMaster["passives"].Obj() ); while ( piter.more() ) { hostSet.insert( piter.next().String() ); // host:port } } if ( resIsMaster["arbiters"].isABSONObj() ) { BSONObjIterator piter( resIsMaster["arbiters"].Obj() ); while ( piter.more() ) { hostSet.insert( piter.next().String() ); // host:port } } vector<HostAndPort> hosts = servers.getServers(); for ( size_t i = 0 ; i < hosts.size() ; i++ ) { if (!hosts[i].hasPort()) { hosts[i].setPort(ServerGlobalParams::DefaultDBPort); } string host = hosts[i].toString(); // host:port if ( hostSet.find( host ) == hostSet.end() ) { offendingHost = host; foundAll = false; break; } } } if ( ! foundAll ) { ostringstream ss; ss << "in seed list " << servers.toString() << ", host " << offendingHost << " does not belong to replica set " << setName; errMsg = ss.str(); newShardConn.done(); return false; } // shard name defaults to the name of the replica set if ( name->empty() && ! setName.empty() ) *name = setName; // In order to be accepted as a new shard, that mongod must not have any database name that exists already // in any other shards. If that test passes, the new shard's databases are going to be entered as // non-sharded db's whose primary is the newly added shard. BSONObj resListDB; ok = newShardConn->runCommand( "admin" , BSON( "listDatabases" << 1 ) , resListDB ); if ( !ok ) { ostringstream ss; ss << "failed listing " << servers.toString() << "'s databases:" << resListDB; errMsg = ss.str(); newShardConn.done(); return false; } BSONObjIterator i( resListDB["databases"].Obj() ); while ( i.more() ) { BSONObj dbEntry = i.next().Obj(); const string& dbName = dbEntry["name"].String(); if ( _isSpecialLocalDB( dbName ) ) { // 'local', 'admin', and 'config' are system DBs and should be excluded here continue; } else { dbNames.push_back( dbName ); } } if ( newShardConn->type() == ConnectionString::SET ) rsMonitor = ReplicaSetMonitor::get( setName ); newShardConn.done(); } catch ( DBException& e ) { if ( servers.type() == ConnectionString::SET ) { ReplicaSetMonitor::remove( servers.getSetName() ); } ostringstream ss; ss << "couldn't connect to new shard "; ss << e.what(); errMsg = ss.str(); return false; } // check that none of the existing shard candidate's db's exist elsewhere for ( vector<string>::const_iterator it = dbNames.begin(); it != dbNames.end(); ++it ) { DBConfigPtr config = getDBConfig( *it , false ); if ( config.get() != NULL ) { ostringstream ss; ss << "can't add shard " << servers.toString() << " because a local database '" << *it; ss << "' exists in another " << config->getPrimary().toString(); errMsg = ss.str(); return false; } } // if a name for a shard wasn't provided, pick one. if ( name->empty() && ! _getNewShardName( name ) ) { errMsg = "error generating new shard name"; return false; } // build the ConfigDB shard document BSONObjBuilder b; b.append(ShardType::name(), *name); b.append(ShardType::host(), rsMonitor ? rsMonitor->getServerAddress() : servers.toString()); if (maxSize > 0) { b.append(ShardType::maxSize(), maxSize); } BSONObj shardDoc = b.obj(); { ScopedDbConnection conn(configServer.getPrimary().getConnString(), 30); // check whether the set of hosts (or single host) is not an already a known shard BSONObj old = conn->findOne(ShardType::ConfigNS, BSON(ShardType::host(servers.toString()))); if ( ! old.isEmpty() ) { errMsg = "host already used"; conn.done(); return false; } conn.done(); } log() << "going to add shard: " << shardDoc << endl; Status result = clusterInsert( ShardType::ConfigNS, shardDoc, WriteConcernOptions::AllConfigs, NULL ); if ( !result.isOK() ) { errMsg = result.reason(); log() << "error adding shard: " << shardDoc << " err: " << errMsg << endl; return false; } Shard::reloadShardInfo(); // add all databases of the new shard for ( vector<string>::const_iterator it = dbNames.begin(); it != dbNames.end(); ++it ) { DBConfigPtr config = getDBConfig( *it , true , *name ); if ( ! config ) { log() << "adding shard " << servers << " even though could not add database " << *it << endl; } } // Record in changelog BSONObjBuilder shardDetails; shardDetails.append("name", *name); shardDetails.append("host", servers.toString()); configServer.logChange("addShard", "", shardDetails.obj()); return true; }
Status DBClientShardResolver::chooseWriteHost( const string& shardName, ConnectionString* shardHost ) const { // Declare up here for parsing later string errMsg; // Special-case for config and admin if ( shardName == "config" || shardName == "admin" ) { *shardHost = ConnectionString::parse( configServer.modelServer(), errMsg ); dassert( errMsg == "" ); return Status::OK(); } // // First get the information about the shard from the shard cache // // Internally uses our shard cache, does no reload Shard shard = Shard::findIfExists( shardName ); if ( shard.getName() == "" ) { return Status( ErrorCodes::ShardNotFound, string("unknown shard name ") + shardName ); } ConnectionString rawShardHost = ConnectionString::parse( shard.getConnString(), errMsg ); dassert( errMsg == "" ); dassert( rawShardHost.type() == ConnectionString::SET || rawShardHost.type() == ConnectionString::MASTER ); if ( rawShardHost.type() == ConnectionString::MASTER ) { *shardHost = rawShardHost; return Status::OK(); } // // If we need to, then get the particular node we're targeting in the replica set // // Does not reload the monitor if it doesn't currently exist ReplicaSetMonitorPtr replMonitor = ReplicaSetMonitor::get( rawShardHost.getSetName(), false ); if ( !replMonitor ) { return Status( ErrorCodes::ReplicaSetNotFound, string("unknown replica set ") + rawShardHost.getSetName() ); } try { // This can throw when we don't find a master! HostAndPort masterHostAndPort = replMonitor->getMaster(); *shardHost = ConnectionString::parse( masterHostAndPort.toString( true ), errMsg ); dassert( errMsg == "" ); return Status::OK(); } catch ( const DBException& ) { return Status( ErrorCodes::HostNotFound, string("could not contact primary for replica set ") + replMonitor->getName() ); } // Unreachable dassert( false ); return Status( ErrorCodes::UnknownError, "" ); }
bool Grid::addShard( string* name , const ConnectionString& servers , long long maxSize , string& errMsg ) { // name can be NULL, so provide a dummy one here to avoid testing it elsewhere string nameInternal; if ( ! name ) { name = &nameInternal; } ReplicaSetMonitorPtr rsMonitor; // Check whether the host (or set) exists and run several sanity checks on this request. // There are two set of sanity checks: making sure adding this particular shard is consistent // with the replica set state (if it exists) and making sure this shards databases can be // brought into the grid without conflict. if ( servers.type() == ConnectionString::SYNC ) { errMsg = "can't use sync cluster as a shard for replica set, " "have to use <setname>/<server1>,<server2>,..."; return false; } vector<string> dbNames; try { bool ok = false; { ScopedDbConnection newShardConn(servers.toString()); BSONObj resIsMongos; ok = newShardConn->runCommand( "admin", BSON( "isdbgrid" << 1 ), resIsMongos ); newShardConn.done(); } // should return ok=0, cmd not found if it's a normal mongod if ( ok ) { errMsg = "can't add a mongos process as a shard"; return false; } if ( servers.type() == ConnectionString::SET ) { if (!addReplSetShardCheck( servers, &errMsg )) { return false; } // shard name defaults to the name of the replica set if ( name->empty() && !servers.getSetName().empty() ) { *name = servers.getSetName(); } } // In order to be accepted as a new shard, that mongod must not have any database name // that exists already in any other shards. If that test passes, the new shard's // databases are going to be entered as non-sharded db's whose primary is the // newly added shard. BSONObj resListDB; { ScopedDbConnection newShardConn(servers.toString()); ok = newShardConn->runCommand( "admin", BSON( "listDatabases" << 1 ), resListDB ); newShardConn.done(); } if ( !ok ) { errMsg = str::stream() << "failed listing " << servers.toString() << "'s databases:" << resListDB;; return false; } BSONObjIterator i( resListDB["databases"].Obj() ); while ( i.more() ) { BSONObj dbEntry = i.next().Obj(); const string& dbName = dbEntry["name"].String(); if ( _isSpecialLocalDB( dbName ) ) { // 'local', 'admin', and 'config' are system DBs and should be excluded here continue; } else { dbNames.push_back( dbName ); } } if ( servers.type() == ConnectionString::SET ) { rsMonitor = ReplicaSetMonitor::get( servers.getSetName() ); } } catch ( DBException& e ) { if ( servers.type() == ConnectionString::SET ) { ReplicaSetMonitor::remove( servers.getSetName() ); } errMsg = str::stream() << "couldn't connect to new shard " << causedBy(e); return false; } // check that none of the existing shard candidate's db's exist elsewhere for ( vector<string>::const_iterator it = dbNames.begin(); it != dbNames.end(); ++it ) { DBConfigPtr config = getDBConfig( *it , false ); if ( config.get() != NULL ) { ostringstream ss; ss << "can't add shard " << servers.toString() << " because a local database '" << *it; ss << "' exists in another " << config->getPrimary().toString(); errMsg = ss.str(); return false; } } // if a name for a shard wasn't provided, pick one. if ( name->empty() && ! _getNewShardName( name ) ) { errMsg = "error generating new shard name"; return false; } // build the ConfigDB shard document BSONObjBuilder b; b.append(ShardType::name(), *name); b.append(ShardType::host(), rsMonitor ? rsMonitor->getServerAddress() : servers.toString()); if (maxSize > 0) { b.append(ShardType::maxSize(), maxSize); } BSONObj shardDoc = b.obj(); { ScopedDbConnection conn(configServer.getPrimary().getConnString(), 30); // check whether the set of hosts (or single host) is not an already a known shard BSONObj old = conn->findOne(ShardType::ConfigNS, BSON(ShardType::host(servers.toString()))); if ( ! old.isEmpty() ) { errMsg = "host already used"; conn.done(); return false; } log() << "going to add shard: " << shardDoc << endl; conn->insert(ShardType::ConfigNS , shardDoc); errMsg = conn->getLastError(); if ( ! errMsg.empty() ) { log() << "error adding shard: " << shardDoc << " err: " << errMsg << endl; conn.done(); return false; } conn.done(); } Shard::reloadShardInfo(); // add all databases of the new shard for ( vector<string>::const_iterator it = dbNames.begin(); it != dbNames.end(); ++it ) { DBConfigPtr config = getDBConfig( *it , true , *name ); if ( ! config ) { log() << "adding shard " << servers << " even though could not add database " << *it << endl; } } // Record in changelog BSONObjBuilder shardDetails; shardDetails.append("name", *name); shardDetails.append("host", servers.toString()); configServer.logChange("addShard", "", shardDetails.obj()); return true; }