bool Balancer::_checkOIDs() { vector<Shard> all; Shard::getAllShards( all ); map<int,Shard> oids; for ( vector<Shard>::iterator i=all.begin(); i!=all.end(); ++i ) { Shard s = *i; BSONObj f = s.runCommand( "admin" , "features" ); if ( f["oidMachine"].isNumber() ) { int x = f["oidMachine"].numberInt(); if ( oids.count(x) == 0 ) { oids[x] = s; } else { log() << "error: 2 machines have " << x << " as oid machine piece " << s.toString() << " and " << oids[x].toString() << endl; s.runCommand( "admin" , BSON( "features" << 1 << "oidReset" << 1 ) ); oids[x].runCommand( "admin" , BSON( "features" << 1 << "oidReset" << 1 ) ); return false; } } else { log() << "warning: oidMachine not set on: " << s.toString() << endl; } } return true; }
Shard * Shard::split( const BSONObj& m ){ uassert( "can't split as shard that doesn't have a manager" , _manager ); log(1) << " before split on: " << m << "\n" << "\t self : " << toString() << endl; uassert( "locking namespace on server failed" , lockNamespaceOnServer( getServer() , _ns ) ); Shard * s = new Shard( _manager ); s->_ns = _ns; s->_server = _server; s->_min = m.getOwned(); s->_max = _max; s->_markModified(); _markModified(); _manager->_shards.push_back( s ); _max = m.getOwned(); log(1) << " after split:\n" << "\t left : " << toString() << "\n" << "\t right: "<< s->toString() << endl; _manager->save(); return s; }
ShardManager::ShardManager( DBConfig * config , string ns , ShardKeyPattern pattern ) : _config( config ) , _ns( ns ) , _key( pattern ){ Shard temp(0); ScopedDbConnection conn( temp.modelServer() ); auto_ptr<DBClientCursor> cursor = conn->query( temp.getNS() , BSON( "ns" << ns ) ); while ( cursor->more() ){ Shard * s = new Shard( this ); BSONObj d = cursor->next(); s->unserialize( d ); _shards.push_back( s ); s->_id = d["_id"].wrap().getOwned(); } conn.done(); if ( _shards.size() == 0 ){ Shard * s = new Shard( this ); s->_ns = ns; s->_min = _key.globalMin(); s->_max = _key.globalMax(); s->_server = config->getPrimary(); s->_markModified(); _shards.push_back( s ); log() << "no shards for:" << ns << " so creating first: " << s->toString() << endl; } _sequenceNumber = ++NextSequenceNumber; }
bool Shard::moveIfShould( Shard * newShard ){ Shard * toMove = 0; if ( newShard->countObjects() <= 1 ){ toMove = newShard; } else if ( this->countObjects() <= 1 ){ toMove = this; } else { log(1) << "don't know how to decide if i should move inner shard" << endl; } if ( ! toMove ) return false; string newLocation = grid.pickServerForNewDB(); if ( newLocation == getServer() ){ // if this is the best server, then we shouldn't do anything! log(1) << "not moving shard: " << toString() << " b/c would move to same place " << newLocation << " -> " << getServer() << endl; return 0; } log() << "moving shard (auto): " << toMove->toString() << " to: " << newLocation << " #objcets: " << toMove->countObjects() << endl; string errmsg; massert( (string)"moveAndCommit failed: " + errmsg , toMove->moveAndCommit( newLocation , errmsg ) ); return true; }
bool Chunk::moveAndCommit(const Shard& to, long long chunkSize /* bytes */, const WriteConcernOptions* writeConcern, bool waitForDelete, int maxTimeMS, BSONObj& res) const { uassert( 10167 , "can't move shard to its current location!" , getShard() != to ); log() << "moving chunk ns: " << _manager->getns() << " moving ( " << toString() << ") " << _shard.toString() << " -> " << to.toString(); Shard from = _shard; ScopedDbConnection fromconn(from.getConnString()); BSONObjBuilder builder; builder.append("moveChunk", _manager->getns()); builder.append("from", from.getAddress().toString()); builder.append("to", to.getAddress().toString()); // NEEDED FOR 2.0 COMPATIBILITY builder.append("fromShard", from.getName()); builder.append("toShard", to.getName()); /////////////////////////////// builder.append("min", _min); builder.append("max", _max); builder.append("maxChunkSizeBytes", chunkSize); builder.append("shardId", genID()); builder.append("configdb", configServer.modelServer()); // For legacy secondary throttle setting. bool secondaryThrottle = true; if (writeConcern && writeConcern->wNumNodes <= 1 && writeConcern->wMode.empty()) { secondaryThrottle = false; } builder.append("secondaryThrottle", secondaryThrottle); if (secondaryThrottle && writeConcern) { builder.append("writeConcern", writeConcern->toBSON()); } builder.append("waitForDelete", waitForDelete); builder.append(LiteParsedQuery::cmdOptionMaxTimeMS, maxTimeMS); builder.append("epoch", _manager->getVersion().epoch()); bool worked = fromconn->runCommand("admin", builder.done(), res); fromconn.done(); LOG( worked ? 1 : 0 ) << "moveChunk result: " << res; // if succeeded, needs to reload to pick up the new location // if failed, mongos may be stale // reload is excessive here as the failure could be simply because collection metadata is taken _manager->reload(); return worked; }
/** * Updates the remote cached version on the remote shard host (primary, in the case of replica * sets) if needed with a fully-qualified shard version for the given namespace: * config server(s) + shard name + shard version * * If no remote cached version has ever been set, an initial shard version is sent. * * If the namespace is empty and no version has ever been sent, the config server + shard name * is sent to the remote shard host to initialize the connection as coming from mongos. * NOTE: This initialization is *best-effort only*. Operations which wish to correctly version * must send the namespace. * * Config servers are special and are not (unless otherwise a shard) kept up to date with this * protocol. This is safe so long as config servers only contain unversioned collections. * * It is an error to call checkShardVersion with an unversionable connection (isVersionableCB). * * @return true if we contacted the remote host */ bool checkShardVersion( DBClientBase * conn_in , const string& ns , ChunkManagerPtr refManager, bool authoritative , int tryNumber ) { // TODO: cache, optimize, etc... // Empty namespaces are special - we require initialization but not versioning if (ns.size() == 0) { return initShardVersionEmptyNS(conn_in); } DBConfigPtr conf = grid.getDBConfig( ns ); if ( ! conf ) return false; DBClientBase* conn = getVersionable( conn_in ); verify(conn); // errors thrown above unsigned long long officialSequenceNumber = 0; ShardPtr primary; ChunkManagerPtr manager; if (authoritative) conf->getChunkManagerIfExists(ns, true); conf->getChunkManagerOrPrimary(ns, manager, primary); if (manager) officialSequenceNumber = manager->getSequenceNumber(); // Check this manager against the reference manager if( manager ){ Shard shard = Shard::make( conn->getServerAddress() ); if (refManager && !refManager->compatibleWith(*manager, shard.getName())) { const ChunkVersion refVersion(refManager->getVersion(shard.getName())); const ChunkVersion currentVersion(manager->getVersion(shard.getName())); string msg(str::stream() << "manager (" << currentVersion.toString() << " : " << manager->getSequenceNumber() << ") " << "not compatible with reference manager (" << refVersion.toString() << " : " << refManager->getSequenceNumber() << ") " << "on shard " << shard.getName() << " (" << shard.getAddress().toString() << ")"); throw SendStaleConfigException(ns, msg, refVersion, currentVersion); } } else if( refManager ){ Shard shard = Shard::make(conn->getServerAddress()); string msg( str::stream() << "not sharded (" << ( (manager.get() == 0) ? string( "<none>" ) : str::stream() << manager->getSequenceNumber() ) << ") but has reference manager (" << refManager->getSequenceNumber() << ") " << "on conn " << conn->getServerAddress() << " (" << conn_in->getServerAddress() << ")" ); throw SendStaleConfigException(ns, msg, refManager->getVersion(shard.getName()), ChunkVersion::UNSHARDED()); } // Do not send setShardVersion to collections on the config servers - this causes problems // when config servers are also shards and get SSV with conflicting names. // TODO: Make config servers regular shards if (primary && primary->getName() == "config") { return false; } // Has the ChunkManager been reloaded since the last time we updated the shard version over // this connection? If we've never updated the shard version, do so now. unsigned long long sequenceNumber = 0; if (connectionShardStatus.getSequence(conn, ns, &sequenceNumber)) { if (sequenceNumber == officialSequenceNumber) { return false; } } // Now that we're sure we're sending SSV and not to a single config server, get the shard Shard shard = Shard::make(conn->getServerAddress()); ChunkVersion version = ChunkVersion(0, 0, OID()); if (manager) version = manager->getVersion(shard.getName()); LOG(1) << "setting shard version of " << version << " for " << ns << " on shard " << shard.toString(); LOG(3) << "last version sent with chunk manager iteration " << sequenceNumber << ", current chunk manager iteration is " << officialSequenceNumber; BSONObj result; if (setShardVersion(*conn, ns, configServer.modelServer(), version, manager.get(), authoritative, result)) { LOG(1) << " setShardVersion success: " << result; connectionShardStatus.setSequence( conn , ns , officialSequenceNumber ); return true; } LOG(1) << " setShardVersion failed!\n" << result << endl; if ( result["need_authoritative"].trueValue() ) massert( 10428 , "need_authoritative set but in authoritative mode already" , ! authoritative ); if ( ! authoritative ) { // use the original connection and get a fresh versionable connection // since conn can be invalidated (or worse, freed) after the failure checkShardVersion(conn_in, ns, refManager, 1, tryNumber + 1); return true; } if ( result["reloadConfig"].trueValue() ) { if( result["version"].timestampTime() == 0 ){ warning() << "reloading full configuration for " << conf->name() << ", connection state indicates significant version changes"; // reload db conf->reload(); } else { // reload config conf->getChunkManager( ns , true ); } } const int maxNumTries = 7; if ( tryNumber < maxNumTries ) { LOG( tryNumber < ( maxNumTries / 2 ) ? 1 : 0 ) << "going to retry checkShardVersion shard: " << shard.toString() << " " << result; sleepmillis( 10 * tryNumber ); // use the original connection and get a fresh versionable connection // since conn can be invalidated (or worse, freed) after the failure checkShardVersion(conn_in, ns, refManager, true, tryNumber + 1); return true; } string errmsg = str::stream() << "setShardVersion failed shard: " << shard.toString() << " " << result; log() << " " << errmsg << endl; massert( 10429 , errmsg , 0 ); return true; }
/** * Special internal logic to run reduced version handshake for empty namespace operations to * shards. * * Eventually this should go completely away, but for now many commands rely on unversioned but * mongos-specific behavior on mongod (auditing and replication information in commands) */ static bool initShardVersionEmptyNS(DBClientBase * conn_in) { bool ok; BSONObj result; DBClientBase* conn = NULL; try { // May throw if replica set primary is down conn = getVersionable( conn_in ); dassert( conn ); // errors thrown above // Check to see if we've already initialized this connection if (connectionShardStatus.hasAnySequenceSet(conn)) return false; // Check to see if this is actually a shard and not a single config server // NOTE: Config servers are registered only by the name "config" in the shard cache, not // by host, so lookup by host will fail unless the host is also a shard. Shard shard = Shard::findIfExists(conn->getServerAddress()); if (!shard.ok()) return false; LOG(1) << "initializing shard connection to " << shard.toString() << endl; ok = setShardVersion(*conn, "", configServer.modelServer(), ChunkVersion(), NULL, true, result); } catch( const DBException& ) { // NOTE: Replica sets may fail to initShardVersion because future calls relying on // correct versioning must later call checkShardVersion on the primary. // Secondary queries and commands may not call checkShardVersion, but secondary ops // aren't versioned at all. if ( conn_in->type() != ConnectionString::SET ) { throw; } // NOTE: Only old-style cluster operations will talk via DBClientReplicaSets - using // checkShardVersion is required (which includes initShardVersion information) if these // connections are used. OCCASIONALLY { warning() << "failed to initialize new replica set connection version, " << "will initialize on first use" << endl; } return false; } // Record the connection wire version if sent in the response, initShardVersion is a // handshake for mongos->mongod connections. if ( !result["minWireVersion"].eoo() ) { int minWireVersion = result["minWireVersion"].numberInt(); int maxWireVersion = result["maxWireVersion"].numberInt(); conn->setWireVersions( minWireVersion, maxWireVersion ); } LOG(3) << "initial sharding result : " << result << endl; connectionShardStatus.setSequence(conn, "", 0); return ok; }