void Chunk::serialize(BSONObjBuilder& to, ChunkVersion myLastMod) { to.append("_id", genID(_manager->getns(), _min)); if (myLastMod.isSet()) { myLastMod.addToBSON(to, ChunkType::DEPRECATED_lastmod()); } else if (_lastmod.isSet()) { _lastmod.addToBSON(to, ChunkType::DEPRECATED_lastmod()); } else { verify(0); } to << ChunkType::ns(_manager->getns()); to << ChunkType::min(_min); to << ChunkType::max(_max); to << ChunkType::shard(_shardId); }
unique_ptr<CollectionMetadata> CollectionMetadata::clonePlusChunk( const BSONObj& minKey, const BSONObj& maxKey, const ChunkVersion& newShardVersion) const { invariant(newShardVersion.epoch() == _shardVersion.epoch()); invariant(newShardVersion.isSet()); invariant(minKey.woCompare(maxKey) < 0); invariant(!rangeMapOverlaps(_chunksMap, minKey, maxKey)); unique_ptr<CollectionMetadata> metadata(stdx::make_unique<CollectionMetadata>()); metadata->_keyPattern = _keyPattern.getOwned(); metadata->fillKeyPatternFields(); metadata->_pendingMap = _pendingMap; metadata->_chunksMap = _chunksMap; metadata->_chunksMap.insert(make_pair(minKey.getOwned(), maxKey.getOwned())); metadata->_shardVersion = newShardVersion; metadata->_collVersion = newShardVersion > _collVersion ? newShardVersion : _collVersion; metadata->fillRanges(); invariant(metadata->isValid()); return metadata; }
int ConfigDiffTracker<ValType, ShardType>::calculateConfigDiff( const std::vector<ChunkType>& chunks) { _assertAttached(); // Apply the chunk changes to the ranges and versions // // Overall idea here is to work in two steps : // 1. For all the new chunks we find, increment the maximum version per-shard and // per-collection, and remove any conflicting chunks from the ranges. // 2. For all the new chunks we're interested in (all of them for mongos, just chunks on // the shard for mongod) add them to the ranges. std::vector<ChunkType> newTracked; // Store epoch now so it doesn't change when we change max OID currEpoch = _maxVersion->epoch(); _validDiffs = 0; for (const ChunkType& chunk : chunks) { ChunkVersion chunkVersion = ChunkVersion::fromBSON(chunk.toBSON(), ChunkType::DEPRECATED_lastmod()); if (!chunkVersion.isSet() || !chunkVersion.hasEqualEpoch(currEpoch)) { warning() << "got invalid chunk version " << chunkVersion << " in document " << chunk.toString() << " when trying to load differing chunks at version " << ChunkVersion( _maxVersion->majorVersion(), _maxVersion->minorVersion(), currEpoch); // Don't keep loading, since we know we'll be broken here return -1; } _validDiffs++; // Get max changed version and chunk version if (chunkVersion > *_maxVersion) { *_maxVersion = chunkVersion; } // Chunk version changes ShardType shard = shardFor(chunk.getShard()); typename MaxChunkVersionMap::const_iterator shardVersionIt = _maxShardVersions->find(shard); if (shardVersionIt == _maxShardVersions->end() || shardVersionIt->second < chunkVersion) { (*_maxShardVersions)[shard] = chunkVersion; } // See if we need to remove any chunks we are currently tracking because of this // chunk's changes removeOverlapping(chunk.getMin(), chunk.getMax()); // Figure out which of the new chunks we need to track // Important - we need to actually own this doc, in case the cursor decides to getMore // or unbuffer. if (isTracked(chunk)) { newTracked.push_back(chunk); } } LOG(3) << "found " << _validDiffs << " new chunks for collection " << _ns << " (tracking " << newTracked.size() << "), new version is " << *_maxVersion; for (const ChunkType& chunk : newTracked) { // Invariant enforced by sharding - it's possible to read inconsistent state due to // getMore and yielding, so we want to detect it as early as possible. // // TODO: This checks for overlap, we also should check for holes here iff we're // tracking all chunks. if (isOverlapping(chunk.getMin(), chunk.getMax())) { return -1; } _currMap->insert(rangeFor(chunk)); } return _validDiffs; }
void ChunkType::setVersion(const ChunkVersion& version) { invariant(version.isSet()); _version = version; }
int ConfigDiffTracker<ValType,ShardType>:: calculateConfigDiff( DBClientCursorInterface& diffCursor ) { verifyAttached(); // Apply the chunk changes to the ranges and versions // // Overall idea here is to work in two steps : // 1. For all the new chunks we find, increment the maximum version per-shard and // per-collection, and remove any conflicting chunks from the ranges // 2. For all the new chunks we're interested in (all of them for mongos, just chunks on the // shard for mongod) add them to the ranges // vector<BSONObj> newTracked; // Store epoch now so it doesn't change when we change max OID currEpoch = _maxVersion->epoch(); _validDiffs = 0; while( diffCursor.more() ) { BSONObj diffChunkDoc = diffCursor.next(); ChunkVersion chunkVersion = ChunkVersion::fromBSON(diffChunkDoc, ChunkType::DEPRECATED_lastmod()); if( diffChunkDoc[ChunkType::min()].type() != Object || diffChunkDoc[ChunkType::max()].type() != Object || diffChunkDoc[ChunkType::shard()].type() != String ) { warning() << "got invalid chunk document " << diffChunkDoc << " when trying to load differing chunks" << endl; continue; } if( ! chunkVersion.isSet() || ! chunkVersion.hasCompatibleEpoch( currEpoch ) ) { warning() << "got invalid chunk version " << chunkVersion << " in document " << diffChunkDoc << " when trying to load differing chunks at version " << ChunkVersion( _maxVersion->toLong(), currEpoch ) << endl; // Don't keep loading, since we know we'll be broken here return -1; } _validDiffs++; // Get max changed version and chunk version if( chunkVersion > *_maxVersion ) *_maxVersion = chunkVersion; // Chunk version changes ShardType shard = shardFor( diffChunkDoc[ChunkType::shard()].String() ); typename map<ShardType, ChunkVersion>::iterator shardVersionIt = _maxShardVersions->find( shard ); if( shardVersionIt == _maxShardVersions->end() || shardVersionIt->second < chunkVersion ) { (*_maxShardVersions)[ shard ] = chunkVersion; } // See if we need to remove any chunks we are currently tracking b/c of this chunk's changes removeOverlapping(diffChunkDoc[ChunkType::min()].Obj(), diffChunkDoc[ChunkType::max()].Obj()); // Figure out which of the new chunks we need to track // Important - we need to actually own this doc, in case the cursor decides to getMore or unbuffer if( isTracked( diffChunkDoc ) ) newTracked.push_back( diffChunkDoc.getOwned() ); } LOG(3) << "found " << _validDiffs << " new chunks for collection " << _ns << " (tracking " << newTracked.size() << "), new version is " << *_maxVersion << endl; for( vector<BSONObj>::iterator it = newTracked.begin(); it != newTracked.end(); it++ ) { BSONObj chunkDoc = *it; // Important - we need to make sure we actually own the min and max here BSONObj min = chunkDoc[ChunkType::min()].Obj().getOwned(); BSONObj max = chunkDoc[ChunkType::max()].Obj().getOwned(); // Invariant enforced by sharding // It's possible to read inconsistent state b/c of getMore() and yielding, so we want // to detect as early as possible. // TODO: This checks for overlap, we also should check for holes here iff we're tracking // all chunks if( isOverlapping( min, max ) ) return -1; _currMap->insert( rangeFor( chunkDoc, min, max ) ); } return _validDiffs; }
Status MetadataLoader::initChunks( const string& ns, const string& shard, const CollectionMetadata* oldMetadata, CollectionMetadata* metadata ) { map<string, ChunkVersion> versionMap; OID epoch = metadata->getCollVersion().epoch(); // Check to see if we should use the old version or not. if ( oldMetadata ) { ChunkVersion oldVersion = oldMetadata->getShardVersion(); if ( oldVersion.isSet() && oldVersion.hasCompatibleEpoch( epoch ) ) { // Our epoch for coll version and shard version should be the same. verify( oldMetadata->getCollVersion().hasCompatibleEpoch( epoch ) ); versionMap[shard] = oldMetadata->_shardVersion; metadata->_collVersion = oldMetadata->_collVersion; // TODO: This could be made more efficient if copying not required, but // not as frequently reloaded as in mongos. metadata->_chunksMap = oldMetadata->_chunksMap; LOG(2) << "loading new chunks for collection " << ns << " using old metadata w/ version " << oldMetadata->getShardVersion() << " and " << metadata->_chunksMap.size() << " chunks" << endl; } } // Exposes the new metadata's range map and version to the "differ," who // would ultimately be responsible of filling them up. SCMConfigDiffTracker differ( shard ); differ.attach( ns, metadata->_chunksMap, metadata->_collVersion, versionMap ); try { ScopedDbConnection conn( _configLoc.toString(), 30 ); auto_ptr<DBClientCursor> cursor = conn->query( ChunkType::ConfigNS, differ.configDiffQuery() ); if ( !cursor.get() ) { metadata->_collVersion = ChunkVersion(); metadata->_chunksMap.clear(); conn.done(); return Status( ErrorCodes::HostUnreachable, "problem opening chunk metadata cursor" ); } // Diff tracker should *always* find at least one chunk if this shard owns a chunk. int diffsApplied = differ.calculateConfigDiff( *cursor ); if ( diffsApplied > 0 ) { LOG(2) << "loaded " << diffsApplied << " chunks into new metadata for " << ns << " with version " << metadata->_collVersion << endl; metadata->_shardVersion = versionMap[shard]; metadata->fillRanges(); conn.done(); return Status::OK(); } else if ( diffsApplied == 0 ) { warning() << "no chunks found when reloading " << ns << ", previous version was " << metadata->_collVersion.toString() << endl; metadata->_collVersion = ChunkVersion( 0, 0, OID() ); metadata->_chunksMap.clear(); conn.done(); return Status::OK(); } else { // TODO: make this impossible by making sure we don't migrate / split on this // shard during the reload. No chunks were found for the ns. string errMsg = str::stream() << "invalid chunks found when reloading " << ns << ", previous version was " << metadata->_collVersion.toString() << ", this should be rare"; warning() << errMsg << endl; metadata->_collVersion = ChunkVersion( 0, 0, OID() ); metadata->_chunksMap.clear(); conn.done(); return Status( ErrorCodes::RemoteChangeDetected, errMsg ); } } catch ( const DBException& e ) { string errMsg = str::stream() << "problem querying chunks metadata" << causedBy( e ); // We deliberately do not return connPtr to the pool, since it was involved // with the error here. return Status( ErrorCodes::HostUnreachable, errMsg ); } }
std::shared_ptr<ChunkManager> DBConfig::getChunkManager(OperationContext* txn, const string& ns, bool shouldReload, bool forceReload) { BSONObj key; ChunkVersion oldVersion; std::shared_ptr<ChunkManager> oldManager; { stdx::lock_guard<stdx::mutex> lk(_lock); bool earlyReload = !_collections[ns].isSharded() && (shouldReload || forceReload); if (earlyReload) { // This is to catch cases where there this is a new sharded collection. // Note: read the _reloadCount inside the _lock mutex, so _loadIfNeeded will always // be forced to perform a reload. const auto currentReloadIteration = _reloadCount.load(); _loadIfNeeded(txn, currentReloadIteration); } CollectionInfo& ci = _collections[ns]; uassert(10181, str::stream() << "not sharded:" << ns, ci.isSharded()); invariant(!ci.key().isEmpty()); if (!(shouldReload || forceReload) || earlyReload) { return ci.getCM(); } key = ci.key().copy(); if (ci.getCM()) { oldManager = ci.getCM(); oldVersion = ci.getCM()->getVersion(); } } invariant(!key.isEmpty()); // TODO: We need to keep this first one-chunk check in until we have a more efficient way of // creating/reusing a chunk manager, as doing so requires copying the full set of chunks // currently vector<ChunkType> newestChunk; if (oldVersion.isSet() && !forceReload) { uassertStatusOK( grid.catalogClient(txn)->getChunks(txn, BSON(ChunkType::ns(ns)), BSON(ChunkType::DEPRECATED_lastmod() << -1), 1, &newestChunk, nullptr)); if (!newestChunk.empty()) { invariant(newestChunk.size() == 1); ChunkVersion v = newestChunk[0].getVersion(); if (v.equals(oldVersion)) { stdx::lock_guard<stdx::mutex> lk(_lock); const CollectionInfo& ci = _collections[ns]; uassert(15885, str::stream() << "not sharded after reloading from chunks : " << ns, ci.isSharded()); return ci.getCM(); } } } else if (!oldVersion.isSet()) { warning() << "version 0 found when " << (forceReload ? "reloading" : "checking") << " chunk manager; collection '" << ns << "' initially detected as sharded"; } // we are not locked now, and want to load a new ChunkManager unique_ptr<ChunkManager> tempChunkManager; { stdx::lock_guard<stdx::mutex> lll(_hitConfigServerLock); if (!newestChunk.empty() && !forceReload) { // If we have a target we're going for see if we've hit already stdx::lock_guard<stdx::mutex> lk(_lock); CollectionInfo& ci = _collections[ns]; if (ci.isSharded() && ci.getCM()) { ChunkVersion currentVersion = newestChunk[0].getVersion(); // Only reload if the version we found is newer than our own in the same epoch if (currentVersion <= ci.getCM()->getVersion() && ci.getCM()->getVersion().hasEqualEpoch(currentVersion)) { return ci.getCM(); } } } tempChunkManager.reset(new ChunkManager(oldManager->getns(), oldManager->getShardKeyPattern(), oldManager->getDefaultCollation(), oldManager->isUnique())); tempChunkManager->loadExistingRanges(txn, oldManager.get()); if (tempChunkManager->numChunks() == 0) { // Maybe we're not sharded any more, so do a full reload reload(txn); return getChunkManager(txn, ns, false); } } stdx::lock_guard<stdx::mutex> lk(_lock); CollectionInfo& ci = _collections[ns]; uassert(14822, (string) "state changed in the middle: " + ns, ci.isSharded()); // Reset if our versions aren't the same bool shouldReset = !tempChunkManager->getVersion().equals(ci.getCM()->getVersion()); // Also reset if we're forced to do so if (!shouldReset && forceReload) { shouldReset = true; warning() << "chunk manager reload forced for collection '" << ns << "', config version is " << tempChunkManager->getVersion(); } // // LEGACY BEHAVIOR // // It's possible to get into a state when dropping collections when our new version is // less than our prev version. Behave identically to legacy mongos, for now, and warn to // draw attention to the problem. // // TODO: Assert in next version, to allow smooth upgrades // if (shouldReset && tempChunkManager->getVersion() < ci.getCM()->getVersion()) { shouldReset = false; warning() << "not resetting chunk manager for collection '" << ns << "', config version is " << tempChunkManager->getVersion() << " and " << "old version is " << ci.getCM()->getVersion(); } // end legacy behavior if (shouldReset) { const auto cmOpTime = tempChunkManager->getConfigOpTime(); // The existing ChunkManager could have been updated since we last checked, so // replace the existing chunk manager only if it is strictly newer. // The condition should be (>) than instead of (>=), but use (>=) since legacy non-repl // config servers will always have an opTime of zero. if (cmOpTime >= ci.getCM()->getConfigOpTime()) { ci.resetCM(tempChunkManager.release()); } } uassert( 15883, str::stream() << "not sharded after chunk manager reset : " << ns, ci.isSharded()); return ci.getCM(); }
bool MetadataLoader::initChunks(const CollectionType& collDoc, const string& ns, const string& shard, const CollectionManager* oldManager, CollectionManager* manager, string* errMsg) { map<string,ChunkVersion> versionMap; manager->_maxCollVersion = ChunkVersion(0, 0, collDoc.getEpoch()); // Check to see if we should use the old version or not. if (oldManager) { ChunkVersion oldVersion = oldManager->getMaxShardVersion(); if (oldVersion.isSet() && oldVersion.hasCompatibleEpoch(collDoc.getEpoch())) { // Our epoch for coll version and shard version should be the same. verify(oldManager->getMaxCollVersion().hasCompatibleEpoch(collDoc.getEpoch())); versionMap[shard] = oldManager->_maxShardVersion; manager->_maxCollVersion = oldManager->_maxCollVersion; // TODO: This could be made more efficient if copying not required, but // not as frequently reloaded as in mongos. manager->_chunksMap = oldManager->_chunksMap; LOG(2) << "loading new chunks for collection " << ns << " using old chunk manager w/ version " << oldManager->getMaxShardVersion() << " and " << manager->_chunksMap.size() << " chunks" << endl; } } // Exposes the new 'manager's range map and version to the "differ," who // would ultimately be responsible of filling them up. SCMConfigDiffTracker differ(shard); differ.attach(ns, manager->_chunksMap, manager->_maxCollVersion, versionMap); try { scoped_ptr<ScopedDbConnection> connPtr( ScopedDbConnection::getInternalScopedDbConnection(_configLoc.toString(), 30)); ScopedDbConnection& conn = *connPtr; auto_ptr<DBClientCursor> cursor = conn->query(ChunkType::ConfigNS, differ.configDiffQuery()); if (!cursor.get()) { // 'errMsg' was filled by the getChunkCursor() call. manager->_maxCollVersion = ChunkVersion(); manager->_chunksMap.clear(); connPtr->done(); return false; } // Diff tracker should *always* find at least one chunk if collection exists. int diffsApplied = differ.calculateConfigDiff(*cursor); if (diffsApplied > 0) { LOG(2) << "loaded " << diffsApplied << " chunks into new chunk manager for " << ns << " with version " << manager->_maxCollVersion << endl; manager->_maxShardVersion = versionMap[shard]; manager->fillRanges(); connPtr->done(); return true; } else if(diffsApplied == 0) { *errMsg = str::stream() << "no chunks found when reloading " << ns << ", previous version was " << manager->_maxCollVersion.toString(); warning() << *errMsg << endl; manager->_maxCollVersion = ChunkVersion(); manager->_chunksMap.clear(); connPtr->done(); return false; } else{ // TODO: make this impossible by making sure we don't migrate / split on this // shard during the reload. No chunks were found for the ns. *errMsg = str::stream() << "invalid chunks found when reloading " << ns << ", previous version was " << manager->_maxCollVersion.toString() << ", this should be rare"; warning() << errMsg << endl; manager->_maxCollVersion = ChunkVersion(); manager->_chunksMap.clear(); connPtr->done(); return false; } } catch (const DBException& e) { *errMsg = str::stream() << "caught exception accessing the config servers" << causedBy(e); // We deliberately do not return connPtr to the pool, since it was involved // with the error here. return false; } }
void WriteBackListener::run() { int secsToSleep = 0; scoped_ptr<ChunkVersion> lastNeededVersion; int lastNeededCount = 0; bool needsToReloadShardInfo = false; while ( ! inShutdown() ) { if ( ! Shard::isAShardNode( _addr ) ) { LOG(1) << _addr << " is not a shard node" << endl; sleepsecs( 60 ); continue; } try { if (needsToReloadShardInfo) { // It's possible this shard was removed Shard::reloadShardInfo(); needsToReloadShardInfo = false; } scoped_ptr<ScopedDbConnection> conn( ScopedDbConnection::getInternalScopedDbConnection( _addr ) ); BSONObj result; { BSONObjBuilder cmd; cmd.appendOID( "writebacklisten" , &serverID ); // Command will block for data if ( ! conn->get()->runCommand( "admin" , cmd.obj() , result ) ) { result = result.getOwned(); log() << "writebacklisten command failed! " << result << endl; conn->done(); continue; } } conn->done(); LOG(1) << "writebacklisten result: " << result << endl; BSONObj data = result.getObjectField( "data" ); if ( data.getBoolField( "writeBack" ) ) { string ns = data["ns"].valuestrsafe(); ConnectionIdent cid( "" , 0 ); OID wid; if ( data["connectionId"].isNumber() && data["id"].type() == jstOID ) { string s = ""; if ( data["instanceIdent"].type() == String ) s = data["instanceIdent"].String(); cid = ConnectionIdent( s , data["connectionId"].numberLong() ); wid = data["id"].OID(); } else { warning() << "mongos/mongod version mismatch (1.7.5 is the split)" << endl; } int len; // not used, but needed for next call Message msg( (void*)data["msg"].binData( len ) , false ); massert( 10427 , "invalid writeback message" , msg.header()->valid() ); DBConfigPtr db = grid.getDBConfig( ns ); ChunkVersion needVersion = ChunkVersion::fromBSON( data, "version" ); // // TODO: Refactor the sharded strategy to correctly handle all sharding state changes itself, // we can't rely on WBL to do this for us b/c anything could reset our state in-between. // We should always reload here for efficiency when possible, but staleness is also caught in the // loop below. // ChunkManagerPtr manager; ShardPtr primary; db->getChunkManagerOrPrimary( ns, manager, primary ); ChunkVersion currVersion; if( manager ) currVersion = manager->getVersion(); LOG(1) << "connectionId: " << cid << " writebackId: " << wid << " needVersion : " << needVersion.toString() << " mine : " << currVersion.toString() << endl; LOG(1) << msg.toString() << endl; // // We should reload only if we need to update our version to be compatible *and* we // haven't already done so. This avoids lots of reloading when we remove/add a sharded collection // bool alreadyReloaded = lastNeededVersion && lastNeededVersion->isEquivalentTo( needVersion ); if( alreadyReloaded ){ LOG(1) << "wbl already reloaded config information for version " << needVersion << ", at version " << currVersion << endl; } else if( lastNeededVersion ) { log() << "new version change detected to " << needVersion.toString() << ", " << lastNeededCount << " writebacks processed at " << lastNeededVersion->toString() << endl; lastNeededCount = 0; } // // Set our lastNeededVersion for next time // lastNeededVersion.reset( new ChunkVersion( needVersion ) ); lastNeededCount++; // // Determine if we should reload, if so, reload // bool shouldReload = ! needVersion.isWriteCompatibleWith( currVersion ) && ! alreadyReloaded; if( shouldReload && currVersion.isSet() && needVersion.isSet() && currVersion.hasCompatibleEpoch( needVersion ) ) { // // If we disagree about versions only, reload the chunk manager // db->getChunkManagerIfExists( ns, true ); } else if( shouldReload ){ // // If we disagree about anything else, reload the full db // warning() << "reloading config data for " << db->getName() << ", " << "wanted version " << needVersion.toString() << " but currently have version " << currVersion.toString() << endl; db->reload(); } // do request and then call getLastError // we have to call getLastError so we can return the right fields to the user if they decide to call getLastError BSONObj gle; int attempts = 0; while ( true ) { attempts++; try { Request r( msg , 0 ); r.init(); r.d().reservedField() |= Reserved_FromWriteback; ClientInfo * ci = r.getClientInfo(); if (!noauth) { ci->getAuthorizationManager()->grantInternalAuthorization( "_writebackListener"); } ci->noAutoSplit(); r.process( attempts ); ci->newRequest(); // this so we flip prev and cur shards BSONObjBuilder b; string errmsg; if ( ! ci->getLastError( "admin", BSON( "getLastError" << 1 ), b, errmsg, true ) ) { b.appendBool( "commandFailed" , true ); if( ! b.hasField( "errmsg" ) ){ b.append( "errmsg", errmsg ); gle = b.obj(); } else if( errmsg.size() > 0 ){ // Rebuild GLE object with errmsg // TODO: Make this less clumsy by improving GLE interface gle = b.obj(); if( gle["errmsg"].type() == String ){ BSONObj gleNoErrmsg = gle.filterFieldsUndotted( BSON( "errmsg" << 1 ), false ); BSONObjBuilder bb; bb.appendElements( gleNoErrmsg ); bb.append( "errmsg", gle["errmsg"].String() + " ::and:: " + errmsg ); gle = bb.obj().getOwned(); } } } else{ gle = b.obj(); } if ( gle["code"].numberInt() == 9517 ) { log() << "new version change detected, " << lastNeededCount << " writebacks processed previously" << endl; lastNeededVersion.reset(); lastNeededCount = 1; log() << "writeback failed because of stale config, retrying attempts: " << attempts << endl; LOG(1) << "writeback error : " << gle << endl; // // Bringing this in line with the similar retry logic elsewhere // // TODO: Reloading the chunk manager may not help if we dropped a // collection, but we don't actually have that info in the writeback // error // if( attempts <= 2 ){ db->getChunkManagerIfExists( ns, true ); } else{ versionManager.forceRemoteCheckShardVersionCB( ns ); sleepsecs( attempts - 1 ); } uassert( 15884, str::stream() << "Could not reload chunk manager after " << attempts << " attempts.", attempts <= 4 ); continue; } ci->clearSinceLastGetError(); } catch ( DBException& e ) { error() << "error processing writeback: " << e << endl; BSONObjBuilder b; e.getInfo().append( b, "err", "code" ); gle = b.obj(); } break; } { scoped_lock lk( _seenWritebacksLock ); WBStatus& s = _seenWritebacks[cid]; s.id = wid; s.gle = gle; } } else if ( result["noop"].trueValue() ) { // no-op } else { log() << "unknown writeBack result: " << result << endl; } secsToSleep = 0; continue; } catch ( std::exception& e ) { // Attention! Do not call any method that would throw an exception // (or assert) in this block. if ( inShutdown() ) { // we're shutting down, so just clean up return; } log() << "WriteBackListener exception : " << e.what() << endl; needsToReloadShardInfo = true; } catch ( ... ) { log() << "WriteBackListener uncaught exception!" << endl; } secsToSleep++; sleepsecs(secsToSleep); if ( secsToSleep > 10 ) secsToSleep = 0; } log() << "WriteBackListener exiting : address no longer in cluster " << _addr; }
bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) { // Steps // 1. check basic config // 2. extract params from command // 3. fast check // 4. slow check (LOCKS) // step 1 lastError.disableForCommand(); ShardedConnectionInfo* info = ShardedConnectionInfo::get( true ); // make sure we have the mongos id for writebacks if ( ! checkMongosID( info , cmdObj["serverID"] , errmsg ) ) return false; bool authoritative = cmdObj.getBoolField( "authoritative" ); // check config server is ok or enable sharding if ( ! checkConfigOrInit( cmdObj["configdb"].valuestrsafe() , authoritative , errmsg , result ) ) return false; // check shard name/hosts are correct if ( cmdObj["shard"].type() == String ) { shardingState.gotShardName( cmdObj["shard"].String() ); } // Handle initial shard connection if( cmdObj["version"].eoo() && cmdObj["init"].trueValue() ){ result.append( "initialized", true ); // Send back wire version to let mongos know what protocol we can speak result.append( "minWireVersion", minWireVersion ); result.append( "maxWireVersion", maxWireVersion ); return true; } // we can run on a slave up to here if ( ! isMaster( "admin" ) ) { result.append( "errmsg" , "not master" ); result.append( "note" , "from post init in setShardVersion" ); return false; } // step 2 string ns = cmdObj["setShardVersion"].valuestrsafe(); if ( ns.size() == 0 ) { errmsg = "need to specify namespace"; return false; } if( ! ChunkVersion::canParseBSON( cmdObj, "version" ) ){ errmsg = "need to specify version"; return false; } const ChunkVersion version = ChunkVersion::fromBSON( cmdObj, "version" ); // step 3 const ChunkVersion oldVersion = info->getVersion(ns); const ChunkVersion globalVersion = shardingState.getVersion(ns); oldVersion.addToBSON( result, "oldVersion" ); if ( globalVersion.isSet() && version.isSet() ) { // this means there is no reset going on an either side // so its safe to make some assumptions if ( version.isWriteCompatibleWith( globalVersion ) ) { // mongos and mongod agree! if ( ! oldVersion.isWriteCompatibleWith( version ) ) { if ( oldVersion < globalVersion && oldVersion.hasCompatibleEpoch(globalVersion) ) { info->setVersion( ns , version ); } else if ( authoritative ) { // this means there was a drop and our version is reset info->setVersion( ns , version ); } else { result.append( "ns" , ns ); result.appendBool( "need_authoritative" , true ); errmsg = "verifying drop on '" + ns + "'"; return false; } } return true; } } // step 4 // this is because of a weird segfault I saw and I can't see why this should ever be set massert( 13647 , str::stream() << "context should be empty here, is: " << cc().getContext()->ns() , cc().getContext() == 0 ); if ( oldVersion.isSet() && ! globalVersion.isSet() ) { // this had been reset info->setVersion( ns , ChunkVersion( 0, OID() ) ); } if ( ! version.isSet() && ! globalVersion.isSet() ) { // this connection is cleaning itself info->setVersion( ns , ChunkVersion( 0, OID() ) ); return true; } // Cases below all either return OR fall-through to remote metadata reload. if ( version.isSet() || !globalVersion.isSet() ) { // Not Dropping // TODO: Refactor all of this if ( version < oldVersion && version.hasCompatibleEpoch( oldVersion ) ) { errmsg = "this connection already had a newer version of collection '" + ns + "'"; result.append( "ns" , ns ); version.addToBSON( result, "newVersion" ); globalVersion.addToBSON( result, "globalVersion" ); return false; } // TODO: Refactor all of this if ( version < globalVersion && version.hasCompatibleEpoch( globalVersion ) ) { while ( shardingState.inCriticalMigrateSection() ) { log() << "waiting till out of critical section" << endl; shardingState.waitTillNotInCriticalSection( 10 ); } errmsg = "shard global version for collection is higher than trying to set to '" + ns + "'"; result.append( "ns" , ns ); version.addToBSON( result, "version" ); globalVersion.addToBSON( result, "globalVersion" ); result.appendBool( "reloadConfig" , true ); return false; } if ( ! globalVersion.isSet() && ! authoritative ) { // Needed b/c when the last chunk is moved off a shard, the version gets reset to zero, which // should require a reload. while ( shardingState.inCriticalMigrateSection() ) { log() << "waiting till out of critical section" << endl; shardingState.waitTillNotInCriticalSection( 10 ); } // need authoritative for first look result.append( "ns" , ns ); result.appendBool( "need_authoritative" , true ); errmsg = "first time for collection '" + ns + "'"; return false; } // Fall through to metadata reload below } else { // Dropping if ( ! authoritative ) { result.appendBool( "need_authoritative" , true ); result.append( "ns" , ns ); globalVersion.addToBSON( result, "globalVersion" ); errmsg = "dropping needs to be authoritative"; return false; } // Fall through to metadata reload below } ChunkVersion currVersion; Status status = shardingState.refreshMetadataIfNeeded( ns, version, &currVersion ); if (!status.isOK()) { // The reload itself was interrupted or confused here errmsg = str::stream() << "could not refresh metadata for " << ns << " with requested shard version " << version.toString() << ", stored shard version is " << currVersion.toString() << causedBy( status.reason() ); warning() << errmsg << endl; result.append( "ns" , ns ); version.addToBSON( result, "version" ); currVersion.addToBSON( result, "globalVersion" ); result.appendBool( "reloadConfig", true ); return false; } else if ( !version.isWriteCompatibleWith( currVersion ) ) { // We reloaded a version that doesn't match the version mongos was trying to // set. errmsg = str::stream() << "requested shard version differs from" << " config shard version for " << ns << ", requested version is " << version.toString() << " but found version " << currVersion.toString(); OCCASIONALLY warning() << errmsg << endl; // WARNING: the exact fields below are important for compatibility with mongos // version reload. result.append( "ns" , ns ); currVersion.addToBSON( result, "globalVersion" ); // If this was a reset of a collection or the last chunk moved out, inform mongos to // do a full reload. if (currVersion.epoch() != version.epoch() || !currVersion.isSet() ) { result.appendBool( "reloadConfig", true ); // Zero-version also needed to trigger full mongos reload, sadly // TODO: Make this saner, and less impactful (full reload on last chunk is bad) ChunkVersion( 0, 0, OID() ).addToBSON( result, "version" ); // For debugging version.addToBSON( result, "origVersion" ); } else { version.addToBSON( result, "version" ); } return false; } info->setVersion( ns , version ); return true; }
/** * @ return true if not in sharded mode or if version for this client is ok */ bool shardVersionOk( const string& ns , string& errmsg, ChunkVersion& received, ChunkVersion& wanted ) { if ( ! shardingState.enabled() ) return true; if ( ! isMasterNs( ns.c_str() ) ) { // right now connections to secondaries aren't versioned at all return true; } ShardedConnectionInfo* info = ShardedConnectionInfo::get( false ); if ( ! info ) { // this means the client has nothing sharded // so this allows direct connections to do whatever they want // which i think is the correct behavior return true; } if ( info->inForceVersionOkMode() ) { return true; } // TODO : all collections at some point, be sharded or not, will have a version // (and a CollectionMetadata) received = info->getVersion( ns ); wanted = shardingState.getVersion( ns ); if( received.isWriteCompatibleWith( wanted ) ) return true; // // Figure out exactly why not compatible, send appropriate error message // The versions themselves are returned in the error, so not needed in messages here // // Check epoch first, to send more meaningful message, since other parameters probably // won't match either if( ! wanted.hasCompatibleEpoch( received ) ){ errmsg = str::stream() << "version epoch mismatch detected for " << ns << ", " << "the collection may have been dropped and recreated"; return false; } if( ! wanted.isSet() && received.isSet() ){ errmsg = str::stream() << "this shard no longer contains chunks for " << ns << ", " << "the collection may have been dropped"; return false; } if( wanted.isSet() && ! received.isSet() ){ errmsg = str::stream() << "this shard contains versioned chunks for " << ns << ", " << "but no version set in request"; return false; } if( wanted.majorVersion() != received.majorVersion() ){ // // Could be > or < - wanted is > if this is the source of a migration, // wanted < if this is the target of a migration // errmsg = str::stream() << "version mismatch detected for " << ns << ", " << "stored major version " << wanted.majorVersion() << " does not match received " << received.majorVersion(); return false; } // Those are all the reasons the versions can mismatch verify( false ); return false; }
bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) { // Steps // 1. check basic config // 2. extract params from command // 3. fast check // 4. slow check (LOCKS) // step 1 lastError.disableForCommand(); ShardedConnectionInfo* info = ShardedConnectionInfo::get( true ); // make sure we have the mongos id for writebacks if ( ! checkMongosID( info , cmdObj["serverID"] , errmsg ) ) return false; bool authoritative = cmdObj.getBoolField( "authoritative" ); // check config server is ok or enable sharding if ( ! checkConfigOrInit( cmdObj["configdb"].valuestrsafe() , authoritative , errmsg , result ) ) return false; // check shard name/hosts are correct if ( cmdObj["shard"].type() == String ) { shardingState.gotShardName( cmdObj["shard"].String() ); shardingState.gotShardHost( cmdObj["shardHost"].String() ); } // Handle initial shard connection if( cmdObj["version"].eoo() && cmdObj["init"].trueValue() ){ result.append( "initialized", true ); return true; } // we can run on a slave up to here if ( ! isMaster( "admin" ) ) { result.append( "errmsg" , "not master" ); result.append( "note" , "from post init in setShardVersion" ); return false; } // step 2 string ns = cmdObj["setShardVersion"].valuestrsafe(); if ( ns.size() == 0 ) { errmsg = "need to specify namespace"; return false; } if( ! ConfigVersion::canParseBSON( cmdObj, "version" ) ){ errmsg = "need to specify version"; return false; } const ConfigVersion version = ConfigVersion::fromBSON( cmdObj, "version" ); // step 3 const ConfigVersion oldVersion = info->getVersion(ns); const ConfigVersion globalVersion = shardingState.getVersion(ns); oldVersion.addToBSON( result, "oldVersion" ); if ( globalVersion.isSet() && version.isSet() ) { // this means there is no reset going on an either side // so its safe to make some assumptions if ( version.isWriteCompatibleWith( globalVersion ) ) { // mongos and mongod agree! if ( ! oldVersion.isWriteCompatibleWith( version ) ) { if ( oldVersion < globalVersion && oldVersion.hasCompatibleEpoch(globalVersion) ) { info->setVersion( ns , version ); } else if ( authoritative ) { // this means there was a drop and our version is reset info->setVersion( ns , version ); } else { result.append( "ns" , ns ); result.appendBool( "need_authoritative" , true ); errmsg = "verifying drop on '" + ns + "'"; return false; } } return true; } } // step 4 // this is because of a weird segfault I saw and I can't see why this should ever be set massert( 13647 , str::stream() << "context should be empty here, is: " << cc().getContext()->ns() , cc().getContext() == 0 ); Lock::GlobalWrite setShardVersionLock; // TODO: can we get rid of this?? if ( oldVersion.isSet() && ! globalVersion.isSet() ) { // this had been reset info->setVersion( ns , ChunkVersion( 0, OID() ) ); } if ( ! version.isSet() && ! globalVersion.isSet() ) { // this connection is cleaning itself info->setVersion( ns , ChunkVersion( 0, OID() ) ); return true; } if ( ! version.isSet() && globalVersion.isSet() ) { if ( ! authoritative ) { result.appendBool( "need_authoritative" , true ); result.append( "ns" , ns ); globalVersion.addToBSON( result, "globalVersion" ); errmsg = "dropping needs to be authoritative"; return false; } log() << "wiping data for: " << ns << endl; globalVersion.addToBSON( result, "beforeDrop" ); // only setting global version on purpose // need clients to re-find meta-data shardingState.resetVersion( ns ); info->setVersion( ns , ChunkVersion( 0, OID() ) ); return true; } // TODO: Refactor all of this if ( version < oldVersion && version.hasCompatibleEpoch( oldVersion ) ) { errmsg = "this connection already had a newer version of collection '" + ns + "'"; result.append( "ns" , ns ); version.addToBSON( result, "newVersion" ); globalVersion.addToBSON( result, "globalVersion" ); return false; } // TODO: Refactor all of this if ( version < globalVersion && version.hasCompatibleEpoch( globalVersion ) ) { while ( shardingState.inCriticalMigrateSection() ) { log() << "waiting till out of critical section" << endl; dbtemprelease r; shardingState.waitTillNotInCriticalSection( 10 ); } errmsg = "shard global version for collection is higher than trying to set to '" + ns + "'"; result.append( "ns" , ns ); version.addToBSON( result, "version" ); globalVersion.addToBSON( result, "globalVersion" ); result.appendBool( "reloadConfig" , true ); return false; } if ( ! globalVersion.isSet() && ! authoritative ) { // Needed b/c when the last chunk is moved off a shard, the version gets reset to zero, which // should require a reload. while ( shardingState.inCriticalMigrateSection() ) { log() << "waiting till out of critical section" << endl; dbtemprelease r; shardingState.waitTillNotInCriticalSection( 10 ); } // need authoritative for first look result.append( "ns" , ns ); result.appendBool( "need_authoritative" , true ); errmsg = "first time for collection '" + ns + "'"; return false; } Timer relockTime; { dbtemprelease unlock; ChunkVersion currVersion = version; if ( ! shardingState.trySetVersion( ns , currVersion ) ) { errmsg = str::stream() << "client version differs from config's for collection '" << ns << "'"; result.append( "ns" , ns ); // If this was a reset of a collection, inform mongos to do a full reload if( ! currVersion.isSet() ){ ConfigVersion( 0, OID() ).addToBSON( result, "version" ); result.appendBool( "reloadConfig", true ); } else{ version.addToBSON( result, "version" ); } globalVersion.addToBSON( result, "globalVersion" ); return false; } } if ( relockTime.millis() >= ( cmdLine.slowMS - 10 ) ) { log() << "setShardVersion - relocking slow: " << relockTime.millis() << endl; } info->setVersion( ns , version ); return true; }
/** * @return true if had to do something */ bool checkShardVersion( DBClientBase * conn_in , const string& ns , ChunkManagerPtr refManager, bool authoritative , int tryNumber ) { // TODO: cache, optimize, etc... WriteBackListener::init( *conn_in ); DBConfigPtr conf = grid.getDBConfig( ns ); if ( ! conf ) return false; DBClientBase* conn = getVersionable( conn_in ); verify(conn); // errors thrown above unsigned long long officialSequenceNumber = 0; ChunkManagerPtr manager; const bool isSharded = conf->isSharded( ns ); if ( isSharded ) { manager = conf->getChunkManagerIfExists( ns , authoritative ); // It's possible the chunk manager was reset since we checked whether sharded was true, // so must check this here. if( manager ) officialSequenceNumber = manager->getSequenceNumber(); } // Check this manager against the reference manager if( isSharded && manager ){ Shard shard = Shard::make( conn->getServerAddress() ); if( refManager && ! refManager->compatibleWith( manager, shard ) ){ throw SendStaleConfigException( ns, str::stream() << "manager (" << manager->getVersion( shard ).toString() << " : " << manager->getSequenceNumber() << ") " << "not compatible with reference manager (" << refManager->getVersion( shard ).toString() << " : " << refManager->getSequenceNumber() << ") " << "on shard " << shard.getName() << " (" << shard.getAddress().toString() << ")", refManager->getVersion( shard ), manager->getVersion( shard ) ); } } else if( refManager ){ Shard shard = Shard::make( conn->getServerAddress() ); string msg( str::stream() << "not sharded (" << ( (manager.get() == 0) ? string( "<none>" ) : str::stream() << manager->getSequenceNumber() ) << ") but has reference manager (" << refManager->getSequenceNumber() << ") " << "on conn " << conn->getServerAddress() << " (" << conn_in->getServerAddress() << ")" ); throw SendStaleConfigException( ns, msg, refManager->getVersion( shard ), ChunkVersion( 0, OID() )); } // has the ChunkManager been reloaded since the last time we updated the connection-level version? // (ie., last time we issued the setShardVersions below) unsigned long long sequenceNumber = connectionShardStatus.getSequence(conn,ns); if ( sequenceNumber == officialSequenceNumber ) { return false; } ChunkVersion version = ChunkVersion( 0, OID() ); if ( isSharded && manager ) { version = manager->getVersion( Shard::make( conn->getServerAddress() ) ); } if( ! version.isSet() ){ LOG(0) << "resetting shard version of " << ns << " on " << conn->getServerAddress() << ", " << ( ! isSharded ? "no longer sharded" : ( ! manager ? "no chunk manager found" : "version is zero" ) ) << endl; } LOG(2) << " have to set shard version for conn: " << conn->getServerAddress() << " ns:" << ns << " my last seq: " << sequenceNumber << " current: " << officialSequenceNumber << " version: " << version << " manager: " << manager.get() << endl; const string versionableServerAddress(conn->getServerAddress()); BSONObj result; if ( setShardVersion( *conn , ns , version , manager , authoritative , result ) ) { // success! LOG(1) << " setShardVersion success: " << result << endl; connectionShardStatus.setSequence( conn , ns , officialSequenceNumber ); return true; } LOG(1) << " setShardVersion failed!\n" << result << endl; if ( result["need_authoritative"].trueValue() ) massert( 10428 , "need_authoritative set but in authoritative mode already" , ! authoritative ); if ( ! authoritative ) { // use the original connection and get a fresh versionable connection // since conn can be invalidated (or worse, freed) after the failure checkShardVersion(conn_in, ns, refManager, 1, tryNumber + 1); return true; } if ( result["reloadConfig"].trueValue() ) { if( result["version"].timestampTime() == 0 ){ warning() << "reloading full configuration for " << conf->getName() << ", connection state indicates significant version changes" << endl; // reload db conf->reload(); } else { // reload config conf->getChunkManager( ns , true ); } } const int maxNumTries = 7; if ( tryNumber < maxNumTries ) { LOG( tryNumber < ( maxNumTries / 2 ) ? 1 : 0 ) << "going to retry checkShardVersion host: " << versionableServerAddress << " " << result << endl; sleepmillis( 10 * tryNumber ); // use the original connection and get a fresh versionable connection // since conn can be invalidated (or worse, freed) after the failure checkShardVersion(conn_in, ns, refManager, true, tryNumber + 1); return true; } string errmsg = str::stream() << "setShardVersion failed host: " << versionableServerAddress << " " << result; log() << " " << errmsg << endl; massert( 10429 , errmsg , 0 ); return true; }