int ConfigDiffTracker<ValType,ShardType>:: calculateConfigDiff( DBClientCursorInterface& diffCursor ) { verifyAttached(); // Apply the chunk changes to the ranges and versions // // Overall idea here is to work in two steps : // 1. For all the new chunks we find, increment the maximum version per-shard and // per-collection, and remove any conflicting chunks from the ranges // 2. For all the new chunks we're interested in (all of them for mongos, just chunks on the // shard for mongod) add them to the ranges // vector<BSONObj> newTracked; // Store epoch now so it doesn't change when we change max OID currEpoch = _maxVersion->epoch(); _validDiffs = 0; while( diffCursor.more() ){ BSONObj diffChunkDoc = diffCursor.next(); ShardChunkVersion chunkVersion = ShardChunkVersion::fromBSON( diffChunkDoc, "lastmod" ); if( diffChunkDoc[ "min" ].type() != Object || diffChunkDoc[ "max" ].type() != Object || diffChunkDoc[ "shard" ].type() != String ) { warning() << "got invalid chunk document " << diffChunkDoc << " when trying to load differing chunks" << endl; continue; } if( ! chunkVersion.isSet() || ! chunkVersion.hasCompatibleEpoch( currEpoch ) ){ warning() << "got invalid chunk version " << chunkVersion << " in document " << diffChunkDoc << " when trying to load differing chunks at version " << ShardChunkVersion( _maxVersion->toLong(), currEpoch ) << endl; // Don't keep loading, since we know we'll be broken here return -1; } _validDiffs++; // Get max changed version and chunk version if( chunkVersion > *_maxVersion ) *_maxVersion = chunkVersion; // Chunk version changes ShardType shard = shardFor( diffChunkDoc[ "shard" ].String() ); typename map<ShardType, ShardChunkVersion>::iterator shardVersionIt = _maxShardVersions->find( shard ); if( shardVersionIt == _maxShardVersions->end() || shardVersionIt->second < chunkVersion ){ (*_maxShardVersions)[ shard ] = chunkVersion; } // See if we need to remove any chunks we are currently tracking b/c of this chunk's changes removeOverlapping( diffChunkDoc[ "min" ].Obj(), diffChunkDoc[ "max" ].Obj() ); // Figure out which of the new chunks we need to track // Important - we need to actually own this doc, in case the cursor decides to getMore or unbuffer if( isTracked( diffChunkDoc ) ) newTracked.push_back( diffChunkDoc.getOwned() ); } LOG(3) << "found " << _validDiffs << " new chunks for collection " << _ns << " (tracking " << newTracked.size() << "), new version is " << _maxVersion << endl; for( vector<BSONObj>::iterator it = newTracked.begin(); it != newTracked.end(); it++ ){ BSONObj chunkDoc = *it; // Important - we need to make sure we actually own the min and max here BSONObj min = chunkDoc[ "min" ].Obj().getOwned(); BSONObj max = chunkDoc[ "max" ].Obj().getOwned(); // Invariant enforced by sharding // It's possible to read inconsistent state b/c of getMore() and yielding, so we want // to detect as early as possible. // TODO: This checks for overlap, we also should check for holes here iff we're tracking // all chunks if( isOverlapping( min, max ) ) return -1; _currMap->insert( rangeFor( chunkDoc, min, max ) ); } return _validDiffs; }
bool MetadataLoader::initChunks(const CollectionType& collDoc, const string& ns, const string& shard, const CollectionManager* oldManager, CollectionManager* manager, string* errMsg) { map<string,ShardChunkVersion> versionMap; manager->_maxCollVersion = ShardChunkVersion(0, 0, collDoc.getEpoch()); // Check to see if we should use the old version or not. if (oldManager) { ShardChunkVersion oldVersion = oldManager->getMaxShardVersion(); if (oldVersion.isSet() && oldVersion.hasCompatibleEpoch(collDoc.getEpoch())) { // Our epoch for coll version and shard version should be the same. verify(oldManager->getMaxCollVersion().hasCompatibleEpoch(collDoc.getEpoch())); versionMap[shard] = oldManager->_maxShardVersion; manager->_maxCollVersion = oldManager->_maxCollVersion; // TODO: This could be made more efficient if copying not required, but // not as frequently reloaded as in mongos. manager->_chunksMap = oldManager->_chunksMap; LOG(2) << "loading new chunks for collection " << ns << " using old chunk manager w/ version " << oldManager->getMaxShardVersion() << " and " << manager->_chunksMap.size() << " chunks" << endl; } } // Exposes the new 'manager's range map and version to the "differ," who // would ultimately be responsible of filling them up. SCMConfigDiffTracker differ(shard); differ.attach(ns, manager->_chunksMap, manager->_maxCollVersion, versionMap); try { scoped_ptr<ScopedDbConnection> connPtr( ScopedDbConnection::getInternalScopedDbConnection(_configLoc.toString(), 30)); ScopedDbConnection& conn = *connPtr; auto_ptr<DBClientCursor> cursor = conn->query(ChunkType::ConfigNS, differ.configDiffQuery()); if (!cursor.get()) { // 'errMsg' was filled by the getChunkCursor() call. manager->_maxCollVersion = ShardChunkVersion(); manager->_chunksMap.clear(); connPtr->done(); return false; } // Diff tracker should *always* find at least one chunk if collection exists. int diffsApplied = differ.calculateConfigDiff(*cursor); if (diffsApplied > 0) { LOG(2) << "loaded " << diffsApplied << " chunks into new chunk manager for " << ns << " with version " << manager->_maxCollVersion << endl; manager->_maxShardVersion = versionMap[shard]; manager->fillRanges(); connPtr->done(); return true; } else if(diffsApplied == 0) { *errMsg = str::stream() << "no chunks found when reloading " << ns << ", previous version was " << manager->_maxCollVersion.toString(); warning() << *errMsg << endl; manager->_maxCollVersion = ShardChunkVersion(); manager->_chunksMap.clear(); connPtr->done(); return false; } else{ // TODO: make this impossible by making sure we don't migrate / split on this // shard during the reload. No chunks were found for the ns. *errMsg = str::stream() << "invalid chunks found when reloading " << ns << ", previous version was " << manager->_maxCollVersion.toString() << ", this should be rare"; warning() << errMsg << endl; manager->_maxCollVersion = ShardChunkVersion(); manager->_chunksMap.clear(); connPtr->done(); return false; } } catch (const DBException& e) { *errMsg = str::stream() << "caught exception accessing the config servers" << causedBy(e); // We deliberately do not return connPtr to the pool, since it was involved // with the error here. return false; } }
void WriteBackListener::run() { int secsToSleep = 0; scoped_ptr<ShardChunkVersion> lastNeededVersion; int lastNeededCount = 0; while ( ! inShutdown() ) { if ( ! Shard::isAShardNode( _addr ) ) { LOG(1) << _addr << " is not a shard node" << endl; sleepsecs( 60 ); continue; } try { scoped_ptr<ScopedDbConnection> conn( ScopedDbConnection::getInternalScopedDbConnection( _addr ) ); BSONObj result; { BSONObjBuilder cmd; cmd.appendOID( "writebacklisten" , &serverID ); // Command will block for data if ( ! conn->get()->runCommand( "admin" , cmd.obj() , result ) ) { result = result.getOwned(); log() << "writebacklisten command failed! " << result << endl; conn->done(); continue; } } conn->done(); LOG(1) << "writebacklisten result: " << result << endl; BSONObj data = result.getObjectField( "data" ); if ( data.getBoolField( "writeBack" ) ) { string ns = data["ns"].valuestrsafe(); ConnectionIdent cid( "" , 0 ); OID wid; if ( data["connectionId"].isNumber() && data["id"].type() == jstOID ) { string s = ""; if ( data["instanceIdent"].type() == String ) s = data["instanceIdent"].String(); cid = ConnectionIdent( s , data["connectionId"].numberLong() ); wid = data["id"].OID(); } else { warning() << "mongos/mongod version mismatch (1.7.5 is the split)" << endl; } int len; // not used, but needed for next call Message msg( (void*)data["msg"].binData( len ) , false ); massert( 10427 , "invalid writeback message" , msg.header()->valid() ); DBConfigPtr db = grid.getDBConfig( ns ); ShardChunkVersion needVersion = ShardChunkVersion::fromBSON( data, "version" ); // // TODO: Refactor the sharded strategy to correctly handle all sharding state changes itself, // we can't rely on WBL to do this for us b/c anything could reset our state in-between. // We should always reload here for efficiency when possible, but staleness is also caught in the // loop below. // ChunkManagerPtr manager; ShardPtr primary; db->getChunkManagerOrPrimary( ns, manager, primary ); ShardChunkVersion currVersion; if( manager ) currVersion = manager->getVersion(); LOG(1) << "connectionId: " << cid << " writebackId: " << wid << " needVersion : " << needVersion.toString() << " mine : " << currVersion.toString() << endl; LOG(1) << msg.toString() << endl; // // We should reload only if we need to update our version to be compatible *and* we // haven't already done so. This avoids lots of reloading when we remove/add a sharded collection // bool alreadyReloaded = lastNeededVersion && lastNeededVersion->isEquivalentTo( needVersion ); if( alreadyReloaded ){ LOG(1) << "wbl already reloaded config information for version " << needVersion << ", at version " << currVersion << endl; } else if( lastNeededVersion ) { log() << "new version change detected to " << needVersion.toString() << ", " << lastNeededCount << " writebacks processed at " << lastNeededVersion->toString() << endl; lastNeededCount = 0; } // // Set our lastNeededVersion for next time // lastNeededVersion.reset( new ShardChunkVersion( needVersion ) ); lastNeededCount++; // // Determine if we should reload, if so, reload // bool shouldReload = ! needVersion.isWriteCompatibleWith( currVersion ) && ! alreadyReloaded; if( shouldReload && currVersion.isSet() && needVersion.isSet() && currVersion.hasCompatibleEpoch( needVersion ) ) { // // If we disagree about versions only, reload the chunk manager // db->getChunkManagerIfExists( ns, true ); } else if( shouldReload ){ // // If we disagree about anything else, reload the full db // warning() << "reloading config data for " << db->getName() << ", " << "wanted version " << needVersion.toString() << " but currently have version " << currVersion.toString() << endl; db->reload(); } // do request and then call getLastError // we have to call getLastError so we can return the right fields to the user if they decide to call getLastError BSONObj gle; int attempts = 0; while ( true ) { attempts++; try { Request r( msg , 0 ); r.init(); r.d().reservedField() |= Reserved_FromWriteback; ClientInfo * ci = r.getClientInfo(); if (!noauth) { // TODO: Figure out why this is 'admin' instead of 'local'. ci->getAuthenticationInfo()->authorize("admin", internalSecurity.user); } ci->noAutoSplit(); r.process( attempts ); ci->newRequest(); // this so we flip prev and cur shards BSONObjBuilder b; string errmsg; if ( ! ci->getLastError( "admin", BSON( "getLastError" << 1 ), b, errmsg, true ) ) { b.appendBool( "commandFailed" , true ); if( ! b.hasField( "errmsg" ) ){ b.append( "errmsg", errmsg ); gle = b.obj(); } else if( errmsg.size() > 0 ){ // Rebuild GLE object with errmsg // TODO: Make this less clumsy by improving GLE interface gle = b.obj(); if( gle["errmsg"].type() == String ){ BSONObj gleNoErrmsg = gle.filterFieldsUndotted( BSON( "errmsg" << 1 ), false ); BSONObjBuilder bb; bb.appendElements( gleNoErrmsg ); bb.append( "errmsg", gle["errmsg"].String() + " ::and:: " + errmsg ); gle = bb.obj().getOwned(); } } } else{ gle = b.obj(); } log() << "GLE is " << gle << endl; if ( gle["code"].numberInt() == 9517 ) { log() << "new version change detected, " << lastNeededCount << " writebacks processed previously" << endl; lastNeededVersion.reset(); lastNeededCount = 1; log() << "writeback failed because of stale config, retrying attempts: " << attempts << endl; LOG(1) << "writeback error : " << gle << endl; // // Bringing this in line with the similar retry logic elsewhere // // TODO: Reloading the chunk manager may not help if we dropped a // collection, but we don't actually have that info in the writeback // error // if( attempts <= 2 ){ db->getChunkManagerIfExists( ns, true ); } else{ versionManager.forceRemoteCheckShardVersionCB( ns ); sleepsecs( attempts - 1 ); } uassert( 15884, str::stream() << "Could not reload chunk manager after " << attempts << " attempts.", attempts <= 4 ); continue; } ci->clearSinceLastGetError(); } catch ( DBException& e ) { error() << "error processing writeback: " << e << endl; BSONObjBuilder b; e.getInfo().append( b, "err", "code" ); gle = b.obj(); } break; } { scoped_lock lk( _seenWritebacksLock ); WBStatus& s = _seenWritebacks[cid]; s.id = wid; s.gle = gle; } } else if ( result["noop"].trueValue() ) { // no-op } else { log() << "unknown writeBack result: " << result << endl; } secsToSleep = 0; continue; } catch ( std::exception& e ) { if ( inShutdown() ) { // we're shutting down, so just clean up return; } log() << "WriteBackListener exception : " << e.what() << endl; // It's possible this shard was removed Shard::reloadShardInfo(); } catch ( ... ) { log() << "WriteBackListener uncaught exception!" << endl; } secsToSleep++; sleepsecs(secsToSleep); if ( secsToSleep > 10 ) secsToSleep = 0; } log() << "WriteBackListener exiting : address no longer in cluster " << _addr; }