int Balancer::_moveChunks( const vector<CandidateChunkPtr>* candidateChunks , bool secondaryThrottle ) { int movedCount = 0; for ( vector<CandidateChunkPtr>::const_iterator it = candidateChunks->begin(); it != candidateChunks->end(); ++it ) { const CandidateChunk& chunkInfo = *it->get(); DBConfigPtr cfg = grid.getDBConfig( chunkInfo.ns ); verify( cfg ); ChunkManagerPtr cm = cfg->getChunkManager( chunkInfo.ns ); verify( cm ); ChunkPtr c = cm->findChunk( chunkInfo.chunk.min ); if ( c->getMin().woCompare( chunkInfo.chunk.min ) || c->getMax().woCompare( chunkInfo.chunk.max ) ) { // likely a split happened somewhere cm = cfg->getChunkManager( chunkInfo.ns , true /* reload */); verify( cm ); c = cm->findChunk( chunkInfo.chunk.min ); if ( c->getMin().woCompare( chunkInfo.chunk.min ) || c->getMax().woCompare( chunkInfo.chunk.max ) ) { log() << "chunk mismatch after reload, ignoring will retry issue " << chunkInfo.chunk.toString() << endl; continue; } } BSONObj res; if ( c->moveAndCommit( Shard::make( chunkInfo.to ) , Chunk::MaxChunkSize , secondaryThrottle , res ) ) { movedCount++; continue; } // the move requires acquiring the collection metadata's lock, which can fail log() << "balancer move failed: " << res << " from: " << chunkInfo.from << " to: " << chunkInfo.to << " chunk: " << chunkInfo.chunk << endl; if ( res["chunkTooBig"].trueValue() ) { // reload just to be safe cm = cfg->getChunkManager( chunkInfo.ns ); verify( cm ); c = cm->findChunk( chunkInfo.chunk.min ); log() << "forcing a split because migrate failed for size reasons" << endl; res = BSONObj(); c->singleSplit( true , res ); log() << "forced split results: " << res << endl; if ( ! res["ok"].trueValue() ) { log() << "marking chunk as jumbo: " << c->toString() << endl; c->markAsJumbo(); // we increment moveCount so we do another round right away movedCount++; } } } return movedCount; }
bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){ ShardConnection::sync(); Timer t; string ns = cmdObj.firstElement().valuestrsafe(); if ( ns.size() == 0 ){ errmsg = "no ns"; return false; } DBConfigPtr config = grid.getDBConfig( ns ); if ( ! config->isSharded( ns ) ){ errmsg = "ns not sharded. have to shard before can move a chunk"; return false; } BSONObj find = cmdObj.getObjectField( "find" ); if ( find.isEmpty() ){ errmsg = "need to specify find. see help"; return false; } string toString = cmdObj["to"].valuestrsafe(); if ( ! toString.size() ){ errmsg = "you have to specify where you want to move the chunk"; return false; } Shard to = Shard::make( toString ); // so far, chunk size serves test purposes; it may or may not become a supported parameter long long maxChunkSizeBytes = cmdObj["maxChunkSizeBytes"].numberLong(); if ( maxChunkSizeBytes == 0 ) { maxChunkSizeBytes = Chunk::MaxChunkSize; } tlog() << "CMD: movechunk: " << cmdObj << endl; ChunkManagerPtr info = config->getChunkManager( ns ); ChunkPtr c = info->findChunk( find ); const Shard& from = c->getShard(); if ( from == to ){ errmsg = "that chunk is already on that shard"; return false; } BSONObj res; if ( ! c->moveAndCommit( to , maxChunkSizeBytes , res ) ){ errmsg = "move failed"; result.append( "cause" , res ); return false; } result.append( "millis" , t.millis() ); return true; }
int Balancer::_moveChunks( const vector<CandidateChunkPtr>* candidateChunks ) { int movedCount = 0; for ( vector<CandidateChunkPtr>::const_iterator it = candidateChunks->begin(); it != candidateChunks->end(); ++it ) { const CandidateChunk& chunkInfo = *it->get(); DBConfigPtr cfg = grid.getDBConfig( chunkInfo.ns ); assert( cfg ); ChunkManagerPtr cm = cfg->getChunkManager( chunkInfo.ns ); assert( cm ); const BSONObj& chunkToMove = chunkInfo.chunk; ChunkPtr c = cm->findChunk( chunkToMove["min"].Obj() ); if ( c->getMin().woCompare( chunkToMove["min"].Obj() ) || c->getMax().woCompare( chunkToMove["max"].Obj() ) ) { // likely a split happened somewhere cm = cfg->getChunkManager( chunkInfo.ns , true /* reload */); assert( cm ); c = cm->findChunk( chunkToMove["min"].Obj() ); if ( c->getMin().woCompare( chunkToMove["min"].Obj() ) || c->getMax().woCompare( chunkToMove["max"].Obj() ) ) { log() << "chunk mismatch after reload, ignoring will retry issue cm: " << c->getMin() << " min: " << chunkToMove["min"].Obj() << endl; continue; } } BSONObj res; if ( c->moveAndCommit( Shard::make( chunkInfo.to ) , Chunk::MaxChunkSize , res ) ) { movedCount++; continue; } // the move requires acquiring the collection metadata's lock, which can fail log() << "balacer move failed: " << res << " from: " << chunkInfo.from << " to: " << chunkInfo.to << " chunk: " << chunkToMove << endl; if ( res["chunkTooBig"].trueValue() ) { // reload just to be safe cm = cfg->getChunkManager( chunkInfo.ns ); assert( cm ); c = cm->findChunk( chunkToMove["min"].Obj() ); log() << "forcing a split because migrate failed for size reasons" << endl; res = BSONObj(); c->singleSplit( true , res ); log() << "forced split results: " << res << endl; // TODO: if the split fails, mark as jumbo SERVER-2571 } } return movedCount; }
int Balancer::_moveChunks( const vector<CandidateChunkPtr>* candidateChunks ) { int movedCount = 0; for ( vector<CandidateChunkPtr>::const_iterator it = candidateChunks->begin(); it != candidateChunks->end(); ++it ){ const CandidateChunk& chunkInfo = *it->get(); DBConfigPtr cfg = grid.getDBConfig( chunkInfo.ns ); assert( cfg ); ChunkManagerPtr cm = cfg->getChunkManager( chunkInfo.ns ); assert( cm ); const BSONObj& chunkToMove = chunkInfo.chunk; ChunkPtr c = cm->findChunk( chunkToMove["min"].Obj() ); if ( c->getMin().woCompare( chunkToMove["min"].Obj() ) ){ // likely a split happened somewhere cm = cfg->getChunkManager( chunkInfo.ns , true ); assert( cm ); c = cm->findChunk( chunkToMove["min"].Obj() ); if ( c->getMin().woCompare( chunkToMove["min"].Obj() ) ){ log() << "balancer: chunk mismatch after reload, ignoring will retry issue cm: " << c->getMin() << " min: " << chunkToMove["min"].Obj() << endl; continue; } } string errmsg; if ( c->moveAndCommit( Shard::make( chunkInfo.to ) , errmsg ) ){ movedCount++; continue; } log() << "balancer: MOVE FAILED **** " << errmsg << "\n" << " from: " << chunkInfo.from << " to: " << " chunk: " << chunkToMove << endl; } return movedCount; }
int Balancer::_moveChunks(const vector<CandidateChunkPtr>* candidateChunks, bool secondaryThrottle, bool waitForDelete) { int movedCount = 0; for ( vector<CandidateChunkPtr>::const_iterator it = candidateChunks->begin(); it != candidateChunks->end(); ++it ) { const CandidateChunk& chunkInfo = *it->get(); // Changes to metadata, borked metadata, and connectivity problems should cause us to // abort this chunk move, but shouldn't cause us to abort the entire round of chunks. // TODO: Handle all these things more cleanly, since they're expected problems try { DBConfigPtr cfg = grid.getDBConfig( chunkInfo.ns ); verify( cfg ); // NOTE: We purposely do not reload metadata here, since _doBalanceRound already // tried to do so once. ChunkManagerPtr cm = cfg->getChunkManager( chunkInfo.ns ); verify( cm ); ChunkPtr c = cm->findIntersectingChunk( chunkInfo.chunk.min ); if ( c->getMin().woCompare( chunkInfo.chunk.min ) || c->getMax().woCompare( chunkInfo.chunk.max ) ) { // likely a split happened somewhere cm = cfg->getChunkManager( chunkInfo.ns , true /* reload */); verify( cm ); c = cm->findIntersectingChunk( chunkInfo.chunk.min ); if ( c->getMin().woCompare( chunkInfo.chunk.min ) || c->getMax().woCompare( chunkInfo.chunk.max ) ) { log() << "chunk mismatch after reload, ignoring will retry issue " << chunkInfo.chunk.toString() << endl; continue; } } BSONObj res; if (c->moveAndCommit(Shard::make(chunkInfo.to), Chunk::MaxChunkSize, secondaryThrottle, waitForDelete, 0, /* maxTimeMS */ res)) { movedCount++; continue; } // the move requires acquiring the collection metadata's lock, which can fail log() << "balancer move failed: " << res << " from: " << chunkInfo.from << " to: " << chunkInfo.to << " chunk: " << chunkInfo.chunk << endl; if ( res["chunkTooBig"].trueValue() ) { // reload just to be safe cm = cfg->getChunkManager( chunkInfo.ns ); verify( cm ); c = cm->findIntersectingChunk( chunkInfo.chunk.min ); log() << "forcing a split because migrate failed for size reasons" << endl; res = BSONObj(); c->singleSplit( true , res ); log() << "forced split results: " << res << endl; if ( ! res["ok"].trueValue() ) { log() << "marking chunk as jumbo: " << c->toString() << endl; c->markAsJumbo(); // we increment moveCount so we do another round right away movedCount++; } } } catch( const DBException& ex ) { warning() << "could not move chunk " << chunkInfo.chunk.toString() << ", continuing balancing round" << causedBy( ex ) << endl; } } return movedCount; }
int Balancer::_moveChunks(OperationContext* txn, const vector<MigrateInfo>& candidateChunks, const MigrationSecondaryThrottleOptions& secondaryThrottle, bool waitForDelete) { int movedCount = 0; for (const auto& migrateInfo : candidateChunks) { // If the balancer was disabled since we started this round, don't start new chunks // moves. const auto balSettingsResult = grid.catalogManager(txn)->getGlobalSettings(txn, SettingsType::BalancerDocKey); const bool isBalSettingsAbsent = balSettingsResult.getStatus() == ErrorCodes::NoMatchingDocument; if (!balSettingsResult.isOK() && !isBalSettingsAbsent) { warning() << balSettingsResult.getStatus(); return movedCount; } const SettingsType& balancerConfig = isBalSettingsAbsent ? SettingsType{} : balSettingsResult.getValue(); if ((!isBalSettingsAbsent && !Chunk::shouldBalance(balancerConfig)) || MONGO_FAIL_POINT(skipBalanceRound)) { LOG(1) << "Stopping balancing round early as balancing was disabled"; return movedCount; } // Changes to metadata, borked metadata, and connectivity problems between shards // should cause us to abort this chunk move, but shouldn't cause us to abort the entire // round of chunks. // // TODO(spencer): We probably *should* abort the whole round on issues communicating // with the config servers, but its impossible to distinguish those types of failures // at the moment. // // TODO: Handle all these things more cleanly, since they're expected problems const NamespaceString nss(migrateInfo.ns); try { shared_ptr<DBConfig> cfg = uassertStatusOK(grid.catalogCache()->getDatabase(txn, nss.db().toString())); // NOTE: We purposely do not reload metadata here, since _getCandidateChunks already // tried to do so once shared_ptr<ChunkManager> cm = cfg->getChunkManager(txn, migrateInfo.ns); uassert(28628, str::stream() << "Collection " << migrateInfo.ns << " was deleted while balancing was active. Aborting balancing round.", cm); ChunkPtr c = cm->findIntersectingChunk(txn, migrateInfo.chunk.min); if (c->getMin().woCompare(migrateInfo.chunk.min) || c->getMax().woCompare(migrateInfo.chunk.max)) { // Likely a split happened somewhere, so force reload the chunk manager cm = cfg->getChunkManager(txn, migrateInfo.ns, true); invariant(cm); c = cm->findIntersectingChunk(txn, migrateInfo.chunk.min); if (c->getMin().woCompare(migrateInfo.chunk.min) || c->getMax().woCompare(migrateInfo.chunk.max)) { log() << "chunk mismatch after reload, ignoring will retry issue " << migrateInfo.chunk.toString(); continue; } } BSONObj res; if (c->moveAndCommit(txn, migrateInfo.to, Chunk::MaxChunkSize, secondaryThrottle, waitForDelete, 0, /* maxTimeMS */ res)) { movedCount++; continue; } // The move requires acquiring the collection metadata's lock, which can fail. log() << "balancer move failed: " << res << " from: " << migrateInfo.from << " to: " << migrateInfo.to << " chunk: " << migrateInfo.chunk; Status moveStatus = getStatusFromCommandResult(res); if (moveStatus == ErrorCodes::ChunkTooBig || res["chunkTooBig"].trueValue()) { // Reload just to be safe cm = cfg->getChunkManager(txn, migrateInfo.ns); invariant(cm); c = cm->findIntersectingChunk(txn, migrateInfo.chunk.min); log() << "performing a split because migrate failed for size reasons"; Status status = c->split(txn, Chunk::normal, NULL, NULL); log() << "split results: " << status; if (!status.isOK()) { log() << "marking chunk as jumbo: " << c->toString(); c->markAsJumbo(txn); // We increment moveCount so we do another round right away movedCount++; } } } catch (const DBException& ex) { warning() << "could not move chunk " << migrateInfo.chunk.toString() << ", continuing balancing round" << causedBy(ex); } } return movedCount; }
int Balancer::_moveChunks(const vector<CandidateChunkPtr>* candidateChunks, const WriteConcernOptions* writeConcern, bool waitForDelete) { int movedCount = 0; for (vector<CandidateChunkPtr>::const_iterator it = candidateChunks->begin(); it != candidateChunks->end(); ++it) { // If the balancer was disabled since we started this round, don't start new // chunks moves. SettingsType balancerConfig; std::string errMsg; if (!grid.getBalancerSettings(&balancerConfig, &errMsg)) { warning() << errMsg; // No point in continuing the round if the config servers are unreachable. return movedCount; } if ((balancerConfig.isKeySet() && // balancer config doc exists !grid.shouldBalance(balancerConfig)) || MONGO_FAIL_POINT(skipBalanceRound)) { LOG(1) << "Stopping balancing round early as balancing was disabled"; return movedCount; } // Changes to metadata, borked metadata, and connectivity problems between shards should // cause us to abort this chunk move, but shouldn't cause us to abort the entire round // of chunks. // TODO(spencer): We probably *should* abort the whole round on issues communicating // with the config servers, but its impossible to distinguish those types of failures // at the moment. // TODO: Handle all these things more cleanly, since they're expected problems const CandidateChunk& chunkInfo = *it->get(); try { DBConfigPtr cfg = grid.getDBConfig(chunkInfo.ns); verify(cfg); // NOTE: We purposely do not reload metadata here, since _doBalanceRound already // tried to do so once. ChunkManagerPtr cm = cfg->getChunkManager(chunkInfo.ns); verify(cm); ChunkPtr c = cm->findIntersectingChunk(chunkInfo.chunk.min); if (c->getMin().woCompare(chunkInfo.chunk.min) || c->getMax().woCompare(chunkInfo.chunk.max)) { // likely a split happened somewhere cm = cfg->getChunkManager(chunkInfo.ns, true /* reload */); verify(cm); c = cm->findIntersectingChunk(chunkInfo.chunk.min); if (c->getMin().woCompare(chunkInfo.chunk.min) || c->getMax().woCompare(chunkInfo.chunk.max)) { log() << "chunk mismatch after reload, ignoring will retry issue " << chunkInfo.chunk.toString() << endl; continue; } } BSONObj res; if (c->moveAndCommit(Shard::make(chunkInfo.to), Chunk::MaxChunkSize, writeConcern, waitForDelete, 0, /* maxTimeMS */ res)) { movedCount++; continue; } // the move requires acquiring the collection metadata's lock, which can fail log() << "balancer move failed: " << res << " from: " << chunkInfo.from << " to: " << chunkInfo.to << " chunk: " << chunkInfo.chunk << endl; if (res["chunkTooBig"].trueValue()) { // reload just to be safe cm = cfg->getChunkManager(chunkInfo.ns); verify(cm); c = cm->findIntersectingChunk(chunkInfo.chunk.min); log() << "performing a split because migrate failed for size reasons"; Status status = c->split(Chunk::normal, NULL, NULL); log() << "split results: " << status << endl; if (!status.isOK()) { log() << "marking chunk as jumbo: " << c->toString() << endl; c->markAsJumbo(); // we increment moveCount so we do another round right away movedCount++; } } } catch (const DBException& ex) { warning() << "could not move chunk " << chunkInfo.chunk.toString() << ", continuing balancing round" << causedBy(ex) << endl; } } return movedCount; }