bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){ ShardConnection::sync(); string ns = cmdObj.firstElement().valuestrsafe(); if ( ns.size() == 0 ){ errmsg = "no ns"; return false; } DBConfigPtr config = grid.getDBConfig( ns ); if ( ! config->isSharded( ns ) ){ errmsg = "ns not sharded. have to shard before can split"; return false; } BSONObj find = cmdObj.getObjectField( "find" ); if ( find.isEmpty() ){ find = cmdObj.getObjectField( "middle" ); if ( find.isEmpty() ){ errmsg = "need to specify find or middle"; return false; } } ChunkManagerPtr info = config->getChunkManager( ns ); ChunkPtr old = info->findChunk( find ); return _split( result , errmsg , ns , info , old , cmdObj.getObjectField( "middle" ) ); }
ChunkManagerPtr DBConfig::shardCollection( const string& ns , ShardKeyPattern fieldsAndOrder , bool unique ) { uassert( 8042 , "db doesn't have sharding enabled" , _shardingEnabled ); scoped_lock lk( _lock ); CollectionInfo& ci = _collections[ns]; uassert( 8043 , "collection already sharded" , ! ci.isSharded() ); log() << "enable sharding on: " << ns << " with shard key: " << fieldsAndOrder << endl; // From this point on, 'ns' is going to be treated as a sharded collection. We assume this is the first // time it is seen by the sharded system and thus create the first chunk for the collection. All the remaining // chunks will be created as a by-product of splitting. ci.shard( ns , fieldsAndOrder , unique ); ChunkManagerPtr cm = ci.getCM(); uassert( 13449 , "collections already sharded" , (cm->numChunks() == 0) ); cm->createFirstChunk( getPrimary() ); _save(); try { cm->maybeChunkCollection(); } catch ( UserException& e ) { // failure to chunk is not critical enough to abort the command (and undo the _save()'d configDB state) log() << "couldn't chunk recently created collection: " << ns << " " << e << endl; } return cm; }
void _insert( Request& r , DbMessage& d, ChunkManagerPtr manager ){ while ( d.moreJSObjs() ){ BSONObj o = d.nextJsObj(); if ( ! manager->hasShardKey( o ) ){ bool bad = true; if ( manager->getShardKey().partOfShardKey( "_id" ) ){ BSONObjBuilder b; b.appendOID( "_id" , 0 , true ); b.appendElements( o ); o = b.obj(); bad = ! manager->hasShardKey( o ); } if ( bad ){ log() << "tried to insert object without shard key: " << r.getns() << " " << o << endl; throw UserException( 8011 , "tried to insert object without shard key" ); } } ChunkPtr c = manager->findChunk( o ); log(4) << " server:" << c->getShard().toString() << " " << o << endl; insert( c->getShard() , r.getns() , o ); r.gotInsert(); c->splitIfShould( o.objsize() ); } }
void _update( Request& r , DbMessage& d, ChunkManagerPtr manager ) { int flags = d.pullInt(); BSONObj query = d.nextJsObj(); uassert( 13506 , "$atomic not supported sharded" , query["$atomic"].eoo() ); uassert( 10201 , "invalid update" , d.moreJSObjs() ); BSONObj toupdate = d.nextJsObj(); BSONObj chunkFinder = query; bool upsert = flags & UpdateOption_Upsert; bool multi = flags & UpdateOption_Multi; uassert( 10202 , "can't mix multi and upsert and sharding" , ! ( upsert && multi ) ); if (upsert) { uassert(8012, "can't upsert something without shard key", (manager->hasShardKey(toupdate) || (toupdate.firstElement().fieldName()[0] == '$' && manager->hasShardKey(query)))); BSONObj key = manager->getShardKey().extractKey(query); BSONForEach(e, key) { uassert(13465, "shard key in upsert query must be an exact match", getGtLtOp(e) == BSONObj::Equality); } }
/** * Returns true if request is a query for sharded indexes. */ static bool doShardedIndexQuery(OperationContext* txn, Request& r, const QuerySpec& qSpec) { // Extract the ns field from the query, which may be embedded within the "query" or // "$query" field. auto nsField = qSpec.filter()["ns"]; if (nsField.eoo()) { return false; } const NamespaceString indexNSSQuery(nsField.str()); auto status = grid.catalogCache()->getDatabase(txn, indexNSSQuery.db().toString()); if (!status.isOK()) { return false; } shared_ptr<DBConfig> config = status.getValue(); if (!config->isSharded(indexNSSQuery.ns())) { return false; } // if you are querying on system.indexes, we need to make sure we go to a shard // that actually has chunks. This is not a perfect solution (what if you just // look at all indexes), but better than doing nothing. ShardPtr shard; ChunkManagerPtr cm; config->getChunkManagerOrPrimary(indexNSSQuery.ns(), cm, shard); if (cm) { set<ShardId> shardIds; cm->getAllShardIds(&shardIds); verify(shardIds.size() > 0); shard = grid.shardRegistry()->getShard(*shardIds.begin()); } ShardConnection dbcon(shard->getConnString(), r.getns()); DBClientBase& c = dbcon.conn(); string actualServer; Message response; bool ok = c.call(r.m(), response, true, &actualServer); uassert(10200, "mongos: error calling db", ok); { QueryResult::View qr = response.singleData().view2ptr(); if (qr.getResultFlags() & ResultFlag_ShardConfigStale) { dbcon.done(); // Version is zero b/c this is deprecated codepath throw RecvStaleConfigException(r.getns(), "Strategy::doQuery", ChunkVersion(0, 0, OID()), ChunkVersion(0, 0, OID())); } } r.reply(response, actualServer.size() ? actualServer : c.getServerAddress()); dbcon.done(); return true; }
int Balancer::_moveChunks( const vector<CandidateChunkPtr>* candidateChunks , bool secondaryThrottle ) { int movedCount = 0; for ( vector<CandidateChunkPtr>::const_iterator it = candidateChunks->begin(); it != candidateChunks->end(); ++it ) { const CandidateChunk& chunkInfo = *it->get(); DBConfigPtr cfg = grid.getDBConfig( chunkInfo.ns ); verify( cfg ); ChunkManagerPtr cm = cfg->getChunkManager( chunkInfo.ns ); verify( cm ); ChunkPtr c = cm->findChunk( chunkInfo.chunk.min ); if ( c->getMin().woCompare( chunkInfo.chunk.min ) || c->getMax().woCompare( chunkInfo.chunk.max ) ) { // likely a split happened somewhere cm = cfg->getChunkManager( chunkInfo.ns , true /* reload */); verify( cm ); c = cm->findChunk( chunkInfo.chunk.min ); if ( c->getMin().woCompare( chunkInfo.chunk.min ) || c->getMax().woCompare( chunkInfo.chunk.max ) ) { log() << "chunk mismatch after reload, ignoring will retry issue " << chunkInfo.chunk.toString() << endl; continue; } } BSONObj res; if ( c->moveAndCommit( Shard::make( chunkInfo.to ) , Chunk::MaxChunkSize , secondaryThrottle , res ) ) { movedCount++; continue; } // the move requires acquiring the collection metadata's lock, which can fail log() << "balancer move failed: " << res << " from: " << chunkInfo.from << " to: " << chunkInfo.to << " chunk: " << chunkInfo.chunk << endl; if ( res["chunkTooBig"].trueValue() ) { // reload just to be safe cm = cfg->getChunkManager( chunkInfo.ns ); verify( cm ); c = cm->findChunk( chunkInfo.chunk.min ); log() << "forcing a split because migrate failed for size reasons" << endl; res = BSONObj(); c->singleSplit( true , res ); log() << "forced split results: " << res << endl; if ( ! res["ok"].trueValue() ) { log() << "marking chunk as jumbo: " << c->toString() << endl; c->markAsJumbo(); // we increment moveCount so we do another round right away movedCount++; } } } return movedCount; }
bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { ShardConnection::sync(); string ns = cmdObj.firstElement().valuestrsafe(); if ( ns.size() == 0 ) { errmsg = "no ns"; return false; } DBConfigPtr config = grid.getDBConfig( ns ); if ( ! config->isSharded( ns ) ) { errmsg = "ns not sharded. have to shard before can split"; return false; } BSONObj find = cmdObj.getObjectField( "find" ); if ( find.isEmpty() ) { find = cmdObj.getObjectField( "middle" ); if ( find.isEmpty() ) { errmsg = "need to specify find or middle"; return false; } } ChunkManagerPtr info = config->getChunkManager( ns ); ChunkPtr chunk = info->findChunk( find ); BSONObj middle = cmdObj.getObjectField( "middle" ); assert( chunk.get() ); log() << "splitting: " << ns << " shard: " << chunk << endl; BSONObj res; ChunkPtr p; if ( middle.isEmpty() ) { p = chunk->singleSplit( true /* force a split even if not enough data */ , res ); } else { // sanity check if the key provided is a valid split point if ( ( middle == chunk->getMin() ) || ( middle == chunk->getMax() ) ) { errmsg = "cannot split on initial or final chunk's key"; return false; } vector<BSONObj> splitPoints; splitPoints.push_back( middle ); p = chunk->multiSplit( splitPoints , res ); } if ( p.get() == NULL ) { errmsg = "split failed"; result.append( "cause" , res ); return false; } return true; }
bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){ ShardConnection::sync(); Timer t; string ns = cmdObj.firstElement().valuestrsafe(); if ( ns.size() == 0 ){ errmsg = "no ns"; return false; } DBConfigPtr config = grid.getDBConfig( ns ); if ( ! config->isSharded( ns ) ){ errmsg = "ns not sharded. have to shard before can move a chunk"; return false; } BSONObj find = cmdObj.getObjectField( "find" ); if ( find.isEmpty() ){ errmsg = "need to specify find. see help"; return false; } string toString = cmdObj["to"].valuestrsafe(); if ( ! toString.size() ){ errmsg = "you have to specify where you want to move the chunk"; return false; } Shard to = Shard::make( toString ); // so far, chunk size serves test purposes; it may or may not become a supported parameter long long maxChunkSizeBytes = cmdObj["maxChunkSizeBytes"].numberLong(); if ( maxChunkSizeBytes == 0 ) { maxChunkSizeBytes = Chunk::MaxChunkSize; } tlog() << "CMD: movechunk: " << cmdObj << endl; ChunkManagerPtr info = config->getChunkManager( ns ); ChunkPtr c = info->findChunk( find ); const Shard& from = c->getShard(); if ( from == to ){ errmsg = "that chunk is already on that shard"; return false; } BSONObj res; if ( ! c->moveAndCommit( to , maxChunkSizeBytes , res ) ){ errmsg = "move failed"; result.append( "cause" , res ); return false; } result.append( "millis" , t.millis() ); return true; }
int Balancer::_moveChunks( const vector<CandidateChunkPtr>* candidateChunks ) { int movedCount = 0; for ( vector<CandidateChunkPtr>::const_iterator it = candidateChunks->begin(); it != candidateChunks->end(); ++it ) { const CandidateChunk& chunkInfo = *it->get(); DBConfigPtr cfg = grid.getDBConfig( chunkInfo.ns ); assert( cfg ); ChunkManagerPtr cm = cfg->getChunkManager( chunkInfo.ns ); assert( cm ); const BSONObj& chunkToMove = chunkInfo.chunk; ChunkPtr c = cm->findChunk( chunkToMove["min"].Obj() ); if ( c->getMin().woCompare( chunkToMove["min"].Obj() ) || c->getMax().woCompare( chunkToMove["max"].Obj() ) ) { // likely a split happened somewhere cm = cfg->getChunkManager( chunkInfo.ns , true /* reload */); assert( cm ); c = cm->findChunk( chunkToMove["min"].Obj() ); if ( c->getMin().woCompare( chunkToMove["min"].Obj() ) || c->getMax().woCompare( chunkToMove["max"].Obj() ) ) { log() << "chunk mismatch after reload, ignoring will retry issue cm: " << c->getMin() << " min: " << chunkToMove["min"].Obj() << endl; continue; } } BSONObj res; if ( c->moveAndCommit( Shard::make( chunkInfo.to ) , Chunk::MaxChunkSize , res ) ) { movedCount++; continue; } // the move requires acquiring the collection metadata's lock, which can fail log() << "balacer move failed: " << res << " from: " << chunkInfo.from << " to: " << chunkInfo.to << " chunk: " << chunkToMove << endl; if ( res["chunkTooBig"].trueValue() ) { // reload just to be safe cm = cfg->getChunkManager( chunkInfo.ns ); assert( cm ); c = cm->findChunk( chunkToMove["min"].Obj() ); log() << "forcing a split because migrate failed for size reasons" << endl; res = BSONObj(); c->singleSplit( true , res ); log() << "forced split results: " << res << endl; // TODO: if the split fails, mark as jumbo SERVER-2571 } } return movedCount; }
void checkShardVersion( DBClientBase& conn , const string& ns , bool authoritative ){ // TODO: cache, optimize, etc... WriteBackListener::init( conn ); DBConfigPtr conf = grid.getDBConfig( ns ); if ( ! conf ) return; ShardChunkVersion version = 0; unsigned long long officialSequenceNumber = 0; ChunkManagerPtr manager; const bool isSharded = conf->isSharded( ns ); if ( isSharded ){ manager = conf->getChunkManager( ns , authoritative ); officialSequenceNumber = manager->getSequenceNumber(); } unsigned long long & sequenceNumber = checkShardVersionLastSequence[ make_pair(&conn,ns) ]; if ( sequenceNumber == officialSequenceNumber ) return; if ( isSharded ){ version = manager->getVersion( Shard::make( conn.getServerAddress() ) ); } log(2) << " have to set shard version for conn: " << &conn << " ns:" << ns << " my last seq: " << sequenceNumber << " current: " << officialSequenceNumber << " version: " << version << " manager: " << manager.get() << endl; BSONObj result; if ( setShardVersion( conn , ns , version , authoritative , result ) ){ // success! log(1) << " setShardVersion success!" << endl; sequenceNumber = officialSequenceNumber; dassert( sequenceNumber == checkShardVersionLastSequence[ make_pair(&conn,ns) ] ); return; } log(1) << " setShardVersion failed!\n" << result << endl; if ( result.getBoolField( "need_authoritative" ) ) massert( 10428 , "need_authoritative set but in authoritative mode already" , ! authoritative ); if ( ! authoritative ){ checkShardVersion( conn , ns , 1 ); return; } log() << " setShardVersion failed: " << result << endl; massert( 10429 , (string)"setShardVersion failed! " + result.jsonString() , 0 ); }
void _insert( Request& r , DbMessage& d, ChunkManagerPtr manager ) { while ( d.moreJSObjs() ) { BSONObj o = d.nextJsObj(); if ( ! manager->hasShardKey( o ) ) { bool bad = true; if ( manager->getShardKey().partOfShardKey( "_id" ) ) { BSONObjBuilder b; b.appendOID( "_id" , 0 , true ); b.appendElements( o ); o = b.obj(); bad = ! manager->hasShardKey( o ); } if ( bad ) { log() << "tried to insert object without shard key: " << r.getns() << " " << o << endl; throw UserException( 8011 , "tried to insert object without shard key" ); } } // Many operations benefit from having the shard key early in the object o = manager->getShardKey().moveToFront(o); const int maxTries = 10; bool gotThrough = false; for ( int i=0; i<maxTries; i++ ) { try { ChunkPtr c = manager->findChunk( o ); log(4) << " server:" << c->getShard().toString() << " " << o << endl; insert( c->getShard() , r.getns() , o ); r.gotInsert(); if ( r.getClientInfo()->autoSplitOk() ) c->splitIfShould( o.objsize() ); gotThrough = true; break; } catch ( StaleConfigException& e ) { log( i < ( maxTries / 2 ) ) << "retrying insert because of StaleConfigException: " << e << " object: " << o << endl; r.reset(); manager = r.getChunkManager(); uassert(14804, "collection no longer sharded", manager); } sleepmillis( i * 200 ); } assert( inShutdown() || gotThrough ); } }
/** * Returns true if request is a query for sharded indexes. */ static bool doShardedIndexQuery( Request& r, const QuerySpec& qSpec ) { // Extract the ns field from the query, which may be embedded within the "query" or // "$query" field. string indexNSQuery(qSpec.filter()["ns"].str()); DBConfigPtr config = grid.getDBConfig( r.getns() ); if ( !config->isSharded( indexNSQuery )) { return false; } // if you are querying on system.indexes, we need to make sure we go to a shard // that actually has chunks. This is not a perfect solution (what if you just // look at all indexes), but better than doing nothing. ShardPtr shard; ChunkManagerPtr cm; config->getChunkManagerOrPrimary( indexNSQuery, cm, shard ); if ( cm ) { set<Shard> shards; cm->getAllShards( shards ); verify( shards.size() > 0 ); shard.reset( new Shard( *shards.begin() ) ); } ShardConnection dbcon( *shard , r.getns() ); DBClientBase &c = dbcon.conn(); string actualServer; Message response; bool ok = c.call( r.m(), response, true , &actualServer ); uassert( 10200 , "mongos: error calling db", ok ); { QueryResult *qr = (QueryResult *) response.singleData(); if ( qr->resultFlags() & ResultFlag_ShardConfigStale ) { dbcon.done(); // Version is zero b/c this is deprecated codepath throw RecvStaleConfigException( r.getns(), "Strategy::doQuery", ChunkVersion( 0, 0, OID() ), ChunkVersion( 0, 0, OID() )); } } r.reply( response , actualServer.size() ? actualServer : c.getServerAddress() ); dbcon.done(); return true; }
ChunkManagerPtr DBConfig::getChunkManager( const string& ns , bool reload ){ scoped_lock lk( _lock ); ChunkManagerPtr m = _shards[ns]; if ( m && ! reload ) return m; uassert( 10181 , (string)"not sharded:" + ns , _isSharded( ns ) ); if ( m && reload ) log() << "reloading shard info for: " << ns << endl; m.reset( new ChunkManager( this , ns , _sharded[ ns ].key , _sharded[ns].unique ) ); _shards[ns] = m; return m; }
// TODO: Same limitations as other mongos metadata commands, sometimes we'll be stale here // and fail. Need to better integrate targeting with commands. ShardPtr guessMergeShard( const NamespaceString& nss, const BSONObj& minKey ) { DBConfigPtr config = grid.getDBConfig( nss.ns() ); if ( !config->isSharded( nss ) ) { config->reload(); if ( !config->isSharded( nss ) ) { return ShardPtr(); } } ChunkManagerPtr manager = config->getChunkManager( nss ); if ( !manager ) return ShardPtr(); ChunkPtr chunk = manager->findChunkForDoc( minKey ); if ( !chunk ) return ShardPtr(); return ShardPtr( new Shard( chunk->getShard() ) ); }
void _delete( Request& r , DbMessage& d, ChunkManagerPtr manager ){ int flags = d.pullInt(); bool justOne = flags & 1; uassert( 10203 , "bad delete message" , d.moreJSObjs() ); BSONObj pattern = d.nextJsObj(); vector<shared_ptr<ChunkRange> > chunks; manager->getChunksForQuery( chunks , pattern ); log(2) << "delete : " << pattern << " \t " << chunks.size() << " justOne: " << justOne << endl; if ( chunks.size() == 1 ){ doWrite( dbDelete , r , chunks[0]->getShard() ); return; } if ( justOne && ! pattern.hasField( "_id" ) ) throw UserException( 8015 , "can only delete with a non-shard key pattern if can delete as many as we find" ); set<Shard> seen; for ( vector<shared_ptr<ChunkRange> >::iterator i=chunks.begin(); i!=chunks.end(); i++){ shared_ptr<ChunkRange> c = *i; if ( seen.count( c->getShard() ) ) continue; seen.insert( c->getShard() ); doWrite( dbDelete , r , c->getShard() ); } }
void _insert( Request& r , DbMessage& d, ChunkManagerPtr manager ){ while ( d.moreJSObjs() ){ BSONObj o = d.nextJsObj(); if ( ! manager->hasShardKey( o ) ){ bool bad = true; if ( manager->getShardKey().partOfShardKey( "_id" ) ){ BSONObjBuilder b; b.appendOID( "_id" , 0 , true ); b.appendElements( o ); o = b.obj(); bad = ! manager->hasShardKey( o ); } if ( bad ){ log() << "tried to insert object without shard key: " << r.getns() << " " << o << endl; throw UserException( 8011 , "tried to insert object without shard key" ); } } bool gotThrough = false; for ( int i=0; i<10; i++ ){ try { ChunkPtr c = manager->findChunk( o ); log(4) << " server:" << c->getShard().toString() << " " << o << endl; insert( c->getShard() , r.getns() , o ); r.gotInsert(); c->splitIfShould( o.objsize() ); gotThrough = true; break; } catch ( StaleConfigException& ){ log(1) << "retrying insert because of StaleConfigException: " << o << endl; r.reset(); manager = r.getChunkManager(); } sleepmillis( i * 200 ); } assert( gotThrough ); } }
void handleIndexWrite( int op , Request& r ) { DbMessage& d = r.d(); if ( op == dbInsert ) { while( d.moreJSObjs() ) { BSONObj o = d.nextJsObj(); const char * ns = o["ns"].valuestr(); if ( r.getConfig()->isSharded( ns ) ) { BSONObj newIndexKey = o["key"].embeddedObjectUserCheck(); uassert( 10205 , (string)"can't use unique indexes with sharding ns:" + ns + " key: " + o["key"].embeddedObjectUserCheck().toString() , IndexDetails::isIdIndexPattern( newIndexKey ) || ! o["unique"].trueValue() || r.getConfig()->getChunkManager( ns )->getShardKey().isPrefixOf( newIndexKey ) ); ChunkManagerPtr cm = r.getConfig()->getChunkManager( ns ); assert( cm ); set<Shard> shards; cm->getAllShards(shards); for (set<Shard>::const_iterator it=shards.begin(), end=shards.end(); it != end; ++it) doWrite( op , r , *it ); } else { doWrite( op , r , r.primaryShard() ); } r.gotInsert(); } } else if ( op == dbUpdate ) { throw UserException( 8050 , "can't update system.indexes" ); } else if ( op == dbDelete ) { // TODO throw UserException( 8051 , "can't delete indexes on sharded collection yet" ); } else { log() << "handleIndexWrite invalid write op: " << op << endl; throw UserException( 8052 , "handleIndexWrite invalid write op" ); } }
ChunkManagerPtr DBConfig::shardCollection( const string& ns , ShardKeyPattern fieldsAndOrder , bool unique ){ if ( ! _shardingEnabled ) throw UserException( 8042 , "db doesn't have sharding enabled" ); scoped_lock lk( _lock ); ChunkManagerPtr info = _shards[ns]; if ( info ) return info; if ( _isSharded( ns ) ) throw UserException( 8043 , "already sharded" ); log() << "enable sharding on: " << ns << " with shard key: " << fieldsAndOrder << endl; _sharded[ns] = CollectionInfo( fieldsAndOrder , unique ); info.reset( new ChunkManager( this , ns , fieldsAndOrder , unique ) ); _shards[ns] = info; return info; }
/** * Splits the chunks touched based from the targeter stats if needed. */ static void splitIfNeeded( const string& ns, const TargeterStats& stats ) { if ( !Chunk::ShouldAutoSplit ) { return; } DBConfigPtr config; try { config = grid.getDBConfig( ns ); } catch ( const DBException& ex ) { warning() << "failed to get database config for " << ns << " while checking for auto-split: " << causedBy( ex ) << endl; return; } ChunkManagerPtr chunkManager; ShardPtr dummyShard; config->getChunkManagerOrPrimary( ns, chunkManager, dummyShard ); if ( !chunkManager ) { return; } for ( map<BSONObj, int>::const_iterator it = stats.chunkSizeDelta.begin(); it != stats.chunkSizeDelta.end(); ++it ) { ChunkPtr chunk; try { chunk = chunkManager->findIntersectingChunk( it->first ); } catch ( const AssertionException& ex ) { warning() << "could not find chunk while checking for auto-split: " << causedBy( ex ) << endl; return; } chunk->splitIfShould( it->second ); } }
int Balancer::_moveChunks( const vector<CandidateChunkPtr>* candidateChunks ) { int movedCount = 0; for ( vector<CandidateChunkPtr>::const_iterator it = candidateChunks->begin(); it != candidateChunks->end(); ++it ){ const CandidateChunk& chunkInfo = *it->get(); DBConfigPtr cfg = grid.getDBConfig( chunkInfo.ns ); assert( cfg ); ChunkManagerPtr cm = cfg->getChunkManager( chunkInfo.ns ); assert( cm ); const BSONObj& chunkToMove = chunkInfo.chunk; ChunkPtr c = cm->findChunk( chunkToMove["min"].Obj() ); if ( c->getMin().woCompare( chunkToMove["min"].Obj() ) ){ // likely a split happened somewhere cm = cfg->getChunkManager( chunkInfo.ns , true ); assert( cm ); c = cm->findChunk( chunkToMove["min"].Obj() ); if ( c->getMin().woCompare( chunkToMove["min"].Obj() ) ){ log() << "balancer: chunk mismatch after reload, ignoring will retry issue cm: " << c->getMin() << " min: " << chunkToMove["min"].Obj() << endl; continue; } } string errmsg; if ( c->moveAndCommit( Shard::make( chunkInfo.to ) , errmsg ) ){ movedCount++; continue; } log() << "balancer: MOVE FAILED **** " << errmsg << "\n" << " from: " << chunkInfo.from << " to: " << " chunk: " << chunkToMove << endl; } return movedCount; }
void insertSharded( DBConfigPtr conf, const char* ns, BSONObj& o, int flags ) { ChunkManagerPtr manager = conf->getChunkManager(ns); if ( ! manager->hasShardKey( o ) ) { bool bad = true; if ( manager->getShardKey().partOfShardKey( "_id" ) ) { BSONObjBuilder b; b.appendOID( "_id" , 0 , true ); b.appendElements( o ); o = b.obj(); bad = ! manager->hasShardKey( o ); } if ( bad ) { log() << "tried to insert object without shard key: " << ns << " " << o << endl; uasserted( 14842 , "tried to insert object without shard key" ); } } // Many operations benefit from having the shard key early in the object o = manager->getShardKey().moveToFront(o); const int maxTries = 30; for ( int i=0; i<maxTries; i++ ) { try { ChunkPtr c = manager->findChunk( o ); log(4) << " server:" << c->getShard().toString() << " " << o << endl; insert( c->getShard() , ns , o , flags); // r.gotInsert(); // if ( r.getClientInfo()->autoSplitOk() ) c->splitIfShould( o.objsize() ); break; } catch ( StaleConfigException& e ) { int logLevel = i < ( maxTries / 2 ); LOG( logLevel ) << "retrying insert because of StaleConfigException: " << e << " object: " << o << endl; // r.reset(); unsigned long long old = manager->getSequenceNumber(); manager = conf->getChunkManager(ns); LOG( logLevel ) << " sequenece number - old: " << old << " new: " << manager->getSequenceNumber() << endl; if (!manager) { uasserted(14843, "collection no longer sharded"); } } sleepmillis( i * 20 ); } }
bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) { string ns = cmdObj.firstElement().valuestrsafe(); if ( ns.size() == 0 ) { errmsg = "need to specify fully namespace"; return false; } DBConfigPtr config = grid.getDBConfig( ns ); if ( ! config->isSharded( ns ) ) { errmsg = "ns not sharded."; return false; } ChunkManagerPtr cm = config->getChunkManagerIfExists( ns ); if ( ! cm ) { errmsg = "no chunk manager?"; return false; } cm->_printChunks(); result.appendTimestamp( "version" , cm->getVersion().toLong() ); return 1; }
void _delete( Request& r , DbMessage& d, ChunkManagerPtr manager ){ int flags = d.pullInt(); bool justOne = flags & 1; uassert( 10203 , "bad delete message" , d.moreJSObjs() ); BSONObj pattern = d.nextJsObj(); set<Shard> shards; int left = 5; while ( true ){ try { manager->getShardsForQuery( shards , pattern ); log(2) << "delete : " << pattern << " \t " << shards.size() << " justOne: " << justOne << endl; if ( shards.size() == 1 ){ doWrite( dbDelete , r , *shards.begin() ); return; } break; } catch ( StaleConfigException& e ){ if ( left <= 0 ) throw e; left--; log() << "delete failed b/c of StaleConfigException, retrying " << " left:" << left << " ns: " << r.getns() << " patt: " << pattern << endl; r.reset( false ); shards.clear(); manager = r.getChunkManager(); } } if ( justOne && ! pattern.hasField( "_id" ) ) throw UserException( 8015 , "can only delete with a non-shard key pattern if can delete as many as we find" ); for ( set<Shard>::iterator i=shards.begin(); i!=shards.end(); i++){ int * x = (int*)(r.d().afterNS()); x[0] |= RemoveOption_Broadcast; doWrite( dbDelete , r , *i , false ); } }
void _groupInserts( ChunkManagerPtr manager, vector<BSONObj>& inserts, map<ChunkPtr,vector<BSONObj> >& insertsForChunks ){ // Redo all inserts for chunks which have changed map<ChunkPtr,vector<BSONObj> >::iterator i = insertsForChunks.begin(); while( ! insertsForChunks.empty() && i != insertsForChunks.end() ){ if( ! manager->compatibleWith( i->first ) ){ inserts.insert( inserts.end(), i->second.begin(), i->second.end() ); insertsForChunks.erase( i++ ); } else ++i; } // Figure out inserts we haven't chunked yet for( vector<BSONObj>::iterator i = inserts.begin(); i != inserts.end(); ++i ){ BSONObj o = *i; if ( ! manager->hasShardKey( o ) ) { bool bad = true; // Add autogenerated _id to item and see if we now have a shard key if ( manager->getShardKey().partOfShardKey( "_id" ) ) { BSONObjBuilder b; b.appendOID( "_id" , 0 , true ); b.appendElements( o ); o = b.obj(); bad = ! manager->hasShardKey( o ); } if ( bad ) { // TODO: log() << "tried to insert object with no valid shard key for " << manager->getShardKey() << " : " << o << endl; uassert( 8011, str::stream() << "tried to insert object with no valid shard key for " << manager->getShardKey().toString() << " : " << o.toString(), false ); } } // Many operations benefit from having the shard key early in the object o = manager->getShardKey().moveToFront(o); insertsForChunks[manager->findChunk(o)].push_back(o); } inserts.clear(); }
/** * @return true if had to do something */ bool checkShardVersion( DBClientBase& conn , const string& ns , bool authoritative , int tryNumber ) { // TODO: cache, optimize, etc... WriteBackListener::init( conn ); DBConfigPtr conf = grid.getDBConfig( ns ); if ( ! conf ) return false; unsigned long long officialSequenceNumber = 0; ChunkManagerPtr manager; const bool isSharded = conf->isSharded( ns ); if ( isSharded ) { manager = conf->getChunkManager( ns , authoritative ); officialSequenceNumber = manager->getSequenceNumber(); } // has the ChunkManager been reloaded since the last time we updated the connection-level version? // (ie, last time we issued the setShardVersions below) unsigned long long sequenceNumber = connectionShardStatus.getSequence(&conn,ns); if ( sequenceNumber == officialSequenceNumber ) { return false; } ShardChunkVersion version = 0; if ( isSharded ) { version = manager->getVersion( Shard::make( conn.getServerAddress() ) ); } log(2) << " have to set shard version for conn: " << &conn << " ns:" << ns << " my last seq: " << sequenceNumber << " current: " << officialSequenceNumber << " version: " << version << " manager: " << manager.get() << endl; BSONObj result; if ( setShardVersion( conn , ns , version , authoritative , result ) ) { // success! LOG(1) << " setShardVersion success: " << result << endl; connectionShardStatus.setSequence( &conn , ns , officialSequenceNumber ); return true; } log(1) << " setShardVersion failed!\n" << result << endl; if ( result.getBoolField( "need_authoritative" ) ) massert( 10428 , "need_authoritative set but in authoritative mode already" , ! authoritative ); if ( ! authoritative ) { checkShardVersion( conn , ns , 1 , tryNumber + 1 ); return true; } if ( tryNumber < 4 ) { log(1) << "going to retry checkShardVersion" << endl; sleepmillis( 10 ); checkShardVersion( conn , ns , 1 , tryNumber + 1 ); return true; } log() << " setShardVersion failed: " << result << endl; massert( 10429 , (string)"setShardVersion failed! " + result.jsonString() , 0 ); return true; }
void _insert( Request& r , DbMessage& d, ChunkManagerPtr manager ) { const int flags = d.reservedField() | InsertOption_ContinueOnError; // ContinueOnError is always on when using sharding. map<ChunkPtr, vector<BSONObj> > insertsForChunk; // Group bulk insert for appropriate shards try { while ( d.moreJSObjs() ) { BSONObj o = d.nextJsObj(); if ( ! manager->hasShardKey( o ) ) { bool bad = true; if ( manager->getShardKey().partOfShardKey( "_id" ) ) { BSONObjBuilder b; b.appendOID( "_id" , 0 , true ); b.appendElements( o ); o = b.obj(); bad = ! manager->hasShardKey( o ); } if ( bad ) { log() << "tried to insert object with no valid shard key: " << r.getns() << " " << o << endl; uasserted( 8011 , "tried to insert object with no valid shard key" ); } } // Many operations benefit from having the shard key early in the object o = manager->getShardKey().moveToFront(o); insertsForChunk[manager->findChunk(o)].push_back(o); } for (map<ChunkPtr, vector<BSONObj> >::iterator it = insertsForChunk.begin(); it != insertsForChunk.end(); ++it) { ChunkPtr c = it->first; vector<BSONObj> objs = it->second; const int maxTries = 30; bool gotThrough = false; for ( int i=0; i<maxTries; i++ ) { try { LOG(4) << " server:" << c->getShard().toString() << " bulk insert " << objs.size() << " documents" << endl; insert( c->getShard() , r.getns() , objs , flags); int bytesWritten = 0; for (vector<BSONObj>::iterator vecIt = objs.begin(); vecIt != objs.end(); ++vecIt) { r.gotInsert(); // Record the correct number of individual inserts bytesWritten += (*vecIt).objsize(); } if ( r.getClientInfo()->autoSplitOk() ) c->splitIfShould( bytesWritten ); gotThrough = true; break; } catch ( StaleConfigException& e ) { int logLevel = i < ( maxTries / 2 ); LOG( logLevel ) << "retrying bulk insert of " << objs.size() << " documents because of StaleConfigException: " << e << endl; r.reset(); manager = r.getChunkManager(); if( ! manager ) { uasserted(14804, "collection no longer sharded"); } unsigned long long old = manager->getSequenceNumber(); LOG( logLevel ) << " sequence number - old: " << old << " new: " << manager->getSequenceNumber() << endl; } sleepmillis( i * 20 ); } assert( inShutdown() || gotThrough ); // not caught below } } catch (const UserException&){ if (!d.moreJSObjs()){ throw; } // Ignore and keep going. ContinueOnError is implied with sharding. } }
/** * @return true if had to do something */ bool checkShardVersion( DBClientBase * conn_in , const string& ns , ChunkManagerPtr refManager, bool authoritative , int tryNumber ) { // TODO: cache, optimize, etc... WriteBackListener::init( *conn_in ); DBConfigPtr conf = grid.getDBConfig( ns ); if ( ! conf ) return false; DBClientBase* conn = getVersionable( conn_in ); verify(conn); // errors thrown above unsigned long long officialSequenceNumber = 0; ChunkManagerPtr manager; const bool isSharded = conf->isSharded( ns ); if ( isSharded ) { manager = conf->getChunkManagerIfExists( ns , authoritative ); // It's possible the chunk manager was reset since we checked whether sharded was true, // so must check this here. if( manager ) officialSequenceNumber = manager->getSequenceNumber(); } // Check this manager against the reference manager if( isSharded && manager ){ Shard shard = Shard::make( conn->getServerAddress() ); if( refManager && ! refManager->compatibleWith( manager, shard ) ){ throw SendStaleConfigException( ns, str::stream() << "manager (" << manager->getVersion( shard ).toString() << " : " << manager->getSequenceNumber() << ") " << "not compatible with reference manager (" << refManager->getVersion( shard ).toString() << " : " << refManager->getSequenceNumber() << ") " << "on shard " << shard.getName() << " (" << shard.getAddress().toString() << ")", refManager->getVersion( shard ), manager->getVersion( shard ) ); } } else if( refManager ){ Shard shard = Shard::make( conn->getServerAddress() ); string msg( str::stream() << "not sharded (" << ( (manager.get() == 0) ? string( "<none>" ) : str::stream() << manager->getSequenceNumber() ) << ") but has reference manager (" << refManager->getSequenceNumber() << ") " << "on conn " << conn->getServerAddress() << " (" << conn_in->getServerAddress() << ")" ); throw SendStaleConfigException( ns, msg, refManager->getVersion( shard ), ShardChunkVersion( 0, OID() )); } // has the ChunkManager been reloaded since the last time we updated the connection-level version? // (ie., last time we issued the setShardVersions below) unsigned long long sequenceNumber = connectionShardStatus.getSequence(conn,ns); if ( sequenceNumber == officialSequenceNumber ) { return false; } ShardChunkVersion version = ShardChunkVersion( 0, OID() ); if ( isSharded && manager ) { version = manager->getVersion( Shard::make( conn->getServerAddress() ) ); } if( ! version.isSet() ){ LOG(0) << "resetting shard version of " << ns << " on " << conn->getServerAddress() << ", " << ( ! isSharded ? "no longer sharded" : ( ! manager ? "no chunk manager found" : "version is zero" ) ) << endl; } LOG(2) << " have to set shard version for conn: " << conn->getServerAddress() << " ns:" << ns << " my last seq: " << sequenceNumber << " current: " << officialSequenceNumber << " version: " << version << " manager: " << manager.get() << endl; const string versionableServerAddress(conn->getServerAddress()); BSONObj result; if ( setShardVersion( *conn , ns , version , authoritative , result ) ) { // success! LOG(1) << " setShardVersion success: " << result << endl; connectionShardStatus.setSequence( conn , ns , officialSequenceNumber ); return true; } LOG(1) << " setShardVersion failed!\n" << result << endl; if ( result["need_authoritative"].trueValue() ) massert( 10428 , "need_authoritative set but in authoritative mode already" , ! authoritative ); if ( ! authoritative ) { // use the original connection and get a fresh versionable connection // since conn can be invalidated (or worse, freed) after the failure checkShardVersion(conn_in, ns, refManager, 1, tryNumber + 1); return true; } if ( result["reloadConfig"].trueValue() ) { if( result["version"].timestampTime() == 0 ){ warning() << "reloading full configuration for " << conf->getName() << ", connection state indicates significant version changes" << endl; // reload db conf->reload(); } else { // reload config conf->getChunkManager( ns , true ); } } const int maxNumTries = 7; if ( tryNumber < maxNumTries ) { LOG( tryNumber < ( maxNumTries / 2 ) ? 1 : 0 ) << "going to retry checkShardVersion host: " << versionableServerAddress << " " << result << endl; sleepmillis( 10 * tryNumber ); // use the original connection and get a fresh versionable connection // since conn can be invalidated (or worse, freed) after the failure checkShardVersion(conn_in, ns, refManager, true, tryNumber + 1); return true; } string errmsg = str::stream() << "setShardVersion failed host: " << versionableServerAddress << " " << result; log() << " " << errmsg << endl; massert( 10429 , errmsg , 0 ); return true; }
virtual void queryOp( Request& r ){ QueryMessage q( r.d() ); log(3) << "shard query: " << q.ns << " " << q.query << endl; if ( q.ntoreturn == 1 && strstr(q.ns, ".$cmd") ) throw UserException( 8010 , "something is wrong, shouldn't see a command here" ); ChunkManagerPtr info = r.getChunkManager(); assert( info ); Query query( q.query ); set<Shard> shards; info->getShardsForQuery( shards , query.getFilter() ); set<ServerAndQuery> servers; for ( set<Shard>::iterator i = shards.begin(); i != shards.end(); i++ ){ servers.insert( ServerAndQuery( i->getConnString() , BSONObj() ) ); } if ( logLevel > 4 ){ StringBuilder ss; ss << " shard query servers: " << servers.size() << '\n'; for ( set<ServerAndQuery>::iterator i = servers.begin(); i!=servers.end(); i++ ){ const ServerAndQuery& s = *i; ss << " " << s.toString() << '\n'; } log() << ss.str(); } ClusteredCursor * cursor = 0; BSONObj sort = query.getSort(); if ( sort.isEmpty() ){ cursor = new SerialServerClusteredCursor( servers , q ); } else { cursor = new ParallelSortClusteredCursor( servers , q , sort ); } assert( cursor ); try { cursor->init(); log(5) << " cursor type: " << cursor->type() << endl; shardedCursorTypes.hit( cursor->type() ); if ( query.isExplain() ){ BSONObj explain = cursor->explain(); replyToQuery( 0 , r.p() , r.m() , explain ); delete( cursor ); return; } } catch(...) { delete cursor; throw; } ShardedClientCursorPtr cc (new ShardedClientCursor( q , cursor )); if ( ! cc->sendNextBatch( r ) ){ return; } log(6) << "storing cursor : " << cc->getId() << endl; cursorCache.store( cc ); }
void _update( Request& r , DbMessage& d, ChunkManagerPtr manager ){ int flags = d.pullInt(); BSONObj query = d.nextJsObj(); uassert( 10201 , "invalid update" , d.moreJSObjs() ); BSONObj toupdate = d.nextJsObj(); BSONObj chunkFinder = query; bool upsert = flags & UpdateOption_Upsert; bool multi = flags & UpdateOption_Multi; uassert( 10202 , "can't mix multi and upsert and sharding" , ! ( upsert && multi ) ); if ( upsert && !(manager->hasShardKey(toupdate) || (toupdate.firstElement().fieldName()[0] == '$' && manager->hasShardKey(query)))) { throw UserException( 8012 , "can't upsert something without shard key" ); } bool save = false; if ( ! manager->hasShardKey( query ) ){ if ( multi ){ } else if ( strcmp( query.firstElement().fieldName() , "_id" ) || query.nFields() != 1 ){ throw UserException( 8013 , "can't do non-multi update with query that doesn't have the shard key" ); } else { save = true; chunkFinder = toupdate; } } if ( ! save ){ if ( toupdate.firstElement().fieldName()[0] == '$' ){ BSONObjIterator ops(toupdate); while(ops.more()){ BSONElement op(ops.next()); if (op.type() != Object) continue; BSONObjIterator fields(op.embeddedObject()); while(fields.more()){ const string field = fields.next().fieldName(); uassert(13123, "Can't modify shard key's value", ! manager->getShardKey().partOfShardKey(field)); } } } else if ( manager->hasShardKey( toupdate ) ){ uassert( 8014, "change would move shards!", manager->getShardKey().compare( query , toupdate ) == 0 ); } else { uasserted(12376, "shard key must be in update object"); } } if ( multi ){ set<Shard> shards; manager->getShardsForQuery( shards , chunkFinder ); int * x = (int*)(r.d().afterNS()); x[0] |= UpdateOption_Broadcast; for ( set<Shard>::iterator i=shards.begin(); i!=shards.end(); i++){ doWrite( dbUpdate , r , *i , false ); } } else { int left = 5; while ( true ){ try { ChunkPtr c = manager->findChunk( chunkFinder ); doWrite( dbUpdate , r , c->getShard() ); c->splitIfShould( d.msg().header()->dataLen() ); break; } catch ( StaleConfigException& e ){ if ( left <= 0 ) throw e; left--; log() << "update failed b/c of StaleConfigException, retrying " << " left:" << left << " ns: " << r.getns() << " query: " << query << endl; r.reset( false ); manager = r.getChunkManager(); } } } }
/** * @return true if had to do something */ bool checkShardVersion( DBClientBase& conn_in , const string& ns , bool authoritative , int tryNumber ) { // TODO: cache, optimize, etc... WriteBackListener::init( conn_in ); DBConfigPtr conf = grid.getDBConfig( ns ); if ( ! conf ) return false; DBClientBase* conn = 0; switch ( conn_in.type() ) { case ConnectionString::INVALID: assert(0); break; case ConnectionString::MASTER: // great conn = &conn_in; break; case ConnectionString::PAIR: assert( ! "pair not support for sharding" ); break; case ConnectionString::SYNC: // TODO: we should check later that we aren't actually sharded on this conn = &conn_in; break; case ConnectionString::SET: DBClientReplicaSet* set = (DBClientReplicaSet*)&conn_in; conn = &(set->masterConn()); break; } assert(conn); unsigned long long officialSequenceNumber = 0; ChunkManagerPtr manager; const bool isSharded = conf->isSharded( ns ); if ( isSharded ) { manager = conf->getChunkManagerIfExists( ns , authoritative ); // It's possible the chunk manager was reset since we checked whether sharded was true, // so must check this here. if( manager ) officialSequenceNumber = manager->getSequenceNumber(); } // has the ChunkManager been reloaded since the last time we updated the connection-level version? // (ie., last time we issued the setShardVersions below) unsigned long long sequenceNumber = connectionShardStatus.getSequence(conn,ns); if ( sequenceNumber == officialSequenceNumber ) { return false; } ShardChunkVersion version = 0; if ( isSharded && manager ) { version = manager->getVersion( Shard::make( conn->getServerAddress() ) ); } LOG(2) << " have to set shard version for conn: " << conn << " ns:" << ns << " my last seq: " << sequenceNumber << " current: " << officialSequenceNumber << " version: " << version << " manager: " << manager.get() << endl; BSONObj result; if ( setShardVersion( *conn , ns , version , authoritative , result ) ) { // success! LOG(1) << " setShardVersion success: " << result << endl; connectionShardStatus.setSequence( conn , ns , officialSequenceNumber ); return true; } LOG(1) << " setShardVersion failed!\n" << result << endl; if ( result["need_authoritative"].trueValue() ) massert( 10428 , "need_authoritative set but in authoritative mode already" , ! authoritative ); if ( ! authoritative ) { checkShardVersion( *conn , ns , 1 , tryNumber + 1 ); return true; } if ( result["reloadConfig"].trueValue() ) { if( result["version"].timestampTime() == 0 ){ // reload db conf->reload(); } else { // reload config conf->getChunkManager( ns , true ); } } const int maxNumTries = 7; if ( tryNumber < maxNumTries ) { LOG( tryNumber < ( maxNumTries / 2 ) ? 1 : 0 ) << "going to retry checkShardVersion host: " << conn->getServerAddress() << " " << result << endl; sleepmillis( 10 * tryNumber ); checkShardVersion( *conn , ns , true , tryNumber + 1 ); return true; } string errmsg = str::stream() << "setShardVersion failed host: " << conn->getServerAddress() << " " << result; log() << " " << errmsg << endl; massert( 10429 , errmsg , 0 ); return true; }