void insertSharded( DBConfigPtr conf, const char* ns, BSONObj& o, int flags ) { ChunkManagerPtr manager = conf->getChunkManager(ns); if ( ! manager->hasShardKey( o ) ) { bool bad = true; if ( manager->getShardKey().partOfShardKey( "_id" ) ) { BSONObjBuilder b; b.appendOID( "_id" , 0 , true ); b.appendElements( o ); o = b.obj(); bad = ! manager->hasShardKey( o ); } if ( bad ) { log() << "tried to insert object without shard key: " << ns << " " << o << endl; uasserted( 14842 , "tried to insert object without shard key" ); } } // Many operations benefit from having the shard key early in the object o = manager->getShardKey().moveToFront(o); const int maxTries = 30; for ( int i=0; i<maxTries; i++ ) { try { ChunkPtr c = manager->findChunk( o ); log(4) << " server:" << c->getShard().toString() << " " << o << endl; insert( c->getShard() , ns , o , flags); // r.gotInsert(); // if ( r.getClientInfo()->autoSplitOk() ) c->splitIfShould( o.objsize() ); break; } catch ( StaleConfigException& e ) { int logLevel = i < ( maxTries / 2 ); LOG( logLevel ) << "retrying insert because of StaleConfigException: " << e << " object: " << o << endl; // r.reset(); unsigned long long old = manager->getSequenceNumber(); manager = conf->getChunkManager(ns); LOG( logLevel ) << " sequenece number - old: " << old << " new: " << manager->getSequenceNumber() << endl; if (!manager) { uasserted(14843, "collection no longer sharded"); } } sleepmillis( i * 20 ); } }
void _insert( Request& r , DbMessage& d, ChunkManagerPtr manager ) { while ( d.moreJSObjs() ) { BSONObj o = d.nextJsObj(); if ( ! manager->hasShardKey( o ) ) { bool bad = true; if ( manager->getShardKey().partOfShardKey( "_id" ) ) { BSONObjBuilder b; b.appendOID( "_id" , 0 , true ); b.appendElements( o ); o = b.obj(); bad = ! manager->hasShardKey( o ); } if ( bad ) { log() << "tried to insert object without shard key: " << r.getns() << " " << o << endl; throw UserException( 8011 , "tried to insert object without shard key" ); } } // Many operations benefit from having the shard key early in the object o = manager->getShardKey().moveToFront(o); const int maxTries = 10; bool gotThrough = false; for ( int i=0; i<maxTries; i++ ) { try { ChunkPtr c = manager->findChunk( o ); log(4) << " server:" << c->getShard().toString() << " " << o << endl; insert( c->getShard() , r.getns() , o ); r.gotInsert(); if ( r.getClientInfo()->autoSplitOk() ) c->splitIfShould( o.objsize() ); gotThrough = true; break; } catch ( StaleConfigException& e ) { log( i < ( maxTries / 2 ) ) << "retrying insert because of StaleConfigException: " << e << " object: " << o << endl; r.reset(); manager = r.getChunkManager(); uassert(14804, "collection no longer sharded", manager); } sleepmillis( i * 200 ); } assert( inShutdown() || gotThrough ); } }
void _update( Request& r , DbMessage& d, ChunkManagerPtr manager ) { int flags = d.pullInt(); BSONObj query = d.nextJsObj(); uassert( 13506 , "$atomic not supported sharded" , query["$atomic"].eoo() ); uassert( 10201 , "invalid update" , d.moreJSObjs() ); BSONObj toupdate = d.nextJsObj(); BSONObj chunkFinder = query; bool upsert = flags & UpdateOption_Upsert; bool multi = flags & UpdateOption_Multi; uassert( 10202 , "can't mix multi and upsert and sharding" , ! ( upsert && multi ) ); if (upsert) { uassert(8012, "can't upsert something without shard key", (manager->hasShardKey(toupdate) || (toupdate.firstElement().fieldName()[0] == '$' && manager->hasShardKey(query)))); BSONObj key = manager->getShardKey().extractKey(query); BSONForEach(e, key) { uassert(13465, "shard key in upsert query must be an exact match", getGtLtOp(e) == BSONObj::Equality); } }
void _insert( Request& r , DbMessage& d, ChunkManagerPtr manager ){ while ( d.moreJSObjs() ){ BSONObj o = d.nextJsObj(); if ( ! manager->hasShardKey( o ) ){ bool bad = true; if ( manager->getShardKey().partOfShardKey( "_id" ) ){ BSONObjBuilder b; b.appendOID( "_id" , 0 , true ); b.appendElements( o ); o = b.obj(); bad = ! manager->hasShardKey( o ); } if ( bad ){ log() << "tried to insert object without shard key: " << r.getns() << " " << o << endl; throw UserException( 8011 , "tried to insert object without shard key" ); } } ChunkPtr c = manager->findChunk( o ); log(4) << " server:" << c->getShard().toString() << " " << o << endl; insert( c->getShard() , r.getns() , o ); r.gotInsert(); c->splitIfShould( o.objsize() ); } }
void _groupInserts( ChunkManagerPtr manager, vector<BSONObj>& inserts, map<ChunkPtr,vector<BSONObj> >& insertsForChunks ){ // Redo all inserts for chunks which have changed map<ChunkPtr,vector<BSONObj> >::iterator i = insertsForChunks.begin(); while( ! insertsForChunks.empty() && i != insertsForChunks.end() ){ if( ! manager->compatibleWith( i->first ) ){ inserts.insert( inserts.end(), i->second.begin(), i->second.end() ); insertsForChunks.erase( i++ ); } else ++i; } // Figure out inserts we haven't chunked yet for( vector<BSONObj>::iterator i = inserts.begin(); i != inserts.end(); ++i ){ BSONObj o = *i; if ( ! manager->hasShardKey( o ) ) { bool bad = true; // Add autogenerated _id to item and see if we now have a shard key if ( manager->getShardKey().partOfShardKey( "_id" ) ) { BSONObjBuilder b; b.appendOID( "_id" , 0 , true ); b.appendElements( o ); o = b.obj(); bad = ! manager->hasShardKey( o ); } if ( bad ) { // TODO: log() << "tried to insert object with no valid shard key for " << manager->getShardKey() << " : " << o << endl; uassert( 8011, str::stream() << "tried to insert object with no valid shard key for " << manager->getShardKey().toString() << " : " << o.toString(), false ); } } // Many operations benefit from having the shard key early in the object o = manager->getShardKey().moveToFront(o); insertsForChunks[manager->findChunk(o)].push_back(o); } inserts.clear(); }
void _insert( Request& r , DbMessage& d, ChunkManagerPtr manager ){ while ( d.moreJSObjs() ){ BSONObj o = d.nextJsObj(); if ( ! manager->hasShardKey( o ) ){ bool bad = true; if ( manager->getShardKey().partOfShardKey( "_id" ) ){ BSONObjBuilder b; b.appendOID( "_id" , 0 , true ); b.appendElements( o ); o = b.obj(); bad = ! manager->hasShardKey( o ); } if ( bad ){ log() << "tried to insert object without shard key: " << r.getns() << " " << o << endl; throw UserException( 8011 , "tried to insert object without shard key" ); } } bool gotThrough = false; for ( int i=0; i<10; i++ ){ try { ChunkPtr c = manager->findChunk( o ); log(4) << " server:" << c->getShard().toString() << " " << o << endl; insert( c->getShard() , r.getns() , o ); r.gotInsert(); c->splitIfShould( o.objsize() ); gotThrough = true; break; } catch ( StaleConfigException& ){ log(1) << "retrying insert because of StaleConfigException: " << o << endl; r.reset(); manager = r.getChunkManager(); } sleepmillis( i * 200 ); } assert( gotThrough ); } }
bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) { if ( ! okForConfigChanges( errmsg ) ) return false; ShardConnection::sync(); string ns = cmdObj.firstElement().valuestrsafe(); if ( ns.size() == 0 ) { errmsg = "no ns"; return false; } DBConfigPtr config = grid.getDBConfig( ns ); if ( ! config->isSharded( ns ) ) { config->reload(); if ( ! config->isSharded( ns ) ) { errmsg = "ns not sharded. have to shard before can split"; return false; } } BSONObj find = cmdObj.getObjectField( "find" ); if ( find.isEmpty() ) { find = cmdObj.getObjectField( "middle" ); if ( find.isEmpty() ) { errmsg = "need to specify find or middle"; return false; } } ChunkManagerPtr info = config->getChunkManager( ns ); ChunkPtr chunk = info->findChunk( find ); BSONObj middle = cmdObj.getObjectField( "middle" ); assert( chunk.get() ); log() << "splitting: " << ns << " shard: " << chunk << endl; BSONObj res; bool worked; if ( middle.isEmpty() ) { BSONObj ret = chunk->singleSplit( true /* force a split even if not enough data */ , res ); worked = !ret.isEmpty(); } else { // sanity check if the key provided is a valid split point if ( ( middle == chunk->getMin() ) || ( middle == chunk->getMax() ) ) { errmsg = "cannot split on initial or final chunk's key"; return false; } if (!fieldsMatch(middle, info->getShardKey().key())){ errmsg = "middle has different fields (or different order) than shard key"; return false; } vector<BSONObj> splitPoints; splitPoints.push_back( middle ); worked = chunk->multiSplit( splitPoints , res ); } if ( !worked ) { errmsg = "split failed"; result.append( "cause" , res ); return false; } config->getChunkManager( ns , true ); return true; }
void _update( Request& r , DbMessage& d, ChunkManagerPtr manager ){ int flags = d.pullInt(); BSONObj query = d.nextJsObj(); uassert( 10201 , "invalid update" , d.moreJSObjs() ); BSONObj toupdate = d.nextJsObj(); BSONObj chunkFinder = query; bool upsert = flags & UpdateOption_Upsert; bool multi = flags & UpdateOption_Multi; uassert( 10202 , "can't mix multi and upsert and sharding" , ! ( upsert && multi ) ); if ( upsert && !(manager->hasShardKey(toupdate) || (toupdate.firstElement().fieldName()[0] == '$' && manager->hasShardKey(query)))) { throw UserException( 8012 , "can't upsert something without shard key" ); } bool save = false; if ( ! manager->hasShardKey( query ) ){ if ( multi ){ } else if ( strcmp( query.firstElement().fieldName() , "_id" ) || query.nFields() != 1 ){ throw UserException( 8013 , "can't do non-multi update with query that doesn't have the shard key" ); } else { save = true; chunkFinder = toupdate; } } if ( ! save ){ if ( toupdate.firstElement().fieldName()[0] == '$' ){ BSONObjIterator ops(toupdate); while(ops.more()){ BSONElement op(ops.next()); if (op.type() != Object) continue; BSONObjIterator fields(op.embeddedObject()); while(fields.more()){ const string field = fields.next().fieldName(); uassert(13123, "Can't modify shard key's value", ! manager->getShardKey().partOfShardKey(field)); } } } else if ( manager->hasShardKey( toupdate ) ){ uassert( 8014, "change would move shards!", manager->getShardKey().compare( query , toupdate ) == 0 ); } else { uasserted(12376, "shard key must be in update object"); } } if ( multi ){ set<Shard> shards; manager->getShardsForQuery( shards , chunkFinder ); int * x = (int*)(r.d().afterNS()); x[0] |= UpdateOption_Broadcast; for ( set<Shard>::iterator i=shards.begin(); i!=shards.end(); i++){ doWrite( dbUpdate , r , *i , false ); } } else { int left = 5; while ( true ){ try { ChunkPtr c = manager->findChunk( chunkFinder ); doWrite( dbUpdate , r , c->getShard() ); c->splitIfShould( d.msg().header()->dataLen() ); break; } catch ( StaleConfigException& e ){ if ( left <= 0 ) throw e; left--; log() << "update failed b/c of StaleConfigException, retrying " << " left:" << left << " ns: " << r.getns() << " query: " << query << endl; r.reset( false ); manager = r.getChunkManager(); } } } }
void _insert( Request& r , DbMessage& d, ChunkManagerPtr manager ) { const int flags = d.reservedField() | InsertOption_ContinueOnError; // ContinueOnError is always on when using sharding. map<ChunkPtr, vector<BSONObj> > insertsForChunk; // Group bulk insert for appropriate shards try { while ( d.moreJSObjs() ) { BSONObj o = d.nextJsObj(); if ( ! manager->hasShardKey( o ) ) { bool bad = true; if ( manager->getShardKey().partOfShardKey( "_id" ) ) { BSONObjBuilder b; b.appendOID( "_id" , 0 , true ); b.appendElements( o ); o = b.obj(); bad = ! manager->hasShardKey( o ); } if ( bad ) { log() << "tried to insert object with no valid shard key: " << r.getns() << " " << o << endl; uasserted( 8011 , "tried to insert object with no valid shard key" ); } } // Many operations benefit from having the shard key early in the object o = manager->getShardKey().moveToFront(o); insertsForChunk[manager->findChunk(o)].push_back(o); } for (map<ChunkPtr, vector<BSONObj> >::iterator it = insertsForChunk.begin(); it != insertsForChunk.end(); ++it) { ChunkPtr c = it->first; vector<BSONObj> objs = it->second; const int maxTries = 30; bool gotThrough = false; for ( int i=0; i<maxTries; i++ ) { try { LOG(4) << " server:" << c->getShard().toString() << " bulk insert " << objs.size() << " documents" << endl; insert( c->getShard() , r.getns() , objs , flags); int bytesWritten = 0; for (vector<BSONObj>::iterator vecIt = objs.begin(); vecIt != objs.end(); ++vecIt) { r.gotInsert(); // Record the correct number of individual inserts bytesWritten += (*vecIt).objsize(); } if ( r.getClientInfo()->autoSplitOk() ) c->splitIfShould( bytesWritten ); gotThrough = true; break; } catch ( StaleConfigException& e ) { int logLevel = i < ( maxTries / 2 ); LOG( logLevel ) << "retrying bulk insert of " << objs.size() << " documents because of StaleConfigException: " << e << endl; r.reset(); manager = r.getChunkManager(); if( ! manager ) { uasserted(14804, "collection no longer sharded"); } unsigned long long old = manager->getSequenceNumber(); LOG( logLevel ) << " sequence number - old: " << old << " new: " << manager->getSequenceNumber() << endl; } sleepmillis( i * 20 ); } assert( inShutdown() || gotThrough ); // not caught below } } catch (const UserException&){ if (!d.moreJSObjs()){ throw; } // Ignore and keep going. ContinueOnError is implied with sharding. } }
void Balancer::_doBalanceRound( DBClientBase& conn, vector<CandidateChunkPtr>* candidateChunks ) { verify( candidateChunks ); // // 1. Check whether there is any sharded collection to be balanced by querying // the ShardsNS::collections collection // auto_ptr<DBClientCursor> cursor = conn.query(CollectionType::ConfigNS, BSONObj()); if ( NULL == cursor.get() ) { warning() << "could not query " << CollectionType::ConfigNS << " while trying to balance" << endl; return; } vector< string > collections; while ( cursor->more() ) { BSONObj col = cursor->nextSafe(); // sharded collections will have a shard "key". if ( ! col[CollectionType::keyPattern()].eoo() && ! col[CollectionType::noBalance()].trueValue() ){ collections.push_back( col[CollectionType::ns()].String() ); } else if( col[CollectionType::noBalance()].trueValue() ){ LOG(1) << "not balancing collection " << col[CollectionType::ns()].String() << ", explicitly disabled" << endl; } } cursor.reset(); if ( collections.empty() ) { LOG(1) << "no collections to balance" << endl; return; } // // 2. Get a list of all the shards that are participating in this balance round // along with any maximum allowed quotas and current utilization. We get the // latter by issuing db.serverStatus() (mem.mapped) to all shards. // // TODO: skip unresponsive shards and mark information as stale. // ShardInfoMap shardInfo; Status loadStatus = DistributionStatus::populateShardInfoMap(&shardInfo); if (!loadStatus.isOK()) { warning() << "failed to load shard metadata" << causedBy(loadStatus) << endl; return; } if (shardInfo.size() < 2) { LOG(1) << "can't balance without more active shards" << endl; return; } OCCASIONALLY warnOnMultiVersion( shardInfo ); // // 3. For each collection, check if the balancing policy recommends moving anything around. // for (vector<string>::const_iterator it = collections.begin(); it != collections.end(); ++it ) { const string& ns = *it; OwnedPointerMap<string, OwnedPointerVector<ChunkType> > shardToChunksMap; cursor = conn.query(ChunkType::ConfigNS, QUERY(ChunkType::ns(ns)).sort(ChunkType::min())); set<BSONObj> allChunkMinimums; while ( cursor->more() ) { BSONObj chunkDoc = cursor->nextSafe().getOwned(); auto_ptr<ChunkType> chunk(new ChunkType()); string errmsg; if (!chunk->parseBSON(chunkDoc, &errmsg)) { error() << "bad chunk format for " << chunkDoc << ": " << errmsg << endl; return; } allChunkMinimums.insert(chunk->getMin().getOwned()); OwnedPointerVector<ChunkType>*& chunkList = shardToChunksMap.mutableMap()[chunk->getShard()]; if (chunkList == NULL) { chunkList = new OwnedPointerVector<ChunkType>(); } chunkList->mutableVector().push_back(chunk.release()); } cursor.reset(); if (shardToChunksMap.map().empty()) { LOG(1) << "skipping empty collection (" << ns << ")"; continue; } for (ShardInfoMap::const_iterator i = shardInfo.begin(); i != shardInfo.end(); ++i) { // this just makes sure there is an entry in shardToChunksMap for every shard OwnedPointerVector<ChunkType>*& chunkList = shardToChunksMap.mutableMap()[i->first]; if (chunkList == NULL) { chunkList = new OwnedPointerVector<ChunkType>(); } } DistributionStatus status(shardInfo, shardToChunksMap.map()); // load tags Status result = clusterCreateIndex(TagsType::ConfigNS, BSON(TagsType::ns() << 1 << TagsType::min() << 1), true, // unique WriteConcernOptions::AllConfigs, NULL); if ( !result.isOK() ) { warning() << "could not create index tags_1_min_1: " << result.reason() << endl; continue; } cursor = conn.query(TagsType::ConfigNS, QUERY(TagsType::ns(ns)).sort(TagsType::min())); vector<TagRange> ranges; while ( cursor->more() ) { BSONObj tag = cursor->nextSafe(); TagRange tr(tag[TagsType::min()].Obj().getOwned(), tag[TagsType::max()].Obj().getOwned(), tag[TagsType::tag()].String()); ranges.push_back(tr); uassert(16356, str::stream() << "tag ranges not valid for: " << ns, status.addTagRange(tr) ); } cursor.reset(); DBConfigPtr cfg = grid.getDBConfig( ns ); if ( !cfg ) { warning() << "could not load db config to balance " << ns << " collection" << endl; continue; } // This line reloads the chunk manager once if this process doesn't know the collection // is sharded yet. ChunkManagerPtr cm = cfg->getChunkManagerIfExists( ns, true ); if ( !cm ) { warning() << "could not load chunks to balance " << ns << " collection" << endl; continue; } // loop through tags to make sure no chunk spans tags; splits on tag min. for all chunks bool didAnySplits = false; for ( unsigned i = 0; i < ranges.size(); i++ ) { BSONObj min = ranges[i].min; min = cm->getShardKey().extendRangeBound( min, false ); if ( allChunkMinimums.count( min ) > 0 ) continue; didAnySplits = true; log() << "ns: " << ns << " need to split on " << min << " because there is a range there" << endl; ChunkPtr c = cm->findIntersectingChunk( min ); vector<BSONObj> splitPoints; splitPoints.push_back( min ); BSONObj res; if ( !c->multiSplit( splitPoints, res ) ) { error() << "split failed: " << res << endl; } else { LOG(1) << "split worked: " << res << endl; } break; } if ( didAnySplits ) { // state change, just wait till next round continue; } CandidateChunk* p = _policy->balance( ns, status, _balancedLastTime ); if ( p ) candidateChunks->push_back( CandidateChunkPtr( p ) ); } }
void Balancer::_doBalanceRound( DBClientBase& conn, vector<CandidateChunkPtr>* candidateChunks ) { verify( candidateChunks ); // // 1. Check whether there is any sharded collection to be balanced by querying // the ShardsNS::collections collection // auto_ptr<DBClientCursor> cursor = conn.query(CollectionType::ConfigNS, BSONObj()); vector< string > collections; while ( cursor->more() ) { BSONObj col = cursor->nextSafe(); // sharded collections will have a shard "key". if ( ! col[CollectionType::keyPattern()].eoo() && ! col[CollectionType::noBalance()].trueValue() ){ collections.push_back( col[CollectionType::ns()].String() ); } else if( col[CollectionType::noBalance()].trueValue() ){ LOG(1) << "not balancing collection " << col[CollectionType::ns()].String() << ", explicitly disabled" << endl; } } cursor.reset(); if ( collections.empty() ) { LOG(1) << "no collections to balance" << endl; return; } // // 2. Get a list of all the shards that are participating in this balance round // along with any maximum allowed quotas and current utilization. We get the // latter by issuing db.serverStatus() (mem.mapped) to all shards. // // TODO: skip unresponsive shards and mark information as stale. // vector<Shard> allShards; Shard::getAllShards( allShards ); if ( allShards.size() < 2) { LOG(1) << "can't balance without more active shards" << endl; return; } ShardInfoMap shardInfo; for ( vector<Shard>::const_iterator it = allShards.begin(); it != allShards.end(); ++it ) { const Shard& s = *it; ShardStatus status = s.getStatus(); shardInfo[ s.getName() ] = ShardInfo( s.getMaxSize(), status.mapped(), s.isDraining(), status.hasOpsQueued(), s.tags() ); } // // 3. For each collection, check if the balancing policy recommends moving anything around. // for (vector<string>::const_iterator it = collections.begin(); it != collections.end(); ++it ) { const string& ns = *it; map< string,vector<BSONObj> > shardToChunksMap; cursor = conn.query(ChunkType::ConfigNS, QUERY(ChunkType::ns(ns)).sort(ChunkType::min())); set<BSONObj> allChunkMinimums; while ( cursor->more() ) { BSONObj chunk = cursor->nextSafe().getOwned(); vector<BSONObj>& chunks = shardToChunksMap[chunk[ChunkType::shard()].String()]; allChunkMinimums.insert( chunk[ChunkType::min()].Obj() ); chunks.push_back( chunk ); } cursor.reset(); if (shardToChunksMap.empty()) { LOG(1) << "skipping empty collection (" << ns << ")"; continue; } for ( vector<Shard>::iterator i=allShards.begin(); i!=allShards.end(); ++i ) { // this just makes sure there is an entry in shardToChunksMap for every shard Shard s = *i; shardToChunksMap[s.getName()].size(); } DistributionStatus status( shardInfo, shardToChunksMap ); // load tags conn.ensureIndex(TagsType::ConfigNS, BSON(TagsType::ns() << 1 << TagsType::min() << 1), true); cursor = conn.query(TagsType::ConfigNS, QUERY(TagsType::ns(ns)).sort(TagsType::min())); vector<TagRange> ranges; while ( cursor->more() ) { BSONObj tag = cursor->nextSafe(); TagRange tr(tag[TagsType::min()].Obj().getOwned(), tag[TagsType::max()].Obj().getOwned(), tag[TagsType::tag()].String()); ranges.push_back(tr); uassert(16356, str::stream() << "tag ranges not valid for: " << ns, status.addTagRange(tr) ); } cursor.reset(); DBConfigPtr cfg = grid.getDBConfig( ns ); verify( cfg ); ChunkManagerPtr cm = cfg->getChunkManager( ns ); verify( cm ); // loop through tags to make sure no chunk spans tags; splits on tag min. for all chunks bool didAnySplits = false; for ( unsigned i = 0; i < ranges.size(); i++ ) { BSONObj min = ranges[i].min; min = cm->getShardKey().extendRangeBound( min, false ); if ( allChunkMinimums.count( min ) > 0 ) continue; didAnySplits = true; log() << "ns: " << ns << " need to split on " << min << " because there is a range there" << endl; ChunkPtr c = cm->findIntersectingChunk( min ); vector<BSONObj> splitPoints; splitPoints.push_back( min ); BSONObj res; if ( !c->multiSplit( splitPoints, res ) ) { error() << "split failed: " << res << endl; } else { LOG(1) << "split worked: " << res << endl; } break; } if ( didAnySplits ) { // state change, just wait till next round continue; } CandidateChunk* p = _policy->balance( ns, status, _balancedLastTime ); if ( p ) candidateChunks->push_back( CandidateChunkPtr( p ) ); } }
void _insert( Request& r , DbMessage& d, ChunkManagerPtr manager ) { const int flags = d.reservedField(); bool keepGoing = flags & InsertOption_KeepGoing; // modified before assertion if should abort while ( d.moreJSObjs() ) { try { BSONObj o = d.nextJsObj(); if ( ! manager->hasShardKey( o ) ) { bool bad = true; if ( manager->getShardKey().partOfShardKey( "_id" ) ) { BSONObjBuilder b; b.appendOID( "_id" , 0 , true ); b.appendElements( o ); o = b.obj(); bad = ! manager->hasShardKey( o ); } if ( bad ) { log() << "tried to insert object without shard key: " << r.getns() << " " << o << endl; uasserted( 8011 , "tried to insert object without shard key" ); } } // Many operations benefit from having the shard key early in the object o = manager->getShardKey().moveToFront(o); const int maxTries = 30; bool gotThrough = false; for ( int i=0; i<maxTries; i++ ) { try { ChunkPtr c = manager->findChunk( o ); log(4) << " server:" << c->getShard().toString() << " " << o << endl; insert( c->getShard() , r.getns() , o , flags); r.gotInsert(); if ( r.getClientInfo()->autoSplitOk() ) c->splitIfShould( o.objsize() ); gotThrough = true; break; } catch ( StaleConfigException& e ) { int logLevel = i < ( maxTries / 2 ); LOG( logLevel ) << "retrying insert because of StaleConfigException: " << e << " object: " << o << endl; r.reset(); unsigned long long old = manager->getSequenceNumber(); manager = r.getChunkManager(); LOG( logLevel ) << " sequence number - old: " << old << " new: " << manager->getSequenceNumber() << endl; if (!manager) { keepGoing = false; uasserted(14804, "collection no longer sharded"); } } sleepmillis( i * 20 ); } assert( inShutdown() || gotThrough ); // not caught below } catch (const UserException&){ if (!keepGoing || !d.moreJSObjs()){ throw; } // otherwise ignore and keep going } } }
virtual void queryOp( Request& r ){ QueryMessage q( r.d() ); log(3) << "shard query: " << q.ns << " " << q.query << endl; if ( q.ntoreturn == 1 && strstr(q.ns, ".$cmd") ) throw UserException( 8010 , "something is wrong, shouldn't see a command here" ); ChunkManagerPtr info = r.getChunkManager(); assert( info ); Query query( q.query ); vector<shared_ptr<ChunkRange> > shards; info->getChunksForQuery( shards , query.getFilter() ); set<ServerAndQuery> servers; for ( vector<shared_ptr<ChunkRange> >::iterator i = shards.begin(); i != shards.end(); i++ ){ shared_ptr<ChunkRange> c = *i; //servers.insert( ServerAndQuery( c->getShard().getConnString() , BSONObj() ) ); // ERH ERH ERH servers.insert( ServerAndQuery( c->getShard().getConnString() , c->getFilter() ) ); } if ( logLevel > 4 ){ StringBuilder ss; ss << " shard query servers: " << servers.size() << '\n'; for ( set<ServerAndQuery>::iterator i = servers.begin(); i!=servers.end(); i++ ){ const ServerAndQuery& s = *i; ss << " " << s.toString() << '\n'; } log() << ss.str(); } ClusteredCursor * cursor = 0; BSONObj sort = query.getSort(); if ( sort.isEmpty() ){ // 1. no sort, can just hit them in serial cursor = new SerialServerClusteredCursor( servers , q ); } else { int shardKeyOrder = info->getShardKey().canOrder( sort ); if ( shardKeyOrder ){ // 2. sort on shard key, can do in serial intelligently set<ServerAndQuery> buckets; for ( vector<shared_ptr<ChunkRange> >::iterator i = shards.begin(); i != shards.end(); i++ ){ shared_ptr<ChunkRange> s = *i; buckets.insert( ServerAndQuery( s->getShard().getConnString() , s->getFilter() , s->getMin() ) ); } cursor = new SerialServerClusteredCursor( buckets , q , shardKeyOrder ); } else { // 3. sort on non-sharded key, pull back a portion from each server and iterate slowly cursor = new ParallelSortClusteredCursor( servers , q , sort ); } } assert( cursor ); log(5) << " cursor type: " << cursor->type() << endl; shardedCursorTypes.hit( cursor->type() ); if ( query.isExplain() ){ BSONObj explain = cursor->explain(); replyToQuery( 0 , r.p() , r.m() , explain ); delete( cursor ); return; } ShardedClientCursorPtr cc (new ShardedClientCursor( q , cursor )); if ( ! cc->sendNextBatch( r ) ){ return; } log(6) << "storing cursor : " << cc->getId() << endl; cursorCache.store( cc ); }
void _update( Request& r , DbMessage& d, ChunkManagerPtr manager ){ int flags = d.pullInt(); BSONObj query = d.nextJsObj(); uassert( 10201 , "invalid update" , d.moreJSObjs() ); BSONObj toupdate = d.nextJsObj(); BSONObj chunkFinder = query; bool upsert = flags & UpdateOption_Upsert; bool multi = flags & UpdateOption_Multi; if ( multi ) uassert( 10202 , "can't mix multi and upsert and sharding" , ! upsert ); if ( upsert && !(manager->hasShardKey(toupdate) || (toupdate.firstElement().fieldName()[0] == '$' && manager->hasShardKey(query)))) { throw UserException( 8012 , "can't upsert something without shard key" ); } bool save = false; if ( ! manager->hasShardKey( query ) ){ if ( multi ){ } else if ( query.nFields() != 1 || strcmp( query.firstElement().fieldName() , "_id" ) ){ throw UserException( 8013 , "can't do update with query that doesn't have the shard key" ); } else { save = true; chunkFinder = toupdate; } } if ( ! save ){ if ( toupdate.firstElement().fieldName()[0] == '$' ){ BSONObjIterator ops(toupdate); while(ops.more()){ BSONElement op(ops.next()); if (op.type() != Object) continue; BSONObjIterator fields(op.embeddedObject()); while(fields.more()){ const string field = fields.next().fieldName(); uassert(13123, "Can't modify shard key's value", ! manager->getShardKey().partOfShardKey(field)); } } } else if ( manager->hasShardKey( toupdate ) ){ uassert( 8014, "change would move shards!", manager->getShardKey().compare( query , toupdate ) == 0 ); } else { uasserted(12376, "shard key must be in update object"); } } if ( multi ){ vector<shared_ptr<ChunkRange> > chunks; manager->getChunksForQuery( chunks , chunkFinder ); set<Shard> seen; for ( vector<shared_ptr<ChunkRange> >::iterator i=chunks.begin(); i!=chunks.end(); i++){ shared_ptr<ChunkRange> c = *i; if ( seen.count( c->getShard() ) ) continue; doWrite( dbUpdate , r , c->getShard() ); seen.insert( c->getShard() ); } } else { ChunkPtr c = manager->findChunk( chunkFinder ); doWrite( dbUpdate , r , c->getShard() ); c->splitIfShould( d.msg().header()->dataLen() ); } }