void CursorCache::store( ShardedClientCursorPtr cursor ) { LOG(_myLogLevel) << "CursorCache::store cursor " << " id: " << cursor->getId() << endl; verify( cursor->getId() ); scoped_lock lk( _mutex ); _cursors[cursor->getId()] = cursor; _shardedTotal++; }
void CursorCache::store(ShardedClientCursorPtr cursor, int maxTimeMS) { LOG(_myLogLevel) << "CursorCache::store cursor " << " id: " << cursor->getId() << (maxTimeMS != kMaxTimeCursorNoTimeLimit ? str::stream() << "maxTimeMS: " << maxTimeMS : string("")) << endl; verify(cursor->getId()); verify(maxTimeMS == kMaxTimeCursorTimeLimitExpired || maxTimeMS == kMaxTimeCursorNoTimeLimit || maxTimeMS > 0); stdx::lock_guard<stdx::mutex> lk(_mutex); _cursorsMaxTimeMS[cursor->getId()] = maxTimeMS; _cursors[cursor->getId()] = cursor; _shardedTotal++; }
virtual void queryOp( Request& r ){ QueryMessage q( r.d() ); log(3) << "shard query: " << q.ns << " " << q.query << endl; if ( q.ntoreturn == 1 && strstr(q.ns, ".$cmd") ) throw UserException( 8010 , "something is wrong, shouldn't see a command here" ); ChunkManagerPtr info = r.getChunkManager(); assert( info ); Query query( q.query ); set<Shard> shards; info->getShardsForQuery( shards , query.getFilter() ); set<ServerAndQuery> servers; for ( set<Shard>::iterator i = shards.begin(); i != shards.end(); i++ ){ servers.insert( ServerAndQuery( i->getConnString() , BSONObj() ) ); } if ( logLevel > 4 ){ StringBuilder ss; ss << " shard query servers: " << servers.size() << '\n'; for ( set<ServerAndQuery>::iterator i = servers.begin(); i!=servers.end(); i++ ){ const ServerAndQuery& s = *i; ss << " " << s.toString() << '\n'; } log() << ss.str(); } ClusteredCursor * cursor = 0; BSONObj sort = query.getSort(); if ( sort.isEmpty() ){ cursor = new SerialServerClusteredCursor( servers , q ); } else { cursor = new ParallelSortClusteredCursor( servers , q , sort ); } assert( cursor ); try { cursor->init(); log(5) << " cursor type: " << cursor->type() << endl; shardedCursorTypes.hit( cursor->type() ); if ( query.isExplain() ){ BSONObj explain = cursor->explain(); replyToQuery( 0 , r.p() , r.m() , explain ); delete( cursor ); return; } } catch(...) { delete cursor; throw; } ShardedClientCursorPtr cc (new ShardedClientCursor( q , cursor )); if ( ! cc->sendNextBatch( r ) ){ return; } log(6) << "storing cursor : " << cc->getId() << endl; cursorCache.store( cc ); }
void Strategy::getMore(OperationContext* txn, Request& request) { Timer getMoreTimer; const char* ns = request.getns(); const int ntoreturn = request.d().pullInt(); const long long id = request.d().pullInt64(); // TODO: Handle stale config exceptions here from coll being dropped or sharded during op // for now has same semantics as legacy request const NamespaceString nss(ns); auto statusGetDb = grid.catalogCache()->getDatabase(txn, nss.db().toString()); if (statusGetDb == ErrorCodes::DatabaseNotFound) { cursorCache.remove(id); replyToQuery(ResultFlag_CursorNotFound, request.p(), request.m(), 0, 0, 0); return; } uassertStatusOK(statusGetDb); // Spigot which controls whether OP_QUERY style find on mongos uses the new ClusterClientCursor // code path. // // TODO: Delete the spigot and always use the new code. if (useClusterClientCursor) { boost::optional<long long> batchSize; if (ntoreturn) { batchSize = ntoreturn; } GetMoreRequest getMoreRequest(NamespaceString(ns), id, batchSize, boost::none); auto cursorResponse = ClusterFind::runGetMore(txn, getMoreRequest); if (cursorResponse == ErrorCodes::CursorNotFound) { replyToQuery(ResultFlag_CursorNotFound, request.p(), request.m(), 0, 0, 0); return; } uassertStatusOK(cursorResponse.getStatus()); // Build the response document. // // TODO: this constant should be shared between mongos and mongod, and should not be inside // ShardedClientCursor. BufBuilder buffer(ShardedClientCursor::INIT_REPLY_BUFFER_SIZE); int numResults = 0; for (const auto& obj : cursorResponse.getValue().batch) { buffer.appendBuf((void*)obj.objdata(), obj.objsize()); ++numResults; } replyToQuery(0, request.p(), request.m(), buffer.buf(), buffer.len(), numResults, cursorResponse.getValue().numReturnedSoFar.value_or(0), cursorResponse.getValue().cursorId); return; } shared_ptr<DBConfig> config = statusGetDb.getValue(); ShardPtr primary; ChunkManagerPtr info; config->getChunkManagerOrPrimary(txn, ns, info, primary); // // TODO: Cleanup cursor cache, consolidate into single codepath // const string host = cursorCache.getRef(id); ShardedClientCursorPtr cursor = cursorCache.get(id); int cursorMaxTimeMS = cursorCache.getMaxTimeMS(id); // Cursor ids should not overlap between sharded and unsharded cursors massert(17012, str::stream() << "duplicate sharded and unsharded cursor id " << id << " detected for " << ns << ", duplicated on host " << host, NULL == cursorCache.get(id).get() || host.empty()); ClientBasic* client = ClientBasic::getCurrent(); NamespaceString nsString(ns); AuthorizationSession* authSession = AuthorizationSession::get(client); Status status = authSession->checkAuthForGetMore(nsString, id, false); audit::logGetMoreAuthzCheck(client, nsString, id, status.code()); uassertStatusOK(status); if (!host.empty()) { LOG(3) << "single getmore: " << ns; // we used ScopedDbConnection because we don't get about config versions // not deleting data is handled elsewhere // and we don't want to call setShardVersion ScopedDbConnection conn(host); Message response; bool ok = conn->callRead(request.m(), response); uassert(10204, "dbgrid: getmore: error calling db", ok); bool hasMore = (response.singleData().getCursor() != 0); if (!hasMore) { cursorCache.removeRef(id); } request.reply(response, "" /*conn->getServerAddress() */); conn.done(); return; } else if (cursor) { if (cursorMaxTimeMS == kMaxTimeCursorTimeLimitExpired) { cursorCache.remove(id); uasserted(ErrorCodes::ExceededTimeLimit, "operation exceeded time limit"); } // TODO: Try to match logic of mongod, where on subsequent getMore() we pull lots more data? BufBuilder buffer(ShardedClientCursor::INIT_REPLY_BUFFER_SIZE); int docCount = 0; const int startFrom = cursor->getTotalSent(); bool hasMore = cursor->sendNextBatch(ntoreturn, buffer, docCount); if (hasMore) { // still more data cursor->accessed(); if (cursorMaxTimeMS != kMaxTimeCursorNoTimeLimit) { // Update remaining amount of time in cursor cache. int cursorLeftoverMillis = cursorMaxTimeMS - getMoreTimer.millis(); if (cursorLeftoverMillis <= 0) { cursorLeftoverMillis = kMaxTimeCursorTimeLimitExpired; } cursorCache.updateMaxTimeMS(id, cursorLeftoverMillis); } } else { // we've exhausted the cursor cursorCache.remove(id); } replyToQuery(0, request.p(), request.m(), buffer.buf(), buffer.len(), docCount, startFrom, hasMore ? cursor->getId() : 0); return; } else { LOG(3) << "could not find cursor " << id << " in cache for " << ns; replyToQuery(ResultFlag_CursorNotFound, request.p(), request.m(), 0, 0, 0); return; } }
void Strategy::getMore( Request& r ) { Timer getMoreTimer; const char* ns = r.getns(); const int ntoreturn = r.d().pullInt(); const long long id = r.d().pullInt64(); // TODO: Handle stale config exceptions here from coll being dropped or sharded during op // for now has same semantics as legacy request const NamespaceString nss(ns); auto statusGetDb = grid.catalogCache()->getDatabase(nss.db().toString()); if (statusGetDb == ErrorCodes::DatabaseNotFound) { cursorCache.remove(id); replyToQuery(ResultFlag_CursorNotFound, r.p(), r.m(), 0, 0, 0); return; } uassertStatusOK(statusGetDb); shared_ptr<DBConfig> config = statusGetDb.getValue(); ShardPtr primary; ChunkManagerPtr info; config->getChunkManagerOrPrimary( ns, info, primary ); // // TODO: Cleanup cursor cache, consolidate into single codepath // const string host = cursorCache.getRef(id); ShardedClientCursorPtr cursor = cursorCache.get( id ); int cursorMaxTimeMS = cursorCache.getMaxTimeMS( id ); // Cursor ids should not overlap between sharded and unsharded cursors massert( 17012, str::stream() << "duplicate sharded and unsharded cursor id " << id << " detected for " << ns << ", duplicated on host " << host, NULL == cursorCache.get( id ).get() || host.empty() ); ClientBasic* client = ClientBasic::getCurrent(); NamespaceString nsString(ns); AuthorizationSession* authSession = client->getAuthorizationSession(); Status status = authSession->checkAuthForGetMore( nsString, id ); audit::logGetMoreAuthzCheck( client, nsString, id, status.code() ); uassertStatusOK(status); if( !host.empty() ){ LOG(3) << "single getmore: " << ns << endl; // we used ScopedDbConnection because we don't get about config versions // not deleting data is handled elsewhere // and we don't want to call setShardVersion ScopedDbConnection conn(host); Message response; bool ok = conn->callRead( r.m() , response); uassert( 10204 , "dbgrid: getmore: error calling db", ok); bool hasMore = (response.singleData().getCursor() != 0); if ( !hasMore ) { cursorCache.removeRef( id ); } r.reply( response , "" /*conn->getServerAddress() */ ); conn.done(); return; } else if ( cursor ) { if ( cursorMaxTimeMS == kMaxTimeCursorTimeLimitExpired ) { cursorCache.remove( id ); uasserted( ErrorCodes::ExceededTimeLimit, "operation exceeded time limit" ); } // TODO: Try to match logic of mongod, where on subsequent getMore() we pull lots more data? BufBuilder buffer( ShardedClientCursor::INIT_REPLY_BUFFER_SIZE ); int docCount = 0; const int startFrom = cursor->getTotalSent(); bool hasMore = cursor->sendNextBatch(ntoreturn, buffer, docCount); if ( hasMore ) { // still more data cursor->accessed(); if ( cursorMaxTimeMS != kMaxTimeCursorNoTimeLimit ) { // Update remaining amount of time in cursor cache. int cursorLeftoverMillis = cursorMaxTimeMS - getMoreTimer.millis(); if ( cursorLeftoverMillis <= 0 ) { cursorLeftoverMillis = kMaxTimeCursorTimeLimitExpired; } cursorCache.updateMaxTimeMS( id, cursorLeftoverMillis ); } } else { // we've exhausted the cursor cursorCache.remove( id ); } replyToQuery( 0, r.p(), r.m(), buffer.buf(), buffer.len(), docCount, startFrom, hasMore ? cursor->getId() : 0 ); return; } else { LOG( 3 ) << "could not find cursor " << id << " in cache for " << ns << endl; replyToQuery( ResultFlag_CursorNotFound , r.p() , r.m() , 0 , 0 , 0 ); return; } }
void Strategy::queryOp( Request& r ) { verify( !NamespaceString( r.getns() ).isCommand() ); Timer queryTimer; QueryMessage q( r.d() ); NamespaceString ns(q.ns); ClientBasic* client = ClientBasic::getCurrent(); AuthorizationSession* authSession = client->getAuthorizationSession(); Status status = authSession->checkAuthForQuery(ns, q.query); audit::logQueryAuthzCheck(client, ns, q.query, status.code()); uassertStatusOK(status); LOG(3) << "query: " << q.ns << " " << q.query << " ntoreturn: " << q.ntoreturn << " options: " << q.queryOptions << endl; if ( q.ntoreturn == 1 && strstr(q.ns, ".$cmd") ) throw UserException( 8010 , "something is wrong, shouldn't see a command here" ); if (q.queryOptions & QueryOption_Exhaust) { uasserted(18526, string("the 'exhaust' query option is invalid for mongos queries: ") + q.ns + " " + q.query.toString()); } QuerySpec qSpec( (string)q.ns, q.query, q.fields, q.ntoskip, q.ntoreturn, q.queryOptions ); // Parse "$maxTimeMS". StatusWith<int> maxTimeMS = LiteParsedQuery::parseMaxTimeMSQuery( q.query ); uassert( 17233, maxTimeMS.getStatus().reason(), maxTimeMS.isOK() ); if ( _isSystemIndexes( q.ns ) && doShardedIndexQuery( r, qSpec )) { return; } ParallelSortClusteredCursor * cursor = new ParallelSortClusteredCursor( qSpec, CommandInfo() ); verify( cursor ); // TODO: Move out to Request itself, not strategy based try { cursor->init(); if ( qSpec.isExplain() ) { BSONObjBuilder explain_builder; cursor->explain( explain_builder ); explain_builder.appendNumber( "executionTimeMillis", static_cast<long long>(queryTimer.millis()) ); BSONObj b = explain_builder.obj(); replyToQuery( 0 , r.p() , r.m() , b ); delete( cursor ); return; } } catch(...) { delete cursor; throw; } // TODO: Revisit all of this when we revisit the sharded cursor cache if (cursor->getNumQueryShards() != 1) { // More than one shard (or zero), manage with a ShardedClientCursor // NOTE: We may also have *zero* shards here when the returnPartial flag is set. // Currently the code in ShardedClientCursor handles this. ShardedClientCursorPtr cc (new ShardedClientCursor( q , cursor )); BufBuilder buffer( ShardedClientCursor::INIT_REPLY_BUFFER_SIZE ); int docCount = 0; const int startFrom = cc->getTotalSent(); bool hasMore = cc->sendNextBatch(q.ntoreturn, buffer, docCount); if ( hasMore ) { LOG(5) << "storing cursor : " << cc->getId() << endl; int cursorLeftoverMillis = maxTimeMS.getValue() - queryTimer.millis(); if ( maxTimeMS.getValue() == 0 ) { // 0 represents "no limit". cursorLeftoverMillis = kMaxTimeCursorNoTimeLimit; } else if ( cursorLeftoverMillis <= 0 ) { cursorLeftoverMillis = kMaxTimeCursorTimeLimitExpired; } cursorCache.store( cc, cursorLeftoverMillis ); } replyToQuery( 0, r.p(), r.m(), buffer.buf(), buffer.len(), docCount, startFrom, hasMore ? cc->getId() : 0 ); } else{ // Only one shard is used // Remote cursors are stored remotely, we shouldn't need this around. scoped_ptr<ParallelSortClusteredCursor> cursorDeleter( cursor ); ShardPtr shard = cursor->getQueryShard(); verify( shard.get() ); DBClientCursorPtr shardCursor = cursor->getShardCursor(*shard); // Implicitly stores the cursor in the cache r.reply( *(shardCursor->getMessage()) , shardCursor->originalHost() ); // We don't want to kill the cursor remotely if there's still data left shardCursor->decouple(); } }
virtual void queryOp( Request& r ){ QueryMessage q( r.d() ); log(3) << "shard query: " << q.ns << " " << q.query << endl; if ( q.ntoreturn == 1 && strstr(q.ns, ".$cmd") ) throw UserException( 8010 , "something is wrong, shouldn't see a command here" ); ChunkManagerPtr info = r.getChunkManager(); assert( info ); Query query( q.query ); vector<shared_ptr<ChunkRange> > shards; info->getChunksForQuery( shards , query.getFilter() ); set<ServerAndQuery> servers; for ( vector<shared_ptr<ChunkRange> >::iterator i = shards.begin(); i != shards.end(); i++ ){ shared_ptr<ChunkRange> c = *i; //servers.insert( ServerAndQuery( c->getShard().getConnString() , BSONObj() ) ); // ERH ERH ERH servers.insert( ServerAndQuery( c->getShard().getConnString() , c->getFilter() ) ); } if ( logLevel > 4 ){ StringBuilder ss; ss << " shard query servers: " << servers.size() << '\n'; for ( set<ServerAndQuery>::iterator i = servers.begin(); i!=servers.end(); i++ ){ const ServerAndQuery& s = *i; ss << " " << s.toString() << '\n'; } log() << ss.str(); } ClusteredCursor * cursor = 0; BSONObj sort = query.getSort(); if ( sort.isEmpty() ){ // 1. no sort, can just hit them in serial cursor = new SerialServerClusteredCursor( servers , q ); } else { int shardKeyOrder = info->getShardKey().canOrder( sort ); if ( shardKeyOrder ){ // 2. sort on shard key, can do in serial intelligently set<ServerAndQuery> buckets; for ( vector<shared_ptr<ChunkRange> >::iterator i = shards.begin(); i != shards.end(); i++ ){ shared_ptr<ChunkRange> s = *i; buckets.insert( ServerAndQuery( s->getShard().getConnString() , s->getFilter() , s->getMin() ) ); } cursor = new SerialServerClusteredCursor( buckets , q , shardKeyOrder ); } else { // 3. sort on non-sharded key, pull back a portion from each server and iterate slowly cursor = new ParallelSortClusteredCursor( servers , q , sort ); } } assert( cursor ); log(5) << " cursor type: " << cursor->type() << endl; shardedCursorTypes.hit( cursor->type() ); if ( query.isExplain() ){ BSONObj explain = cursor->explain(); replyToQuery( 0 , r.p() , r.m() , explain ); delete( cursor ); return; } ShardedClientCursorPtr cc (new ShardedClientCursor( q , cursor )); if ( ! cc->sendNextBatch( r ) ){ return; } log(6) << "storing cursor : " << cc->getId() << endl; cursorCache.store( cc ); }