Value DocumentSourceCursor::serialize(bool explain) const { // we never parse a documentSourceCursor, so we only serialize for explain if (!explain) return Value(); Lock::DBRead lk(ns); Client::Context ctx(ns, storageGlobalParams.dbpath, /*doVersion=*/false); ClientCursorPin pin(_cursorId); ClientCursor* cursor = pin.c(); uassert(17135, "Cursor deleted. Was the collection or database dropped?", cursor); cursor->c()->recoverFromYield(); return Value(DOC(getSourceName() << DOC("query" << Value(_query) << "sort" << (!_sort.isEmpty() ? Value(_sort) : Value()) << "limit" << (_limit ? Value(_limit->getLimit()) : Value()) << "fields" << (_projection ? Value(_projection->getSpec()) : Value()) << "indexOnly" << canUseCoveredIndex(cursor) << "cursorType" << cursor->c()->toString() ))); // TODO get more plan information }
static void handleCursorCommand(CursorId id, BSONObj& cmdObj, BSONObjBuilder& result) { BSONElement batchSizeElem = cmdObj.getFieldDotted("cursor.batchSize"); const long long batchSize = batchSizeElem.isNumber() ? batchSizeElem.numberLong() : 101; // same as query // Using limited cursor API that ignores many edge cases. Should be sufficient for commands. ClientCursor::Pin pin(id); ClientCursor* cursor = pin.c(); massert(16958, "Cursor shouldn't have been deleted", cursor); // Make sure this cursor won't disappear on us fassert(16959, !cursor->c()->shouldDestroyOnNSDeletion()); fassert(16960, !cursor->c()->requiresLock()); try { // can't use result BSONObjBuilder directly since it won't handle exceptions correctly. BSONArrayBuilder resultsArray; const int byteLimit = MaxBytesToReturnToClientAtOnce; for (int objs = 0; objs < batchSize && cursor->ok() && resultsArray.len() <= byteLimit; objs++) { // TODO may need special logic if cursor->current() would cause results to be > 16MB resultsArray.append(cursor->current()); cursor->advance(); } // The initial ok() on a cursor may be very expensive so we don't do it when batchSize // is 0 since that indicates a desire for a fast return. if (batchSize != 0 && !cursor->ok()) { // There is no more data. Kill the cursor. pin.release(); ClientCursor::erase(id); id = 0; } BSONObjBuilder cursorObj(result.subobjStart("cursor")); cursorObj.append("id", id); cursorObj.append("ns", cursor->ns()); cursorObj.append("firstBatch", resultsArray.arr()); cursorObj.done(); } catch (...) { // Clean up cursor on way out of scope. pin.release(); ClientCursor::erase(id); throw; } }
// ns is either a full namespace or "dbname." when invalidating for a whole db void ClientCursor::invalidate(const StringData &ns) { Lock::assertWriteLocked(ns); size_t dotpos = ns.find('.'); verify(dotpos != string::npos); bool isDB = (dotpos + 1) == ns.size(); // first (and only) dot is the last char { //cout << "\nTEMP invalidate " << ns << endl; Database *db = cc().database(); verify(db); verify( ns.startsWith(db->name()) ); for( LockedIterator i; i.ok(); ) { ClientCursor *cc = i.current(); bool shouldDelete = false; if (cc->c()->shouldDestroyOnNSDeletion() && cc->_db == db) { if (isDB) { // already checked that db matched above dassert( StringData(cc->_ns).startsWith(ns) ); shouldDelete = true; } else { if ( ns == cc->_ns ) shouldDelete = true; } } if ( shouldDelete ) { i.deleteAndAdvance(); } else { i.advance(); } } } }
QueryResult* processGetMore(const char* ns, int ntoreturn, long long cursorid, CurOp& curop, int pass, bool& exhaust, bool* isCursorAuthorized ) { bool hasRunner = false; // Scoped to kill the pin after seeing if the runner's there. { // See if there's a runner. We do this until agg. is behind a Runner instead of a CC. ClientCursorPin p(cursorid); ClientCursor *cc = p.c(); if (NULL != cc && NULL != cc->getRunner()) { hasRunner = true; } } if (hasRunner) { return newGetMore(ns, ntoreturn, cursorid, curop, pass, exhaust, isCursorAuthorized); } exhaust = false; int bufSize = 512 + sizeof( QueryResult ) + MaxBytesToReturnToClientAtOnce; BufBuilder b( bufSize ); b.skip(sizeof(QueryResult)); int resultFlags = ResultFlag_AwaitCapable; int start = 0; int n = 0; scoped_ptr<Client::ReadContext> ctx(new Client::ReadContext(ns)); // call this readlocked so state can't change replVerifyReadsOk(); ClientCursorPin p(cursorid); ClientCursor *cc = p.c(); if ( unlikely(!cc) ) { LOGSOME << "getMore: cursorid not found " << ns << " " << cursorid << endl; cursorid = 0; resultFlags = ResultFlag_CursorNotFound; } else { // Some internal users create a ClientCursor with a Runner. Don't crash if this // happens. Instead, hand them off to the new framework. if (NULL != cc->getRunner()) { p.release(); return newGetMore(ns, ntoreturn, cursorid, curop, pass, exhaust, isCursorAuthorized); } // check for spoofing of the ns such that it does not match the one originally there for the cursor uassert(14833, "auth error", str::equals(ns, cc->ns().c_str())); *isCursorAuthorized = true; // This must be done after auth check to ensure proper cleanup. uassert(16951, "failing getmore due to set failpoint", !MONGO_FAIL_POINT(getMoreError)); // If the operation that spawned this cursor had a time limit set, apply leftover // time to this getmore. curop.setMaxTimeMicros( cc->getLeftoverMaxTimeMicros() ); killCurrentOp.checkForInterrupt(); // May trigger maxTimeAlwaysTimeOut fail point. if ( pass == 0 ) cc->updateSlaveLocation( curop ); int queryOptions = cc->queryOptions(); curop.debug().query = cc->query(); curop.setQuery( cc->query() ); start = cc->pos(); Cursor *c = cc->c(); if (!c->requiresLock()) { // make sure it won't be destroyed under us fassert(16952, !c->shouldDestroyOnNSDeletion()); fassert(16953, !c->supportYields()); ctx.reset(); // unlocks } c->recoverFromYield(); DiskLoc last; // This metadata may be stale, but it's the state of chunking when the cursor was // created. CollectionMetadataPtr metadata = cc->getCollMetadata(); KeyPattern keyPattern( metadata ? metadata->getKeyPattern() : BSONObj() ); while ( 1 ) { if ( !c->ok() ) { if ( c->tailable() ) { // when a tailable cursor hits "EOF", ok() goes false, and current() is // null. however advance() can still be retries as a reactivation attempt. // when there is new data, it will return true. that's what we are doing // here. if ( c->advance() ) continue; if( n == 0 && (queryOptions & QueryOption_AwaitData) && pass < 1000 ) { return 0; } break; } p.release(); bool ok = ClientCursor::erase(cursorid); verify(ok); cursorid = 0; cc = 0; break; } MatchDetails details; if ( cc->fields && cc->fields->getArrayOpType() == Projection::ARRAY_OP_POSITIONAL ) { // field projection specified, and contains an array operator details.requestElemMatchKey(); } // in some cases (clone collection) there won't be a matcher if ( !c->currentMatches( &details ) ) { } else if ( metadata && !metadata->keyBelongsToMe( extractKey(c, keyPattern ) ) ) { LOG(2) << "cursor skipping document in un-owned chunk: " << c->current() << endl; } else { if( c->getsetdup(c->currLoc()) ) { //out() << " but it's a dup \n"; } else { last = c->currLoc(); n++; // Fill out the fields requested by the query. const Projection::KeyOnly *keyFieldsOnly = c->keyFieldsOnly(); if ( keyFieldsOnly ) { fillQueryResultFromObj( b, 0, keyFieldsOnly->hydrate( c->currKey() ), &details ); } else { DiskLoc loc = c->currLoc(); fillQueryResultFromObj( b, cc->fields.get(), c->current(), &details, ( ( cc->pq.get() && cc->pq->showDiskLoc() ) ? &loc : 0 ) ); } if ( ( ntoreturn && n >= ntoreturn ) || b.len() > MaxBytesToReturnToClientAtOnce ) { c->advance(); cc->incPos( n ); break; } } } c->advance(); if ( ! cc->yieldSometimes( ( c->ok() && c->keyFieldsOnly() ) ? ClientCursor::DontNeed : ClientCursor::WillNeed ) ) { ClientCursor::erase(cursorid); cursorid = 0; cc = 0; break; } } if ( cc ) { if ( c->supportYields() ) { ClientCursor::YieldData data; verify( cc->prepareToYield( data ) ); } else { cc->c()->noteLocation(); } cc->storeOpForSlave( last ); exhaust = cc->queryOptions() & QueryOption_Exhaust; // If the getmore had a time limit, remaining time is "rolled over" back to the // cursor (for use by future getmore ops). cc->setLeftoverMaxTimeMicros( curop.getRemainingMaxTimeMicros() ); } } QueryResult *qr = (QueryResult *) b.buf(); qr->len = b.len(); qr->setOperation(opReply); qr->_resultFlags() = resultFlags; qr->cursorId = cursorid; qr->startingFrom = start; qr->nReturned = n; b.decouple(); return qr; }
static void handleCursorCommand(CursorId id, BSONObj& cmdObj, BSONObjBuilder& result) { BSONElement batchSizeElem = cmdObj.getFieldDotted("cursor.batchSize"); const long long batchSize = batchSizeElem.isNumber() ? batchSizeElem.numberLong() : 101; // same as query // Using limited cursor API that ignores many edge cases. Should be sufficient for commands. ClientCursorPin pin(id); ClientCursor* cursor = pin.c(); massert(16958, "Cursor shouldn't have been deleted", cursor); // Make sure this cursor won't disappear on us fassert(16959, !cursor->c()->shouldDestroyOnNSDeletion()); fassert(16960, !cursor->c()->requiresLock()); try { const string cursorNs = cursor->ns(); // we need this after cursor may have been deleted // can't use result BSONObjBuilder directly since it won't handle exceptions correctly. BSONArrayBuilder resultsArray; const int byteLimit = MaxBytesToReturnToClientAtOnce; for (int objCount = 0; objCount < batchSize && cursor->ok(); objCount++) { BSONObj current = cursor->current(); if (resultsArray.len() + current.objsize() > byteLimit) break; // too big. current will be the first doc in the second batch resultsArray.append(current); cursor->advance(); } // The initial ok() on a cursor may be very expensive so we don't do it when batchSize // is 0 since that indicates a desire for a fast return. if (batchSize != 0 && !cursor->ok()) { // There is no more data. Kill the cursor. pin.release(); ClientCursor::erase(id); id = 0; cursor = NULL; // make it an obvious error to use cursor after this point } if (cursor) { // If a time limit was set on the pipeline, remaining time is "rolled over" to the // cursor (for use by future getmore ops). cursor->setLeftoverMaxTimeMicros( cc().curop()->getRemainingMaxTimeMicros() ); } BSONObjBuilder cursorObj(result.subobjStart("cursor")); cursorObj.append("id", id); cursorObj.append("ns", cursorNs); cursorObj.append("firstBatch", resultsArray.arr()); cursorObj.done(); } catch (...) { // Clean up cursor on way out of scope. pin.release(); ClientCursor::erase(id); throw; } }
void DocumentSourceCursor::loadBatch() { if (!_cursorId) { dispose(); return; } // We have already validated the sharding version when we constructed the cursor // so we shouldn't check it again. Lock::DBRead lk(ns); Client::Context ctx(ns, storageGlobalParams.dbpath, /*doVersion=*/false); ClientCursorPin pin(_cursorId); ClientCursor* cursor = pin.c(); uassert(16950, "Cursor deleted. Was the collection or database dropped?", cursor); cursor->c()->recoverFromYield(); int memUsageBytes = 0; for( ; cursor->ok(); cursor->advance() ) { yieldSometimes(cursor); if ( !cursor->ok() ) { // The cursor was exhausted during the yield. break; } if ( !cursor->currentMatches() || cursor->currentIsDup() ) continue; // grab the matching document if (canUseCoveredIndex(cursor)) { // Can't have collection metadata if we are here BSONObj indexKey = cursor->currKey(); _currentBatch.push_back(Document(cursor->c()->keyFieldsOnly()->hydrate(indexKey))); } else { BSONObj next = cursor->current(); // check to see if this is a new object we don't own yet // because of a chunk migration if (_collMetadata) { KeyPattern kp( _collMetadata->getKeyPattern() ); if ( !_collMetadata->keyBelongsToMe( kp.extractSingleKey( next ) ) ) continue; } _currentBatch.push_back(_projection ? documentFromBsonWithDeps(next, _dependencies) : Document(next)); } if (_limit) { if (++_docsAddedToBatches == _limit->getLimit()) { break; } verify(_docsAddedToBatches < _limit->getLimit()); } memUsageBytes += _currentBatch.back().getApproximateSize(); if (memUsageBytes > MaxBytesToReturnToClientAtOnce) { // End this batch and prepare cursor for yielding. cursor->advance(); if (cursor->c()->supportYields()) { ClientCursor::YieldData data; cursor->prepareToYield(data); } else { cursor->c()->noteLocation(); } return; } } // If we got here, there aren't any more documents. // The Cursor must be released, see SERVER-6123. pin.release(); ClientCursor::erase(_cursorId); _cursorId = 0; _collMetadata.reset(); }
void ClientCursor::invalidate(const StringData& ns) { Lock::assertWriteLocked(ns); size_t dot = ns.find( '.' ); verify( dot != string::npos ); // first (and only) dot is the last char bool isDB = dot == ns.size() - 1; Database *db = cc().database(); verify(db); verify(ns.startsWith(db->name())); recursive_scoped_lock cclock(ccmutex); // Look at all active non-cached Runners. These are the runners that are in auto-yield mode // that are not attached to the the client cursor. For example, all internal runners don't // need to be cached -- there will be no getMore. for (set<Runner*>::iterator it = nonCachedRunners.begin(); it != nonCachedRunners.end(); ++it) { Runner* runner = *it; const string& runnerNS = runner->ns(); if ( ( isDB && StringData(runnerNS).startsWith(ns) ) || ns == runnerNS ) { runner->kill(); } } // Look at all cached ClientCursor(s). The CC may have a Runner, a Cursor, or nothing (see // sharding_block.h). CCById::const_iterator it = clientCursorsById.begin(); while (it != clientCursorsById.end()) { ClientCursor* cc = it->second; // We're only interested in cursors over one db. if (cc->_db != db) { ++it; continue; } // Note that a valid ClientCursor state is "no cursor no runner." This is because // the set of active cursor IDs in ClientCursor is used as representation of query // state. See sharding_block.h. TODO(greg,hk): Move this out. if (NULL == cc->c() && NULL == cc->_runner.get()) { ++it; continue; } bool shouldDelete = false; // We will only delete CCs with runners that are not actively in use. The runners that // are actively in use are instead kill()-ed. if (NULL != cc->_runner.get()) { verify(NULL == cc->c()); if (isDB || cc->_runner->ns() == ns) { // If there is a pinValue >= 100, somebody is actively using the CC and we do // not delete it. Instead we notify the holder that we killed it. The holder // will then delete the CC. if (cc->_pinValue >= 100) { cc->_runner->kill(); } else { // pinvalue is <100, so there is nobody actively holding the CC. We can // safely delete it as nobody is holding the CC. shouldDelete = true; } } } // Begin cursor-only DEPRECATED else if (cc->c()->shouldDestroyOnNSDeletion()) { verify(NULL == cc->_runner.get()); if (isDB) { // already checked that db matched above dassert( StringData(cc->_ns).startsWith( ns ) ); shouldDelete = true; } else { if ( ns == cc->_ns ) { shouldDelete = true; } } } // End cursor-only DEPRECATED if (shouldDelete) { ClientCursor* toDelete = it->second; CursorId id = toDelete->cursorid(); delete toDelete; // We're not following the usual paradigm of saving it, ++it, and deleting the saved // 'it' because deleting 'it' might invalidate the next thing in clientCursorsById. // TODO: Why? it = clientCursorsById.upper_bound(id); } else { ++it; } } }
QueryResult* processGetMore(const char *ns, int ntoreturn, long long cursorid , CurOp& curop, int pass, bool& exhaust ) { exhaust = false; ClientCursor::Pointer p(cursorid); ClientCursor *cc = p.c(); int bufSize = 512 + sizeof( QueryResult ) + MaxBytesToReturnToClientAtOnce; BufBuilder b( bufSize ); b.skip(sizeof(QueryResult)); int resultFlags = ResultFlag_AwaitCapable; int start = 0; int n = 0; if ( unlikely(!cc) ) { LOGSOME << "getMore: cursorid not found " << ns << " " << cursorid << endl; cursorid = 0; resultFlags = ResultFlag_CursorNotFound; } else { // check for spoofing of the ns such that it does not match the one originally there for the cursor uassert(14833, "auth error", str::equals(ns, cc->ns().c_str())); if ( pass == 0 ) cc->updateSlaveLocation( curop ); int queryOptions = cc->queryOptions(); curop.debug().query = cc->query(); start = cc->pos(); Cursor *c = cc->c(); c->recoverFromYield(); DiskLoc last; scoped_ptr<Projection::KeyOnly> keyFieldsOnly; if ( cc->modifiedKeys() == false && cc->isMultiKey() == false && cc->fields ) keyFieldsOnly.reset( cc->fields->checkKey( cc->indexKeyPattern() ) ); // This manager may be stale, but it's the state of chunking when the cursor was created. ShardChunkManagerPtr manager = cc->getChunkManager(); while ( 1 ) { if ( !c->ok() ) { if ( c->tailable() ) { /* when a tailable cursor hits "EOF", ok() goes false, and current() is null. however advance() can still be retries as a reactivation attempt. when there is new data, it will return true. that's what we are doing here. */ if ( c->advance() ) continue; if( n == 0 && (queryOptions & QueryOption_AwaitData) && pass < 1000 ) { return 0; } break; } p.release(); bool ok = ClientCursor::erase(cursorid); verify(ok); cursorid = 0; cc = 0; break; } // in some cases (clone collection) there won't be a matcher if ( !c->currentMatches() ) { } else if ( manager && ! manager->belongsToMe( cc ) ){ LOG(2) << "cursor skipping document in un-owned chunk: " << c->current() << endl; } else { if( c->getsetdup(c->currLoc()) ) { //out() << " but it's a dup \n"; } else { last = c->currLoc(); n++; if ( keyFieldsOnly ) { fillQueryResultFromObj(b, 0, keyFieldsOnly->hydrate( c->currKey() ) ); } else { BSONObj js = c->current(); // show disk loc should be part of the main query, not in an $or clause, so this should be ok fillQueryResultFromObj(b, cc->fields.get(), js, ( cc->pq.get() && cc->pq->showDiskLoc() ? &last : 0)); } if ( ( ntoreturn && n >= ntoreturn ) || b.len() > MaxBytesToReturnToClientAtOnce ) { c->advance(); cc->incPos( n ); break; } } } c->advance(); if ( ! cc->yieldSometimes( ClientCursor::MaybeCovered ) ) { ClientCursor::erase(cursorid); cursorid = 0; cc = 0; p.deleted(); break; } } if ( cc ) { if ( c->supportYields() ) { ClientCursor::YieldData data; verify( cc->prepareToYield( data ) ); } else { cc->c()->noteLocation(); } cc->mayUpgradeStorage(); cc->storeOpForSlave( last ); exhaust = cc->queryOptions() & QueryOption_Exhaust; } } QueryResult *qr = (QueryResult *) b.buf(); qr->len = b.len(); qr->setOperation(opReply); qr->_resultFlags() = resultFlags; qr->cursorId = cursorid; qr->startingFrom = start; qr->nReturned = n; b.decouple(); return qr; }