Value DocumentSourceCursor::serialize(bool explain) const {
        // we never parse a documentSourceCursor, so we only serialize for explain
        if (!explain)
            return Value();

        Lock::DBRead lk(ns);
        Client::Context ctx(ns, storageGlobalParams.dbpath, /*doVersion=*/false);

        ClientCursorPin pin(_cursorId);
        ClientCursor* cursor = pin.c();

        uassert(17135, "Cursor deleted. Was the collection or database dropped?",
                cursor);

        cursor->c()->recoverFromYield();

        return Value(DOC(getSourceName() <<
            DOC("query" << Value(_query)
             << "sort" << (!_sort.isEmpty() ? Value(_sort) : Value())
             << "limit" << (_limit ? Value(_limit->getLimit()) : Value())
             << "fields" << (_projection ? Value(_projection->getSpec()) : Value())
             << "indexOnly" << canUseCoveredIndex(cursor)
             << "cursorType" << cursor->c()->toString()
        ))); // TODO get more plan information
    }
Beispiel #2
0
    static void handleCursorCommand(CursorId id, BSONObj& cmdObj, BSONObjBuilder& result) {
        BSONElement batchSizeElem = cmdObj.getFieldDotted("cursor.batchSize");
        const long long batchSize = batchSizeElem.isNumber()
                                    ? batchSizeElem.numberLong()
                                    : 101; // same as query

        // Using limited cursor API that ignores many edge cases. Should be sufficient for commands.
        ClientCursor::Pin pin(id);
        ClientCursor* cursor = pin.c();

        massert(16958, "Cursor shouldn't have been deleted",
                cursor);

        // Make sure this cursor won't disappear on us
        fassert(16959, !cursor->c()->shouldDestroyOnNSDeletion());
        fassert(16960, !cursor->c()->requiresLock());

        try {
            // can't use result BSONObjBuilder directly since it won't handle exceptions correctly.
            BSONArrayBuilder resultsArray;
            const int byteLimit = MaxBytesToReturnToClientAtOnce;
            for (int objs = 0;
                    objs < batchSize && cursor->ok() && resultsArray.len() <= byteLimit;
                    objs++) {
                // TODO may need special logic if cursor->current() would cause results to be > 16MB
                resultsArray.append(cursor->current());
                cursor->advance();
            }

            // The initial ok() on a cursor may be very expensive so we don't do it when batchSize
            // is 0 since that indicates a desire for a fast return.
            if (batchSize != 0 && !cursor->ok()) {
                // There is no more data. Kill the cursor.
                pin.release();
                ClientCursor::erase(id);
                id = 0;
            }

            BSONObjBuilder cursorObj(result.subobjStart("cursor"));
            cursorObj.append("id", id);
            cursorObj.append("ns", cursor->ns());
            cursorObj.append("firstBatch", resultsArray.arr());
            cursorObj.done();
        }
        catch (...) {
            // Clean up cursor on way out of scope.
            pin.release();
            ClientCursor::erase(id);
            throw;
        }
    }
Beispiel #3
0
    // ns is either a full namespace or "dbname." when invalidating for a whole db
    void ClientCursor::invalidate(const StringData &ns) {
        Lock::assertWriteLocked(ns);
        size_t dotpos = ns.find('.');
        verify(dotpos != string::npos);
        bool isDB = (dotpos + 1) == ns.size(); // first (and only) dot is the last char

        {
            //cout << "\nTEMP invalidate " << ns << endl;
            Database *db = cc().database();
            verify(db);
            verify( ns.startsWith(db->name()) );

            for( LockedIterator i; i.ok(); ) {
                ClientCursor *cc = i.current();

                bool shouldDelete = false;
                if (cc->c()->shouldDestroyOnNSDeletion() && cc->_db == db) {
                    if (isDB) {
                        // already checked that db matched above
                        dassert( StringData(cc->_ns).startsWith(ns) );
                        shouldDelete = true;
                    }
                    else {
                        if ( ns == cc->_ns )
                            shouldDelete = true;
                    }
                }

                if ( shouldDelete ) {
                    i.deleteAndAdvance();
                }
                else {
                    i.advance();
                }
            }
        }
    }
Beispiel #4
0
    QueryResult* processGetMore(const char* ns,
                                int ntoreturn,
                                long long cursorid,
                                CurOp& curop,
                                int pass,
                                bool& exhaust,
                                bool* isCursorAuthorized ) {

        bool hasRunner = false;

        // Scoped to kill the pin after seeing if the runner's there.
        {
            // See if there's a runner.  We do this until agg. is behind a Runner instead of a CC.
            ClientCursorPin p(cursorid);
            ClientCursor *cc = p.c();
            if (NULL != cc && NULL != cc->getRunner()) {
                hasRunner = true;
            }
        }

        if (hasRunner) {
            return newGetMore(ns, ntoreturn, cursorid, curop, pass, exhaust,
                              isCursorAuthorized);
        }

        exhaust = false;

        int bufSize = 512 + sizeof( QueryResult ) + MaxBytesToReturnToClientAtOnce;

        BufBuilder b( bufSize );
        b.skip(sizeof(QueryResult));
        int resultFlags = ResultFlag_AwaitCapable;
        int start = 0;
        int n = 0;

        scoped_ptr<Client::ReadContext> ctx(new Client::ReadContext(ns));
        // call this readlocked so state can't change
        replVerifyReadsOk();

        ClientCursorPin p(cursorid);
        ClientCursor *cc = p.c();

        if ( unlikely(!cc) ) {
            LOGSOME << "getMore: cursorid not found " << ns << " " << cursorid << endl;
            cursorid = 0;
            resultFlags = ResultFlag_CursorNotFound;
        }
        else {
            // Some internal users create a ClientCursor with a Runner.  Don't crash if this
            // happens.  Instead, hand them off to the new framework.
            if (NULL != cc->getRunner()) {
                p.release();
                return newGetMore(ns, ntoreturn, cursorid, curop, pass, exhaust, isCursorAuthorized);
            }

            // check for spoofing of the ns such that it does not match the one originally there for the cursor
            uassert(14833, "auth error", str::equals(ns, cc->ns().c_str()));

            *isCursorAuthorized = true;

            // This must be done after auth check to ensure proper cleanup.
            uassert(16951, "failing getmore due to set failpoint",
                    !MONGO_FAIL_POINT(getMoreError));

            // If the operation that spawned this cursor had a time limit set, apply leftover
            // time to this getmore.
            curop.setMaxTimeMicros( cc->getLeftoverMaxTimeMicros() );
            killCurrentOp.checkForInterrupt(); // May trigger maxTimeAlwaysTimeOut fail point.

            if ( pass == 0 )
                cc->updateSlaveLocation( curop );

            int queryOptions = cc->queryOptions();
            
            curop.debug().query = cc->query();
            curop.setQuery( cc->query() );

            start = cc->pos();
            Cursor *c = cc->c();

            if (!c->requiresLock()) {
                // make sure it won't be destroyed under us
                fassert(16952, !c->shouldDestroyOnNSDeletion());
                fassert(16953, !c->supportYields());
                ctx.reset(); // unlocks
            }

            c->recoverFromYield();
            DiskLoc last;

            // This metadata may be stale, but it's the state of chunking when the cursor was
            // created.
            CollectionMetadataPtr metadata = cc->getCollMetadata();
            KeyPattern keyPattern( metadata ? metadata->getKeyPattern() : BSONObj() );

            while ( 1 ) {
                if ( !c->ok() ) {
                    if ( c->tailable() ) {
                        // when a tailable cursor hits "EOF", ok() goes false, and current() is
                        // null.  however advance() can still be retries as a reactivation attempt.
                        // when there is new data, it will return true.  that's what we are doing
                        // here.
                        if ( c->advance() )
                            continue;

                        if( n == 0 && (queryOptions & QueryOption_AwaitData) && pass < 1000 ) {
                            return 0;
                        }

                        break;
                    }
                    p.release();
                    bool ok = ClientCursor::erase(cursorid);
                    verify(ok);
                    cursorid = 0;
                    cc = 0;
                    break;
                }

                MatchDetails details;
                if ( cc->fields && cc->fields->getArrayOpType() == Projection::ARRAY_OP_POSITIONAL ) {
                    // field projection specified, and contains an array operator
                    details.requestElemMatchKey();
                }

                // in some cases (clone collection) there won't be a matcher
                if ( !c->currentMatches( &details ) ) {
                }
                else if ( metadata && !metadata->keyBelongsToMe( extractKey(c, keyPattern ) ) ) {
                    LOG(2) << "cursor skipping document in un-owned chunk: " << c->current()
                               << endl;
                }
                else {
                    if( c->getsetdup(c->currLoc()) ) {
                        //out() << "  but it's a dup \n";
                    }
                    else {
                        last = c->currLoc();
                        n++;

                        // Fill out the fields requested by the query.
                        const Projection::KeyOnly *keyFieldsOnly = c->keyFieldsOnly();
                        if ( keyFieldsOnly ) {
                            fillQueryResultFromObj( b, 0, keyFieldsOnly->hydrate(
                            c->currKey() ), &details );
                        }
                        else {
                            DiskLoc loc = c->currLoc();
                            fillQueryResultFromObj( b, cc->fields.get(), c->current(), &details,
                                    ( ( cc->pq.get() && cc->pq->showDiskLoc() ) ? &loc : 0 ) );
                        }

                        if ( ( ntoreturn && n >= ntoreturn ) || b.len() > MaxBytesToReturnToClientAtOnce ) {
                            c->advance();
                            cc->incPos( n );
                            break;
                        }
                    }
                }
                c->advance();

                if ( ! cc->yieldSometimes( ( c->ok() && c->keyFieldsOnly() ) ?
                                          ClientCursor::DontNeed : ClientCursor::WillNeed ) ) {
                    ClientCursor::erase(cursorid);
                    cursorid = 0;
                    cc = 0;
                    break;
                }
            }
            
            if ( cc ) {
                if ( c->supportYields() ) {
                    ClientCursor::YieldData data;
                    verify( cc->prepareToYield( data ) );
                }
                else {
                    cc->c()->noteLocation();
                }
                cc->storeOpForSlave( last );
                exhaust = cc->queryOptions() & QueryOption_Exhaust;

                // If the getmore had a time limit, remaining time is "rolled over" back to the
                // cursor (for use by future getmore ops).
                cc->setLeftoverMaxTimeMicros( curop.getRemainingMaxTimeMicros() );
            }
        }

        QueryResult *qr = (QueryResult *) b.buf();
        qr->len = b.len();
        qr->setOperation(opReply);
        qr->_resultFlags() = resultFlags;
        qr->cursorId = cursorid;
        qr->startingFrom = start;
        qr->nReturned = n;
        b.decouple();

        return qr;
    }
Beispiel #5
0
    static void handleCursorCommand(CursorId id, BSONObj& cmdObj, BSONObjBuilder& result) {
        BSONElement batchSizeElem = cmdObj.getFieldDotted("cursor.batchSize");
        const long long batchSize = batchSizeElem.isNumber()
                                    ? batchSizeElem.numberLong()
                                    : 101; // same as query

        // Using limited cursor API that ignores many edge cases. Should be sufficient for commands.
        ClientCursorPin pin(id);
        ClientCursor* cursor = pin.c();

        massert(16958, "Cursor shouldn't have been deleted",
                cursor);

        // Make sure this cursor won't disappear on us
        fassert(16959, !cursor->c()->shouldDestroyOnNSDeletion());
        fassert(16960, !cursor->c()->requiresLock());

        try {
            const string cursorNs = cursor->ns(); // we need this after cursor may have been deleted

            // can't use result BSONObjBuilder directly since it won't handle exceptions correctly.
            BSONArrayBuilder resultsArray;
            const int byteLimit = MaxBytesToReturnToClientAtOnce;
            for (int objCount = 0; objCount < batchSize && cursor->ok(); objCount++) {
                BSONObj current = cursor->current();
                if (resultsArray.len() + current.objsize() > byteLimit)
                    break; // too big. current will be the first doc in the second batch

                resultsArray.append(current);
                cursor->advance();
            }

            // The initial ok() on a cursor may be very expensive so we don't do it when batchSize
            // is 0 since that indicates a desire for a fast return.
            if (batchSize != 0 && !cursor->ok()) {
                // There is no more data. Kill the cursor.
                pin.release();
                ClientCursor::erase(id);
                id = 0;
                cursor = NULL; // make it an obvious error to use cursor after this point
            }

            if (cursor) {
                // If a time limit was set on the pipeline, remaining time is "rolled over" to the
                // cursor (for use by future getmore ops).
                cursor->setLeftoverMaxTimeMicros( cc().curop()->getRemainingMaxTimeMicros() );
            }

            BSONObjBuilder cursorObj(result.subobjStart("cursor"));
            cursorObj.append("id", id);
            cursorObj.append("ns", cursorNs);
            cursorObj.append("firstBatch", resultsArray.arr());
            cursorObj.done();
        }
        catch (...) {
            // Clean up cursor on way out of scope.
            pin.release();
            ClientCursor::erase(id);
            throw;
        }
    }
    void DocumentSourceCursor::loadBatch() {
        if (!_cursorId) {
            dispose();
            return;
        }

        // We have already validated the sharding version when we constructed the cursor
        // so we shouldn't check it again.
        Lock::DBRead lk(ns);
        Client::Context ctx(ns, storageGlobalParams.dbpath, /*doVersion=*/false);

        ClientCursorPin pin(_cursorId);
        ClientCursor* cursor = pin.c();

        uassert(16950, "Cursor deleted. Was the collection or database dropped?",
                cursor);

        cursor->c()->recoverFromYield();

        int memUsageBytes = 0;
        for( ; cursor->ok(); cursor->advance() ) {

            yieldSometimes(cursor);
            if ( !cursor->ok() ) {
                // The cursor was exhausted during the yield.
                break;
            }

            if ( !cursor->currentMatches() || cursor->currentIsDup() )
                continue;

            // grab the matching document
            if (canUseCoveredIndex(cursor)) {
                // Can't have collection metadata if we are here
                BSONObj indexKey = cursor->currKey();
                _currentBatch.push_back(Document(cursor->c()->keyFieldsOnly()->hydrate(indexKey)));
            }
            else {
                BSONObj next = cursor->current();

                // check to see if this is a new object we don't own yet
                // because of a chunk migration
                if (_collMetadata) {
                    KeyPattern kp( _collMetadata->getKeyPattern() );
                    if ( !_collMetadata->keyBelongsToMe( kp.extractSingleKey( next ) ) ) continue;
                }

                _currentBatch.push_back(_projection
                                            ? documentFromBsonWithDeps(next, _dependencies)
                                            : Document(next));
            }

            if (_limit) {
                if (++_docsAddedToBatches == _limit->getLimit()) {
                    break;
                }
                verify(_docsAddedToBatches < _limit->getLimit());
            }

            memUsageBytes += _currentBatch.back().getApproximateSize();

            if (memUsageBytes > MaxBytesToReturnToClientAtOnce) {
                // End this batch and prepare cursor for yielding.
                cursor->advance();

                if (cursor->c()->supportYields()) {
                    ClientCursor::YieldData data;
                    cursor->prepareToYield(data);
                } else {
                    cursor->c()->noteLocation();
                }

                return;
            }
        }

        // If we got here, there aren't any more documents.
        // The Cursor must be released, see SERVER-6123.
        pin.release();
        ClientCursor::erase(_cursorId);
        _cursorId = 0;
        _collMetadata.reset();
    }
Beispiel #7
0
    void ClientCursor::invalidate(const StringData& ns) {
        Lock::assertWriteLocked(ns);

        size_t dot = ns.find( '.' );
        verify( dot != string::npos );

        // first (and only) dot is the last char
        bool isDB = dot == ns.size() - 1;

        Database *db = cc().database();
        verify(db);
        verify(ns.startsWith(db->name()));

        recursive_scoped_lock cclock(ccmutex);
        // Look at all active non-cached Runners.  These are the runners that are in auto-yield mode
        // that are not attached to the the client cursor. For example, all internal runners don't
        // need to be cached -- there will be no getMore.
        for (set<Runner*>::iterator it = nonCachedRunners.begin(); it != nonCachedRunners.end();
             ++it) {

            Runner* runner = *it;
            const string& runnerNS = runner->ns();
            if ( ( isDB && StringData(runnerNS).startsWith(ns) ) || ns == runnerNS ) {
                runner->kill();
            }
        }

        // Look at all cached ClientCursor(s).  The CC may have a Runner, a Cursor, or nothing (see
        // sharding_block.h).
        CCById::const_iterator it = clientCursorsById.begin();
        while (it != clientCursorsById.end()) {
            ClientCursor* cc = it->second;

            // We're only interested in cursors over one db.
            if (cc->_db != db) {
                ++it;
                continue;
            }

            // Note that a valid ClientCursor state is "no cursor no runner."  This is because
            // the set of active cursor IDs in ClientCursor is used as representation of query
            // state.  See sharding_block.h.  TODO(greg,hk): Move this out.
            if (NULL == cc->c() && NULL == cc->_runner.get()) {
                ++it;
                continue;
            }

            bool shouldDelete = false;

            // We will only delete CCs with runners that are not actively in use.  The runners that
            // are actively in use are instead kill()-ed.
            if (NULL != cc->_runner.get()) {
                verify(NULL == cc->c());

                if (isDB || cc->_runner->ns() == ns) {
                    // If there is a pinValue >= 100, somebody is actively using the CC and we do
                    // not delete it.  Instead we notify the holder that we killed it.  The holder
                    // will then delete the CC.
                    if (cc->_pinValue >= 100) {
                        cc->_runner->kill();
                    }
                    else {
                        // pinvalue is <100, so there is nobody actively holding the CC.  We can
                        // safely delete it as nobody is holding the CC.
                        shouldDelete = true;
                    }
                }
            }
            // Begin cursor-only DEPRECATED
            else if (cc->c()->shouldDestroyOnNSDeletion()) {
                verify(NULL == cc->_runner.get());

                if (isDB) {
                    // already checked that db matched above
                    dassert( StringData(cc->_ns).startsWith( ns ) );
                    shouldDelete = true;
                }
                else {
                    if ( ns == cc->_ns ) {
                        shouldDelete = true;
                    }
                }
            }
            // End cursor-only DEPRECATED

            if (shouldDelete) {
                ClientCursor* toDelete = it->second;
                CursorId id = toDelete->cursorid();
                delete toDelete;
                // We're not following the usual paradigm of saving it, ++it, and deleting the saved
                // 'it' because deleting 'it' might invalidate the next thing in clientCursorsById.
                // TODO: Why?
                it = clientCursorsById.upper_bound(id);
            }
            else {
                ++it;
            }
        }
    }
Beispiel #8
0
    QueryResult* processGetMore(const char *ns, int ntoreturn, long long cursorid , CurOp& curop, int pass, bool& exhaust ) {
        exhaust = false;
        ClientCursor::Pointer p(cursorid);
        ClientCursor *cc = p.c();

        int bufSize = 512 + sizeof( QueryResult ) + MaxBytesToReturnToClientAtOnce;

        BufBuilder b( bufSize );
        b.skip(sizeof(QueryResult));
        int resultFlags = ResultFlag_AwaitCapable;
        int start = 0;
        int n = 0;

        if ( unlikely(!cc) ) {
            LOGSOME << "getMore: cursorid not found " << ns << " " << cursorid << endl;
            cursorid = 0;
            resultFlags = ResultFlag_CursorNotFound;
        }
        else {
            // check for spoofing of the ns such that it does not match the one originally there for the cursor
            uassert(14833, "auth error", str::equals(ns, cc->ns().c_str()));

            if ( pass == 0 )
                cc->updateSlaveLocation( curop );

            int queryOptions = cc->queryOptions();
            
            curop.debug().query = cc->query();

            start = cc->pos();
            Cursor *c = cc->c();
            c->recoverFromYield();
            DiskLoc last;

            scoped_ptr<Projection::KeyOnly> keyFieldsOnly;
            if ( cc->modifiedKeys() == false && cc->isMultiKey() == false && cc->fields )
                keyFieldsOnly.reset( cc->fields->checkKey( cc->indexKeyPattern() ) );

            // This manager may be stale, but it's the state of chunking when the cursor was created.
            ShardChunkManagerPtr manager = cc->getChunkManager();

            while ( 1 ) {
                if ( !c->ok() ) {
                    if ( c->tailable() ) {
                        /* when a tailable cursor hits "EOF", ok() goes false, and current() is null.  however
                           advance() can still be retries as a reactivation attempt.  when there is new data, it will
                           return true.  that's what we are doing here.
                           */
                        if ( c->advance() )
                            continue;

                        if( n == 0 && (queryOptions & QueryOption_AwaitData) && pass < 1000 ) {
                            return 0;
                        }

                        break;
                    }
                    p.release();
                    bool ok = ClientCursor::erase(cursorid);
                    verify(ok);
                    cursorid = 0;
                    cc = 0;
                    break;
                }

                // in some cases (clone collection) there won't be a matcher
                if ( !c->currentMatches() ) {
                }
                else if ( manager && ! manager->belongsToMe( cc ) ){
                    LOG(2) << "cursor skipping document in un-owned chunk: " << c->current() << endl;
                }
                else {
                    if( c->getsetdup(c->currLoc()) ) {
                        //out() << "  but it's a dup \n";
                    }
                    else {
                        last = c->currLoc();
                        n++;

                        if ( keyFieldsOnly ) {
                            fillQueryResultFromObj(b, 0, keyFieldsOnly->hydrate( c->currKey() ) );
                        }
                        else {
                            BSONObj js = c->current();
                            // show disk loc should be part of the main query, not in an $or clause, so this should be ok
                            fillQueryResultFromObj(b, cc->fields.get(), js, ( cc->pq.get() && cc->pq->showDiskLoc() ? &last : 0));
                        }

                        if ( ( ntoreturn && n >= ntoreturn ) || b.len() > MaxBytesToReturnToClientAtOnce ) {
                            c->advance();
                            cc->incPos( n );
                            break;
                        }
                    }
                }
                c->advance();

                if ( ! cc->yieldSometimes( ClientCursor::MaybeCovered ) ) {
                    ClientCursor::erase(cursorid);
                    cursorid = 0;
                    cc = 0;
                    p.deleted();
                    break;
                }
            }
            
            if ( cc ) {
                if ( c->supportYields() ) {
                    ClientCursor::YieldData data;
                    verify( cc->prepareToYield( data ) );
                }
                else {
                    cc->c()->noteLocation();
                }
                cc->mayUpgradeStorage();
                cc->storeOpForSlave( last );
                exhaust = cc->queryOptions() & QueryOption_Exhaust;
            }
        }

        QueryResult *qr = (QueryResult *) b.buf();
        qr->len = b.len();
        qr->setOperation(opReply);
        qr->_resultFlags() = resultFlags;
        qr->cursorId = cursorid;
        qr->startingFrom = start;
        qr->nReturned = n;
        b.decouple();

        return qr;
    }