C++ (Cpp) Timestamp::isNull Examples

Programming Language: C++ (Cpp)

Class/Type: Timestamp

Method/Function: isNull

Examples at hotexamples.com: 9

C++ (Cpp) Timestamp::isNull - 9 examples found. These are the top rated real world C++ (Cpp) examples of Timestamp::isNull extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

toString(18)

timestamp(10)

isNull(9)

toFormattedString(7)

epochTime(5)

epochMicroseconds(5)

valid(4)

utcTime(4)

isElapsed(4)

elapsed(4)

timebase(4)

toBSON(3)

GetSeconds(3)

before(3)

asULL(3)

msecs(3)

GetSecondFraction(2)

stamp(2)

setToCurrentTime(2)

posix(2)

getSecs(2)

setTime(2)

setnow(2)

getFloatTime(2)

toStringLong(2)

count(2)

ToString(2)

update(2)

subUsec(1)

toFileTimeNP(1)

subSec(1)

seconds(1)

toFormatString(1)

toSec(1)

setToNow(1)

unixTime(1)

setTimeNow(1)

usec(1)

secondsSinceEpoch(1)

GetDate(1)

sec(1)

getInc(1)

GetTicks(1)

GetTime(1)

GetTimeUs(1)

add(1)

after(1)

convertToFramerate(1)

formattedPrint(1)

fseconds(1)

Example #1

Show file

File: replication_recovery.cpp Project: i80and/mongo

void ReplicationRecoveryImpl::_applyToEndOfOplog(OperationContext* opCtx,
                                                 Timestamp oplogApplicationStartPoint,
                                                 Timestamp topOfOplog) {
    invariant(!oplogApplicationStartPoint.isNull());
    invariant(!topOfOplog.isNull());

    // Check if we have any unapplied ops in our oplog. It is important that this is done after
    // deleting the ragged end of the oplog.
    if (oplogApplicationStartPoint == topOfOplog) {
        log()
            << "No oplog entries to apply for recovery. appliedThrough is at the top of the oplog.";
        return;  // We've applied all the valid oplog we have.
    } else if (oplogApplicationStartPoint > topOfOplog) {
        severe() << "Applied op " << oplogApplicationStartPoint.toBSON()
                 << " not found. Top of oplog is " << topOfOplog.toBSON() << '.';
        fassertFailedNoTrace(40313);
    }

    log() << "Replaying stored operations from " << oplogApplicationStartPoint.toBSON()
          << " (exclusive) to " << topOfOplog.toBSON() << " (inclusive).";

    DBDirectClient db(opCtx);
    auto cursor = db.query(NamespaceString::kRsOplogNamespace.ns(),
                           QUERY("ts" << BSON("$gte" << oplogApplicationStartPoint)),
                           /*batchSize*/ 0,
                           /*skip*/ 0,
                           /*projection*/ nullptr,
                           QueryOption_OplogReplay);

    // Check that the first document matches our appliedThrough point then skip it since it's
    // already been applied.
    if (!cursor->more()) {
        // This should really be impossible because we check above that the top of the oplog is
        // strictly > appliedThrough. If this fails it represents a serious bug in either the
        // storage engine or query's implementation of OplogReplay.
        severe() << "Couldn't find any entries in the oplog >= "
                 << oplogApplicationStartPoint.toBSON() << " which should be impossible.";
        fassertFailedNoTrace(40293);
    }

    auto firstTimestampFound =
        fassertStatusOK(40291, OpTime::parseFromOplogEntry(cursor->nextSafe())).getTimestamp();
    if (firstTimestampFound != oplogApplicationStartPoint) {
        severe() << "Oplog entry at " << oplogApplicationStartPoint.toBSON()
                 << " is missing; actual entry found is " << firstTimestampFound.toBSON();
        fassertFailedNoTrace(40292);
    }

    // Apply remaining ops one at at time, but don't log them because they are already logged.
    UnreplicatedWritesBlock uwb(opCtx);

    while (cursor->more()) {
        auto entry = cursor->nextSafe();
        fassertStatusOK(40294,
                        SyncTail::syncApply(opCtx, entry, OplogApplication::Mode::kRecovering));
        _consistencyMarkers->setAppliedThrough(
            opCtx, fassertStatusOK(40295, OpTime::parseFromOplogEntry(entry)));
    }
}

Example #2

Show file

File: oplog_buffer_collection.cpp Project: judahschvimer/mongo

void OplogBufferCollection::pushAllNonBlocking(OperationContext* txn,
        Batch::const_iterator begin,
        Batch::const_iterator end) {
    if (begin == end) {
        return;
    }
    size_t numDocs = std::distance(begin, end);
    Batch docsToInsert(numDocs);
    Timestamp ts;
    std::transform(begin, end, docsToInsert.begin(), [&ts](const Value& value) {
        auto pair = addIdToDocument(value);
        invariant(ts.isNull() || pair.second > ts);
        ts = pair.second;
        return pair.first;
    });

    stdx::lock_guard<stdx::mutex> lk(_mutex);
    auto status = _storageInterface->insertDocuments(txn, _nss, docsToInsert);
    fassertStatusOK(40161, status);

    _lastPushedTimestamp = ts;
    _count += numDocs;
    _size += std::accumulate(begin, end, 0U, [](const size_t& docSize, const Value& value) {
        return docSize + size_t(value.objsize());
    });
    _cvNoLongerEmpty.notify_all();
}

Example #3

Show file

File: oplog_buffer_collection.cpp Project: judahschvimer/mongo

std::pair<BSONObj, Timestamp> OplogBufferCollection::addIdToDocument(const BSONObj& orig) {
    invariant(!orig.isEmpty());
    BSONObjBuilder bob;
    Timestamp ts = orig["ts"].timestamp();
    invariant(!ts.isNull());
    bob.append("_id", ts);
    bob.append(kOplogEntryFieldName, orig);
    return std::pair<BSONObj, Timestamp> {bob.obj(), ts};
}

Example #4

Show file

File: replication_recovery.cpp Project: acmorrow/mongo

void ReplicationRecoveryImpl::_recoverFromStableTimestamp(OperationContext* opCtx,
                                                          Timestamp stableTimestamp,
                                                          OpTime appliedThrough,
                                                          OpTime topOfOplog) {
    invariant(!stableTimestamp.isNull());
    invariant(!topOfOplog.isNull());
    const auto truncateAfterPoint = _consistencyMarkers->getOplogTruncateAfterPoint(opCtx);
    log() << "Recovering from stable timestamp: " << stableTimestamp
          << " (top of oplog: " << topOfOplog << ", appliedThrough: " << appliedThrough
          << ", TruncateAfter: " << truncateAfterPoint << ")";

    log() << "Starting recovery oplog application at the stable timestamp: " << stableTimestamp;
    _applyToEndOfOplog(opCtx, stableTimestamp, topOfOplog.getTimestamp());
}

Example #5

Show file

File: find.cpp Project: ShashankRC/mongo

    std::string runQuery(OperationContext* txn,
                         QueryMessage& q,
                         const NamespaceString& nss,
                         CurOp& curop,
                         Message &result) {
        // Validate the namespace.
        uassert(16256, str::stream() << "Invalid ns [" << nss.ns() << "]", nss.isValid());
        invariant(!nss.isCommand());

        // Set curop information.
        beginQueryOp(nss, q.query, q.ntoreturn, q.ntoskip, &curop);

        // Parse the qm into a CanonicalQuery.
        std::auto_ptr<CanonicalQuery> cq;
        {
            CanonicalQuery* cqRaw;
            Status canonStatus = CanonicalQuery::canonicalize(q,
                                                              &cqRaw,
                                                              WhereCallbackReal(txn, nss.db()));
            if (!canonStatus.isOK()) {
                uasserted(17287, str::stream() << "Can't canonicalize query: "
                                               << canonStatus.toString());
            }
            cq.reset(cqRaw);
        }
        invariant(cq.get());

        LOG(5) << "Running query:\n" << cq->toString();
        LOG(2) << "Running query: " << cq->toStringShort();

        // Parse, canonicalize, plan, transcribe, and get a plan executor.
        AutoGetCollectionForRead ctx(txn, nss);
        Collection* collection = ctx.getCollection();

        const int dbProfilingLevel = ctx.getDb() ? ctx.getDb()->getProfilingLevel() :
                                                   serverGlobalParams.defaultProfile;

        // We have a parsed query. Time to get the execution plan for it.
        std::unique_ptr<PlanExecutor> exec;
        {
            PlanExecutor* rawExec;
            Status execStatus = getExecutorFind(txn,
                                                collection,
                                                nss,
                                                cq.release(),
                                                PlanExecutor::YIELD_AUTO,
                                                &rawExec);
            uassertStatusOK(execStatus);
            exec.reset(rawExec);
        }
        const LiteParsedQuery& pq = exec->getCanonicalQuery()->getParsed();

        // If it's actually an explain, do the explain and return rather than falling through
        // to the normal query execution loop.
        if (pq.isExplain()) {
            BufBuilder bb;
            bb.skip(sizeof(QueryResult::Value));

            BSONObjBuilder explainBob;
            Explain::explainStages(exec.get(), ExplainCommon::EXEC_ALL_PLANS, &explainBob);

            // Add the resulting object to the return buffer.
            BSONObj explainObj = explainBob.obj();
            bb.appendBuf((void*)explainObj.objdata(), explainObj.objsize());

            // TODO: Does this get overwritten/do we really need to set this twice?
            curop.debug().query = q.query;

            // Set query result fields.
            QueryResult::View qr = bb.buf();
            bb.decouple();
            qr.setResultFlagsToOk();
            qr.msgdata().setLen(bb.len());
            curop.debug().responseLength = bb.len();
            qr.msgdata().setOperation(opReply);
            qr.setCursorId(0);
            qr.setStartingFrom(0);
            qr.setNReturned(1);
            result.setData(qr.view2ptr(), true);
            return "";
        }

        // We freak out later if this changes before we're done with the query.
        const ChunkVersion shardingVersionAtStart = shardingState.getVersion(nss.ns());

        // Handle query option $maxTimeMS (not used with commands).
        curop.setMaxTimeMicros(static_cast<unsigned long long>(pq.getMaxTimeMS()) * 1000);
        txn->checkForInterrupt(); // May trigger maxTimeAlwaysTimeOut fail point.

        // uassert if we are not on a primary, and not a secondary with SlaveOk query parameter set.
        bool slaveOK = pq.isSlaveOk() || pq.hasReadPref();
        Status serveReadsStatus = repl::getGlobalReplicationCoordinator()->checkCanServeReadsFor(
                txn,
                nss,
                slaveOK);
        uassertStatusOK(serveReadsStatus);

        // Run the query.
        // bb is used to hold query results
        // this buffer should contain either requested documents per query or
        // explain information, but not both
        BufBuilder bb(32768);
        bb.skip(sizeof(QueryResult::Value));

        // How many results have we obtained from the executor?
        int numResults = 0;

        // If we're replaying the oplog, we save the last time that we read.
        Timestamp slaveReadTill;

        BSONObj obj;
        PlanExecutor::ExecState state;
        // uint64_t numMisplacedDocs = 0;

        // Get summary info about which plan the executor is using.
        curop.debug().planSummary = Explain::getPlanSummary(exec.get());

        while (PlanExecutor::ADVANCED == (state = exec->getNext(&obj, NULL))) {
            // Add result to output buffer.
            bb.appendBuf((void*)obj.objdata(), obj.objsize());

            // Count the result.
            ++numResults;

            // Possibly note slave's position in the oplog.
            if (pq.isOplogReplay()) {
                BSONElement e = obj["ts"];
                if (Date == e.type() || bsonTimestamp == e.type()) {
                    slaveReadTill = e.timestamp();
                }
            }

            if (enoughForFirstBatch(pq, numResults, bb.len())) {
                LOG(5) << "Enough for first batch, wantMore=" << pq.wantMore()
                       << " numToReturn=" << pq.getNumToReturn()
                       << " numResults=" << numResults
                       << endl;
                break;
            }
        }

        // If we cache the executor later, we want to deregister it as it receives notifications
        // anyway by virtue of being cached.
        //
        // If we don't cache the executor later, we are deleting it, so it must be deregistered.
        //
        // So, no matter what, deregister the executor.
        exec->deregisterExec();

        // Caller expects exceptions thrown in certain cases.
        if (PlanExecutor::FAILURE == state) {
            scoped_ptr<PlanStageStats> stats(exec->getStats());
            error() << "Plan executor error, stats: "
                    << Explain::statsToBSON(*stats);
            uasserted(17144, "Executor error: " + WorkingSetCommon::toStatusString(obj));
        }

        // TODO: Currently, chunk ranges are kept around until all ClientCursors created while the
        // chunk belonged on this node are gone. Separating chunk lifetime management from
        // ClientCursor should allow this check to go away.
        if (!shardingState.getVersion(nss.ns()).isWriteCompatibleWith(shardingVersionAtStart)) {
            // if the version changed during the query we might be missing some data and its safe to
            // send this as mongos can resend at this point
            throw SendStaleConfigException(nss.ns(), "version changed during initial query",
                                           shardingVersionAtStart,
                                           shardingState.getVersion(nss.ns()));
        }

        // Fill out curop based on query results. If we have a cursorid, we will fill out curop with
        // this cursorid later.
        long long ccId = 0;

        if (shouldSaveCursor(txn, collection, state, exec.get())) {
            // We won't use the executor until it's getMore'd.
            exec->saveState();

            // Allocate a new ClientCursor.  We don't have to worry about leaking it as it's
            // inserted into a global map by its ctor.
            ClientCursor* cc = new ClientCursor(collection->getCursorManager(),
                                                exec.release(),
                                                nss.ns(),
                                                pq.getOptions(),
                                                pq.getFilter());
            ccId = cc->cursorid();

            if (txn->getClient()->isInDirectClient()) {
                cc->setUnownedRecoveryUnit(txn->recoveryUnit());
            }
            else if (state == PlanExecutor::IS_EOF && pq.isTailable()) {
                // Don't stash the RU for tailable cursors at EOF, let them get a new RU on their
                // next getMore.
            }
            else {
                // We stash away the RecoveryUnit in the ClientCursor.  It's used for subsequent
                // getMore requests.  The calling OpCtx gets a fresh RecoveryUnit.
                txn->recoveryUnit()->abandonSnapshot();
                cc->setOwnedRecoveryUnit(txn->releaseRecoveryUnit());
                StorageEngine* storageEngine = getGlobalServiceContext()->getGlobalStorageEngine();
                invariant(txn->setRecoveryUnit(storageEngine->newRecoveryUnit(),
                                               OperationContext::kNotInUnitOfWork)
                          == OperationContext::kNotInUnitOfWork);
            }

            LOG(5) << "caching executor with cursorid " << ccId
                   << " after returning " << numResults << " results" << endl;

            // TODO document
            if (pq.isOplogReplay() && !slaveReadTill.isNull()) {
                cc->slaveReadTill(slaveReadTill);
            }

            // TODO document
            if (pq.isExhaust()) {
                curop.debug().exhaust = true;
            }

            cc->setPos(numResults);

            // If the query had a time limit, remaining time is "rolled over" to the cursor (for
            // use by future getmore ops).
            cc->setLeftoverMaxTimeMicros(curop.getRemainingMaxTimeMicros());

            endQueryOp(cc->getExecutor(), dbProfilingLevel, numResults, ccId, &curop);
        }
        else {
            LOG(5) << "Not caching executor but returning " << numResults << " results.\n";
            endQueryOp(exec.get(), dbProfilingLevel, numResults, ccId, &curop);
        }

        // Add the results from the query into the output buffer.
        result.appendData(bb.buf(), bb.len());
        bb.decouple();

        // Fill out the output buffer's header.
        QueryResult::View qr = result.header().view2ptr();
        qr.setCursorId(ccId);
        qr.setResultFlagsToOk();
        qr.msgdata().setOperation(opReply);
        qr.setStartingFrom(0);
        qr.setNReturned(numResults);

        // curop.debug().exhaust is set above.
        return curop.debug().exhaust ? nss.ns() : "";
    }

Example #6

Show file

File: find.cpp Project: ShashankRC/mongo

    /**
     * Called by db/instance.cpp.  This is the getMore entry point.
     *
     * pass - when QueryOption_AwaitData is in use, the caller will make repeated calls 
     *        when this method returns an empty result, incrementing pass on each call.  
     *        Thus, pass == 0 indicates this is the first "attempt" before any 'awaiting'.
     */
    QueryResult::View getMore(OperationContext* txn,
                              const char* ns,
                              int ntoreturn,
                              long long cursorid,
                              CurOp& curop,
                              int pass,
                              bool& exhaust,
                              bool* isCursorAuthorized) {

        // For testing, we may want to fail if we receive a getmore.
        if (MONGO_FAIL_POINT(failReceivedGetmore)) {
            invariant(0);
        }

        exhaust = false;

        const NamespaceString nss(ns);

        // Depending on the type of cursor being operated on, we hold locks for the whole getMore,
        // or none of the getMore, or part of the getMore.  The three cases in detail:
        //
        // 1) Normal cursor: we lock with "ctx" and hold it for the whole getMore.
        // 2) Cursor owned by global cursor manager: we don't lock anything.  These cursors don't
        //    own any collection state.
        // 3) Agg cursor: we lock with "ctx", then release, then relock with "unpinDBLock" and
        //    "unpinCollLock".  This is because agg cursors handle locking internally (hence the
        //    release), but the pin and unpin of the cursor must occur under the collection lock.
        //    We don't use our AutoGetCollectionForRead "ctx" to relock, because
        //    AutoGetCollectionForRead checks the sharding version (and we want the relock for the
        //    unpin to succeed even if the sharding version has changed).
        //
        // Note that we declare our locks before our ClientCursorPin, in order to ensure that the
        // pin's destructor is called before the lock destructors (so that the unpin occurs under
        // the lock).
        boost::scoped_ptr<AutoGetCollectionForRead> ctx;
        boost::scoped_ptr<Lock::DBLock> unpinDBLock;
        boost::scoped_ptr<Lock::CollectionLock> unpinCollLock;

        CursorManager* cursorManager;
        CursorManager* globalCursorManager = CursorManager::getGlobalCursorManager();
        if (globalCursorManager->ownsCursorId(cursorid)) {
            cursorManager = globalCursorManager;
        }
        else {
            ctx.reset(new AutoGetCollectionForRead(txn, nss));
            Collection* collection = ctx->getCollection();
            uassert( 17356, "collection dropped between getMore calls", collection );
            cursorManager = collection->getCursorManager();
        }

        LOG(5) << "Running getMore, cursorid: " << cursorid << endl;

        // This checks to make sure the operation is allowed on a replicated node.  Since we are not
        // passing in a query object (necessary to check SlaveOK query option), the only state where
        // reads are allowed is PRIMARY (or master in master/slave).  This function uasserts if
        // reads are not okay.
        Status status = repl::getGlobalReplicationCoordinator()->checkCanServeReadsFor(
                txn,
                nss,
                true);
        uassertStatusOK(status);

        // A pin performs a CC lookup and if there is a CC, increments the CC's pin value so it
        // doesn't time out.  Also informs ClientCursor that there is somebody actively holding the
        // CC, so don't delete it.
        ClientCursorPin ccPin(cursorManager, cursorid);
        ClientCursor* cc = ccPin.c();

        // If we're not being called from DBDirectClient we want to associate the RecoveryUnit
        // used to create the execution machinery inside the cursor with our OperationContext.
        // If we throw or otherwise exit this method in a disorderly fashion, we must ensure
        // that further calls to getMore won't fail, and that the provided OperationContext
        // has a valid RecoveryUnit.  As such, we use RAII to accomplish this.
        //
        // This must be destroyed before the ClientCursor is destroyed.
        std::auto_ptr<ScopedRecoveryUnitSwapper> ruSwapper;

        // These are set in the QueryResult msg we return.
        int resultFlags = ResultFlag_AwaitCapable;

        int numResults = 0;
        int startingResult = 0;

        const int InitialBufSize =
            512 + sizeof(QueryResult::Value) + MaxBytesToReturnToClientAtOnce;

        BufBuilder bb(InitialBufSize);
        bb.skip(sizeof(QueryResult::Value));

        if (NULL == cc) {
            cursorid = 0;
            resultFlags = ResultFlag_CursorNotFound;
        }
        else {
            // Check for spoofing of the ns such that it does not match the one originally
            // there for the cursor.
            uassert(ErrorCodes::Unauthorized,
                    str::stream() << "Requested getMore on namespace " << ns << ", but cursor "
                                  << cursorid << " belongs to namespace " << cc->ns(),
                    ns == cc->ns());
            *isCursorAuthorized = true;

            // Restore the RecoveryUnit if we need to.
            if (txn->getClient()->isInDirectClient()) {
                if (cc->hasRecoveryUnit())
                    invariant(txn->recoveryUnit() == cc->getUnownedRecoveryUnit());
            }
            else {
                if (!cc->hasRecoveryUnit()) {
                    // Start using a new RecoveryUnit
                    cc->setOwnedRecoveryUnit(
                        getGlobalServiceContext()->getGlobalStorageEngine()->newRecoveryUnit());

                }
                // Swap RecoveryUnit(s) between the ClientCursor and OperationContext.
                ruSwapper.reset(new ScopedRecoveryUnitSwapper(cc, txn));
            }

            // Reset timeout timer on the cursor since the cursor is still in use.
            cc->setIdleTime(0);

            // If the operation that spawned this cursor had a time limit set, apply leftover
            // time to this getmore.
            curop.setMaxTimeMicros(cc->getLeftoverMaxTimeMicros());
            txn->checkForInterrupt(); // May trigger maxTimeAlwaysTimeOut fail point.

            if (0 == pass) { 
                cc->updateSlaveLocation(txn); 
            }

            if (cc->isAggCursor()) {
                // Agg cursors handle their own locking internally.
                ctx.reset(); // unlocks
            }

            // If we're replaying the oplog, we save the last time that we read.
            Timestamp slaveReadTill;

            // What number result are we starting at?  Used to fill out the reply.
            startingResult = cc->pos();

            // What gives us results.
            PlanExecutor* exec = cc->getExecutor();
            const int queryOptions = cc->queryOptions();

            // Get results out of the executor.
            exec->restoreState(txn);

            BSONObj obj;
            PlanExecutor::ExecState state;
            while (PlanExecutor::ADVANCED == (state = exec->getNext(&obj, NULL))) {
                // Add result to output buffer.
                bb.appendBuf((void*)obj.objdata(), obj.objsize());

                // Count the result.
                ++numResults;

                // Possibly note slave's position in the oplog.
                if (queryOptions & QueryOption_OplogReplay) {
                    BSONElement e = obj["ts"];
                    if (Date == e.type() || bsonTimestamp == e.type()) {
                        slaveReadTill = e.timestamp();
                    }
                }

                if (enoughForGetMore(ntoreturn, numResults, bb.len())) {
                    break;
                }
            }

            if (PlanExecutor::DEAD == state || PlanExecutor::FAILURE == state) {
                // Propagate this error to caller.
                if (PlanExecutor::FAILURE == state) {
                    scoped_ptr<PlanStageStats> stats(exec->getStats());
                    error() << "Plan executor error, stats: "
                            << Explain::statsToBSON(*stats);
                    uasserted(17406, "getMore executor error: " +
                              WorkingSetCommon::toStatusString(obj));
                }

                // In the old system tailable capped cursors would be killed off at the
                // cursorid level.  If a tailable capped cursor is nuked the cursorid
                // would vanish.
                //
                // In the new system they die and are cleaned up later (or time out).
                // So this is where we get to remove the cursorid.
                if (0 == numResults) {
                    resultFlags = ResultFlag_CursorNotFound;
                }
            }

            const bool shouldSaveCursor =
                    shouldSaveCursorGetMore(state, exec, isCursorTailable(cc));

            // In order to deregister a cursor, we need to be holding the DB + collection lock and
            // if the cursor is aggregation, we release these locks.
            if (cc->isAggCursor()) {
                invariant(NULL == ctx.get());
                unpinDBLock.reset(new Lock::DBLock(txn->lockState(), nss.db(), MODE_IS));
                unpinCollLock.reset(new Lock::CollectionLock(txn->lockState(), nss.ns(), MODE_IS));
            }

            // Our two possible ClientCursorPin cleanup paths are:
            // 1) If the cursor is not going to be saved, we call deleteUnderlying() on the pin.
            // 2) If the cursor is going to be saved, we simply let the pin go out of scope.  In
            //    this case, the pin's destructor will be invoked, which will call release() on the
            //    pin.  Because our ClientCursorPin is declared after our lock is declared, this
            //    will happen under the lock.
            if (!shouldSaveCursor) {
                ruSwapper.reset();
                ccPin.deleteUnderlying();

                // cc is now invalid, as is the executor
                cursorid = 0;
                cc = NULL;
                curop.debug().cursorExhausted = true;

                LOG(5) << "getMore NOT saving client cursor, ended with state "
                       << PlanExecutor::statestr(state)
                       << endl;
            }
            else {
                // Continue caching the ClientCursor.
                cc->incPos(numResults);
                exec->saveState();
                LOG(5) << "getMore saving client cursor ended with state "
                       << PlanExecutor::statestr(state)
                       << endl;

                if (PlanExecutor::IS_EOF == state && (queryOptions & QueryOption_CursorTailable)) {
                    if (!txn->getClient()->isInDirectClient()) {
                        // Don't stash the RU. Get a new one on the next getMore.
                        ruSwapper->dismiss();
                    }

                    if ((queryOptions & QueryOption_AwaitData)
                            && (numResults == 0)
                            && (pass < 1000)) {
                        // Bubble up to the AwaitData handling code in receivedGetMore which will
                        // try again.
                        return NULL;
                    }
                }

                // Possibly note slave's position in the oplog.
                if ((queryOptions & QueryOption_OplogReplay) && !slaveReadTill.isNull()) {
                    cc->slaveReadTill(slaveReadTill);
                }

                exhaust = (queryOptions & QueryOption_Exhaust);

                // If the getmore had a time limit, remaining time is "rolled over" back to the
                // cursor (for use by future getmore ops).
                cc->setLeftoverMaxTimeMicros( curop.getRemainingMaxTimeMicros() );
            }
        }

        QueryResult::View qr = bb.buf();
        qr.msgdata().setLen(bb.len());
        qr.msgdata().setOperation(opReply);
        qr.setResultFlags(resultFlags);
        qr.setCursorId(cursorid);
        qr.setStartingFrom(startingResult);
        qr.setNReturned(numResults);
        bb.decouple();
        LOG(5) << "getMore returned " << numResults << " results\n";
        return qr;
    }

Example #7

Show file

File: find.cpp Project: sbrinkerhoff/mongo

/**
 * Called by db/instance.cpp.  This is the getMore entry point.
 */
QueryResult::View getMore(OperationContext* txn,
                          const char* ns,
                          int ntoreturn,
                          long long cursorid,
                          bool* exhaust,
                          bool* isCursorAuthorized) {
    invariant(ntoreturn >= 0);

    CurOp& curop = *CurOp::get(txn);

    // For testing, we may want to fail if we receive a getmore.
    if (MONGO_FAIL_POINT(failReceivedGetmore)) {
        invariant(0);
    }

    *exhaust = false;

    const NamespaceString nss(ns);

    // Depending on the type of cursor being operated on, we hold locks for the whole getMore,
    // or none of the getMore, or part of the getMore.  The three cases in detail:
    //
    // 1) Normal cursor: we lock with "ctx" and hold it for the whole getMore.
    // 2) Cursor owned by global cursor manager: we don't lock anything.  These cursors don't own
    //    any collection state. These cursors are generated either by the listCollections or
    //    listIndexes commands, as these special cursor-generating commands operate over catalog
    //    data rather than targeting the data within a collection.
    // 3) Agg cursor: we lock with "ctx", then release, then relock with "unpinDBLock" and
    //    "unpinCollLock".  This is because agg cursors handle locking internally (hence the
    //    release), but the pin and unpin of the cursor must occur under the collection lock.
    //    We don't use our AutoGetCollectionForRead "ctx" to relock, because
    //    AutoGetCollectionForRead checks the sharding version (and we want the relock for the
    //    unpin to succeed even if the sharding version has changed).
    //
    // Note that we declare our locks before our ClientCursorPin, in order to ensure that the
    // pin's destructor is called before the lock destructors (so that the unpin occurs under
    // the lock).
    unique_ptr<AutoGetCollectionForRead> ctx;
    unique_ptr<Lock::DBLock> unpinDBLock;
    unique_ptr<Lock::CollectionLock> unpinCollLock;

    CursorManager* cursorManager;
    if (nss.isListIndexesCursorNS() || nss.isListCollectionsCursorNS()) {
        // List collections and list indexes are special cursor-generating commands whose
        // cursors are managed globally, as they operate over catalog data rather than targeting
        // the data within a collection.
        cursorManager = CursorManager::getGlobalCursorManager();
    } else {
        ctx = stdx::make_unique<AutoGetCollectionForRead>(txn, nss);
        Collection* collection = ctx->getCollection();
        uassert(17356, "collection dropped between getMore calls", collection);
        cursorManager = collection->getCursorManager();
    }

    LOG(5) << "Running getMore, cursorid: " << cursorid << endl;

    // This checks to make sure the operation is allowed on a replicated node.  Since we are not
    // passing in a query object (necessary to check SlaveOK query option), the only state where
    // reads are allowed is PRIMARY (or master in master/slave).  This function uasserts if
    // reads are not okay.
    Status status = repl::getGlobalReplicationCoordinator()->checkCanServeReadsFor(txn, nss, true);
    uassertStatusOK(status);

    // A pin performs a CC lookup and if there is a CC, increments the CC's pin value so it
    // doesn't time out.  Also informs ClientCursor that there is somebody actively holding the
    // CC, so don't delete it.
    ClientCursorPin ccPin(cursorManager, cursorid);
    ClientCursor* cc = ccPin.c();
    // These are set in the QueryResult msg we return.
    int resultFlags = ResultFlag_AwaitCapable;

    int numResults = 0;
    int startingResult = 0;

    const int InitialBufSize =
        512 + sizeof(QueryResult::Value) + FindCommon::kMaxBytesToReturnToClientAtOnce;

    BufBuilder bb(InitialBufSize);
    bb.skip(sizeof(QueryResult::Value));

    if (NULL == cc) {
        cursorid = 0;
        resultFlags = ResultFlag_CursorNotFound;
    } else {
        // Check for spoofing of the ns such that it does not match the one originally
        // there for the cursor.
        uassert(ErrorCodes::Unauthorized,
                str::stream() << "Requested getMore on namespace " << ns << ", but cursor "
                              << cursorid << " belongs to namespace " << cc->ns(),
                ns == cc->ns());
        *isCursorAuthorized = true;

        if (cc->isReadCommitted())
            uassertStatusOK(txn->recoveryUnit()->setReadFromMajorityCommittedSnapshot());

        // Reset timeout timer on the cursor since the cursor is still in use.
        cc->setIdleTime(0);

        // If the operation that spawned this cursor had a time limit set, apply leftover
        // time to this getmore.
        curop.setMaxTimeMicros(cc->getLeftoverMaxTimeMicros());
        txn->checkForInterrupt();  // May trigger maxTimeAlwaysTimeOut fail point.

        // Ensure that the original query or command object is available in the slow query log,
        // profiler, and currentOp.
        curop.debug().query = cc->getQuery();
        {
            stdx::lock_guard<Client> lk(*txn->getClient());
            curop.setQuery_inlock(cc->getQuery());
        }

        cc->updateSlaveLocation(txn);

        if (cc->isAggCursor()) {
            // Agg cursors handle their own locking internally.
            ctx.reset();  // unlocks
        }

        // If we're replaying the oplog, we save the last time that we read.
        Timestamp slaveReadTill;

        // What number result are we starting at?  Used to fill out the reply.
        startingResult = cc->pos();

        uint64_t notifierVersion = 0;
        std::shared_ptr<CappedInsertNotifier> notifier;
        if (isCursorAwaitData(cc)) {
            invariant(ctx->getCollection()->isCapped());
            // Retrieve the notifier which we will wait on until new data arrives. We make sure
            // to do this in the lock because once we drop the lock it is possible for the
            // collection to become invalid. The notifier itself will outlive the collection if
            // the collection is dropped, as we keep a shared_ptr to it.
            notifier = ctx->getCollection()->getCappedInsertNotifier();

            // Must get the version before we call generateBatch in case a write comes in after
            // that call and before we call wait on the notifier.
            notifierVersion = notifier->getVersion();
        }

        PlanExecutor* exec = cc->getExecutor();
        exec->reattachToOperationContext(txn);
        exec->restoreState();
        PlanExecutor::ExecState state;

        generateBatch(ntoreturn, cc, &bb, &numResults, &slaveReadTill, &state);

        // If this is an await data cursor, and we hit EOF without generating any results, then
        // we block waiting for new data to arrive.
        if (isCursorAwaitData(cc) && state == PlanExecutor::IS_EOF && numResults == 0) {
            // Save the PlanExecutor and drop our locks.
            exec->saveState();
            ctx.reset();

            // Block waiting for data for up to 1 second.
            Seconds timeout(1);
            notifier->wait(notifierVersion, timeout);
            notifier.reset();

            // Set expected latency to match wait time. This makes sure the logs aren't spammed
            // by awaitData queries that exceed slowms due to blocking on the CappedInsertNotifier.
            curop.setExpectedLatencyMs(durationCount<Milliseconds>(timeout));

            // Reacquiring locks.
            ctx = make_unique<AutoGetCollectionForRead>(txn, nss);
            exec->restoreState();

            // We woke up because either the timed_wait expired, or there was more data. Either
            // way, attempt to generate another batch of results.
            generateBatch(ntoreturn, cc, &bb, &numResults, &slaveReadTill, &state);
        }

        // We have to do this before re-acquiring locks in the agg case because
        // shouldSaveCursorGetMore() can make a network call for agg cursors.
        //
        // TODO: Getting rid of PlanExecutor::isEOF() in favor of PlanExecutor::IS_EOF would mean
        // that this network operation is no longer necessary.
        const bool shouldSaveCursor = shouldSaveCursorGetMore(state, exec, isCursorTailable(cc));

        // In order to deregister a cursor, we need to be holding the DB + collection lock and
        // if the cursor is aggregation, we release these locks.
        if (cc->isAggCursor()) {
            invariant(NULL == ctx.get());
            unpinDBLock = make_unique<Lock::DBLock>(txn->lockState(), nss.db(), MODE_IS);
            unpinCollLock = make_unique<Lock::CollectionLock>(txn->lockState(), nss.ns(), MODE_IS);
        }

        // Our two possible ClientCursorPin cleanup paths are:
        // 1) If the cursor is not going to be saved, we call deleteUnderlying() on the pin.
        // 2) If the cursor is going to be saved, we simply let the pin go out of scope.  In
        //    this case, the pin's destructor will be invoked, which will call release() on the
        //    pin.  Because our ClientCursorPin is declared after our lock is declared, this
        //    will happen under the lock.
        if (!shouldSaveCursor) {
            ccPin.deleteUnderlying();

            // cc is now invalid, as is the executor
            cursorid = 0;
            cc = NULL;
            curop.debug().cursorExhausted = true;

            LOG(5) << "getMore NOT saving client cursor, ended with state "
                   << PlanExecutor::statestr(state) << endl;
        } else {
            // Continue caching the ClientCursor.
            cc->incPos(numResults);
            exec->saveState();
            exec->detachFromOperationContext();
            LOG(5) << "getMore saving client cursor ended with state "
                   << PlanExecutor::statestr(state) << endl;

            // Possibly note slave's position in the oplog.
            if ((cc->queryOptions() & QueryOption_OplogReplay) && !slaveReadTill.isNull()) {
                cc->slaveReadTill(slaveReadTill);
            }

            *exhaust = cc->queryOptions() & QueryOption_Exhaust;

            // If the getmore had a time limit, remaining time is "rolled over" back to the
            // cursor (for use by future getmore ops).
            cc->setLeftoverMaxTimeMicros(curop.getRemainingMaxTimeMicros());
        }
    }

    QueryResult::View qr = bb.buf();
    qr.msgdata().setLen(bb.len());
    qr.msgdata().setOperation(opReply);
    qr.setResultFlags(resultFlags);
    qr.setCursorId(cursorid);
    qr.setStartingFrom(startingResult);
    qr.setNReturned(numResults);
    bb.decouple();
    LOG(5) << "getMore returned " << numResults << " results\n";
    return qr;
}

Example #8

Show file

File: find.cpp Project: AnkyrinRepeat/mongo

std::string runQuery(OperationContext* txn,
                     QueryMessage& q,
                     const NamespaceString& nss,
                     Message& result) {
    CurOp& curop = *CurOp::get(txn);

    uassert(ErrorCodes::InvalidNamespace,
            str::stream() << "Invalid ns [" << nss.ns() << "]",
            nss.isValid());
    invariant(!nss.isCommand());

    // Set curop information.
    beginQueryOp(txn, nss, q.query, q.ntoreturn, q.ntoskip);

    // Parse the qm into a CanonicalQuery.

    auto statusWithCQ = CanonicalQuery::canonicalize(q, ExtensionsCallbackReal(txn, &nss));
    if (!statusWithCQ.isOK()) {
        uasserted(
            17287,
            str::stream() << "Can't canonicalize query: " << statusWithCQ.getStatus().toString());
    }
    unique_ptr<CanonicalQuery> cq = std::move(statusWithCQ.getValue());
    invariant(cq.get());

    LOG(5) << "Running query:\n" << cq->toString();
    LOG(2) << "Running query: " << cq->toStringShort();

    // Parse, canonicalize, plan, transcribe, and get a plan executor.
    AutoGetCollectionForRead ctx(txn, nss);
    Collection* collection = ctx.getCollection();

    const int dbProfilingLevel =
        ctx.getDb() ? ctx.getDb()->getProfilingLevel() : serverGlobalParams.defaultProfile;

    // We have a parsed query. Time to get the execution plan for it.
    std::unique_ptr<PlanExecutor> exec = uassertStatusOK(
        getExecutorFind(txn, collection, nss, std::move(cq), PlanExecutor::YIELD_AUTO));

    const LiteParsedQuery& pq = exec->getCanonicalQuery()->getParsed();

    // If it's actually an explain, do the explain and return rather than falling through
    // to the normal query execution loop.
    if (pq.isExplain()) {
        BufBuilder bb;
        bb.skip(sizeof(QueryResult::Value));

        BSONObjBuilder explainBob;
        Explain::explainStages(exec.get(), ExplainCommon::EXEC_ALL_PLANS, &explainBob);

        // Add the resulting object to the return buffer.
        BSONObj explainObj = explainBob.obj();
        bb.appendBuf((void*)explainObj.objdata(), explainObj.objsize());

        // TODO: Does this get overwritten/do we really need to set this twice?
        curop.debug().query = q.query;

        // Set query result fields.
        QueryResult::View qr = bb.buf();
        bb.decouple();
        qr.setResultFlagsToOk();
        qr.msgdata().setLen(bb.len());
        curop.debug().responseLength = bb.len();
        qr.msgdata().setOperation(opReply);
        qr.setCursorId(0);
        qr.setStartingFrom(0);
        qr.setNReturned(1);
        result.setData(qr.view2ptr(), true);
        return "";
    }

    // Handle query option $maxTimeMS (not used with commands).
    curop.setMaxTimeMicros(static_cast<unsigned long long>(pq.getMaxTimeMS()) * 1000);
    txn->checkForInterrupt();  // May trigger maxTimeAlwaysTimeOut fail point.

    // uassert if we are not on a primary, and not a secondary with SlaveOk query parameter set.
    bool slaveOK = pq.isSlaveOk() || pq.hasReadPref();
    Status serveReadsStatus =
        repl::getGlobalReplicationCoordinator()->checkCanServeReadsFor(txn, nss, slaveOK);
    uassertStatusOK(serveReadsStatus);

    // Run the query.
    // bb is used to hold query results
    // this buffer should contain either requested documents per query or
    // explain information, but not both
    BufBuilder bb(FindCommon::kInitReplyBufferSize);
    bb.skip(sizeof(QueryResult::Value));

    // How many results have we obtained from the executor?
    int numResults = 0;

    // If we're replaying the oplog, we save the last time that we read.
    Timestamp slaveReadTill;

    BSONObj obj;
    PlanExecutor::ExecState state;

    // Get summary info about which plan the executor is using.
    {
        stdx::lock_guard<Client> lk(*txn->getClient());
        curop.setPlanSummary_inlock(Explain::getPlanSummary(exec.get()));
    }

    while (PlanExecutor::ADVANCED == (state = exec->getNext(&obj, NULL))) {
        // If we can't fit this result inside the current batch, then we stash it for later.
        if (!FindCommon::haveSpaceForNext(obj, numResults, bb.len())) {
            exec->enqueue(obj);
            break;
        }

        // Add result to output buffer.
        bb.appendBuf((void*)obj.objdata(), obj.objsize());

        // Count the result.
        ++numResults;

        // Possibly note slave's position in the oplog.
        if (pq.isOplogReplay()) {
            BSONElement e = obj["ts"];
            if (Date == e.type() || bsonTimestamp == e.type()) {
                slaveReadTill = e.timestamp();
            }
        }

        if (FindCommon::enoughForFirstBatch(pq, numResults)) {
            LOG(5) << "Enough for first batch, wantMore=" << pq.wantMore()
                   << " ntoreturn=" << pq.getNToReturn().value_or(0) << " numResults=" << numResults
                   << endl;
            break;
        }
    }

    // If we cache the executor later, we want to deregister it as it receives notifications
    // anyway by virtue of being cached.
    //
    // If we don't cache the executor later, we are deleting it, so it must be deregistered.
    //
    // So, no matter what, deregister the executor.
    exec->deregisterExec();

    // Caller expects exceptions thrown in certain cases.
    if (PlanExecutor::FAILURE == state || PlanExecutor::DEAD == state) {
        error() << "Plan executor error during find: " << PlanExecutor::statestr(state)
                << ", stats: " << Explain::getWinningPlanStats(exec.get());
        uasserted(17144, "Executor error: " + WorkingSetCommon::toStatusString(obj));
    }

    // Before saving the cursor, ensure that whatever plan we established happened with the expected
    // collection version
    auto css = CollectionShardingState::get(txn, nss);
    css->checkShardVersionOrThrow(txn);

    // Fill out curop based on query results. If we have a cursorid, we will fill out curop with
    // this cursorid later.
    long long ccId = 0;

    if (shouldSaveCursor(txn, collection, state, exec.get())) {
        // We won't use the executor until it's getMore'd.
        exec->saveState();
        exec->detachFromOperationContext();

        // Allocate a new ClientCursor.  We don't have to worry about leaking it as it's
        // inserted into a global map by its ctor.
        ClientCursor* cc =
            new ClientCursor(collection->getCursorManager(),
                             exec.release(),
                             nss.ns(),
                             txn->recoveryUnit()->isReadingFromMajorityCommittedSnapshot(),
                             pq.getOptions(),
                             pq.getFilter());
        ccId = cc->cursorid();

        LOG(5) << "caching executor with cursorid " << ccId << " after returning " << numResults
               << " results" << endl;

        // TODO document
        if (pq.isOplogReplay() && !slaveReadTill.isNull()) {
            cc->slaveReadTill(slaveReadTill);
        }

        // TODO document
        if (pq.isExhaust()) {
            curop.debug().exhaust = true;
        }

        cc->setPos(numResults);

        // If the query had a time limit, remaining time is "rolled over" to the cursor (for
        // use by future getmore ops).
        cc->setLeftoverMaxTimeMicros(curop.getRemainingMaxTimeMicros());

        endQueryOp(txn, collection, *cc->getExecutor(), dbProfilingLevel, numResults, ccId);
    } else {
        LOG(5) << "Not caching executor but returning " << numResults << " results.\n";
        endQueryOp(txn, collection, *exec, dbProfilingLevel, numResults, ccId);
    }

    // Add the results from the query into the output buffer.
    result.appendData(bb.buf(), bb.len());
    bb.decouple();

    // Fill out the output buffer's header.
    QueryResult::View qr = result.header().view2ptr();
    qr.setCursorId(ccId);
    qr.setResultFlagsToOk();
    qr.msgdata().setOperation(opReply);
    qr.setStartingFrom(0);
    qr.setNReturned(numResults);

    // curop.debug().exhaust is set above.
    return curop.debug().exhaust ? nss.ns() : "";
}

Example #9

Show file

File: replication_recovery.cpp Project: acmorrow/mongo

void ReplicationRecoveryImpl::_applyToEndOfOplog(OperationContext* opCtx,
                                                 const Timestamp& oplogApplicationStartPoint,
                                                 const Timestamp& topOfOplog) {
    invariant(!oplogApplicationStartPoint.isNull());
    invariant(!topOfOplog.isNull());

    // Check if we have any unapplied ops in our oplog. It is important that this is done after
    // deleting the ragged end of the oplog.
    if (oplogApplicationStartPoint == topOfOplog) {
        log() << "No oplog entries to apply for recovery. Start point is at the top of the oplog.";
        return;  // We've applied all the valid oplog we have.
    } else if (oplogApplicationStartPoint > topOfOplog) {
        severe() << "Applied op " << oplogApplicationStartPoint.toBSON()
                 << " not found. Top of oplog is " << topOfOplog.toBSON() << '.';
        fassertFailedNoTrace(40313);
    }

    log() << "Replaying stored operations from " << oplogApplicationStartPoint.toBSON()
          << " (exclusive) to " << topOfOplog.toBSON() << " (inclusive).";

    OplogBufferLocalOplog oplogBuffer(oplogApplicationStartPoint);
    oplogBuffer.startup(opCtx);

    RecoveryOplogApplierStats stats;

    auto writerPool = OplogApplier::makeWriterPool();
    OplogApplier::Options options;
    options.allowNamespaceNotFoundErrorsOnCrudOps = true;
    options.skipWritesToOplog = true;
    // During replication recovery, the stableTimestampForRecovery refers to the stable timestamp
    // from which we replay the oplog.
    // For startup recovery, this will be the recovery timestamp, which is the stable timestamp that
    // the storage engine recovered to on startup. For rollback recovery, this will be the last
    // stable timestamp, returned when we call recoverToStableTimestamp.
    // We keep track of this for prepared transactions so that when we apply a commitTransaction
    // oplog entry, we can check if it occurs before or after the stable timestamp and decide
    // whether the operations would have already been reflected in the data.
    options.stableTimestampForRecovery = oplogApplicationStartPoint;
    OplogApplierImpl oplogApplier(nullptr,
                                  &oplogBuffer,
                                  &stats,
                                  nullptr,
                                  _consistencyMarkers,
                                  _storageInterface,
                                  options,
                                  writerPool.get());

    OplogApplier::BatchLimits batchLimits;
    batchLimits.bytes = OplogApplier::calculateBatchLimitBytes(opCtx, _storageInterface);
    batchLimits.ops = OplogApplier::getBatchLimitOperations();

    OpTime applyThroughOpTime;
    OplogApplier::Operations batch;
    while (
        !(batch = fassert(50763, oplogApplier.getNextApplierBatch(opCtx, batchLimits))).empty()) {
        applyThroughOpTime = uassertStatusOK(oplogApplier.multiApply(opCtx, std::move(batch)));
    }
    stats.complete(applyThroughOpTime);
    invariant(oplogBuffer.isEmpty(),
              str::stream() << "Oplog buffer not empty after applying operations. Last operation "
                               "applied with optime: "
                            << applyThroughOpTime.toBSON());
    invariant(applyThroughOpTime.getTimestamp() == topOfOplog,
              str::stream() << "Did not apply to top of oplog. Applied through: "
                            << applyThroughOpTime.toString()
                            << ". Top of oplog: "
                            << topOfOplog.toString());
    oplogBuffer.shutdown(opCtx);

    // We may crash before setting appliedThrough. If we have a stable checkpoint, we will recover
    // to that checkpoint at a replication consistent point, and applying the oplog is safe.
    // If we don't have a stable checkpoint, then we must be in startup recovery, and not rollback
    // recovery, because we only roll back to a stable timestamp when we have a stable checkpoint.
    // Startup recovery from an unstable checkpoint only ever applies a single batch and it is safe
    // to replay the batch from any point.
    _consistencyMarkers->setAppliedThrough(opCtx, applyThroughOpTime);
}