Exemple #1
0
    static void _logOpRS(OperationContext* txn,
                         const char *opstr,
                         const char *ns,
                         const char *logNS,
                         const BSONObj& obj,
                         BSONObj *o2,
                         bool *bb,
                         bool fromMigrate ) {
        Lock::DBWrite lk1(txn->lockState(), "local");
        WriteUnitOfWork wunit(txn);

        if ( strncmp(ns, "local.", 6) == 0 ) {
            if ( strncmp(ns, "local.slaves", 12) == 0 )
                resetSlaveCache();
            return;
        }

        mutex::scoped_lock lk2(newOpMutex);

        OpTime ts(getNextGlobalOptime());
        newOptimeNotifier.notify_all();

        long long hashNew;
        if( theReplSet ) {
            if (!theReplSet->box.getState().primary()) {
                log() << "replSet error : logOp() but not primary";
                fassertFailed(17405);
            }
            hashNew = (theReplSet->lastH * 131 + ts.asLL()) * 17 + theReplSet->selfId();
        }
        else {
            // must be initiation
            verify( *ns == 0 );
            hashNew = 0;
        }

        /* we jump through a bunch of hoops here to avoid copying the obj buffer twice --
           instead we do a single copy to the destination position in the memory mapped file.
        */

        logopbufbuilder.reset();
        BSONObjBuilder b(logopbufbuilder);
        b.appendTimestamp("ts", ts.asDate());
        b.append("h", hashNew);
        b.append("v", OPLOG_VERSION);
        b.append("op", opstr);
        b.append("ns", ns);
        if (fromMigrate) 
            b.appendBool("fromMigrate", true);
        if ( bb )
            b.appendBool("b", *bb);
        if ( o2 )
            b.append("o2", *o2);
        BSONObj partial = b.done();

        DEV verify( logNS == 0 ); // check this was never a master/slave master

        if ( localOplogRSCollection == 0 ) {
            Client::Context ctx(txn, rsoplog);
            localDB = ctx.db();
            verify( localDB );
            localOplogRSCollection = localDB->getCollection( txn, rsoplog );
            massert(13347, "local.oplog.rs missing. did you drop it? if so restart server", localOplogRSCollection);
        }

        Client::Context ctx(txn, rsoplog, localDB);
        OplogDocWriter writer( partial, obj );
        checkOplogInsert( localOplogRSCollection->insertDocument( txn, &writer, false ) );

        /* todo: now() has code to handle clock skew.  but if the skew server to server is large it will get unhappy.
           this code (or code in now() maybe) should be improved.
        */
        if( theReplSet ) {
            if( !(theReplSet->lastOpTimeWritten<ts) ) {
                log() << "replication oplog stream went back in time. previous timestamp: "
                      << theReplSet->lastOpTimeWritten << " newest timestamp: " << ts
                      << ". attempting to sync directly from primary." << endl;
                BSONObjBuilder result;
                Status status = theReplSet->forceSyncFrom(theReplSet->box.getPrimary()->fullName(),
                                                          &result);
                if (!status.isOK()) {
                    log() << "Can't sync from primary: " << status;
                }
            }
            theReplSet->lastOpTimeWritten = ts;
            theReplSet->lastH = hashNew;
            ctx.getClient()->setLastOp( ts );
        }
        wunit.commit();

    }
Exemple #2
0
std::string runQuery(OperationContext* txn,
                     QueryMessage& q,
                     const NamespaceString& nss,
                     Message& result) {
    CurOp& curop = *CurOp::get(txn);

    uassert(ErrorCodes::InvalidNamespace,
            str::stream() << "Invalid ns [" << nss.ns() << "]",
            nss.isValid());
    invariant(!nss.isCommand());

    // Set curop information.
    beginQueryOp(txn, nss, q.query, q.ntoreturn, q.ntoskip);

    // Parse the qm into a CanonicalQuery.

    auto statusWithCQ = CanonicalQuery::canonicalize(q, ExtensionsCallbackReal(txn, &nss));
    if (!statusWithCQ.isOK()) {
        uasserted(
            17287,
            str::stream() << "Can't canonicalize query: " << statusWithCQ.getStatus().toString());
    }
    unique_ptr<CanonicalQuery> cq = std::move(statusWithCQ.getValue());
    invariant(cq.get());

    LOG(5) << "Running query:\n" << cq->toString();
    LOG(2) << "Running query: " << cq->toStringShort();

    // Parse, canonicalize, plan, transcribe, and get a plan executor.
    AutoGetCollectionForRead ctx(txn, nss);
    Collection* collection = ctx.getCollection();

    const int dbProfilingLevel =
        ctx.getDb() ? ctx.getDb()->getProfilingLevel() : serverGlobalParams.defaultProfile;

    // We have a parsed query. Time to get the execution plan for it.
    std::unique_ptr<PlanExecutor> exec = uassertStatusOK(
        getExecutorFind(txn, collection, nss, std::move(cq), PlanExecutor::YIELD_AUTO));

    const LiteParsedQuery& pq = exec->getCanonicalQuery()->getParsed();

    // If it's actually an explain, do the explain and return rather than falling through
    // to the normal query execution loop.
    if (pq.isExplain()) {
        BufBuilder bb;
        bb.skip(sizeof(QueryResult::Value));

        BSONObjBuilder explainBob;
        Explain::explainStages(exec.get(), ExplainCommon::EXEC_ALL_PLANS, &explainBob);

        // Add the resulting object to the return buffer.
        BSONObj explainObj = explainBob.obj();
        bb.appendBuf((void*)explainObj.objdata(), explainObj.objsize());

        // TODO: Does this get overwritten/do we really need to set this twice?
        curop.debug().query = q.query;

        // Set query result fields.
        QueryResult::View qr = bb.buf();
        bb.decouple();
        qr.setResultFlagsToOk();
        qr.msgdata().setLen(bb.len());
        curop.debug().responseLength = bb.len();
        qr.msgdata().setOperation(opReply);
        qr.setCursorId(0);
        qr.setStartingFrom(0);
        qr.setNReturned(1);
        result.setData(qr.view2ptr(), true);
        return "";
    }

    // Handle query option $maxTimeMS (not used with commands).
    curop.setMaxTimeMicros(static_cast<unsigned long long>(pq.getMaxTimeMS()) * 1000);
    txn->checkForInterrupt();  // May trigger maxTimeAlwaysTimeOut fail point.

    // uassert if we are not on a primary, and not a secondary with SlaveOk query parameter set.
    bool slaveOK = pq.isSlaveOk() || pq.hasReadPref();
    Status serveReadsStatus =
        repl::getGlobalReplicationCoordinator()->checkCanServeReadsFor(txn, nss, slaveOK);
    uassertStatusOK(serveReadsStatus);

    // Run the query.
    // bb is used to hold query results
    // this buffer should contain either requested documents per query or
    // explain information, but not both
    BufBuilder bb(FindCommon::kInitReplyBufferSize);
    bb.skip(sizeof(QueryResult::Value));

    // How many results have we obtained from the executor?
    int numResults = 0;

    // If we're replaying the oplog, we save the last time that we read.
    Timestamp slaveReadTill;

    BSONObj obj;
    PlanExecutor::ExecState state;

    // Get summary info about which plan the executor is using.
    {
        stdx::lock_guard<Client> lk(*txn->getClient());
        curop.setPlanSummary_inlock(Explain::getPlanSummary(exec.get()));
    }

    while (PlanExecutor::ADVANCED == (state = exec->getNext(&obj, NULL))) {
        // If we can't fit this result inside the current batch, then we stash it for later.
        if (!FindCommon::haveSpaceForNext(obj, numResults, bb.len())) {
            exec->enqueue(obj);
            break;
        }

        // Add result to output buffer.
        bb.appendBuf((void*)obj.objdata(), obj.objsize());

        // Count the result.
        ++numResults;

        // Possibly note slave's position in the oplog.
        if (pq.isOplogReplay()) {
            BSONElement e = obj["ts"];
            if (Date == e.type() || bsonTimestamp == e.type()) {
                slaveReadTill = e.timestamp();
            }
        }

        if (FindCommon::enoughForFirstBatch(pq, numResults)) {
            LOG(5) << "Enough for first batch, wantMore=" << pq.wantMore()
                   << " ntoreturn=" << pq.getNToReturn().value_or(0) << " numResults=" << numResults
                   << endl;
            break;
        }
    }

    // If we cache the executor later, we want to deregister it as it receives notifications
    // anyway by virtue of being cached.
    //
    // If we don't cache the executor later, we are deleting it, so it must be deregistered.
    //
    // So, no matter what, deregister the executor.
    exec->deregisterExec();

    // Caller expects exceptions thrown in certain cases.
    if (PlanExecutor::FAILURE == state || PlanExecutor::DEAD == state) {
        error() << "Plan executor error during find: " << PlanExecutor::statestr(state)
                << ", stats: " << Explain::getWinningPlanStats(exec.get());
        uasserted(17144, "Executor error: " + WorkingSetCommon::toStatusString(obj));
    }

    // Before saving the cursor, ensure that whatever plan we established happened with the expected
    // collection version
    auto css = CollectionShardingState::get(txn, nss);
    css->checkShardVersionOrThrow(txn);

    // Fill out curop based on query results. If we have a cursorid, we will fill out curop with
    // this cursorid later.
    long long ccId = 0;

    if (shouldSaveCursor(txn, collection, state, exec.get())) {
        // We won't use the executor until it's getMore'd.
        exec->saveState();
        exec->detachFromOperationContext();

        // Allocate a new ClientCursor.  We don't have to worry about leaking it as it's
        // inserted into a global map by its ctor.
        ClientCursor* cc =
            new ClientCursor(collection->getCursorManager(),
                             exec.release(),
                             nss.ns(),
                             txn->recoveryUnit()->isReadingFromMajorityCommittedSnapshot(),
                             pq.getOptions(),
                             pq.getFilter());
        ccId = cc->cursorid();

        LOG(5) << "caching executor with cursorid " << ccId << " after returning " << numResults
               << " results" << endl;

        // TODO document
        if (pq.isOplogReplay() && !slaveReadTill.isNull()) {
            cc->slaveReadTill(slaveReadTill);
        }

        // TODO document
        if (pq.isExhaust()) {
            curop.debug().exhaust = true;
        }

        cc->setPos(numResults);

        // If the query had a time limit, remaining time is "rolled over" to the cursor (for
        // use by future getmore ops).
        cc->setLeftoverMaxTimeMicros(curop.getRemainingMaxTimeMicros());

        endQueryOp(txn, collection, *cc->getExecutor(), dbProfilingLevel, numResults, ccId);
    } else {
        LOG(5) << "Not caching executor but returning " << numResults << " results.\n";
        endQueryOp(txn, collection, *exec, dbProfilingLevel, numResults, ccId);
    }

    // Add the results from the query into the output buffer.
    result.appendData(bb.buf(), bb.len());
    bb.decouple();

    // Fill out the output buffer's header.
    QueryResult::View qr = result.header().view2ptr();
    qr.setCursorId(ccId);
    qr.setResultFlagsToOk();
    qr.msgdata().setOperation(opReply);
    qr.setStartingFrom(0);
    qr.setNReturned(numResults);

    // curop.debug().exhaust is set above.
    return curop.debug().exhaust ? nss.ns() : "";
}
Exemple #3
0
bool ShardedClientCursor::sendNextBatch(int batchSize, BufBuilder& buffer, int& docCount) {
    uassert(10191, "cursor already done", !_done);

    int maxSize = 1024 * 1024;
    if (_totalSent > 0)
        maxSize *= 3;

    docCount = 0;

    // If batchSize is negative, it means that we should send up to -batchSize results
    // back to the client, and that we should only send a *single batch*. An batchSize of
    // 1 is also a special case which means "return up to 1 result in a single batch" (so
    // that +1 actually has the same meaning of -1). For all other values of batchSize, we
    // may have to return multiple batches.
    const bool sendMoreBatches = batchSize == 0 || batchSize > 1;
    batchSize = abs(batchSize);

    // Set the initial batch size to 101, just like mongoD.
    if (batchSize == 0 && _totalSent == 0)
        batchSize = 101;

    // Set batch size to batchSize requested by the current operation unconditionally.  This is
    // necessary because if the loop exited due to docCount == batchSize then setBatchSize(0) was
    // called, so the next _cusor->more() will be called with a batch size of 0 if the cursor
    // buffer was drained the previous run.  Unconditionally setting the batch size ensures that
    // we don't ask for a batch size of zero as a side effect.
    _cursor->setBatchSize(batchSize);

    bool cursorHasMore = true;
    while ((cursorHasMore = _cursor->more())) {
        BSONObj o = _cursor->next();
        buffer.appendBuf((void*)o.objdata(), o.objsize());
        ++docCount;

        // Ensure that the next batch will never wind up requesting more docs from the shard
        // than are remaining to satisfy the initial batchSize.
        if (batchSize != 0) {
            if (docCount == batchSize)
                break;
            _cursor->setBatchSize(batchSize - docCount);
        }

        if (buffer.len() > maxSize) {
            break;
        }
    }

    // We need to request another batch if the following two conditions hold:
    //
    //  1. batchSize is positive and not equal to 1 (see the comment above). This condition
    //  is stored in 'sendMoreBatches'.
    //
    //  2. The last call to _cursor->more() was true (i.e. we never explicitly got a false
    //  value from _cursor->more()). This condition is stored in 'cursorHasMore'. If the server
    //  hits EOF while executing a query or a getmore, it will pass a cursorId of 0 in the
    //  query response to indicate that there are no more results. In this case, _cursor->more()
    //  will be explicitly false, and we know for sure that we do not have to send more batches.
    //
    //  On the other hand, if _cursor->more() is true there may or may not be more results.
    //  Suppose that the mongod generates enough results to fill this batch. In this case it
    //  does not know whether not there are more, because doing so would require requesting an
    //  extra result and seeing whether we get EOF. The mongod sends a valid cursorId to
    //  indicate that there may be more. We do the same here: we indicate that there may be
    //  more results to retrieve by setting 'hasMoreBatches' to true.
    bool hasMoreBatches = sendMoreBatches && cursorHasMore;

    LOG(5) << "\t hasMoreBatches: " << hasMoreBatches << " sendMoreBatches: " << sendMoreBatches
           << " cursorHasMore: " << cursorHasMore << " batchSize: " << batchSize
           << " num: " << docCount << " id:" << getId() << " totalSent: " << _totalSent << endl;

    _totalSent += docCount;
    _done = !hasMoreBatches;

    return hasMoreBatches;
}
Exemple #4
0
    BSONObj ShardKeyPattern::moveToFront(const BSONObj& obj) const {
        vector<const char*> keysToMove;
        keysToMove.push_back("_id");
        BSONForEach(e, pattern) {
            if (strchr(e.fieldName(), '.') == NULL && strcmp(e.fieldName(), "_id") != 0)
                keysToMove.push_back(e.fieldName());
        }

        if (keysToMove.size() == 1) {
            return obj;

        }
        else {
            BufBuilder buf (obj.objsize() + 5);
            buf.appendNum((unsigned)0); // refcount
            buf.appendNum(obj.objsize());

            vector<pair<const char*, size_t> > copies;
            pair<const char*, size_t> toCopy ((const char*)NULL, 0); // C++ NULL isn't a pointer type yet

            BSONForEach(e, obj) {
                bool moveToFront = false;
                for (vector<const char*>::const_iterator it(keysToMove.begin()), end(keysToMove.end()); it!=end; ++it) {
                    if (strcmp(e.fieldName(), *it) == 0) {
                        moveToFront = true;
                        break;
                    }
                }

                if (moveToFront) {
                    buf.appendBuf(e.fieldName()-1, e.size());
                    if (toCopy.first) {
                        copies.push_back(toCopy);
                        toCopy.first = NULL;
                    }
                }
                else {
                    if (!toCopy.first) {
                        toCopy.first = e.fieldName()-1;
                        toCopy.second = e.size();
                    }
                    else {
                        toCopy.second += e.size();
                    }
                }
            }

            for (vector<pair<const char*, size_t> >::const_iterator it(copies.begin()), end(copies.end()); it!=end; ++it) {
                buf.appendBuf(it->first, it->second);
            }

            if (toCopy.first) {
                buf.appendBuf(toCopy.first, toCopy.second);
            }

            buf.appendChar('\0');

            BSONObj out ((BSONObj::Holder*)buf.buf());
            buf.decouple();
            return out;
        }
void WireProtocolWriter::write(const StringData& ns,
                               const std::vector<WriteOperation*>& write_operations,
                               bool ordered,
                               bool bypassDocumentValidation,
                               const WriteConcern* writeConcern,
                               WriteResult* writeResult) {
    if (_client->getMaxWireVersion() >= 4) {
        // Per DRIVERS-250:
        // If your driver sends unacknowledged writes using op codes (OP_INSERT, OP_UPDATE,
        // OP_DELETE), you MUST raise an error when bypassDocumentValidation is explicitly set by a
        // user on >= 3.2 servers.
        //
        uassert(0,
                "bypassDocumentValidation is not supported for unacknowledged writes with MongoDB "
                "3.2 and later.",
                !bypassDocumentValidation);
    }

    // Effectively a map of batch relative indexes to WriteOperations
    std::vector<WriteOperation*> batchOps;

    BufBuilder builder;

    std::vector<WriteOperation*>::const_iterator batch_begin = write_operations.begin();
    const std::vector<WriteOperation*>::const_iterator end = write_operations.end();

    while (batch_begin != end) {
        std::vector<WriteOperation*>::const_iterator batch_iter = batch_begin;

        // We must be able to fit the first item of the batch. Otherwise, the calling code
        // passed an over size write operation in violation of our contract.
        invariant(_fits(&builder, *batch_iter));

        // Set the current operation type for this batch
        const WriteOpType batchOpType = (*batch_iter)->operationType();

        // Begin the command for this batch.
        (*batch_iter)->startRequest(ns.toString(), ordered, &builder);

        while (true) {
            // Always safe to append here: either we just entered the loop, or all the
            // below checks passed.
            (*batch_iter)->appendSelfToRequest(&builder);

            // Associate batch index with WriteOperation
            batchOps.push_back(*batch_iter);

            // If the operation we just queued isn't batchable, issue what we have.
            if (!_batchableRequest(batchOpType, writeResult))
                break;

            // Peek at the next operation.
            const std::vector<WriteOperation*>::const_iterator next = boost::next(batch_iter);

            // If we are out of operations, issue what we have.
            if (next == end)
                break;

            // If the next operation is of a different type, issue what we have.
            if ((*next)->operationType() != batchOpType)
                break;

            // If adding the next op would put us over the limit of ops in a batch, issue
            // what we have.
            if (std::distance(batch_begin, next) >= _client->getMaxWriteBatchSize())
                break;

            // If we can't put the next item into the current batch, issue what we have.
            if (!_fits(&builder, *next))
                break;

            // OK to proceed to next op
            batch_iter = next;
        }

        // Issue the complete command.
        BSONObj batchResult = _send(batchOpType, builder, writeConcern, ns);

        // Merge this batch's result into the result for all batches written.
        writeResult->_mergeGleResult(batchOps, batchResult);
        batchOps.clear();

        // Check write result for errors if we are doing ordered processing or last op
        bool lastOp = *batch_iter == write_operations.back();
        if (ordered || lastOp)
            writeResult->_check(lastOp);

        // Reset the builder so we can build the next request.
        builder.reset();

        // The next batch begins with the op after the last one in the just issued batch.
        batch_begin = ++batch_iter;
    }
}
Exemple #6
0
    std::string runQuery(OperationContext* txn,
                         QueryMessage& q,
                         const NamespaceString& nss,
                         CurOp& curop,
                         Message &result) {
        // Validate the namespace.
        uassert(16256, str::stream() << "Invalid ns [" << nss.ns() << "]", nss.isValid());
        invariant(!nss.isCommand());

        // Set curop information.
        beginQueryOp(nss, q.query, q.ntoreturn, q.ntoskip, &curop);

        // Parse the qm into a CanonicalQuery.
        std::auto_ptr<CanonicalQuery> cq;
        {
            CanonicalQuery* cqRaw;
            Status canonStatus = CanonicalQuery::canonicalize(q,
                                                              &cqRaw,
                                                              WhereCallbackReal(txn, nss.db()));
            if (!canonStatus.isOK()) {
                uasserted(17287, str::stream() << "Can't canonicalize query: "
                                               << canonStatus.toString());
            }
            cq.reset(cqRaw);
        }
        invariant(cq.get());

        LOG(5) << "Running query:\n" << cq->toString();
        LOG(2) << "Running query: " << cq->toStringShort();

        // Parse, canonicalize, plan, transcribe, and get a plan executor.
        AutoGetCollectionForRead ctx(txn, nss);
        Collection* collection = ctx.getCollection();

        const int dbProfilingLevel = ctx.getDb() ? ctx.getDb()->getProfilingLevel() :
                                                   serverGlobalParams.defaultProfile;

        // We have a parsed query. Time to get the execution plan for it.
        std::unique_ptr<PlanExecutor> exec;
        {
            PlanExecutor* rawExec;
            Status execStatus = getExecutorFind(txn,
                                                collection,
                                                nss,
                                                cq.release(),
                                                PlanExecutor::YIELD_AUTO,
                                                &rawExec);
            uassertStatusOK(execStatus);
            exec.reset(rawExec);
        }
        const LiteParsedQuery& pq = exec->getCanonicalQuery()->getParsed();

        // If it's actually an explain, do the explain and return rather than falling through
        // to the normal query execution loop.
        if (pq.isExplain()) {
            BufBuilder bb;
            bb.skip(sizeof(QueryResult::Value));

            BSONObjBuilder explainBob;
            Explain::explainStages(exec.get(), ExplainCommon::EXEC_ALL_PLANS, &explainBob);

            // Add the resulting object to the return buffer.
            BSONObj explainObj = explainBob.obj();
            bb.appendBuf((void*)explainObj.objdata(), explainObj.objsize());

            // TODO: Does this get overwritten/do we really need to set this twice?
            curop.debug().query = q.query;

            // Set query result fields.
            QueryResult::View qr = bb.buf();
            bb.decouple();
            qr.setResultFlagsToOk();
            qr.msgdata().setLen(bb.len());
            curop.debug().responseLength = bb.len();
            qr.msgdata().setOperation(opReply);
            qr.setCursorId(0);
            qr.setStartingFrom(0);
            qr.setNReturned(1);
            result.setData(qr.view2ptr(), true);
            return "";
        }

        // We freak out later if this changes before we're done with the query.
        const ChunkVersion shardingVersionAtStart = shardingState.getVersion(nss.ns());

        // Handle query option $maxTimeMS (not used with commands).
        curop.setMaxTimeMicros(static_cast<unsigned long long>(pq.getMaxTimeMS()) * 1000);
        txn->checkForInterrupt(); // May trigger maxTimeAlwaysTimeOut fail point.

        // uassert if we are not on a primary, and not a secondary with SlaveOk query parameter set.
        bool slaveOK = pq.isSlaveOk() || pq.hasReadPref();
        Status serveReadsStatus = repl::getGlobalReplicationCoordinator()->checkCanServeReadsFor(
                txn,
                nss,
                slaveOK);
        uassertStatusOK(serveReadsStatus);

        // Run the query.
        // bb is used to hold query results
        // this buffer should contain either requested documents per query or
        // explain information, but not both
        BufBuilder bb(32768);
        bb.skip(sizeof(QueryResult::Value));

        // How many results have we obtained from the executor?
        int numResults = 0;

        // If we're replaying the oplog, we save the last time that we read.
        Timestamp slaveReadTill;

        BSONObj obj;
        PlanExecutor::ExecState state;
        // uint64_t numMisplacedDocs = 0;

        // Get summary info about which plan the executor is using.
        curop.debug().planSummary = Explain::getPlanSummary(exec.get());

        while (PlanExecutor::ADVANCED == (state = exec->getNext(&obj, NULL))) {
            // Add result to output buffer.
            bb.appendBuf((void*)obj.objdata(), obj.objsize());

            // Count the result.
            ++numResults;

            // Possibly note slave's position in the oplog.
            if (pq.isOplogReplay()) {
                BSONElement e = obj["ts"];
                if (Date == e.type() || bsonTimestamp == e.type()) {
                    slaveReadTill = e.timestamp();
                }
            }

            if (enoughForFirstBatch(pq, numResults, bb.len())) {
                LOG(5) << "Enough for first batch, wantMore=" << pq.wantMore()
                       << " numToReturn=" << pq.getNumToReturn()
                       << " numResults=" << numResults
                       << endl;
                break;
            }
        }

        // If we cache the executor later, we want to deregister it as it receives notifications
        // anyway by virtue of being cached.
        //
        // If we don't cache the executor later, we are deleting it, so it must be deregistered.
        //
        // So, no matter what, deregister the executor.
        exec->deregisterExec();

        // Caller expects exceptions thrown in certain cases.
        if (PlanExecutor::FAILURE == state) {
            scoped_ptr<PlanStageStats> stats(exec->getStats());
            error() << "Plan executor error, stats: "
                    << Explain::statsToBSON(*stats);
            uasserted(17144, "Executor error: " + WorkingSetCommon::toStatusString(obj));
        }

        // TODO: Currently, chunk ranges are kept around until all ClientCursors created while the
        // chunk belonged on this node are gone. Separating chunk lifetime management from
        // ClientCursor should allow this check to go away.
        if (!shardingState.getVersion(nss.ns()).isWriteCompatibleWith(shardingVersionAtStart)) {
            // if the version changed during the query we might be missing some data and its safe to
            // send this as mongos can resend at this point
            throw SendStaleConfigException(nss.ns(), "version changed during initial query",
                                           shardingVersionAtStart,
                                           shardingState.getVersion(nss.ns()));
        }

        // Fill out curop based on query results. If we have a cursorid, we will fill out curop with
        // this cursorid later.
        long long ccId = 0;

        if (shouldSaveCursor(txn, collection, state, exec.get())) {
            // We won't use the executor until it's getMore'd.
            exec->saveState();

            // Allocate a new ClientCursor.  We don't have to worry about leaking it as it's
            // inserted into a global map by its ctor.
            ClientCursor* cc = new ClientCursor(collection->getCursorManager(),
                                                exec.release(),
                                                nss.ns(),
                                                pq.getOptions(),
                                                pq.getFilter());
            ccId = cc->cursorid();

            if (txn->getClient()->isInDirectClient()) {
                cc->setUnownedRecoveryUnit(txn->recoveryUnit());
            }
            else if (state == PlanExecutor::IS_EOF && pq.isTailable()) {
                // Don't stash the RU for tailable cursors at EOF, let them get a new RU on their
                // next getMore.
            }
            else {
                // We stash away the RecoveryUnit in the ClientCursor.  It's used for subsequent
                // getMore requests.  The calling OpCtx gets a fresh RecoveryUnit.
                txn->recoveryUnit()->abandonSnapshot();
                cc->setOwnedRecoveryUnit(txn->releaseRecoveryUnit());
                StorageEngine* storageEngine = getGlobalServiceContext()->getGlobalStorageEngine();
                invariant(txn->setRecoveryUnit(storageEngine->newRecoveryUnit(),
                                               OperationContext::kNotInUnitOfWork)
                          == OperationContext::kNotInUnitOfWork);
            }

            LOG(5) << "caching executor with cursorid " << ccId
                   << " after returning " << numResults << " results" << endl;

            // TODO document
            if (pq.isOplogReplay() && !slaveReadTill.isNull()) {
                cc->slaveReadTill(slaveReadTill);
            }

            // TODO document
            if (pq.isExhaust()) {
                curop.debug().exhaust = true;
            }

            cc->setPos(numResults);

            // If the query had a time limit, remaining time is "rolled over" to the cursor (for
            // use by future getmore ops).
            cc->setLeftoverMaxTimeMicros(curop.getRemainingMaxTimeMicros());

            endQueryOp(cc->getExecutor(), dbProfilingLevel, numResults, ccId, &curop);
        }
        else {
            LOG(5) << "Not caching executor but returning " << numResults << " results.\n";
            endQueryOp(exec.get(), dbProfilingLevel, numResults, ccId, &curop);
        }

        // Add the results from the query into the output buffer.
        result.appendData(bb.buf(), bb.len());
        bb.decouple();

        // Fill out the output buffer's header.
        QueryResult::View qr = result.header().view2ptr();
        qr.setCursorId(ccId);
        qr.setResultFlagsToOk();
        qr.msgdata().setOperation(opReply);
        qr.setStartingFrom(0);
        qr.setNReturned(numResults);

        // curop.debug().exhaust is set above.
        return curop.debug().exhaust ? nss.ns() : "";
    }
Exemple #7
0
    static void _logOpRS(const char *opstr, const char *ns, const char *logNS, const BSONObj& obj, BSONObj *o2, bool *bb ) {
        DEV assertInWriteLock();

        if ( strncmp(ns, "local.", 6) == 0 ) {
            if ( strncmp(ns, "local.slaves", 12) == 0 )
                resetSlaveCache();
            return;
        }

        const OpTime ts = OpTime::now();
        long long hashNew;
        if( theReplSet ) {
            massert(13312, "replSet error : logOp() but not primary?", theReplSet->box.getState().primary());
            hashNew = (theReplSet->lastH * 131 + ts.asLL()) * 17 + theReplSet->selfId();
        }
        else {
            // must be initiation
            assert( *ns == 0 );
            hashNew = 0;
        }

        /* we jump through a bunch of hoops here to avoid copying the obj buffer twice --
           instead we do a single copy to the destination position in the memory mapped file.
        */

        logopbufbuilder.reset();
        BSONObjBuilder b(logopbufbuilder);
        b.appendTimestamp("ts", ts.asDate());
        b.append("h", hashNew);
        b.append("op", opstr);
        b.append("ns", ns);
        if ( bb )
            b.appendBool("b", *bb);
        if ( o2 )
            b.append("o2", *o2);
        BSONObj partial = b.done();
        int posz = partial.objsize();
        int len = posz + obj.objsize() + 1 + 2 /*o:*/;

        Record *r;
        DEV assert( logNS == 0 );
        {
            const char *logns = rsoplog;
            if ( rsOplogDetails == 0 ) {
                Client::Context ctx( logns , dbpath, 0, false);
                localDB = ctx.db();
                assert( localDB );
                rsOplogDetails = nsdetails(logns);
                massert(13347, "local.oplog.rs missing. did you drop it? if so restart server", rsOplogDetails);
            }
            Client::Context ctx( logns , localDB, false );
            r = theDataFileMgr.fast_oplog_insert(rsOplogDetails, logns, len);
            /* todo: now() has code to handle clock skew.  but if the skew server to server is large it will get unhappy.
                     this code (or code in now() maybe) should be improved.
                     */
            if( theReplSet ) {
                if( !(theReplSet->lastOpTimeWritten<ts) ) {
                    log() << "replSet ERROR possible failover clock skew issue? " << theReplSet->lastOpTimeWritten << ' ' << ts << rsLog;
                    log() << "replSet " << theReplSet->isPrimary() << rsLog;
                }
                theReplSet->lastOpTimeWritten = ts;
                theReplSet->lastH = hashNew;
                ctx.getClient()->setLastOp( ts.asDate() );
            }
        }

        append_O_Obj(r->data, partial, obj);

        if ( logLevel >= 6 ) {
            BSONObj temp(r);
            log( 6 ) << "logOp:" << temp << endl;
        }
    }
Exemple #8
0
int main( int argc, const char **argv ) {

    const char *port = "27017";
    if ( argc != 1 ) {
        if ( argc != 3 ) {
            std::cout << "need to pass port as second param" << endl;
            return EXIT_FAILURE;
        }
        port = argv[ 2 ];
    }

    DBClientConnection conn;
    string errmsg;
    if ( ! conn.connect( string( "127.0.0.1:" ) + port , errmsg ) ) {
        cout << "couldn't connect : " << errmsg << endl;
        return EXIT_FAILURE;
    }

    const char * ns = "test.test1";

    conn.dropCollection(ns);

    // clean up old data from any previous tests
    conn.remove( ns, BSONObj() );
    verify( conn.findOne( ns , BSONObj() ).isEmpty() );

    // test insert
    conn.insert( ns ,BSON( "name" << "eliot" << "num" << 1 ) );
    verify( ! conn.findOne( ns , BSONObj() ).isEmpty() );

    // test remove
    conn.remove( ns, BSONObj() );
    verify( conn.findOne( ns , BSONObj() ).isEmpty() );


    // insert, findOne testing
    conn.insert( ns , BSON( "name" << "eliot" << "num" << 1 ) );
    {
        BSONObj res = conn.findOne( ns , BSONObj() );
        verify( strstr( res.getStringField( "name" ) , "eliot" ) );
        verify( ! strstr( res.getStringField( "name2" ) , "eliot" ) );
        verify( 1 == res.getIntField( "num" ) );
    }


    // cursor
    conn.insert( ns ,BSON( "name" << "sara" << "num" << 2 ) );
    {
        auto_ptr<DBClientCursor> cursor = conn.query( ns , BSONObj() );
        int count = 0;
        while ( cursor->more() ) {
            count++;
            BSONObj obj = cursor->next();
        }
        verify( count == 2 );
    }

    {
        auto_ptr<DBClientCursor> cursor = conn.query( ns , BSON( "num" << 1 ) );
        int count = 0;
        while ( cursor->more() ) {
            count++;
            BSONObj obj = cursor->next();
        }
        verify( count == 1 );
    }

    {
        auto_ptr<DBClientCursor> cursor = conn.query( ns , BSON( "num" << 3 ) );
        int count = 0;
        while ( cursor->more() ) {
            count++;
            BSONObj obj = cursor->next();
        }
        verify( count == 0 );
    }

    // update
    {
        BSONObj res = conn.findOne( ns , BSONObjBuilder().append( "name" , "eliot" ).obj() );
        verify( ! strstr( res.getStringField( "name2" ) , "eliot" ) );

        BSONObj after = BSONObjBuilder().appendElements( res ).append( "name2" , "h" ).obj();

        conn.update( ns , BSONObjBuilder().append( "name" , "eliot2" ).obj() , after );
        res = conn.findOne( ns , BSONObjBuilder().append( "name" , "eliot" ).obj() );
        verify( ! strstr( res.getStringField( "name2" ) , "eliot" ) );
        verify( conn.findOne( ns , BSONObjBuilder().append( "name" , "eliot2" ).obj() ).isEmpty() );

        conn.update( ns , BSONObjBuilder().append( "name" , "eliot" ).obj() , after );
        res = conn.findOne( ns , BSONObjBuilder().append( "name" , "eliot" ).obj() );
        verify( strstr( res.getStringField( "name" ) , "eliot" ) );
        verify( strstr( res.getStringField( "name2" ) , "h" ) );
        verify( conn.findOne( ns , BSONObjBuilder().append( "name" , "eliot2" ).obj() ).isEmpty() );

        // upsert
        conn.update( ns , BSONObjBuilder().append( "name" , "eliot2" ).obj() , after , 1 );
        verify( ! conn.findOne( ns , BSONObjBuilder().append( "name" , "eliot" ).obj() ).isEmpty() );

    }

    {
        // ensure index
        verify( conn.ensureIndex( ns , BSON( "name" << 1 ) ) );
        verify( ! conn.ensureIndex( ns , BSON( "name" << 1 ) ) );
    }

    {
        // 5 second TTL index
        const char * ttlns = "test.ttltest1";
        conn.dropCollection( ttlns );

        {
            mongo::BSONObjBuilder b;
            b.appendTimeT("ttltime", time(0));
            b.append("name", "foo");
            conn.insert(ttlns, b.obj());
        }
        conn.ensureIndex(ttlns, BSON("ttltime" << 1), false, false, "", true, false, -1, 5);
        verify(!conn.findOne(ttlns, BSONObjBuilder().append("name", "foo").obj()).isEmpty());
        // Sleep 66 seconds, 60 seconds for the TTL loop, 5 seconds for the TTL and 1 to ensure
        sleepsecs(66);
        verify(conn.findOne(ttlns, BSONObjBuilder().append("name", "foo").obj()).isEmpty());
    }

    {
        // hint related tests
        // tokumx doesn't reorder documents just because you updated one, what even is that
        verify( conn.findOne(ns, "{}")["name"].str() == "eliot" );

        verify( conn.findOne(ns, "{ name : 'sara' }")["name"].str() == "sara" );
        verify( conn.getLastError() == "" );

        // nonexistent index test
        bool asserted = false;
        try {
            conn.findOne(ns, Query("{name:\"eliot\"}").hint("{foo:1}"));
        }
        catch ( ... ) {
            asserted = true;
        }
        verify( asserted );

        //existing index
        verify( conn.findOne(ns, Query("{name:'eliot'}").hint("{name:1}")).hasElement("name") );

        // run validate
        verify( conn.validate( ns ) );
    }

    {
        // timestamp test

        const char * tsns = "test.tstest1";
        conn.dropCollection( tsns );

        {
            mongo::BSONObjBuilder b;
            b.appendTimestamp( "ts" );
            conn.insert( tsns , b.obj() );
        }

        mongo::BSONObj out = conn.findOne( tsns , mongo::BSONObj() );
        Date_t oldTime = out["ts"].timestampTime();
        unsigned int oldInc = out["ts"].timestampInc();

        {
            mongo::BSONObjBuilder b1;
            b1.append( out["_id"] );

            mongo::BSONObjBuilder b2;
            b2.append( out["_id"] );
            b2.appendTimestamp( "ts" );

            conn.update( tsns , b1.obj() , b2.obj() );
        }

        BSONObj found = conn.findOne( tsns , mongo::BSONObj() );
        cout << "old: " << out << "\nnew: " << found << endl;
        verify( ( oldTime < found["ts"].timestampTime() ) ||
                ( oldTime == found["ts"].timestampTime() && oldInc < found["ts"].timestampInc() ) );

    }

    {
        // check that killcursors doesn't affect last error
        verify( conn.getLastError().empty() );

        BufBuilder b;
        b.appendNum( (int)0 ); // reserved
        b.appendNum( (int)-1 ); // invalid # of cursors triggers exception
        b.appendNum( (int)-1 ); // bogus cursor id

        Message m;
        m.setData( dbKillCursors, b.buf(), b.len() );

        // say() is protected in DBClientConnection, so get superclass
        static_cast< DBConnector* >( &conn )->say( m );

        verify( conn.getLastError().empty() );
    }

    {
        list<string> l = conn.getDatabaseNames();
        for ( list<string>::iterator i = l.begin(); i != l.end(); i++ ) {
            cout << "db name : " << *i << endl;
        }

        l = conn.getCollectionNames( "test" );
        for ( list<string>::iterator i = l.begin(); i != l.end(); i++ ) {
            cout << "coll name : " << *i << endl;
        }
    }

    {
        //Map Reduce (this mostly just tests that it compiles with all output types)
        const string ns = "test.mr";
        conn.insert(ns, BSON("a" << 1));
        conn.insert(ns, BSON("a" << 1));

        const char* map = "function() { emit(this.a, 1); }";
        const char* reduce = "function(key, values) { return Array.sum(values); }";

        const string outcoll = ns + ".out";

        BSONObj out;
        out = conn.mapreduce(ns, map, reduce, BSONObj()); // default to inline
        //MONGO_PRINT(out);
        out = conn.mapreduce(ns, map, reduce, BSONObj(), outcoll);
        //MONGO_PRINT(out);
        out = conn.mapreduce(ns, map, reduce, BSONObj(), outcoll.c_str());
        //MONGO_PRINT(out);
        out = conn.mapreduce(ns, map, reduce, BSONObj(), BSON("reduce" << outcoll));
        //MONGO_PRINT(out);
    }

    { 
        // test timeouts

        DBClientConnection conn( true , 0 , 2 );
        if ( ! conn.connect( string( "127.0.0.1:" ) + port , errmsg ) ) {
            cout << "couldn't connect : " << errmsg << endl;
            throw -11;
        }
        conn.insert( "test.totest" , BSON( "x" << 1 ) );
        BSONObj res;
        
        bool gotError = false;
        verify( conn.eval( "test" , "return db.totest.findOne().x" , res ) );
        try {
            conn.eval( "test" , "sleep(5000); return db.totest.findOne().x" , res );
        }
        catch ( std::exception& e ) {
            gotError = true;
            log() << e.what() << endl;
        }
        verify( gotError );
        // sleep so the server isn't locked anymore
        sleepsecs( 4 );
        
        verify( conn.eval( "test" , "return db.totest.findOne().x" , res ) );
        
        
    }

    cout << "client test finished!" << endl;
    return EXIT_SUCCESS;
}
Exemple #9
0
    std::string newRunQuery(Message& m, QueryMessage& q, CurOp& curop, Message &result) {
        // Validate the namespace.
        const char *ns = q.ns;
        uassert(16332, "can't have an empty ns", ns[0]);

        const NamespaceString nsString(ns);
        uassert(16256, str::stream() << "Invalid ns [" << ns << "]", nsString.isValid());

        // Set curop information.
        curop.debug().ns = ns;
        curop.debug().ntoreturn = q.ntoreturn;
        curop.debug().query = q.query;
        curop.setQuery(q.query);

        // If the query is really a command, run it.
        if (nsString.isCommand()) {
            int nToReturn = q.ntoreturn;
            uassert(16979, str::stream() << "bad numberToReturn (" << nToReturn
                                         << ") for $cmd type ns - can only be 1 or -1",
                    nToReturn == 1 || nToReturn == -1);

            curop.markCommand();

            BufBuilder bb;
            bb.skip(sizeof(QueryResult));

            BSONObjBuilder cmdResBuf;
            if (!runCommands(ns, q.query, curop, bb, cmdResBuf, false, q.queryOptions)) {
                uasserted(13530, "bad or malformed command request?");
            }

            curop.debug().iscommand = true;
            // TODO: Does this get overwritten/do we really need to set this twice?
            curop.debug().query = q.query;

            QueryResult* qr = reinterpret_cast<QueryResult*>(bb.buf());
            bb.decouple();
            qr->setResultFlagsToOk();
            qr->len = bb.len();
            curop.debug().responseLength = bb.len();
            qr->setOperation(opReply);
            qr->cursorId = 0;
            qr->startingFrom = 0;
            qr->nReturned = 1;
            result.setData(qr, true);
            return "";
        }

        // This is a read lock.  We require this because if we're parsing a $where, the
        // where-specific parsing code assumes we have a lock and creates execution machinery that
        // requires it.
        Client::ReadContext ctx(q.ns);
        Collection* collection = ctx.ctx().db()->getCollection( ns );

        // Parse the qm into a CanonicalQuery.
        CanonicalQuery* cq;
        Status canonStatus = CanonicalQuery::canonicalize(q, &cq);
        if (!canonStatus.isOK()) {
            uasserted(17287, str::stream() << "Can't canonicalize query: " << canonStatus.toString());
        }
        verify(cq);

        QLOG() << "Running query:\n" << cq->toString();
        LOG(2) << "Running query: " << cq->toStringShort();

        // Parse, canonicalize, plan, transcribe, and get a runner.
        Runner* rawRunner = NULL;

        // We use this a lot below.
        const LiteParsedQuery& pq = cq->getParsed();

        // We'll now try to get the query runner that will execute this query for us. There
        // are a few cases in which we know upfront which runner we should get and, therefore,
        // we shortcut the selection process here.
        //
        // (a) If the query is over a collection that doesn't exist, we get a special runner
        // that's is so (a runner) which doesn't return results, the EOFRunner.
        //
        // (b) if the query is a replication's initial sync one, we get a SingleSolutinRunner
        // that uses a specifically designed stage that skips extents faster (see details in
        // exec/oplogstart.h)
        //
        // Otherwise we go through the selection of which runner is most suited to the
        // query + run-time context at hand.
        Status status = Status::OK();
        if (collection == NULL) {
            rawRunner = new EOFRunner(cq, cq->ns());
        }
        else if (pq.hasOption(QueryOption_OplogReplay)) {
            status = getOplogStartHack(collection, cq, &rawRunner);
        }
        else {
            // Takes ownership of cq.
            size_t options = QueryPlannerParams::DEFAULT;
            if (shardingState.needCollectionMetadata(pq.ns())) {
                options |= QueryPlannerParams::INCLUDE_SHARD_FILTER;
            }
            status = getRunner(cq, &rawRunner, options);
        }

        if (!status.isOK()) {
            // NOTE: Do not access cq as getRunner has deleted it.
            uasserted(17007, "Unable to execute query: " + status.reason());
        }

        verify(NULL != rawRunner);
        auto_ptr<Runner> runner(rawRunner);

        // We freak out later if this changes before we're done with the query.
        const ChunkVersion shardingVersionAtStart = shardingState.getVersion(cq->ns());

        // Handle query option $maxTimeMS (not used with commands).
        curop.setMaxTimeMicros(static_cast<unsigned long long>(pq.getMaxTimeMS()) * 1000);
        killCurrentOp.checkForInterrupt(); // May trigger maxTimeAlwaysTimeOut fail point.

        // uassert if we are not on a primary, and not a secondary with SlaveOk query parameter set.
        replVerifyReadsOk(&pq);

        // If this exists, the collection is sharded.
        // If it doesn't exist, we can assume we're not sharded.
        // If we're sharded, we might encounter data that is not consistent with our sharding state.
        // We must ignore this data.
        CollectionMetadataPtr collMetadata;
        if (!shardingState.needCollectionMetadata(pq.ns())) {
            collMetadata = CollectionMetadataPtr();
        }
        else {
            collMetadata = shardingState.getCollectionMetadata(pq.ns());
        }

        // Run the query.
        // bb is used to hold query results
        // this buffer should contain either requested documents per query or
        // explain information, but not both
        BufBuilder bb(32768);
        bb.skip(sizeof(QueryResult));

        // How many results have we obtained from the runner?
        int numResults = 0;

        // If we're replaying the oplog, we save the last time that we read.
        OpTime slaveReadTill;

        // Do we save the Runner in a ClientCursor for getMore calls later?
        bool saveClientCursor = false;

        // We turn on auto-yielding for the runner here.  The runner registers itself with the
        // active runners list in ClientCursor.
        auto_ptr<ScopedRunnerRegistration> safety(new ScopedRunnerRegistration(runner.get()));
        runner->setYieldPolicy(Runner::YIELD_AUTO);

        BSONObj obj;
        Runner::RunnerState state;
        // uint64_t numMisplacedDocs = 0;

        // set this outside loop. we will need to use this both within loop and when deciding
        // to fill in explain information
        const bool isExplain = pq.isExplain();

        // Have we retrieved info about which plan the runner will
        // use to execute the query yet?
        bool gotPlanInfo = false;
        PlanInfo* rawInfo;
        boost::scoped_ptr<PlanInfo> planInfo;

        while (Runner::RUNNER_ADVANCED == (state = runner->getNext(&obj, NULL))) {
            // Add result to output buffer. This is unnecessary if explain info is requested
            if (!isExplain) {
                bb.appendBuf((void*)obj.objdata(), obj.objsize());
            }

            // Count the result.
            ++numResults;

            // In the case of the multi plan runner, we may not be able to
            // successfully retrieve plan info until after the query starts
            // to run. This is because the multi plan runner doesn't know what
            // plan it will end up using until it runs candidates and selects
            // the best.
            //
            // TODO: Do we ever want to output what the MPR is comparing?
            if (!gotPlanInfo) {
                Status infoStatus = runner->getInfo(NULL, &rawInfo);
                if (infoStatus.isOK()) {
                    gotPlanInfo = true;
                    planInfo.reset(rawInfo);
                    // planSummary is really a ThreadSafeString which copies the data from
                    // the provided pointer.
                    curop.debug().planSummary = planInfo->planSummary.c_str();
                }
            }

            // Possibly note slave's position in the oplog.
            if (pq.hasOption(QueryOption_OplogReplay)) {
                BSONElement e = obj["ts"];
                if (Date == e.type() || Timestamp == e.type()) {
                    slaveReadTill = e._opTime();
                }
            }

            // TODO: only one type of 2d search doesn't support this.  We need a way to pull it out
            // of CanonicalQuery. :(
            const bool supportsGetMore = true;
            if (isExplain) {
                if (enoughForExplain(pq, numResults)) {
                    break;
                }
            }
            else if (!supportsGetMore && (enough(pq, numResults)
                                          || bb.len() >= MaxBytesToReturnToClientAtOnce)) {
                break;
            }
            else if (enoughForFirstBatch(pq, numResults, bb.len())) {
                QLOG() << "Enough for first batch, wantMore=" << pq.wantMore()
                       << " numToReturn=" << pq.getNumToReturn()
                       << " numResults=" << numResults
                       << endl;
                // If only one result requested assume it's a findOne() and don't save the cursor.
                if (pq.wantMore() && 1 != pq.getNumToReturn()) {
                    QLOG() << " runner EOF=" << runner->isEOF() << endl;
                    saveClientCursor = !runner->isEOF();
                }
                break;
            }
        }

        // Try to get information about the plan which the runner
        // will use to execute the query, it we don't have it already.
        if (!gotPlanInfo) {
            Status infoStatus = runner->getInfo(NULL, &rawInfo);
            if (infoStatus.isOK()) {
                gotPlanInfo = true;
                planInfo.reset(rawInfo);
                // planSummary is really a ThreadSafeString which copies the data from
                // the provided pointer.
                curop.debug().planSummary = planInfo->planSummary.c_str();
            }
        }

        // If we cache the runner later, we want to deregister it as it receives notifications
        // anyway by virtue of being cached.
        //
        // If we don't cache the runner later, we are deleting it, so it must be deregistered.
        //
        // So, no matter what, deregister the runner.
        safety.reset();

        // Caller expects exceptions thrown in certain cases.
        if (Runner::RUNNER_ERROR == state) {
            TypeExplain* bareExplain;
            Status res = runner->getInfo(&bareExplain, NULL);
            if (res.isOK()) {
                boost::scoped_ptr<TypeExplain> errorExplain(bareExplain);
                error() << "Runner error, stats:\n"
                        << errorExplain->stats.jsonString(Strict, true);
            }
            uasserted(17144, "Runner error: " + WorkingSetCommon::toStatusString(obj));
        }

        // Why save a dead runner?
        if (Runner::RUNNER_DEAD == state) {
            saveClientCursor = false;
        }
        else if (pq.hasOption(QueryOption_CursorTailable)) {
            // If we're tailing a capped collection, we don't bother saving the cursor if the
            // collection is empty. Otherwise, the semantics of the tailable cursor is that the
            // client will keep trying to read from it. So we'll keep it around.
            Collection* collection = ctx.ctx().db()->getCollection(cq->ns());
            if (collection && collection->numRecords() != 0 && pq.getNumToReturn() != 1) {
                saveClientCursor = true;
            }
        }

        // TODO(greg): This will go away soon.
        if (!shardingState.getVersion(pq.ns()).isWriteCompatibleWith(shardingVersionAtStart)) {
            // if the version changed during the query we might be missing some data and its safe to
            // send this as mongos can resend at this point
            throw SendStaleConfigException(pq.ns(), "version changed during initial query",
                                           shardingVersionAtStart,
                                           shardingState.getVersion(pq.ns()));
        }

        // Used to fill in explain and to determine if the query is slow enough to be logged.
        int elapsedMillis = curop.elapsedMillis();

        // Get explain information if:
        // 1) it is needed by an explain query;
        // 2) profiling is enabled; or
        // 3) profiling is disabled but we still need explain details to log a "slow" query.
        // Producing explain information is expensive and should be done only if we are certain
        // the information will be used.
        boost::scoped_ptr<TypeExplain> explain(NULL);
        if (isExplain ||
            ctx.ctx().db()->getProfilingLevel() > 0 ||
            elapsedMillis > serverGlobalParams.slowMS) {
            // Ask the runner to produce explain information.
            TypeExplain* bareExplain;
            Status res = runner->getInfo(&bareExplain, NULL);
            if (res.isOK()) {
                explain.reset(bareExplain);
            }
            else if (isExplain) {
                error() << "could not produce explain of query '" << pq.getFilter()
                        << "', error: " << res.reason();
                // If numResults and the data in bb don't correspond, we'll crash later when rooting
                // through the reply msg.
                BSONObj emptyObj;
                bb.appendBuf((void*)emptyObj.objdata(), emptyObj.objsize());
                // The explain output is actually a result.
                numResults = 1;
                // TODO: we can fill out millis etc. here just fine even if the plan screwed up.
            }
        }

        // Fill in the missing run-time fields in explain, starting with propeties of
        // the process running the query.
        if (isExplain && NULL != explain.get()) {
            std::string server = mongoutils::str::stream()
                << getHostNameCached() << ":" << serverGlobalParams.port;
            explain->setServer(server);

            // We might have skipped some results due to chunk migration etc. so our count is
            // correct.
            explain->setN(numResults);

            // Clock the whole operation.
            explain->setMillis(elapsedMillis);

            BSONObj explainObj = explain->toBSON();
            bb.appendBuf((void*)explainObj.objdata(), explainObj.objsize());

            // The explain output is actually a result.
            numResults = 1;
        }

        long long ccId = 0;
        if (saveClientCursor) {
            // We won't use the runner until it's getMore'd.
            runner->saveState();

            // Allocate a new ClientCursor.  We don't have to worry about leaking it as it's
            // inserted into a global map by its ctor.
            ClientCursor* cc = new ClientCursor(collection, runner.get(),
                                                cq->getParsed().getOptions(),
                                                cq->getParsed().getFilter());
            ccId = cc->cursorid();

            QLOG() << "caching runner with cursorid " << ccId
                   << " after returning " << numResults << " results" << endl;

            // ClientCursor takes ownership of runner.  Release to make sure it's not deleted.
            runner.release();

            // TODO document
            if (pq.hasOption(QueryOption_OplogReplay) && !slaveReadTill.isNull()) {
                cc->slaveReadTill(slaveReadTill);
            }

            // TODO document
            if (pq.hasOption(QueryOption_Exhaust)) {
                curop.debug().exhaust = true;
            }

            // Set attributes for getMore.
            cc->setCollMetadata(collMetadata);
            cc->setPos(numResults);

            // If the query had a time limit, remaining time is "rolled over" to the cursor (for
            // use by future getmore ops).
            cc->setLeftoverMaxTimeMicros(curop.getRemainingMaxTimeMicros());
        }
        else {
            QLOG() << "Not caching runner but returning " << numResults << " results.\n";
        }

        // Add the results from the query into the output buffer.
        result.appendData(bb.buf(), bb.len());
        bb.decouple();

        // Fill out the output buffer's header.
        QueryResult* qr = static_cast<QueryResult*>(result.header());
        qr->cursorId = ccId;
        curop.debug().cursorid = (0 == ccId ? -1 : ccId);
        qr->setResultFlagsToOk();
        qr->setOperation(opReply);
        qr->startingFrom = 0;
        qr->nReturned = numResults;

        // Set debug information for consumption by the profiler.
        curop.debug().ntoskip = pq.getSkip();
        curop.debug().nreturned = numResults;
        if (NULL != explain.get()) {
            if (explain->isScanAndOrderSet()) {
                curop.debug().scanAndOrder = explain->getScanAndOrder();
            }
            else {
                curop.debug().scanAndOrder = false;
            }

            if (explain->isNScannedSet()) {
                curop.debug().nscanned = explain->getNScanned();
            }

            if (explain->isNScannedObjectsSet()) {
                curop.debug().nscannedObjects = explain->getNScannedObjects();
            }

            if (explain->isIDHackSet()) {
                curop.debug().idhack = explain->getIDHack();
            }

            if (!explain->stats.isEmpty()) {
                // execStats is a CachedBSONObj because it lives in the race-prone
                // curop.
                curop.debug().execStats.set(explain->stats);

                // Replace exec stats with plan summary if stats cannot fit into CachedBSONObj.
                if (curop.debug().execStats.tooBig() && !curop.debug().planSummary.empty()) {
                    BSONObjBuilder bob;
                    bob.append("summary", curop.debug().planSummary.toString());
                    curop.debug().execStats.set(bob.done());
                }

            }
        }

        // curop.debug().exhaust is set above.
        return curop.debug().exhaust ? pq.ns() : "";
    }
Exemple #10
0
        BSONObj KeyV1::toBson(BufBuilder &bb) const { 
            verify( _keyData != 0 );
            if( !isCompactFormat() )
                return bson();

            BSONObjBuilder b(bb);
            const unsigned char *p = _keyData;
            while( 1 ) { 
                unsigned bits = *p++;

                switch( bits & cFULLTYPEMASK ) {
                    case cminkey: b.appendMinKey(""); break;
                    case cnull:   b.appendNull(""); break;
                    case cfalse:  b.appendBool("", false); break;
                    case ctrue:   b.appendBool("", true); break;
                    case cmaxkey: 
                        b.appendMaxKey(""); 
                        break;
                    case cstring:
                        {
                            unsigned sz = *p++;
                            // we build the element ourself as we have to null terminate it
                            BufBuilder &bb = b.bb();
                            bb.appendNum((char) String);
                            bb.appendUChar(0); // fieldname ""
                            bb.appendNum(sz+1);
                            bb.appendBuf(p, sz);
                            bb.appendUChar(0); // null char at end of string
                            p += sz;
                            break;
                        }
                    case coid:
                        b.appendOID("", (OID *) p);
                        p += sizeof(OID);
                        break;
                    case cbindata:
                        {
                            int len = binDataCodeToLength(*p);
                            int subtype = (*p) & BinDataTypeMask;
                            if( subtype & 0x8 ) { 
                                subtype = (subtype & 0x7) | 0x80;
                            }
                            b.appendBinData("", len, (BinDataType) subtype, ++p);
                            p += len;
                            break;
                        }
                    case cdate:
                        b.appendDate("", (Date_t&) *p);
                        p += 8;
                        break;
                    case cdouble:
                        b.append("", (double&) *p);
                        p += sizeof(double);
                        break;
                    case cint:
                        b.append("", static_cast< int >((reinterpret_cast< const PackedDouble& >(*p)).d));
                        p += sizeof(double);
                        break;
                    case clong:
                        b.append("", static_cast< long long>((reinterpret_cast< const PackedDouble& >(*p)).d));
                        p += sizeof(double);
                        break;
                    case cint64:
                        b.append("", *reinterpret_cast<const long long *>(p));
                        p += sizeof(long long);
                        break;
                    default:
                        verify(false);
                }

                if( (bits & cHASMORE) == 0 )
                    break;
            }
            return b.done();
        }
Exemple #11
0
        /** caller handles locking */
        static bool PREPLOGBUFFER(BufBuilder& bb) { 
            if( writes.empty() )
                return false;

            bb.reset();

            unsigned *lenInBlockHeader;
            {
                // JSectHeader
                bb.appendStr("\nHH\n", false);
                lenInBlockHeader = (unsigned *) bb.skip(4);
            }

            string lastFilePath;

            {
                scoped_lock lk(privateViews._mutex());
                for( vector<WriteIntent>::iterator i = writes.begin(); i != writes.end(); i++ ) {
                    size_t ofs;
                    MongoMMF *mmf = privateViews._find(i->p, ofs);
                    if( mmf == 0 ) {
                        journalingFailure("view pointer cannot be resolved");
                    }
                    else {
                        if( mmf->filePath() != lastFilePath ) { 
                            lastFilePath = mmf->filePath();
                            JDbContext c;
                            bb.appendStruct(c);
                            bb.appendStr(lastFilePath);
                        }
                        JEntry e;
                        e.len = i->len;
                        e.fileNo = mmf->fileSuffixNo();
                        bb.appendStruct(e);
                        bb.appendBuf(i->p, i->len);
                    }
                }
            }

            {
                JSectFooter f;
                f.hash = 0;
                bb.appendStruct(f);
            }

            {
                unsigned L = (bb.len() + 8191) & 0xffffe000; // fill to alignment
                dassert( L >= (unsigned) bb.len() );
                *lenInBlockHeader = L;
                unsigned padding = L - bb.len();
                bb.skip(padding);
                dassert( bb.len() % 8192 == 0 );
            }

            writes.clear();
            alreadyNoted.clear();
            return true;
        }
Exemple #12
0
    static void _logOpOld(OperationContext* txn,
                          const char *opstr,
                          const char *ns,
                          const char *logNS,
                          const BSONObj& obj,
                          BSONObj *o2,
                          bool *bb,
                          bool fromMigrate ) {
        Lock::DBLock lk(txn->lockState(), "local", newlm::MODE_X);
        WriteUnitOfWork wunit(txn);
        static BufBuilder bufbuilder(8*1024); // todo there is likely a mutex on this constructor

        if ( strncmp(ns, "local.", 6) == 0 ) {
            if ( strncmp(ns, "local.slaves", 12) == 0 ) {
                resetSlaveCache();
            }
            return;
        }

        mutex::scoped_lock lk2(newOpMutex);

        OpTime ts(getNextGlobalOptime());
        newOptimeNotifier.notify_all();

        /* we jump through a bunch of hoops here to avoid copying the obj buffer twice --
           instead we do a single copy to the destination position in the memory mapped file.
        */

        bufbuilder.reset();
        BSONObjBuilder b(bufbuilder);
        b.appendTimestamp("ts", ts.asDate());
        b.append("op", opstr);
        b.append("ns", ns);
        if (fromMigrate) 
            b.appendBool("fromMigrate", true);
        if ( bb )
            b.appendBool("b", *bb);
        if ( o2 )
            b.append("o2", *o2);
        BSONObj partial = b.done(); // partial is everything except the o:... part.

        if( logNS == 0 ) {
            logNS = "local.oplog.$main";
        }

        if ( localOplogMainCollection == 0 ) {
            Client::Context ctx(txn, logNS);
            localDB = ctx.db();
            verify( localDB );
            localOplogMainCollection = localDB->getCollection(txn, logNS);
            verify( localOplogMainCollection );
        }

        Client::Context ctx(txn, logNS , localDB);
        OplogDocWriter writer( partial, obj );
        checkOplogInsert( localOplogMainCollection->insertDocument( txn, &writer, false ) );

        ctx.getClient()->setLastOp( ts );

        ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();
        replCoord->setMyLastOptime(txn, ts);
        wunit.commit();
    }
Exemple #13
0
    static void _logOpRS(OperationContext* txn,
                         const char *opstr,
                         const char *ns,
                         const char *logNS,
                         const BSONObj& obj,
                         BSONObj *o2,
                         bool *bb,
                         bool fromMigrate ) {
        Lock::DBLock lk1(txn->lockState(), "local", newlm::MODE_X);
        WriteUnitOfWork wunit(txn);

        if ( strncmp(ns, "local.", 6) == 0 ) {
            if ( strncmp(ns, "local.slaves", 12) == 0 )
                resetSlaveCache();
            return;
        }
        ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();

        mutex::scoped_lock lk2(newOpMutex);

        OpTime ts(getNextGlobalOptime());
        newOptimeNotifier.notify_all();

        long long hashNew = BackgroundSync::get()->getLastAppliedHash();

        // Check to make sure logOp() is legal at this point.
        if (*opstr == 'n') {
            // 'n' operations are always logged
            invariant(*ns == '\0');

            // 'n' operations do not advance the hash, since they are not rolled back
        }
        else {
            if (!replCoord->canAcceptWritesForDatabase(nsToDatabaseSubstring(ns))) {
                severe() << "replSet error : logOp() but can't accept write to collection " << ns;
                fassertFailed(17405);
            }

            // Advance the hash
            hashNew = (hashNew * 131 + ts.asLL()) * 17 + replCoord->getMyId();
        }


        /* we jump through a bunch of hoops here to avoid copying the obj buffer twice --
           instead we do a single copy to the destination position in the memory mapped file.
        */

        logopbufbuilder.reset();
        BSONObjBuilder b(logopbufbuilder);
        b.appendTimestamp("ts", ts.asDate());
        b.append("h", hashNew);
        b.append("v", OPLOG_VERSION);
        b.append("op", opstr);
        b.append("ns", ns);
        if (fromMigrate) 
            b.appendBool("fromMigrate", true);
        if ( bb )
            b.appendBool("b", *bb);
        if ( o2 )
            b.append("o2", *o2);
        BSONObj partial = b.done();

        DEV verify( logNS == 0 ); // check this was never a master/slave master

        if ( localOplogRSCollection == 0 ) {
            Client::Context ctx(txn, rsoplog);
            localDB = ctx.db();
            verify( localDB );
            localOplogRSCollection = localDB->getCollection( txn, rsoplog );
            massert(13347, "local.oplog.rs missing. did you drop it? if so restart server", localOplogRSCollection);
        }

        Client::Context ctx(txn, rsoplog, localDB);
        OplogDocWriter writer( partial, obj );
        checkOplogInsert( localOplogRSCollection->insertDocument( txn, &writer, false ) );

        BackgroundSync::get()->setLastAppliedHash(hashNew);
        ctx.getClient()->setLastOp( ts );
        replCoord->setMyLastOptime(txn, ts);

        wunit.commit();

    }
Exemple #14
0
namespace mongo {

    BufBuilder profileBufBuilder; // reused, instead of allocated every time - avoids a malloc/free cycle

    void profile( const Client& c , CurOp& currentOp ) {
        verify( Lock::somethingWriteLocked() );

        Database *db = c.database();
        DEV verify( db );
        const char *ns = db->profileName.c_str();
        
        // build object
        profileBufBuilder.reset();
        BSONObjBuilder b(profileBufBuilder);

        const bool isQueryObjTooBig = !currentOp.debug().append(currentOp, b,
                MAX_PROFILE_DOC_SIZE_BYTES);

        b.appendDate("ts", jsTime());
        b.append("client", c.clientAddress());

        if (c.getAuthenticationInfo()) {
            b.append("user", c.getAuthenticationInfo()->getUser(nsToDatabase(ns)));
        }

        BSONObj p = b.done();

        if (static_cast<size_t>(p.objsize()) > MAX_PROFILE_DOC_SIZE_BYTES || isQueryObjTooBig) {
            string small = p.toString(/*isArray*/false, /*full*/false);

            warning() << "can't add full line to system.profile: " << small << endl;

            // rebuild with limited info
            BSONObjBuilder b(profileBufBuilder);
            b.appendDate("ts", jsTime());
            b.append("client", c.clientAddress() );
            if ( c.getAuthenticationInfo() )
                b.append( "user" , c.getAuthenticationInfo()->getUser( nsToDatabase( ns ) ) );

            b.append("err", "profile line too large (max is 100KB)");

            // should be much smaller but if not don't break anything
            if (small.size() < MAX_PROFILE_DOC_SIZE_BYTES){
                b.append("abbreviated", small);
            }

            p = b.done();
        }

        // write: not replicated
        // get or create the profiling collection
        NamespaceDetails *details = getOrCreateProfileCollection(db);
        if (details) {
            int len = p.objsize();
            Record *r = theDataFileMgr.fast_oplog_insert(details, ns, len);
            memcpy(getDur().writingPtr(r->data(), len), p.objdata(), len);
        }
    }

    NamespaceDetails* getOrCreateProfileCollection(Database *db, bool force) {
        fassert(16372, db);
        const char* profileName = db->profileName.c_str();
        NamespaceDetails* details = db->namespaceIndex.details(profileName);
        if (!details && (cmdLine.defaultProfile || force)) {
            // system.profile namespace doesn't exist; create it
            log() << "creating profile collection: " << profileName << endl;
            string errmsg;
            if (!userCreateNS(db->profileName.c_str(),
                              BSON("capped" << true << "size" << 1024 * 1024), errmsg , false)) {
                log() << "could not create ns " << db->profileName << ": " << errmsg << endl;
                return NULL;
            }
            details = db->namespaceIndex.details(profileName);
        }
        if (!details) {
            // failed to get or create profile collection
            static time_t last = time(0) - 10;  // warn the first time
            if( time(0) > last+10 ) {
                log() << "profile: warning ns " << profileName << " does not exist" << endl;
                last = time(0);
            }
        }
        return details;
    }

} // namespace mongo
Exemple #15
0
    static bool receivedQuery(Client& c, DbResponse& dbresponse, Message& m ) {
        bool ok = true;
        MSGID responseTo = m.header()->id;

        DbMessage d(m);
        QueryMessage q(d);
        auto_ptr< Message > resp( new Message() );

        CurOp& op = *(c.curop());

        shared_ptr<AssertionException> ex;

        try {
            dbresponse.exhaust = runQuery(m, q, op, *resp);
            assert( !resp->empty() );
        }
        catch ( SendStaleConfigException& e ){
            ex.reset( new SendStaleConfigException( e.getns(), e.getInfo().msg ) );
            ok = false;
        }
        catch ( AssertionException& e ) {
            ex.reset( new AssertionException( e.getInfo().msg, e.getCode() ) );
            ok = false;
        }

        if( ex ){

            op.debug().exceptionInfo = ex->getInfo();
            LOGWITHRATELIMIT {
                log() << "assertion " << ex->toString() << " ns:" << q.ns << " query:" <<
                (q.query.valid() ? q.query.toString() : "query object is corrupt") << endl;
                if( q.ntoskip || q.ntoreturn )
                    log() << " ntoskip:" << q.ntoskip << " ntoreturn:" << q.ntoreturn << endl;
            }

            SendStaleConfigException* scex = NULL;
            if ( ex->getCode() == SendStaleConfigCode ) scex = static_cast<SendStaleConfigException*>( ex.get() );

            BSONObjBuilder err;
            ex->getInfo().append( err );
            if( scex ) err.append( "ns", scex->getns() );
            BSONObj errObj = err.done();

            log() << errObj << endl;

            BufBuilder b;
            b.skip(sizeof(QueryResult));
            b.appendBuf((void*) errObj.objdata(), errObj.objsize());

            // todo: call replyToQuery() from here instead of this!!! see dbmessage.h
            QueryResult * msgdata = (QueryResult *) b.buf();
            b.decouple();
            QueryResult *qr = msgdata;
            qr->_resultFlags() = ResultFlag_ErrSet;
            if( scex ) qr->_resultFlags() |= ResultFlag_ShardConfigStale;
            qr->len = b.len();
            qr->setOperation(opReply);
            qr->cursorId = 0;
            qr->startingFrom = 0;
            qr->nReturned = 1;
            resp.reset( new Message() );
            resp->setData( msgdata, true );

        }

        op.debug().responseLength = resp->header()->dataLen();

        dbresponse.response = resp.release();
        dbresponse.responseTo = responseTo;

        return ok;
    }
Exemple #16
0
    /**
     * Run a query -- includes checking for and running a Command.
     * @return points to ns if exhaust mode. 0=normal mode
     * @locks the db mutex for reading (and potentially for writing temporarily to create a new db).
     * @asserts on scan and order memory exhaustion and other cases.
     */
    string runQuery(Message& m, QueryMessage& q, CurOp& curop, Message &result) {
        shared_ptr<ParsedQuery> pq_shared( new ParsedQuery(q) );
        ParsedQuery& pq( *pq_shared );
        BSONObj jsobj = q.query;
        int queryOptions = q.queryOptions;
        const char *ns = q.ns;

        uassert( 16332 , "can't have an empty ns" , ns[0] );

        if( logLevel >= 2 )
            log() << "runQuery called " << ns << " " << jsobj << endl;

        curop.debug().ns = ns;
        curop.debug().ntoreturn = pq.getNumToReturn();
        curop.debug().query = jsobj;
        curop.setQuery(jsobj);

        uassert( 16256, str::stream() << "Invalid ns [" << ns << "]", NamespaceString::isValid(ns) );

        // Run a command.
        
        if ( pq.couldBeCommand() ) {
            curop.markCommand();
            BufBuilder bb;
            bb.skip(sizeof(QueryResult));
            BSONObjBuilder cmdResBuf;
            if ( runCommands(ns, jsobj, curop, bb, cmdResBuf, false, queryOptions) ) {
                curop.debug().iscommand = true;
                curop.debug().query = jsobj;

                auto_ptr< QueryResult > qr;
                qr.reset( (QueryResult *) bb.buf() );
                bb.decouple();
                qr->setResultFlagsToOk();
                qr->len = bb.len();
                curop.debug().responseLength = bb.len();
                qr->setOperation(opReply);
                qr->cursorId = 0;
                qr->startingFrom = 0;
                qr->nReturned = 1;
                result.setData( qr.release(), true );
            }
            else {
                uasserted(13530, "bad or malformed command request?");
            }
            return "";
        }

        const bool explain = pq.isExplain();
        const bool tailable = pq.hasOption(QueryOption_CursorTailable);
        BSONObj order = pq.getOrder();
        BSONObj query = pq.getFilter();

        /* The ElemIter will not be happy if this isn't really an object. So throw exception
           here when that is true.
           (Which may indicate bad data from client.)
        */
        if ( query.objsize() == 0 ) {
            out() << "Bad query object?\n  jsobj:";
            out() << jsobj.toString() << "\n  query:";
            out() << query.toString() << endl;
            uassert( 10110 , "bad query object", false);
        }

        // Tailable cursors need to read newly written entries from the tail
        // of the collection. They manually arbitrate with the collection over
        // what data is readable and when, so we choose read uncommited isolation.
        OpSettings settings;
        settings.setQueryCursorMode(DEFAULT_LOCK_CURSOR);
        settings.setBulkFetch(true);
        settings.setCappedAppendPK(pq.hasOption(QueryOption_AddHiddenPK));
        cc().setOpSettings(settings);

        // If our caller has a transaction, it's multi-statement.
        const bool inMultiStatementTxn = cc().hasTxn();
        if (tailable) {
            // Because it's easier to disable this. It shouldn't be happening in a normal system.
            uassert(16812, "May not perform a tailable query in a multi-statement transaction.",
                           !inMultiStatementTxn);
        }

        // Begin a read-only, snapshot transaction under normal circumstances.
        // If the cursor is tailable, we need to be able to read uncommitted data.
        const int txnFlags = (tailable ? DB_READ_UNCOMMITTED : DB_TXN_SNAPSHOT) | DB_TXN_READ_ONLY;
        LOCK_REASON(lockReason, "query");
        Client::ReadContext ctx(ns, lockReason);
        scoped_ptr<Client::Transaction> transaction(!inMultiStatementTxn ?
                                                    new Client::Transaction(txnFlags) : NULL);

        bool hasRetried = false;
        while ( 1 ) {
            try {
                replVerifyReadsOk(&pq);

                // Fast-path for primary key queries.
                if (!explain && !tailable) {
                    replVerifyReadsOk(&pq);
                    if (_tryQueryByPKHack(ns, query, pq, curop, result)) {
                        if (transaction) {
                            transaction->commit();
                        }
                        return "";
                    }
                }

                // sanity check the query and projection
                if (pq.getFields() != NULL) {
                    pq.getFields()->validateQuery( query );
                }

                        
                if (tailable) {
                    Collection *cl = getCollection( ns );
                    if (cl != NULL && !(cl->isCapped() || str::equals(ns, rsoplog))) {
                        uasserted( 13051, "tailable cursor requested on non-capped, non-oplog collection" );
                    }
                    const BSONObj nat1 = BSON( "$natural" << 1 );
                    if ( order.isEmpty() ) {
                        order = nat1;
                    } else {
                        uassert( 13052, "only {$natural:1} order allowed for tailable cursor", order == nat1 );
                    }
                }
                    
                // Run a regular query.

                // these now may stored in a ClientCursor or somewhere else,
                // so make sure we use a real copy
                jsobj = jsobj.getOwned();
                query = query.getOwned();
                order = order.getOwned();
                const ConfigVersion shardingVersionAtStart = shardingState.getVersion( ns );
                const bool getCachedExplainPlan = ! hasRetried && explain && ! pq.hasIndexSpecifier();
                const bool savedCursor = queryWithQueryOptimizer( queryOptions, ns, jsobj, curop, query,
                                                                  order, pq_shared, shardingVersionAtStart,
                                                                  getCachedExplainPlan, inMultiStatementTxn,
                                                                  result );
                // Did not save the cursor, so we can commit the transaction now if it exists.
                if (transaction && !savedCursor) {
                    transaction->commit();
                }
                return curop.debug().exhaust ? ns : "";
            }
            catch ( const QueryRetryException & ) {
                // In some cases the query may be retried if there is an in memory sort size assertion.
                verify( ! hasRetried );
                hasRetried = true;
            }
        }
    }
Exemple #17
0
    static bool receivedQuery(Client& c, DbResponse& dbresponse, Message& m ) {
        bool ok = true;
        MSGID responseTo = m.header()->id;

        DbMessage d(m);
        QueryMessage q(d);
        auto_ptr< Message > resp( new Message() );

        CurOp& op = *(c.curop());

        shared_ptr<AssertionException> ex;

        try {
            if (!NamespaceString(d.getns()).isCommand()) {
                // Auth checking for Commands happens later.
                Status status = cc().getAuthorizationManager()->checkAuthForQuery(d.getns());
                uassert(16550, status.reason(), status.isOK());
            }
            dbresponse.exhaustNS = runQuery(m, q, op, *resp);
            verify( !resp->empty() );
        }
        catch ( SendStaleConfigException& e ){
            ex.reset( new SendStaleConfigException( e.getns(), e.getInfo().msg, e.getVersionReceived(), e.getVersionWanted() ) );
            ok = false;
        }
        catch ( AssertionException& e ) {
            ex.reset( new AssertionException( e.getInfo().msg, e.getCode() ) );
            ok = false;
        }

        if( ex ){

            op.debug().exceptionInfo = ex->getInfo();
            LOGWITHRATELIMIT {
                log() << "assertion " << ex->toString() << " ns:" << q.ns << " query:" <<
                (q.query.valid() ? q.query.toString() : "query object is corrupt") << endl;
                if( q.ntoskip || q.ntoreturn )
                    log() << " ntoskip:" << q.ntoskip << " ntoreturn:" << q.ntoreturn << endl;
            }

            SendStaleConfigException* scex = NULL;
            if ( ex->getCode() == SendStaleConfigCode ) scex = static_cast<SendStaleConfigException*>( ex.get() );

            BSONObjBuilder err;
            ex->getInfo().append( err );
            if( scex ){
                err.append( "ns", scex->getns() );
                scex->getVersionReceived().addToBSON( err, "vReceived" );
                scex->getVersionWanted().addToBSON( err, "vWanted" );
            }
            BSONObj errObj = err.done();

            if( scex ){
                log() << "stale version detected during query over "
                      << q.ns << " : " << errObj << endl;
            }
            else{
                log() << "problem detected during query over "
                      << q.ns << " : " << errObj << endl;
            }

            BufBuilder b;
            b.skip(sizeof(QueryResult));
            b.appendBuf((void*) errObj.objdata(), errObj.objsize());

            // todo: call replyToQuery() from here instead of this!!! see dbmessage.h
            QueryResult * msgdata = (QueryResult *) b.buf();
            b.decouple();
            QueryResult *qr = msgdata;
            qr->_resultFlags() = ResultFlag_ErrSet;
            if( scex ) qr->_resultFlags() |= ResultFlag_ShardConfigStale;
            qr->len = b.len();
            qr->setOperation(opReply);
            qr->cursorId = 0;
            qr->startingFrom = 0;
            qr->nReturned = 1;
            resp.reset( new Message() );
            resp->setData( msgdata, true );

        }

        op.debug().responseLength = resp->header()->dataLen();

        dbresponse.response = resp.release();
        dbresponse.responseTo = responseTo;

        return ok;
    }
Exemple #18
0
    static void _logOpOld(const char *opstr, const char *ns, const char *logNS, const BSONObj& obj, BSONObj *o2, bool *bb, bool fromMigrate ) {
        Lock::DBWrite lk("local");
        static BufBuilder bufbuilder(8*1024); // todo there is likely a mutex on this constructor

        if ( strncmp(ns, "local.", 6) == 0 ) {
            if ( strncmp(ns, "local.slaves", 12) == 0 ) {
                resetSlaveCache();
            }
            return;
        }

        mutex::scoped_lock lk2(OpTime::m);

        const OpTime ts = OpTime::now(lk2);
        Client::Context context("", 0);

        /* we jump through a bunch of hoops here to avoid copying the obj buffer twice --
           instead we do a single copy to the destination position in the memory mapped file.
        */

        bufbuilder.reset();
        BSONObjBuilder b(bufbuilder);
        b.appendTimestamp("ts", ts.asDate());
        b.append("op", opstr);
        b.append("ns", ns);
        if (fromMigrate) 
            b.appendBool("fromMigrate", true);
        if ( bb )
            b.appendBool("b", *bb);
        if ( o2 )
            b.append("o2", *o2);
        BSONObj partial = b.done(); // partial is everything except the o:... part.

        int po_sz = partial.objsize();
        int len = po_sz + obj.objsize() + 1 + 2 /*o:*/;

        Record *r;
        if( logNS == 0 ) {
            logNS = "local.oplog.$main";
            if ( localOplogMainDetails == 0 ) {
                Client::Context ctx(logNS , dbpath);
                localDB = ctx.db();
                verify( localDB );
                localOplogMainDetails = nsdetails(logNS);
                verify( localOplogMainDetails );
            }
            Client::Context ctx(logNS , localDB);
            r = theDataFileMgr.fast_oplog_insert(localOplogMainDetails, logNS, len);
        }
        else {
            Client::Context ctx(logNS, dbpath);
            verify( nsdetails( logNS ) );
            // first we allocate the space, then we fill it below.
            r = theDataFileMgr.fast_oplog_insert( nsdetails( logNS ), logNS, len);
        }

        append_O_Obj(r->data(), partial, obj);

        context.getClient()->setLastOp( ts );

        LOG( 6 ) << "logging op:" << BSONObj::make(r) << endl;
    } 
Exemple #19
0
    bool ShardedClientCursor::sendNextBatch(int ntoreturn, BufBuilder& buffer, int& docCount) {
        uassert( 10191 ,  "cursor already done" , ! _done );

        int maxSize = 1024 * 1024;
        if ( _totalSent > 0 )
            maxSize *= 3;

        docCount = 0;

        // If ntoreturn is negative, it means that we should send up to -ntoreturn results
        // back to the client, and that we should only send a *single batch*. An ntoreturn of
        // 1 is also a special case which means "return up to 1 result in a single batch" (so
        // that +1 actually has the same meaning of -1). For all other values of ntoreturn, we
        // may have to return multiple batches.
        const bool sendMoreBatches = ntoreturn == 0 || ntoreturn > 1;
        ntoreturn = abs( ntoreturn );

        bool cursorHasMore = true;
        while ( ( cursorHasMore = _cursor->more() ) ) {
            BSONObj o = _cursor->next();

            buffer.appendBuf( (void*)o.objdata() , o.objsize() );
            docCount++;
            // Ensure that the next batch will never wind up requesting more docs from the shard
            // than are remaining to satisfy the initial ntoreturn.
            if (ntoreturn != 0) {
                _cursor->setBatchSize(ntoreturn - docCount);
            }

            if ( buffer.len() > maxSize ) {
                break;
            }

            if ( docCount == ntoreturn ) {
                // soft limit aka batch size
                break;
            }

            if ( ntoreturn == 0 && _totalSent == 0 && docCount >= 100 ) {
                // first batch should be max 100 unless batch size specified
                break;
            }
        }

        // We need to request another batch if the following two conditions hold:
        //
        //  1. ntoreturn is positive and not equal to 1 (see the comment above). This condition
        //  is stored in 'sendMoreBatches'.
        //
        //  2. The last call to _cursor->more() was true (i.e. we never explicitly got a false
        //  value from _cursor->more()). This condition is stored in 'cursorHasMore'. If the server
        //  hits EOF while executing a query or a getmore, it will pass a cursorId of 0 in the
        //  query response to indicate that there are no more results. In this case, _cursor->more()
        //  will be explicitly false, and we know for sure that we do not have to send more batches.
        //
        //  On the other hand, if _cursor->more() is true there may or may not be more results.
        //  Suppose that the mongod generates enough results to fill this batch. In this case it
        //  does not know whether not there are more, because doing so would require requesting an
        //  extra result and seeing whether we get EOF. The mongod sends a valid cursorId to
        //  indicate that there may be more. We do the same here: we indicate that there may be
        //  more results to retrieve by setting 'hasMoreBatches' to true.
        bool hasMoreBatches = sendMoreBatches && cursorHasMore;

        LOG(5) << "\t hasMoreBatches: " << hasMoreBatches
               << " sendMoreBatches: " << sendMoreBatches
               << " cursorHasMore: " << cursorHasMore
               << " ntoreturn: " << ntoreturn
               << " num: " << docCount
               << " id:" << getId()
               << " totalSent: " << _totalSent << endl;

        _totalSent += docCount;
        _done = ! hasMoreBatches;

        return hasMoreBatches;
    }
Exemple #20
0
std::string runQuery(OperationContext* opCtx,
                     QueryMessage& q,
                     const NamespaceString& nss,
                     Message& result) {
    CurOp& curOp = *CurOp::get(opCtx);
    curOp.ensureStarted();

    uassert(ErrorCodes::InvalidNamespace,
            str::stream() << "Invalid ns [" << nss.ns() << "]",
            nss.isValid());
    invariant(!nss.isCommand());

    // Set CurOp information.
    const auto upconvertedQuery = upconvertQueryEntry(q.query, nss, q.ntoreturn, q.ntoskip);
    beginQueryOp(opCtx, nss, upconvertedQuery, q.ntoreturn, q.ntoskip);

    // Parse the qm into a CanonicalQuery.
    const boost::intrusive_ptr<ExpressionContext> expCtx;
    auto cq = uassertStatusOKWithContext(
        CanonicalQuery::canonicalize(opCtx,
                                     q,
                                     expCtx,
                                     ExtensionsCallbackReal(opCtx, &nss),
                                     MatchExpressionParser::kAllowAllSpecialFeatures),
        "Can't canonicalize query");
    invariant(cq.get());

    LOG(5) << "Running query:\n" << redact(cq->toString());
    LOG(2) << "Running query: " << redact(cq->toStringShort());

    // Parse, canonicalize, plan, transcribe, and get a plan executor.
    AutoGetCollectionForReadCommand ctx(opCtx, nss, AutoGetCollection::ViewMode::kViewsForbidden);
    Collection* const collection = ctx.getCollection();

    {
        const QueryRequest& qr = cq->getQueryRequest();

        // Allow the query to run on secondaries if the read preference permits it. If no read
        // preference was specified, allow the query to run iff slaveOk has been set.
        const bool slaveOK = qr.hasReadPref()
            ? uassertStatusOK(ReadPreferenceSetting::fromContainingBSON(q.query))
                  .canRunOnSecondary()
            : qr.isSlaveOk();
        uassertStatusOK(
            repl::ReplicationCoordinator::get(opCtx)->checkCanServeReadsFor(opCtx, nss, slaveOK));
    }

    // We have a parsed query. Time to get the execution plan for it.
    auto exec = uassertStatusOK(getExecutorLegacyFind(opCtx, collection, nss, std::move(cq)));

    const QueryRequest& qr = exec->getCanonicalQuery()->getQueryRequest();

    // If it's actually an explain, do the explain and return rather than falling through
    // to the normal query execution loop.
    if (qr.isExplain()) {
        BufBuilder bb;
        bb.skip(sizeof(QueryResult::Value));

        BSONObjBuilder explainBob;
        Explain::explainStages(
            exec.get(), collection, ExplainOptions::Verbosity::kExecAllPlans, &explainBob);

        // Add the resulting object to the return buffer.
        BSONObj explainObj = explainBob.obj();
        bb.appendBuf((void*)explainObj.objdata(), explainObj.objsize());

        // Set query result fields.
        QueryResult::View qr = bb.buf();
        qr.setResultFlagsToOk();
        qr.msgdata().setLen(bb.len());
        curOp.debug().responseLength = bb.len();
        qr.msgdata().setOperation(opReply);
        qr.setCursorId(0);
        qr.setStartingFrom(0);
        qr.setNReturned(1);
        result.setData(bb.release());
        return "";
    }

    // Handle query option $maxTimeMS (not used with commands).
    if (qr.getMaxTimeMS() > 0) {
        uassert(40116,
                "Illegal attempt to set operation deadline within DBDirectClient",
                !opCtx->getClient()->isInDirectClient());
        opCtx->setDeadlineAfterNowBy(Milliseconds{qr.getMaxTimeMS()});
    }
    opCtx->checkForInterrupt();  // May trigger maxTimeAlwaysTimeOut fail point.

    // Run the query.
    // bb is used to hold query results
    // this buffer should contain either requested documents per query or
    // explain information, but not both
    BufBuilder bb(FindCommon::kInitReplyBufferSize);
    bb.skip(sizeof(QueryResult::Value));

    // How many results have we obtained from the executor?
    int numResults = 0;

    BSONObj obj;
    PlanExecutor::ExecState state;

    // Get summary info about which plan the executor is using.
    {
        stdx::lock_guard<Client> lk(*opCtx->getClient());
        curOp.setPlanSummary_inlock(Explain::getPlanSummary(exec.get()));
    }

    while (PlanExecutor::ADVANCED == (state = exec->getNext(&obj, NULL))) {
        // If we can't fit this result inside the current batch, then we stash it for later.
        if (!FindCommon::haveSpaceForNext(obj, numResults, bb.len())) {
            exec->enqueue(obj);
            break;
        }

        // Add result to output buffer.
        bb.appendBuf((void*)obj.objdata(), obj.objsize());

        // Count the result.
        ++numResults;

        if (FindCommon::enoughForFirstBatch(qr, numResults)) {
            LOG(5) << "Enough for first batch, wantMore=" << qr.wantMore()
                   << " ntoreturn=" << qr.getNToReturn().value_or(0)
                   << " numResults=" << numResults;
            break;
        }
    }

    // Caller expects exceptions thrown in certain cases.
    if (PlanExecutor::FAILURE == state || PlanExecutor::DEAD == state) {
        error() << "Plan executor error during find: " << PlanExecutor::statestr(state)
                << ", stats: " << redact(Explain::getWinningPlanStats(exec.get()));
        uassertStatusOKWithContext(WorkingSetCommon::getMemberObjectStatus(obj),
                                   "Executor error during OP_QUERY find");
        MONGO_UNREACHABLE;
    }

    // Before saving the cursor, ensure that whatever plan we established happened with the expected
    // collection version
    auto css = CollectionShardingState::get(opCtx, nss);
    css->checkShardVersionOrThrow(opCtx);

    // Fill out CurOp based on query results. If we have a cursorid, we will fill out CurOp with
    // this cursorid later.
    long long ccId = 0;

    if (shouldSaveCursor(opCtx, collection, state, exec.get())) {
        // We won't use the executor until it's getMore'd.
        exec->saveState();
        exec->detachFromOperationContext();

        // Allocate a new ClientCursor and register it with the cursor manager.
        ClientCursorPin pinnedCursor = collection->getCursorManager()->registerCursor(
            opCtx,
            {std::move(exec),
             nss,
             AuthorizationSession::get(opCtx->getClient())->getAuthenticatedUserNames(),
             opCtx->recoveryUnit()->getReadConcernLevel(),
             upconvertedQuery});
        ccId = pinnedCursor.getCursor()->cursorid();

        LOG(5) << "caching executor with cursorid " << ccId << " after returning " << numResults
               << " results";

        // TODO document
        if (qr.isExhaust()) {
            curOp.debug().exhaust = true;
        }

        pinnedCursor.getCursor()->setPos(numResults);

        // We assume that cursors created through a DBDirectClient are always used from their
        // original OperationContext, so we do not need to move time to and from the cursor.
        if (!opCtx->getClient()->isInDirectClient()) {
            // If the query had a time limit, remaining time is "rolled over" to the cursor (for
            // use by future getmore ops).
            pinnedCursor.getCursor()->setLeftoverMaxTimeMicros(opCtx->getRemainingMaxTimeMicros());
        }

        endQueryOp(opCtx, collection, *pinnedCursor.getCursor()->getExecutor(), numResults, ccId);
    } else {
        LOG(5) << "Not caching executor but returning " << numResults << " results.";
        endQueryOp(opCtx, collection, *exec, numResults, ccId);
    }

    // Fill out the output buffer's header.
    QueryResult::View queryResultView = bb.buf();
    queryResultView.setCursorId(ccId);
    queryResultView.setResultFlagsToOk();
    queryResultView.msgdata().setLen(bb.len());
    queryResultView.msgdata().setOperation(opReply);
    queryResultView.setStartingFrom(0);
    queryResultView.setNReturned(numResults);

    // Add the results from the query into the output buffer.
    result.setData(bb.release());

    // curOp.debug().exhaust is set above.
    return curOp.debug().exhaust ? nss.ns() : "";
}
Exemple #21
0
    /* we write to local.opload.$main:
         { ts : ..., op: ..., ns: ..., o: ... }
       ts: an OpTime timestamp
       op:
        "i" insert
        "u" update
        "d" delete
        "c" db cmd
        "db" declares presence of a database (ns is set to the db name + '.')
        "n" no op
       logNS - where to log it.  0/null means "local.oplog.$main".
       bb:
         if not null, specifies a boolean to pass along to the other side as b: param.
         used for "justOne" or "upsert" flags on 'd', 'u'
       first: true
         when set, indicates this is the first thing we have logged for this database.
         thus, the slave does not need to copy down all the data when it sees this.

       note this is used for single collection logging even when --replSet is enabled.
    */
    static void _logOpOld(const char *opstr, const char *ns, const char *logNS, const BSONObj& obj, BSONObj *o2, bool *bb ) {
        DEV assertInWriteLock();
        static BufBuilder bufbuilder(8*1024);

        if ( strncmp(ns, "local.", 6) == 0 ) {
            if ( strncmp(ns, "local.slaves", 12) == 0 ) {
                resetSlaveCache();
            }
            return;
        }

        const OpTime ts = OpTime::now();
        Client::Context context;

        /* we jump through a bunch of hoops here to avoid copying the obj buffer twice --
           instead we do a single copy to the destination position in the memory mapped file.
        */

        bufbuilder.reset();
        BSONObjBuilder b(bufbuilder);
        b.appendTimestamp("ts", ts.asDate());
        b.append("op", opstr);
        b.append("ns", ns);
        if ( bb )
            b.appendBool("b", *bb);
        if ( o2 )
            b.append("o2", *o2);
        BSONObj partial = b.done(); // partial is everything except the o:... part.

        int po_sz = partial.objsize();
        int len = po_sz + obj.objsize() + 1 + 2 /*o:*/;

        Record *r;
        if( logNS == 0 ) {
            logNS = "local.oplog.$main";
            if ( localOplogMainDetails == 0 ) {
                Client::Context ctx( logNS , dbpath, 0, false);
                localDB = ctx.db();
                assert( localDB );
                localOplogMainDetails = nsdetails(logNS);
                assert( localOplogMainDetails );
            }
            Client::Context ctx( logNS , localDB, false );
            r = theDataFileMgr.fast_oplog_insert(localOplogMainDetails, logNS, len);
        }
        else {
            Client::Context ctx( logNS, dbpath, 0, false );
            assert( nsdetails( logNS ) );
            // first we allocate the space, then we fill it below.
            r = theDataFileMgr.fast_oplog_insert( nsdetails( logNS ), logNS, len);
        }

        append_O_Obj(r->data, partial, obj);

        context.getClient()->setLastOp( ts.asDate() );

        if ( logLevel >= 6 ) {
            BSONObj temp(r);
            log( 6 ) << "logging op:" << temp << endl;
        }

    }
Exemple #22
0
                massert(14821, "No client or lazy client specified, cannot store multi-host connection.", false);
        }
        else {
            _scopedHost = conn->getHost();
        }

        conn->done();
        _client = 0;
        _lazyHost = "";
    }

    DBClientCursor::~DBClientCursor() {
        DESTRUCTOR_GUARD (

        if ( cursorId && _ownCursor && ! inShutdown() ) {
            BufBuilder b;
            b.appendNum( (int)0 ); // reserved
            b.appendNum( (int)1 ); // number
            b.appendNum( cursorId );
            
            Message m;
            m.setData( dbKillCursors , b.buf() , b.len() );

            if ( _client ) {

                // Kill the cursor the same way the connection itself would.  Usually, non-lazily
                if( DBClientConnection::getLazyKillCursor() )
                    _client->sayPiggyBack( m );
                else
                    _client->say( m );
Exemple #23
0
int main( int argc, const char **argv ) {

    const char *port = "27017";
    if ( argc != 1 ) {
        if ( argc != 3 )
            throw -12;
        port = argv[ 2 ];
    }

    DBClientConnection conn;
    string errmsg;
    if ( ! conn.connect( string( "127.0.0.1:" ) + port , errmsg ) ) {
        cout << "couldn't connect : " << errmsg << endl;
        throw -11;
    }

    const char * ns = "test.test1";

    conn.dropCollection(ns);

    // clean up old data from any previous tests
    conn.remove( ns, BSONObj() );
    assert( conn.findOne( ns , BSONObj() ).isEmpty() );

    // test insert
    conn.insert( ns ,BSON( "name" << "eliot" << "num" << 1 ) );
    assert( ! conn.findOne( ns , BSONObj() ).isEmpty() );

    // test remove
    conn.remove( ns, BSONObj() );
    assert( conn.findOne( ns , BSONObj() ).isEmpty() );


    // insert, findOne testing
    conn.insert( ns , BSON( "name" << "eliot" << "num" << 1 ) );
    {
        BSONObj res = conn.findOne( ns , BSONObj() );
        assert( strstr( res.getStringField( "name" ) , "eliot" ) );
        assert( ! strstr( res.getStringField( "name2" ) , "eliot" ) );
        assert( 1 == res.getIntField( "num" ) );
    }


    // cursor
    conn.insert( ns ,BSON( "name" << "sara" << "num" << 2 ) );
    {
        auto_ptr<DBClientCursor> cursor = conn.query( ns , BSONObj() );
        int count = 0;
        while ( cursor->more() ) {
            count++;
            BSONObj obj = cursor->next();
        }
        assert( count == 2 );
    }

    {
        auto_ptr<DBClientCursor> cursor = conn.query( ns , BSON( "num" << 1 ) );
        int count = 0;
        while ( cursor->more() ) {
            count++;
            BSONObj obj = cursor->next();
        }
        assert( count == 1 );
    }

    {
        auto_ptr<DBClientCursor> cursor = conn.query( ns , BSON( "num" << 3 ) );
        int count = 0;
        while ( cursor->more() ) {
            count++;
            BSONObj obj = cursor->next();
        }
        assert( count == 0 );
    }

    // update
    {
        BSONObj res = conn.findOne( ns , BSONObjBuilder().append( "name" , "eliot" ).obj() );
        assert( ! strstr( res.getStringField( "name2" ) , "eliot" ) );

        BSONObj after = BSONObjBuilder().appendElements( res ).append( "name2" , "h" ).obj();

        conn.update( ns , BSONObjBuilder().append( "name" , "eliot2" ).obj() , after );
        res = conn.findOne( ns , BSONObjBuilder().append( "name" , "eliot" ).obj() );
        assert( ! strstr( res.getStringField( "name2" ) , "eliot" ) );
        assert( conn.findOne( ns , BSONObjBuilder().append( "name" , "eliot2" ).obj() ).isEmpty() );

        conn.update( ns , BSONObjBuilder().append( "name" , "eliot" ).obj() , after );
        res = conn.findOne( ns , BSONObjBuilder().append( "name" , "eliot" ).obj() );
        assert( strstr( res.getStringField( "name" ) , "eliot" ) );
        assert( strstr( res.getStringField( "name2" ) , "h" ) );
        assert( conn.findOne( ns , BSONObjBuilder().append( "name" , "eliot2" ).obj() ).isEmpty() );

        // upsert
        conn.update( ns , BSONObjBuilder().append( "name" , "eliot2" ).obj() , after , 1 );
        assert( ! conn.findOne( ns , BSONObjBuilder().append( "name" , "eliot" ).obj() ).isEmpty() );

    }

    { // ensure index
        assert( conn.ensureIndex( ns , BSON( "name" << 1 ) ) );
        assert( ! conn.ensureIndex( ns , BSON( "name" << 1 ) ) );
    }

    { // hint related tests
        assert( conn.findOne(ns, "{}")["name"].str() == "sara" );

        assert( conn.findOne(ns, "{ name : 'eliot' }")["name"].str() == "eliot" );
        assert( conn.getLastError() == "" );

        // nonexistent index test
        bool asserted = false;
        try {
            conn.findOne(ns, Query("{name:\"eliot\"}").hint("{foo:1}"));
        }
        catch ( ... ){
            asserted = true;
        }
        assert( asserted );

        //existing index
        assert( conn.findOne(ns, Query("{name:'eliot'}").hint("{name:1}")).hasElement("name") );

        // run validate
        assert( conn.validate( ns ) );
    }

    { // timestamp test

        const char * tsns = "test.tstest1";
        conn.dropCollection( tsns );

        {
            mongo::BSONObjBuilder b;
            b.appendTimestamp( "ts" );
            conn.insert( tsns , b.obj() );
        }

        mongo::BSONObj out = conn.findOne( tsns , mongo::BSONObj() );
        Date_t oldTime = out["ts"].timestampTime();
        unsigned int oldInc = out["ts"].timestampInc();

        {
            mongo::BSONObjBuilder b1;
            b1.append( out["_id"] );

            mongo::BSONObjBuilder b2;
            b2.append( out["_id"] );
            b2.appendTimestamp( "ts" );

            conn.update( tsns , b1.obj() , b2.obj() );
        }

        BSONObj found = conn.findOne( tsns , mongo::BSONObj() );
        cout << "old: " << out << "\nnew: " << found << endl;
        assert( ( oldTime < found["ts"].timestampTime() ) ||
                ( oldTime == found["ts"].timestampTime() && oldInc < found["ts"].timestampInc() ) );

    }
    
    { // check that killcursors doesn't affect last error
        assert( conn.getLastError().empty() );
        
        BufBuilder b;
        b.appendNum( (int)0 ); // reserved
        b.appendNum( (int)-1 ); // invalid # of cursors triggers exception
        b.appendNum( (int)-1 ); // bogus cursor id
        
        Message m;
        m.setData( dbKillCursors, b.buf(), b.len() );
        
        // say() is protected in DBClientConnection, so get superclass
        static_cast< DBConnector* >( &conn )->say( m );
        
        assert( conn.getLastError().empty() );
    }

    {
        list<string> l = conn.getDatabaseNames();
        for ( list<string>::iterator i = l.begin(); i != l.end(); i++ ){
            cout << "db name : " << *i << endl;
        }

        l = conn.getCollectionNames( "test" );
        for ( list<string>::iterator i = l.begin(); i != l.end(); i++ ){
            cout << "coll name : " << *i << endl;
        }
    }

    cout << "client test finished!" << endl;
}
Exemple #24
0
    /**
     * Run a query -- includes checking for and running a Command.
     * @return points to ns if exhaust mode. 0=normal mode
     * @locks the db mutex for reading (and potentially for writing temporarily to create a new db).
     * @yields the db mutex periodically after acquiring it.
     * @asserts on scan and order memory exhaustion and other cases.
     */
    const char *runQuery(Message& m, QueryMessage& q, CurOp& curop, Message &result) {
        shared_ptr<ParsedQuery> pq_shared( new ParsedQuery(q) );
        ParsedQuery& pq( *pq_shared );
        BSONObj jsobj = q.query;
        int queryOptions = q.queryOptions;
        const char *ns = q.ns;

        if( logLevel >= 2 )
            log() << "runQuery called " << ns << " " << jsobj << endl;

        curop.debug().ns = ns;
        curop.debug().ntoreturn = pq.getNumToReturn();
        curop.debug().query = jsobj;
        curop.setQuery(jsobj);

        // Run a command.
        
        if ( pq.couldBeCommand() ) {
            BufBuilder bb;
            bb.skip(sizeof(QueryResult));
            BSONObjBuilder cmdResBuf;
            if ( runCommands(ns, jsobj, curop, bb, cmdResBuf, false, queryOptions) ) {
                curop.debug().iscommand = true;
                curop.debug().query = jsobj;
                curop.markCommand();

                auto_ptr< QueryResult > qr;
                qr.reset( (QueryResult *) bb.buf() );
                bb.decouple();
                qr->setResultFlagsToOk();
                qr->len = bb.len();
                curop.debug().responseLength = bb.len();
                qr->setOperation(opReply);
                qr->cursorId = 0;
                qr->startingFrom = 0;
                qr->nReturned = 1;
                result.setData( qr.release(), true );
            }
            else {
                uasserted(13530, "bad or malformed command request?");
            }
            return 0;
        }

        bool explain = pq.isExplain();
        BSONObj order = pq.getOrder();
        BSONObj query = pq.getFilter();

        /* The ElemIter will not be happy if this isn't really an object. So throw exception
           here when that is true.
           (Which may indicate bad data from client.)
        */
        if ( query.objsize() == 0 ) {
            out() << "Bad query object?\n  jsobj:";
            out() << jsobj.toString() << "\n  query:";
            out() << query.toString() << endl;
            uassert( 10110 , "bad query object", false);
        }

        Client::ReadContext ctx( ns , dbpath ); // read locks
        const ConfigVersion shardingVersionAtStart = shardingState.getVersion( ns );

        replVerifyReadsOk(&pq);

        if ( pq.hasOption( QueryOption_CursorTailable ) ) {
            NamespaceDetails *d = nsdetails( ns );
            uassert( 13051, "tailable cursor requested on non capped collection", d && d->isCapped() );
            const BSONObj nat1 = BSON( "$natural" << 1 );
            if ( order.isEmpty() ) {
                order = nat1;
            }
            else {
                uassert( 13052, "only {$natural:1} order allowed for tailable cursor", order == nat1 );
            }
        }

        // Run a simple id query.
        
        if ( ! (explain || pq.showDiskLoc()) && isSimpleIdQuery( query ) && !pq.hasOption( QueryOption_CursorTailable ) ) {

            int n = 0;
            bool nsFound = false;
            bool indexFound = false;

            BSONObj resObject;
            Client& c = cc();
            bool found = Helpers::findById( c, ns , query , resObject , &nsFound , &indexFound );
            if ( nsFound == false || indexFound == true ) {
                
                if ( shardingState.needShardChunkManager( ns ) ) {
                    ShardChunkManagerPtr m = shardingState.getShardChunkManager( ns );
                    if ( m && ! m->belongsToMe( resObject ) ) {
                        // I have something this _id
                        // but it doesn't belong to me
                        // so return nothing
                        resObject = BSONObj();
                        found = false;
                    }
                }

                BufBuilder bb(sizeof(QueryResult)+resObject.objsize()+32);
                bb.skip(sizeof(QueryResult));
                
                curop.debug().idhack = true;
                if ( found ) {
                    n = 1;
                    fillQueryResultFromObj( bb , pq.getFields() , resObject );
                }
                auto_ptr< QueryResult > qr;
                qr.reset( (QueryResult *) bb.buf() );
                bb.decouple();
                qr->setResultFlagsToOk();
                qr->len = bb.len();
                
                curop.debug().responseLength = bb.len();
                qr->setOperation(opReply);
                qr->cursorId = 0;
                qr->startingFrom = 0;
                qr->nReturned = n;
                result.setData( qr.release(), true );
                return NULL;
            }
        }
        
        // Run a regular query.
        
        BSONObj oldPlan;
        if ( explain && ! pq.hasIndexSpecifier() ) {
            MultiPlanScanner mps( ns, query, order );
            if ( mps.usingCachedPlan() ) {
                oldPlan =
                mps.oldExplain().firstElement().embeddedObject()
                .firstElement().embeddedObject().getOwned();
            }
        }

        // In some cases the query may be retried if there is an in memory sort size assertion.
        for( int retry = 0; retry < 2; ++retry ) {
            try {
                return queryWithQueryOptimizer( m, queryOptions, ns, jsobj, curop, query, order,
                                               pq_shared, oldPlan, shardingVersionAtStart, result );
            } catch ( const QueryRetryException & ) {
                verify( retry == 0 );
            }
        }
        verify( false );
        return 0;
    }