Exemple #1
0
StatusWith<ReadPreferenceSetting> ClusterFind::extractUnwrappedReadPref(const BSONObj& cmdObj,
                                                                        const bool isSlaveOk) {
    BSONElement queryOptionsElt;
    auto status = bsonExtractTypedField(
        cmdObj, QueryRequest::kUnwrappedReadPrefField, BSONType::Object, &queryOptionsElt);
    if (status.isOK()) {
        // There must be a nested object containing the read preference if there is a queryOptions
        // field.
        BSONObj queryOptionsObj = queryOptionsElt.Obj();
        invariant(queryOptionsObj[QueryRequest::kWrappedReadPrefField].type() == BSONType::Object);
        BSONObj readPrefObj = queryOptionsObj[QueryRequest::kWrappedReadPrefField].Obj();

        auto readPref = ReadPreferenceSetting::fromBSON(readPrefObj);
        if (!readPref.isOK()) {
            return readPref.getStatus();
        }
        return readPref.getValue();
    } else if (status != ErrorCodes::NoSuchKey) {
        return status;
    }

    // If there is no explicit read preference, the value we use depends on the setting of the slave
    // ok bit.
    ReadPreference pref =
        isSlaveOk ? mongo::ReadPreference::SecondaryPreferred : mongo::ReadPreference::PrimaryOnly;
    return ReadPreferenceSetting(pref, TagSet());
}
    DBClientConnection& DBClientReplicaSet::slaveConn() {
        shared_ptr<ReadPreferenceSetting> readPref(
                new ReadPreferenceSetting(ReadPreference_SecondaryPreferred, TagSet()));
        DBClientConnection* conn = selectNodeUsingTags(readPref);

        uassert( 16369, str::stream() << "No good nodes available for set: "
                 << _getMonitor()->getName(), conn != NULL );

        return *conn;
    }
Exemple #3
0
StatusWith<ReadPreferenceSetting> ReadPreferenceSetting::fromBSON(const BSONObj& readPrefObj) {
    std::string modeStr;
    auto modeExtractStatus = bsonExtractStringField(readPrefObj, kModeFieldName, &modeStr);
    if (!modeExtractStatus.isOK()) {
        return modeExtractStatus;
    }

    ReadPreference mode;
    auto swReadPrefMode = parseReadPreferenceMode(modeStr);
    if (!swReadPrefMode.isOK()) {
        return swReadPrefMode.getStatus();
    }
    mode = std::move(swReadPrefMode.getValue());

    TagSet tags;
    BSONElement tagsElem;
    auto tagExtractStatus =
        bsonExtractTypedField(readPrefObj, kTagsFieldName, mongo::Array, &tagsElem);
    if (tagExtractStatus.isOK()) {
        tags = TagSet{BSONArray(tagsElem.Obj().getOwned())};

        // In accordance with the read preference spec, passing the default wildcard tagset
        // '[{}]' is the same as not passing a TagSet at all. Furthermore, passing an empty
        // TagSet with a non-primary ReadPreference is equivalent to passing the wildcard
        // ReadPreference.
        if (tags == TagSet() || tags == TagSet::primaryOnly()) {
            tags = defaultTagSetForMode(mode);
        }

        // If we are using a user supplied TagSet, check that it is compatible with
        // the readPreference mode.
        else if (ReadPreference::PrimaryOnly == mode && (tags != TagSet::primaryOnly())) {
            return Status(ErrorCodes::BadValue,
                          "Only empty tags are allowed with primary read preference");
        }
    }

    else if (ErrorCodes::NoSuchKey == tagExtractStatus) {
        tags = defaultTagSetForMode(mode);
    } else {
        return tagExtractStatus;
    }

    return ReadPreferenceSetting(mode, tags);
}
Exemple #4
0
StatusWith<ReadPreferenceSetting> ReadPreferenceSetting::fromInnerBSON(const BSONObj& readPrefObj) {
    std::string modeStr;
    auto modeExtractStatus = bsonExtractStringField(readPrefObj, kModeFieldName, &modeStr);
    if (!modeExtractStatus.isOK()) {
        return modeExtractStatus;
    }

    ReadPreference mode;
    auto swReadPrefMode = parseReadPreferenceMode(modeStr);
    if (!swReadPrefMode.isOK()) {
        return swReadPrefMode.getStatus();
    }
    mode = std::move(swReadPrefMode.getValue());

    TagSet tags;
    BSONElement tagsElem;
    auto tagExtractStatus =
        bsonExtractTypedField(readPrefObj, kTagsFieldName, mongo::Array, &tagsElem);
    if (tagExtractStatus.isOK()) {
        tags = TagSet{BSONArray(tagsElem.Obj().getOwned())};

        // In accordance with the read preference spec, passing the default wildcard tagset
        // '[{}]' is the same as not passing a TagSet at all. Furthermore, passing an empty
        // TagSet with a non-primary ReadPreference is equivalent to passing the wildcard
        // ReadPreference.
        if (tags == TagSet() || tags == TagSet::primaryOnly()) {
            tags = defaultTagSetForMode(mode);
        }

        // If we are using a user supplied TagSet, check that it is compatible with
        // the readPreference mode.
        else if (ReadPreference::PrimaryOnly == mode && (tags != TagSet::primaryOnly())) {
            return Status(ErrorCodes::BadValue,
                          "Only empty tags are allowed with primary read preference");
        }
    }

    else if (ErrorCodes::NoSuchKey == tagExtractStatus) {
        tags = defaultTagSetForMode(mode);
    } else {
        return tagExtractStatus;
    }

    long long maxStalenessSecondsValue;
    auto maxStalenessSecondsExtractStatus = bsonExtractIntegerFieldWithDefault(
        readPrefObj, kMaxStalenessSecondsFieldName, 0, &maxStalenessSecondsValue);

    if (!maxStalenessSecondsExtractStatus.isOK()) {
        return maxStalenessSecondsExtractStatus;
    }

    if (maxStalenessSecondsValue && maxStalenessSecondsValue < 0) {
        return Status(ErrorCodes::BadValue,
                      str::stream() << kMaxStalenessSecondsFieldName
                                    << " must be a non-negative integer");
    }

    if (maxStalenessSecondsValue && maxStalenessSecondsValue >= Seconds::max().count()) {
        return Status(ErrorCodes::BadValue,
                      str::stream() << kMaxStalenessSecondsFieldName << " value can not exceed "
                                    << Seconds::max().count());
    }

    if (maxStalenessSecondsValue && maxStalenessSecondsValue < kMinimalMaxStalenessValue.count()) {
        return Status(ErrorCodes::MaxStalenessOutOfRange,
                      str::stream() << kMaxStalenessSecondsFieldName
                                    << " value can not be less than "
                                    << kMinimalMaxStalenessValue.count());
    }

    if ((mode == ReadPreference::PrimaryOnly) && maxStalenessSecondsValue) {
        return Status(ErrorCodes::BadValue,
                      str::stream() << kMaxStalenessSecondsFieldName
                                    << " can not be set for the primary mode");
    }

    return ReadPreferenceSetting(mode, tags, Seconds(maxStalenessSecondsValue));
}
Exemple #5
0
void Strategy::queryOp(OperationContext* txn, Request& request) {
    verify(!NamespaceString(request.getns()).isCommand());

    Timer queryTimer;

    globalOpCounters.gotQuery();

    QueryMessage q(request.d());

    NamespaceString ns(q.ns);
    ClientBasic* client = txn->getClient();
    AuthorizationSession* authSession = AuthorizationSession::get(client);
    Status status = authSession->checkAuthForFind(ns, false);
    audit::logQueryAuthzCheck(client, ns, q.query, status.code());
    uassertStatusOK(status);

    LOG(3) << "query: " << q.ns << " " << q.query << " ntoreturn: " << q.ntoreturn
           << " options: " << q.queryOptions;

    if (q.ntoreturn == 1 && strstr(q.ns, ".$cmd"))
        throw UserException(8010, "something is wrong, shouldn't see a command here");

    if (q.queryOptions & QueryOption_Exhaust) {
        uasserted(18526,
                  string("the 'exhaust' query option is invalid for mongos queries: ") + q.ns +
                      " " + q.query.toString());
    }

    // Spigot which controls whether OP_QUERY style find on mongos uses the new ClusterClientCursor
    // code path.
    // TODO: Delete the spigot and always use the new code.
    if (useClusterClientCursor) {
        // Determine the default read preference mode based on the value of the slaveOk flag.
        ReadPreference readPreferenceOption = (q.queryOptions & QueryOption_SlaveOk)
            ? ReadPreference::SecondaryPreferred
            : ReadPreference::PrimaryOnly;
        ReadPreferenceSetting readPreference(readPreferenceOption, TagSet());

        BSONElement rpElem;
        auto readPrefExtractStatus = bsonExtractTypedField(
            q.query, LiteParsedQuery::kWrappedReadPrefField, mongo::Object, &rpElem);

        if (readPrefExtractStatus.isOK()) {
            auto parsedRps = ReadPreferenceSetting::fromBSON(rpElem.Obj());
            uassertStatusOK(parsedRps.getStatus());
            readPreference = parsedRps.getValue();
        } else if (readPrefExtractStatus != ErrorCodes::NoSuchKey) {
            uassertStatusOK(readPrefExtractStatus);
        }

        auto canonicalQuery = CanonicalQuery::canonicalize(q, WhereCallbackNoop());
        uassertStatusOK(canonicalQuery.getStatus());

        // If the $explain flag was set, we must run the operation on the shards as an explain
        // command rather than a find command.
        if (canonicalQuery.getValue()->getParsed().isExplain()) {
            const LiteParsedQuery& lpq = canonicalQuery.getValue()->getParsed();
            BSONObj findCommand = lpq.asFindCommand();

            // We default to allPlansExecution verbosity.
            auto verbosity = ExplainCommon::EXEC_ALL_PLANS;

            const bool secondaryOk = (readPreference.pref != ReadPreference::PrimaryOnly);
            rpc::ServerSelectionMetadata metadata(secondaryOk, readPreference);

            BSONObjBuilder explainBuilder;
            uassertStatusOK(ClusterFind::runExplain(
                txn, findCommand, lpq, verbosity, metadata, &explainBuilder));

            BSONObj explainObj = explainBuilder.done();
            replyToQuery(0,  // query result flags
                         request.p(),
                         request.m(),
                         static_cast<const void*>(explainObj.objdata()),
                         explainObj.objsize(),
                         1,  // numResults
                         0,  // startingFrom
                         CursorId(0));
            return;
        }

        // Do the work to generate the first batch of results. This blocks waiting to get responses
        // from the shard(s).
        std::vector<BSONObj> batch;

        // 0 means the cursor is exhausted and
        // otherwise we assume that a cursor with the returned id can be retrieved via the
        // ClusterCursorManager
        auto cursorId =
            ClusterFind::runQuery(txn, *canonicalQuery.getValue(), readPreference, &batch);
        uassertStatusOK(cursorId.getStatus());

        // TODO: this constant should be shared between mongos and mongod, and should
        // not be inside ShardedClientCursor.
        BufBuilder buffer(ShardedClientCursor::INIT_REPLY_BUFFER_SIZE);

        // Fill out the response buffer.
        int numResults = 0;
        for (const auto& obj : batch) {
            buffer.appendBuf((void*)obj.objdata(), obj.objsize());
            numResults++;
        }

        replyToQuery(0,  // query result flags
                     request.p(),
                     request.m(),
                     buffer.buf(),
                     buffer.len(),
                     numResults,
                     0,  // startingFrom
                     cursorId.getValue());
        return;
    }

    QuerySpec qSpec((string)q.ns, q.query, q.fields, q.ntoskip, q.ntoreturn, q.queryOptions);

    // Parse "$maxTimeMS".
    StatusWith<int> maxTimeMS = LiteParsedQuery::parseMaxTimeMSQuery(q.query);
    uassert(17233, maxTimeMS.getStatus().reason(), maxTimeMS.isOK());

    if (_isSystemIndexes(q.ns) && doShardedIndexQuery(txn, request, qSpec)) {
        return;
    }

    ParallelSortClusteredCursor* cursor = new ParallelSortClusteredCursor(qSpec, CommandInfo());
    verify(cursor);

    // TODO:  Move out to Request itself, not strategy based
    try {
        cursor->init(txn);

        if (qSpec.isExplain()) {
            BSONObjBuilder explain_builder;
            cursor->explain(explain_builder);
            explain_builder.appendNumber("executionTimeMillis",
                                         static_cast<long long>(queryTimer.millis()));
            BSONObj b = explain_builder.obj();

            replyToQuery(0, request.p(), request.m(), b);
            delete (cursor);
            return;
        }
    } catch (...) {
        delete cursor;
        throw;
    }

    // TODO: Revisit all of this when we revisit the sharded cursor cache

    if (cursor->getNumQueryShards() != 1) {
        // More than one shard (or zero), manage with a ShardedClientCursor
        // NOTE: We may also have *zero* shards here when the returnPartial flag is set.
        // Currently the code in ShardedClientCursor handles this.

        ShardedClientCursorPtr cc(new ShardedClientCursor(q, cursor));

        BufBuilder buffer(ShardedClientCursor::INIT_REPLY_BUFFER_SIZE);
        int docCount = 0;
        const int startFrom = cc->getTotalSent();
        bool hasMore = cc->sendNextBatch(q.ntoreturn, buffer, docCount);

        if (hasMore) {
            LOG(5) << "storing cursor : " << cc->getId();

            int cursorLeftoverMillis = maxTimeMS.getValue() - queryTimer.millis();
            if (maxTimeMS.getValue() == 0) {  // 0 represents "no limit".
                cursorLeftoverMillis = kMaxTimeCursorNoTimeLimit;
            } else if (cursorLeftoverMillis <= 0) {
                cursorLeftoverMillis = kMaxTimeCursorTimeLimitExpired;
            }

            cursorCache.store(cc, cursorLeftoverMillis);
        }

        replyToQuery(0,
                     request.p(),
                     request.m(),
                     buffer.buf(),
                     buffer.len(),
                     docCount,
                     startFrom,
                     hasMore ? cc->getId() : 0);
    } else {
        // Only one shard is used

        // Remote cursors are stored remotely, we shouldn't need this around.
        unique_ptr<ParallelSortClusteredCursor> cursorDeleter(cursor);

        ShardPtr shard = grid.shardRegistry()->getShard(txn, cursor->getQueryShardId());
        verify(shard.get());
        DBClientCursorPtr shardCursor = cursor->getShardCursor(shard->getId());

        // Implicitly stores the cursor in the cache
        request.reply(*(shardCursor->getMessage()), shardCursor->originalHost());

        // We don't want to kill the cursor remotely if there's still data left
        shardCursor->decouple();
    }
}
    void DBClientReplicaSet::_auth( const BSONObj& params ) {

        // We prefer to authenticate against a primary, but otherwise a secondary is ok too
        // Empty tag matches every secondary
        shared_ptr<ReadPreferenceSetting> readPref(
            new ReadPreferenceSetting( ReadPreference_PrimaryPreferred, TagSet() ) );

        LOG(3) << "dbclient_rs authentication of " << _getMonitor()->getName() << endl;

        // NOTE that we retry MAX_RETRY + 1 times, since we're always primary preferred we don't
        // fallback to the primary.
        Status lastNodeStatus = Status::OK();
        for ( size_t retry = 0; retry < MAX_RETRY + 1; retry++ ) {
            try {
                DBClientConnection* conn = selectNodeUsingTags( readPref );

                if ( conn == NULL ) {
                    break;
                }

                conn->auth( params );

                // Cache the new auth information since we now validated it's good
                _auths[params[saslCommandUserDBFieldName].str()] = params.getOwned();

                // Ensure the only child connection open is the one we authenticated against - other
                // child connections may not have full authentication information.
                // NOTE: _lastSlaveOkConn may or may not be the same as _master
                dassert(_lastSlaveOkConn.get() == conn || _master.get() == conn);
                if ( conn != _lastSlaveOkConn.get() ) {
                    _lastSlaveOkHost = HostAndPort();
                    _lastSlaveOkConn.reset();
                }
                if ( conn != _master.get() ) {
                    _masterHost = HostAndPort();
                    _master.reset();
                }

                return;
            }
            catch ( const DBException &ex ) {

                // We care if we can't authenticate (i.e. bad password) in credential params.
                if ( isAuthenticationException( ex ) ) {
                    throw;
                }

                StringBuilder errMsgB;
                errMsgB << "can't authenticate against replica set node "
                        << _lastSlaveOkHost.toString();
                lastNodeStatus = ex.toStatus( errMsgB.str() );

                LOG(1) << lastNodeStatus.reason() << endl;
                invalidateLastSlaveOkCache();
            }
        }

        if ( lastNodeStatus.isOK() ) {
            StringBuilder assertMsgB;
            assertMsgB << "Failed to authenticate, no good nodes in " << _getMonitor()->getName();
            uasserted( ErrorCodes::NodeNotFound, assertMsgB.str() );
        }
        else {
            uasserted( lastNodeStatus.code(), lastNodeStatus.reason() );
        }
    }