Пример #1
0
/**
 * Apply the read concern from the cursor to this operation.
 */
void applyCursorReadConcern(OperationContext* opCtx, repl::ReadConcernArgs rcArgs) {
    const auto replicationMode = repl::ReplicationCoordinator::get(opCtx)->getReplicationMode();

    // Select the appropriate read source.
    if (replicationMode == repl::ReplicationCoordinator::modeReplSet &&
        rcArgs.getLevel() == repl::ReadConcernLevel::kMajorityReadConcern) {
        switch (rcArgs.getMajorityReadMechanism()) {
            case repl::ReadConcernArgs::MajorityReadMechanism::kMajoritySnapshot: {
                // Make sure we read from the majority snapshot.
                opCtx->recoveryUnit()->setTimestampReadSource(
                    RecoveryUnit::ReadSource::kMajorityCommitted);
                uassertStatusOK(opCtx->recoveryUnit()->obtainMajorityCommittedSnapshot());
                break;
            }
            case repl::ReadConcernArgs::MajorityReadMechanism::kSpeculative: {
                // Mark the operation as speculative and select the correct read source.
                repl::SpeculativeMajorityReadInfo::get(opCtx).setIsSpeculativeRead();
                opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kNoOverlap);
                break;
            }
        }
    }

    // For cursor commands that take locks internally, the read concern on the
    // OperationContext may affect the timestamp read source selected by the storage engine.
    // We place the cursor read concern onto the OperationContext so the lock acquisition
    // respects the cursor's read concern.
    {
        stdx::lock_guard<Client> lk(*opCtx->getClient());
        repl::ReadConcernArgs::get(opCtx) = rcArgs;
    }
}
Пример #2
0
StatusWith<Shard::QueryResponse> ShardRemote::_exhaustiveFindOnConfig(
    OperationContext* txn,
    const ReadPreferenceSetting& readPref,
    const repl::ReadConcernLevel& readConcernLevel,
    const NamespaceString& nss,
    const BSONObj& query,
    const BSONObj& sort,
    boost::optional<long long> limit) {
    invariant(getId() == "config");
    ReadPreferenceSetting readPrefWithMinOpTime(readPref);
    readPrefWithMinOpTime.minOpTime = grid.configOpTime();

    const auto host = _targeter->findHost(txn, readPrefWithMinOpTime);
    if (!host.isOK()) {
        return host.getStatus();
    }

    QueryResponse response;

    // If for some reason the callback never gets invoked, we will return this status in response.
    Status status = Status(ErrorCodes::InternalError, "Internal error running find command");

    auto fetcherCallback =
        [this, &status, &response](const Fetcher::QueryResponseStatus& dataStatus,
                                   Fetcher::NextAction* nextAction,
                                   BSONObjBuilder* getMoreBob) {

            // Throw out any accumulated results on error
            if (!dataStatus.isOK()) {
                status = dataStatus.getStatus();
                response.docs.clear();
                return;
            }

            auto& data = dataStatus.getValue();
            if (data.otherFields.metadata.hasField(rpc::kReplSetMetadataFieldName)) {
                auto replParseStatus =
                    rpc::ReplSetMetadata::readFromMetadata(data.otherFields.metadata);

                if (!replParseStatus.isOK()) {
                    status = replParseStatus.getStatus();
                    response.docs.clear();
                    return;
                }

                response.opTime = replParseStatus.getValue().getLastOpCommitted();

                // We return the config opTime that was returned for this particular request, but as
                // a safeguard we ensure our global configOpTime is at least as large as it.
                invariant(grid.configOpTime() >= response.opTime);
            }

            for (const BSONObj& doc : data.documents) {
                response.docs.push_back(doc.getOwned());
            }

            status = Status::OK();

            if (!getMoreBob) {
                return;
            }
            getMoreBob->append("getMore", data.cursorId);
            getMoreBob->append("collection", data.nss.coll());
        };

    BSONObj readConcernObj;
    {
        invariant(readConcernLevel == repl::ReadConcernLevel::kMajorityReadConcern);
        const repl::ReadConcernArgs readConcern{grid.configOpTime(), readConcernLevel};
        BSONObjBuilder bob;
        readConcern.appendInfo(&bob);
        readConcernObj =
            bob.done().getObjectField(repl::ReadConcernArgs::kReadConcernFieldName).getOwned();
    }

    const Milliseconds maxTimeMS =
        std::min(txn->getRemainingMaxTimeMillis(), kDefaultConfigCommandTimeout);

    BSONObjBuilder findCmdBuilder;

    {
        QueryRequest qr(nss);
        qr.setFilter(query);
        qr.setSort(sort);
        qr.setReadConcern(readConcernObj);
        qr.setLimit(limit);

        if (maxTimeMS < Milliseconds::max()) {
            qr.setMaxTimeMS(durationCount<Milliseconds>(maxTimeMS));
        }

        qr.asFindCommand(&findCmdBuilder);
    }

    Fetcher fetcher(Grid::get(txn)->getExecutorPool()->getFixedExecutor(),
                    host.getValue(),
                    nss.db().toString(),
                    findCmdBuilder.done(),
                    fetcherCallback,
                    _appendMetadataForCommand(txn, readPrefWithMinOpTime),
                    maxTimeMS);
    Status scheduleStatus = fetcher.schedule();
    if (!scheduleStatus.isOK()) {
        return scheduleStatus;
    }

    fetcher.join();

    updateReplSetMonitor(host.getValue(), status);

    if (!status.isOK()) {
        if (status.compareCode(ErrorCodes::ExceededTimeLimit)) {
            LOG(0) << "Operation timed out " << causedBy(status);
        }
        return status;
    }

    return response;
}
Пример #3
0
StatusWith<Shard::QueryResponse> ShardRemote::_exhaustiveFindOnConfig(
    OperationContext* txn,
    const ReadPreferenceSetting& readPref,
    const repl::ReadConcernLevel& readConcernLevel,
    const NamespaceString& nss,
    const BSONObj& query,
    const BSONObj& sort,
    boost::optional<long long> limit) {
    // Do not allow exhaustive finds to be run against regular shards.
    invariant(getId() == "config");

    const auto host =
        _targeter->findHost(readPref, RemoteCommandTargeter::selectFindHostMaxWaitTime(txn));
    if (!host.isOK()) {
        return host.getStatus();
    }

    QueryResponse response;

    // If for some reason the callback never gets invoked, we will return this status in response.
    Status status = Status(ErrorCodes::InternalError, "Internal error running find command");

    auto fetcherCallback =
        [this, &status, &response](const Fetcher::QueryResponseStatus& dataStatus,
                                   Fetcher::NextAction* nextAction,
                                   BSONObjBuilder* getMoreBob) {

            // Throw out any accumulated results on error
            if (!dataStatus.isOK()) {
                status = dataStatus.getStatus();
                response.docs.clear();
                return;
            }

            auto& data = dataStatus.getValue();
            if (data.otherFields.metadata.hasField(rpc::kReplSetMetadataFieldName)) {
                auto replParseStatus =
                    rpc::ReplSetMetadata::readFromMetadata(data.otherFields.metadata);

                if (!replParseStatus.isOK()) {
                    status = replParseStatus.getStatus();
                    response.docs.clear();
                    return;
                }

                response.opTime = replParseStatus.getValue().getLastOpCommitted();

                // We return the config opTime that was returned for this particular request, but as
                // a safeguard we ensure our global configOpTime is at least as large as it.
                invariant(grid.configOpTime() >= response.opTime);
            }

            for (const BSONObj& doc : data.documents) {
                response.docs.push_back(doc.getOwned());
            }

            status = Status::OK();

            if (!getMoreBob) {
                return;
            }
            getMoreBob->append("getMore", data.cursorId);
            getMoreBob->append("collection", data.nss.coll());
        };

    BSONObj readConcernObj;
    {
        invariant(readConcernLevel == repl::ReadConcernLevel::kMajorityReadConcern);
        const repl::ReadConcernArgs readConcern{grid.configOpTime(), readConcernLevel};
        BSONObjBuilder bob;
        readConcern.appendInfo(&bob);
        readConcernObj =
            bob.done().getObjectField(repl::ReadConcernArgs::kReadConcernFieldName).getOwned();
    }

    auto qr = stdx::make_unique<QueryRequest>(nss);
    qr->setFilter(query);
    qr->setSort(sort);
    qr->setReadConcern(readConcernObj);
    qr->setLimit(limit);

    BSONObjBuilder findCmdBuilder;
    qr->asFindCommand(&findCmdBuilder);

    Microseconds maxTime = std::min(duration_cast<Microseconds>(kConfigCommandTimeout),
                                    txn->getRemainingMaxTimeMicros());
    if (maxTime < Milliseconds{1}) {
        // If there is less than 1ms remaining before the maxTime timeout expires, set the max time
        // to 1ms, since setting maxTimeMs to 1ms in a find command means "no max time".
        maxTime = Milliseconds{1};
    }

    findCmdBuilder.append(QueryRequest::cmdOptionMaxTimeMS, durationCount<Milliseconds>(maxTime));

    Fetcher fetcher(Grid::get(txn)->getExecutorPool()->getFixedExecutor(),
                    host.getValue(),
                    nss.db().toString(),
                    findCmdBuilder.done(),
                    fetcherCallback,
                    _getMetadataForCommand(readPref),
                    duration_cast<Milliseconds>(maxTime));
    Status scheduleStatus = fetcher.schedule();
    if (!scheduleStatus.isOK()) {
        return scheduleStatus;
    }

    fetcher.wait();

    updateReplSetMonitor(host.getValue(), status);

    if (!status.isOK()) {
        if (status.compareCode(ErrorCodes::ExceededTimeLimit)) {
            LOG(0) << "Operation timed out with status " << status;
        }
        return status;
    }

    return response;
}
Пример #4
0
Status waitForReadConcern(OperationContext* txn, const repl::ReadConcernArgs& readConcernArgs) {
    repl::ReplicationCoordinator* const replCoord = repl::ReplicationCoordinator::get(txn);

    if (readConcernArgs.getLevel() == repl::ReadConcernLevel::kLinearizableReadConcern) {
        if (replCoord->getReplicationMode() != repl::ReplicationCoordinator::modeReplSet) {
            // For master/slave and standalone nodes, Linearizable Read is not supported.
            return {ErrorCodes::NotAReplicaSet,
                    "node needs to be a replica set member to use read concern"};
        }

        // Replica sets running pv0 do not support linearizable read concern until further testing
        // is completed (SERVER-27025).
        if (!replCoord->isV1ElectionProtocol()) {
            return {
                ErrorCodes::IncompatibleElectionProtocol,
                "Replica sets running protocol version 0 do not support readConcern: linearizable"};
        }

        if (!readConcernArgs.getOpTime().isNull()) {
            return {ErrorCodes::FailedToParse,
                    "afterOpTime not compatible with linearizable read concern"};
        }

        if (!replCoord->getMemberState().primary()) {
            return {ErrorCodes::NotMaster,
                    "cannot satisfy linearizable read concern on non-primary node"};
        }
    }

    // Skip waiting for the OpTime when testing snapshot behavior
    if (!testingSnapshotBehaviorInIsolation && !readConcernArgs.isEmpty()) {
        Status status = replCoord->waitUntilOpTimeForRead(txn, readConcernArgs);
        if (!status.isOK()) {
            return status;
        }
    }

    if ((replCoord->getReplicationMode() == repl::ReplicationCoordinator::Mode::modeReplSet ||
         testingSnapshotBehaviorInIsolation) &&
        readConcernArgs.getLevel() == repl::ReadConcernLevel::kMajorityReadConcern) {
        // ReadConcern Majority is not supported in ProtocolVersion 0.
        if (!testingSnapshotBehaviorInIsolation && !replCoord->isV1ElectionProtocol()) {
            return {ErrorCodes::ReadConcernMajorityNotEnabled,
                    str::stream() << "Replica sets running protocol version 0 do not support "
                                     "readConcern: majority"};
        }

        const int debugLevel = serverGlobalParams.clusterRole == ClusterRole::ConfigServer ? 1 : 2;

        LOG(debugLevel) << "Waiting for 'committed' snapshot to be available for reading: "
                        << readConcernArgs;

        Status status = txn->recoveryUnit()->setReadFromMajorityCommittedSnapshot();

        // Wait until a snapshot is available.
        while (status == ErrorCodes::ReadConcernMajorityNotAvailableYet) {
            LOG(debugLevel) << "Snapshot not available yet.";
            replCoord->waitUntilSnapshotCommitted(txn, SnapshotName::min());
            status = txn->recoveryUnit()->setReadFromMajorityCommittedSnapshot();
        }

        if (!status.isOK()) {
            return status;
        }

        LOG(debugLevel) << "Using 'committed' snapshot: " << CurOp::get(txn)->query();
    }

    return Status::OK();
}