Esempio n. 1
0
    Status LegacyReplicationCoordinator::canServeReadsFor(OperationContext* txn,
                                                          const NamespaceString& ns,
                                                          bool slaveOk) {
        if (txn->getClient()->isGod()) {
            return Status::OK();
        }
        if (canAcceptWritesForDatabase(ns.db())) {
            return Status::OK();
        }
        if (getReplicationMode() == modeMasterSlave && _settings.slave == SimpleSlave) {
            return Status::OK();
        }
        if (slaveOk) {
            if (getReplicationMode() == modeMasterSlave || getReplicationMode() == modeNone) {
                return Status::OK();
            }
            if (getCurrentMemberState().secondary()) {
                return Status::OK();
            }
            return Status(ErrorCodes::NotMasterOrSecondaryCode,
                         "not master or secondary; cannot currently read from this replSet member");
        }
        return Status(ErrorCodes::NotMasterNoSlaveOkCode,
                      "not master and slaveOk=false");

    }
Esempio n. 2
0
void MongoDSessionCatalog::invalidateSessions(OperationContext* opCtx,
                                              boost::optional<BSONObj> singleSessionDoc) {
    const auto replCoord = repl::ReplicationCoordinator::get(opCtx);
    bool isReplSet = replCoord->getReplicationMode() == repl::ReplicationCoordinator::modeReplSet;
    if (isReplSet) {
        uassert(40528,
                str::stream() << "Direct writes against "
                              << NamespaceString::kSessionTransactionsTableNamespace.ns()
                              << " cannot be performed using a transaction or on a session.",
                !opCtx->getLogicalSessionId());
    }

    const auto catalog = SessionCatalog::get(opCtx);

    // The use of shared_ptr here is in order to work around the limitation of stdx::function that
    // the functor must be copyable.
    auto sessionKillTokens = std::make_shared<std::vector<SessionCatalog::KillToken>>();

    if (singleSessionDoc) {
        sessionKillTokens->emplace_back(catalog->killSession(LogicalSessionId::parse(
            IDLParserErrorContext("lsid"), singleSessionDoc->getField("_id").Obj())));
    } else {
        SessionKiller::Matcher matcher(
            KillAllSessionsByPatternSet{makeKillAllSessionsByPattern(opCtx)});
        catalog->scanSessions(matcher, [&sessionKillTokens](const ObservableSession& session) {
            sessionKillTokens->emplace_back(session.kill());
        });
    }

    killSessionTokensFunction(opCtx, sessionKillTokens);
}
    void appendReplyMetadata(OperationContext* opCtx,
                             const OpMsgRequest& request,
                             BSONObjBuilder* metadataBob) const override {
        const bool isShardingAware = ShardingState::get(opCtx)->enabled();
        const bool isConfig = serverGlobalParams.clusterRole == ClusterRole::ConfigServer;
        auto const replCoord = repl::ReplicationCoordinator::get(opCtx);
        const bool isReplSet =
            replCoord->getReplicationMode() == repl::ReplicationCoordinator::modeReplSet;

        if (isReplSet) {
            // Attach our own last opTime.
            repl::OpTime lastOpTimeFromClient =
                repl::ReplClientInfo::forClient(opCtx->getClient()).getLastOp();
            replCoord->prepareReplMetadata(request.body, lastOpTimeFromClient, metadataBob);
            // For commands from mongos, append some info to help getLastError(w) work.
            // TODO: refactor out of here as part of SERVER-18236
            if (isShardingAware || isConfig) {
                rpc::ShardingMetadata(lastOpTimeFromClient, replCoord->getElectionId())
                    .writeToMetadata(metadataBob)
                    .transitional_ignore();
            }

            if (isShardingAware || isConfig) {
                auto lastCommittedOpTime = replCoord->getLastCommittedOpTime();
                metadataBob->append(kLastCommittedOpTimeFieldName,
                                    lastCommittedOpTime.getTimestamp());
            }
        }

        // If we're a shard other than the config shard, attach the last configOpTime we know about.
        if (isShardingAware && !isConfig) {
            auto opTime = Grid::get(opCtx)->configOpTime();
            rpc::ConfigServerMetadata(opTime).writeToMetadata(metadataBob);
        }
    }
    void LegacyReplicationCoordinator::prepareReplSetUpdatePositionCommandHandshakes(
            OperationContext* txn,
            std::vector<BSONObj>* handshakes) {
        invariant(getReplicationMode() == modeReplSet);
        boost::lock_guard<boost::mutex> lock(_mutex);
        // handshake obj for us
        BSONObjBuilder cmd;
        cmd.append("replSetUpdatePosition", 1);
        BSONObjBuilder sub (cmd.subobjStart("handshake"));
        sub.append("handshake", getMyRID(txn));
        sub.append("member", theReplSet->selfId());
        sub.append("config", theReplSet->myConfig().asBson());
        sub.doneFast();
        handshakes->push_back(cmd.obj());

        // handshake objs for all chained members
        for (OIDMemberMap::const_iterator itr = _ridMemberMap.begin();
             itr != _ridMemberMap.end(); ++itr) {
            BSONObjBuilder cmd;
            cmd.append("replSetUpdatePosition", 1);
            // outer handshake indicates this is a handshake command
            // inner is needed as part of the structure to be passed to gotHandshake
            BSONObjBuilder subCmd (cmd.subobjStart("handshake"));
            subCmd.append("handshake", itr->first);
            subCmd.append("member", itr->second->id());
            subCmd.append("config", itr->second->config().asBson());
            subCmd.doneFast();
            handshakes->push_back(cmd.obj());
        }
    }
 void LegacyReplicationCoordinator::prepareReplSetUpdatePositionCommand(
         OperationContext* txn,
         BSONObjBuilder* cmdBuilder) {
     invariant(getReplicationMode() == modeReplSet);
     boost::lock_guard<boost::mutex> lock(_mutex);
     cmdBuilder->append("replSetUpdatePosition", 1);
     // create an array containing objects each member connected to us and for ourself
     BSONArrayBuilder arrayBuilder(cmdBuilder->subarrayStart("optimes"));
     OID myID = getMyRID(txn);
     {
         for (SlaveOpTimeMap::const_iterator itr = _slaveOpTimeMap.begin();
                 itr != _slaveOpTimeMap.end(); ++itr) {
             const OID& rid = itr->first;
             const BSONObj& config = mapFindWithDefault(_ridConfigMap, rid, BSONObj());
             BSONObjBuilder entry(arrayBuilder.subobjStart());
             entry.append("_id", rid);
             entry.append("optime", itr->second);
             // SERVER-14550 Even though the "config" field isn't used on the other end in 2.8,
             // we need to keep sending it for 2.6 compatibility.
             // TODO(spencer): Remove this after 2.8 is released.
             if (rid == myID) {
                 entry.append("config", theReplSet->myConfig().asBson());
             }
             else {
                 entry.append("config", config);
             }
         }
     }
 }
    bool ReplicationCoordinatorImpl::shouldIgnoreUniqueIndex(const IndexDescriptor* idx) {
        if (!idx->unique()) {
            return false;
        }
        // Never ignore _id index
        if (idx->isIdIndex()) {
            return false;
        }
        if (getReplicationMode() != modeReplSet) {
            return false;
        }
        // see SERVER-6671
        MemberState ms = getCurrentMemberState();
        if (! ((ms == MemberState::RS_STARTUP2) ||
               (ms == MemberState::RS_RECOVERING) ||
               (ms == MemberState::RS_ROLLBACK))) {
            return false;
        }
        // TODO(spencer): SERVER-14233 Remove support for old oplog versions, or move oplogVersion
        // into the repl coordinator
        /* // 2 is the oldest oplog version where operations
        // are fully idempotent.
        if (theReplSet->oplogVersion < 2) {
            return false;
        }*/

        return true;
    }
    Status LegacyReplicationCoordinator::_stepDownHelper(OperationContext* txn,
                                                         bool force,
                                                         const Milliseconds& initialWaitTime,
                                                         const Milliseconds& stepdownTime,
                                                         const Milliseconds& postStepdownWaitTime) {
        invariant(getReplicationMode() == modeReplSet);
        if (!getCurrentMemberState().primary()) {
            return Status(ErrorCodes::NotMaster, "not primary so can't step down");
        }

        if (!force) {
            Status status = _waitForSecondary(initialWaitTime, Milliseconds(10 * 1000));
            if (!status.isOK()) {
                return status;
            }
        }

        // step down
        bool worked = repl::theReplSet->stepDown(txn, stepdownTime.total_seconds());
        if (!worked) {
            return Status(ErrorCodes::NotMaster, "not primary so can't step down");
        }

        if (postStepdownWaitTime.total_milliseconds() > 0) {
            log() << "waiting for secondaries to catch up" << endl;

            // The only caller of this with a non-zero postStepdownWaitTime is
            // stepDownAndWaitForSecondary, and the only caller of that is the shutdown command
            // which doesn't actually care if secondaries failed to catch up here, so we ignore the
            // return status of _waitForSecondary
            _waitForSecondary(postStepdownWaitTime, Milliseconds(0));
        }
        return Status::OK();
    }
void ShardingInitializationMongoD::initializeShardingEnvironmentOnShardServer(
    OperationContext* opCtx, const ShardIdentity& shardIdentity, StringData distLockProcessId) {
    initializeGlobalShardingStateForMongoD(
        opCtx, shardIdentity.getConfigsvrConnectionString(), distLockProcessId);

    _replicaSetChangeListener =
        ReplicaSetMonitor::getNotifier().makeListener<ShardingReplicaSetChangeListener>(
            opCtx->getServiceContext());

    // Determine primary/secondary/standalone state in order to properly initialize sharding
    // components.
    const auto replCoord = repl::ReplicationCoordinator::get(opCtx);
    bool isReplSet = replCoord->getReplicationMode() == repl::ReplicationCoordinator::modeReplSet;
    bool isStandaloneOrPrimary =
        !isReplSet || (replCoord->getMemberState() == repl::MemberState::RS_PRIMARY);

    CatalogCacheLoader::get(opCtx).initializeReplicaSetRole(isStandaloneOrPrimary);
    ChunkSplitter::get(opCtx).onShardingInitialization(isStandaloneOrPrimary);
    PeriodicBalancerConfigRefresher::get(opCtx).onShardingInitialization(opCtx->getServiceContext(),
                                                                         isStandaloneOrPrimary);

    // Start the transaction coordinator service only if the node is the primary of a replica set
    TransactionCoordinatorService::get(opCtx)->onShardingInitialization(
        opCtx, isReplSet && isStandaloneOrPrimary);

    Grid::get(opCtx)->setShardingInitialized();

    LOG(0) << "Finished initializing sharding components for "
           << (isStandaloneOrPrimary ? "primary" : "secondary") << " node.";
}
bool ReplicationCoordinator::isOplogDisabledFor(OperationContext* opCtx,
                                                const NamespaceString& nss) {
    if (getReplicationMode() == ReplicationCoordinator::modeNone) {
        return true;
    }

    if (!opCtx->writesAreReplicated()) {
        return true;
    }

    if (nss.db() == "local") {
        return true;
    }

    if (nss.isSystemDotProfile()) {
        return true;
    }

    if (nss.isDropPendingNamespace()) {
        return true;
    }

    fassert(28626, opCtx->recoveryUnit());

    return false;
}
Esempio n. 10
0
const ConnectionString ShardLocal::getConnString() const {
    auto replCoord = repl::getGlobalReplicationCoordinator();

    // Currently ShardLocal only works for config servers, which must be replica sets.  If we
    // ever start using ShardLocal on shards we'll need to consider how to handle shards that are
    // not replica sets.
    invariant(replCoord->getReplicationMode() == repl::ReplicationCoordinator::modeReplSet);
    return replCoord->getConfig().getConnectionString();
}
Esempio n. 11
0
 OID LegacyReplicationCoordinator::getMyRID(OperationContext* txn) {
     Mode mode = getReplicationMode();
     if (mode == modeReplSet) {
         return theReplSet->syncSourceFeedback.getMyRID();
     } else if (mode == modeMasterSlave) {
         ReplSource source(txn);
         return source.getMyRID();
     }
     invariant(false); // Don't have an RID if no replication is enabled
 }
Esempio n. 12
0
    ReplicationCoordinator::StatusAndDuration ReplicationCoordinatorImpl::awaitReplication(
            const OperationContext* txn,
            const OpTime& opId,
            const WriteConcernOptions& writeConcern) {
        // TODO(spencer): handle killop


        if (writeConcern.wNumNodes <= 1 && writeConcern.wMode.empty()) {
            // no desired replication check
            return StatusAndDuration(Status::OK(), Milliseconds(0));
        }

        const Mode replMode = getReplicationMode();
        if (replMode == modeNone || serverGlobalParams.configsvr) {
            // no replication check needed (validated above)
            return StatusAndDuration(Status::OK(), Milliseconds(0));
        }

        if (writeConcern.wMode == "majority" && replMode == modeMasterSlave) {
            // with master/slave, majority is equivalent to w=1
            return StatusAndDuration(Status::OK(), Milliseconds(0));
        }

        Timer timer;
        boost::condition_variable condVar;
        boost::unique_lock<boost::mutex> lk(_mutex);
        // Must hold _mutex before constructing waitInfo as it will modify _replicationWaiterList
        WaiterInfo waitInfo(&_replicationWaiterList, &opId, &writeConcern, &condVar);

        while (!_opReplicatedEnough_inlock(opId, writeConcern)) {
            const int elapsed = timer.millis();
            if (writeConcern.wTimeout != WriteConcernOptions::kNoTimeout &&
                    elapsed > writeConcern.wTimeout) {
                return StatusAndDuration(Status(ErrorCodes::ExceededTimeLimit,
                                                "waiting for replication timed out"),
                                         Milliseconds(elapsed));
            }

            if (_inShutdown) {
                return StatusAndDuration(Status(ErrorCodes::ShutdownInProgress,
                                                "Replication is being shut down"),
                                         Milliseconds(elapsed));
            }

            try {
                if (writeConcern.wTimeout == WriteConcernOptions::kNoTimeout) {
                    condVar.wait(lk);
                } else {
                    condVar.timed_wait(lk, Milliseconds(writeConcern.wTimeout - elapsed));
                }
            } catch (const boost::thread_interrupted&) {}
        }

        return StatusAndDuration(Status::OK(), Milliseconds(timer.millis()));
    }
Esempio n. 13
0
    Status LegacyReplicationCoordinator::setLastOptime(const OID& rid,
                                                       const OpTime& ts,
                                                       const BSONObj& config) {
        std::string oplogNs = getReplicationMode() == modeReplSet?
                "local.oplog.rs" : "local.oplog.$main";
        if (!updateSlaveTracking(BSON("_id" << rid), config, oplogNs, ts)) {
            return Status(ErrorCodes::NodeNotFound,
                          str::stream() << "could not update node with _id: " 
                                        << config["_id"].Int()
                                        << " beacuse it cannot be found in current ReplSetConfig");
        }

        if (getReplicationMode() == modeReplSet && !getCurrentMemberState().primary()) {
            // pass along if we are not primary
            LOG(2) << "received notification that " << config << " has reached optime: "
                   << ts.toStringPretty();
            theReplSet->syncSourceFeedback.updateMap(rid, ts);
        }
        return Status::OK();
    }
Esempio n. 14
0
    void ReplicationCoordinatorImpl::setCurrentReplicaSetConfig(const ReplicaSetConfig& newConfig,
                                                                int myIndex) {
        invariant(getReplicationMode() == modeReplSet);
        boost::lock_guard<boost::mutex> lk(_mutex);
        _rsConfig = newConfig;
        _thisMembersConfigIndex = myIndex;

        cancelHeartbeats();
        _startHeartbeats();

// TODO(SERVER-14591): instead of this, use WriteConcernOptions and store in replcoord; 
// in getLastError command, fetch the defaults via a getter in replcoord.
// replcoord is responsible for replacing its gledefault with a new config's.
/*        
        if (getLastErrorDefault || !c.getLastErrorDefaults.isEmpty()) {
            // see comment in dbcommands.cpp for getlasterrordefault
            getLastErrorDefault = new BSONObj(c.getLastErrorDefaults);
        }
*/

    }
Esempio n. 15
0
    Status LegacyReplicationCoordinator::setLastOptime(OperationContext* txn,
                                                       const OID& rid,
                                                       const OpTime& ts) {
        {
            boost::lock_guard<boost::mutex> lock(_mutex);
            if (ts <= mapFindWithDefault(_slaveOpTimeMap, rid, OpTime())) {
                // Only update if ts is newer than what we have already
                return Status::OK();
            }
            BSONObj config = mapFindWithDefault(_ridConfigMap, rid, BSONObj());
            LOG(2) << "received notification that node with RID " << rid << " and config " << config
                    << " has reached optime: " << ts.toStringPretty();

            if (rid != getMyRID(txn)) {
                // TODO(spencer): Remove this invariant for backwards compatibility
                invariant(!config.isEmpty());
                // This is what updates the progress information used for satisfying write concern
                // and wakes up threads waiting for replication.
                if (!updateSlaveTracking(BSON("_id" << rid), config, ts)) {
                    return Status(ErrorCodes::NodeNotFound,
                                  str::stream() << "could not update node with _id: "
                                          << config["_id"].Int()
                                          << " because it cannot be found in current ReplSetConfig");
                }
            }

            // This updates the _slaveOpTimeMap which is used for forwarding slave progress
            // upstream in chained replication.
            LOG(2) << "Updating our knowledge of the replication progress for node with RID " <<
                    rid << " to be at optime " << ts;
            _slaveOpTimeMap[rid] = ts;
        }

        if (getReplicationMode() == modeReplSet && !getCurrentMemberState().primary()) {
            // pass along if we are not primary
            theReplSet->syncSourceFeedback.forwardSlaveProgress();
        }
        return Status::OK();
    }
Esempio n. 16
0
 void ReplicationCoordinatorImpl::setCurrentMemberState(const MemberState& newState) {
     invariant(getReplicationMode() == modeReplSet);
     boost::lock_guard<boost::mutex> lk(_mutex);
     _currentState = newState;
 }
Esempio n. 17
0
 void ReplicationCoordinatorImpl::setCurrentReplicaSetConfig(
         const TopologyCoordinator::ReplicaSetConfig& newConfig) {
     invariant(getReplicationMode() == modeReplSet);
     boost::lock_guard<boost::mutex> lk(_mutex);
     _rsConfig = newConfig;
 }
Esempio n. 18
0
 MemberState ReplicationCoordinatorImpl::getCurrentMemberState() const {
     invariant(getReplicationMode() == modeReplSet);
     boost::lock_guard<boost::mutex> lk(_mutex);
     return _currentState;
 }