Status CatalogManagerReplicaSet::getChunks(const BSONObj& query,
                                           const BSONObj& sort,
                                           boost::optional<int> limit,
                                           vector<ChunkType>* chunks) {
    chunks->clear();

    auto configShard = grid.shardRegistry()->getShard("config");
    auto readHostStatus = configShard->getTargeter()->findHost(kConfigReadSelector);
    if (!readHostStatus.isOK()) {
        return readHostStatus.getStatus();
    }

    // Convert boost::optional<int> to boost::optional<long long>.
    auto longLimit = limit ? boost::optional<long long>(*limit) : boost::none;
    auto findStatus = grid.shardRegistry()->exhaustiveFind(
        readHostStatus.getValue(), NamespaceString(ChunkType::ConfigNS), query, sort, longLimit);
    if (!findStatus.isOK()) {
        return findStatus.getStatus();
    }

    for (const BSONObj& obj : findStatus.getValue()) {
        auto chunkRes = ChunkType::fromBSON(obj);
        if (!chunkRes.isOK()) {
            chunks->clear();
            return {ErrorCodes::FailedToParse,
                    stream() << "Failed to parse chunk with id ("
                             << obj[ChunkType::name()].toString()
                             << "): " << chunkRes.getStatus().toString()};
        }

        chunks->push_back(chunkRes.getValue());
    }

    return Status::OK();
}
Status CatalogManagerReplicaSet::getDatabasesForShard(const string& shardName,
                                                      vector<string>* dbs) {
    auto configShard = grid.shardRegistry()->getShard("config");
    auto readHost = configShard->getTargeter()->findHost(kConfigReadSelector);
    if (!readHost.isOK()) {
        return readHost.getStatus();
    }

    auto findStatus = grid.shardRegistry()->exhaustiveFind(readHost.getValue(),
                                                           NamespaceString(DatabaseType::ConfigNS),
                                                           BSON(DatabaseType::primary(shardName)),
                                                           BSONObj(),
                                                           boost::none);  // no limit
    if (!findStatus.isOK()) {
        return findStatus.getStatus();
    }

    for (const BSONObj& obj : findStatus.getValue()) {
        string dbName;
        Status status = bsonExtractStringField(obj, DatabaseType::name(), &dbName);
        if (!status.isOK()) {
            dbs->clear();
            return status;
        }

        dbs->push_back(dbName);
    }

    return Status::OK();
}
Status CatalogManagerReplicaSet::getTagsForCollection(const std::string& collectionNs,
                                                      std::vector<TagsType>* tags) {
    tags->clear();

    auto configShard = grid.shardRegistry()->getShard("config");
    auto readHostStatus = configShard->getTargeter()->findHost(kConfigReadSelector);
    if (!readHostStatus.isOK()) {
        return readHostStatus.getStatus();
    }

    auto findStatus = grid.shardRegistry()->exhaustiveFind(readHostStatus.getValue(),
                                                           NamespaceString(TagsType::ConfigNS),
                                                           BSON(TagsType::ns(collectionNs)),
                                                           BSON(TagsType::min() << 1),
                                                           boost::none);  // no limit
    if (!findStatus.isOK()) {
        return findStatus.getStatus();
    }
    for (const BSONObj& obj : findStatus.getValue()) {
        auto tagRes = TagsType::fromBSON(obj);
        if (!tagRes.isOK()) {
            tags->clear();
            return Status(ErrorCodes::FailedToParse,
                          str::stream()
                              << "Failed to parse tag: " << tagRes.getStatus().toString());
        }

        tags->push_back(tagRes.getValue());
    }

    return Status::OK();
}
StatusWith<OpTimePair<CollectionType>> CatalogManagerReplicaSet::getCollection(
    OperationContext* txn, const std::string& collNs) {
    auto configShard = grid.shardRegistry()->getShard(txn, "config");

    auto readHostStatus = configShard->getTargeter()->findHost(kConfigReadSelector);
    if (!readHostStatus.isOK()) {
        return readHostStatus.getStatus();
    }

    auto statusFind = _exhaustiveFindOnConfig(readHostStatus.getValue(),
                                              NamespaceString(CollectionType::ConfigNS),
                                              BSON(CollectionType::fullNs(collNs)),
                                              BSONObj(),
                                              1);
    if (!statusFind.isOK()) {
        return statusFind.getStatus();
    }

    const auto& retOpTimePair = statusFind.getValue();
    const auto& retVal = retOpTimePair.value;
    if (retVal.empty()) {
        return Status(ErrorCodes::NamespaceNotFound,
                      stream() << "collection " << collNs << " not found");
    }

    invariant(retVal.size() == 1);

    auto parseStatus = CollectionType::fromBSON(retVal.front());
    if (!parseStatus.isOK()) {
        return parseStatus.getStatus();
    }

    return OpTimePair<CollectionType>(parseStatus.getValue(), retOpTimePair.opTime);
}
StatusWith<string> CatalogManagerReplicaSet::getTagForChunk(const std::string& collectionNs,
                                                            const ChunkType& chunk) {
    auto configShard = grid.shardRegistry()->getShard("config");
    auto readHostStatus = configShard->getTargeter()->findHost(kConfigReadSelector);
    if (!readHostStatus.isOK()) {
        return readHostStatus.getStatus();
    }

    BSONObj query =
        BSON(TagsType::ns(collectionNs) << TagsType::min() << BSON("$lte" << chunk.getMin())
                                        << TagsType::max() << BSON("$gte" << chunk.getMax()));
    auto findStatus = grid.shardRegistry()->exhaustiveFind(
        readHostStatus.getValue(), NamespaceString(TagsType::ConfigNS), query, BSONObj(), 1);
    if (!findStatus.isOK()) {
        return findStatus.getStatus();
    }

    const auto& docs = findStatus.getValue();
    if (docs.empty()) {
        return string{};
    }

    invariant(docs.size() == 1);
    BSONObj tagsDoc = docs.front();

    const auto tagsResult = TagsType::fromBSON(tagsDoc);
    if (!tagsResult.isOK()) {
        return {ErrorCodes::FailedToParse,
                stream() << "error while parsing " << TagsType::ConfigNS << " document: " << tagsDoc
                         << " : " << tagsResult.getStatus().toString()};
    }
    return tagsResult.getValue().getTag();
}
StatusWith<CollectionType> CatalogManagerReplicaSet::getCollection(const std::string& collNs) {
    auto configShard = grid.shardRegistry()->getShard("config");

    auto readHostStatus = configShard->getTargeter()->findHost(kConfigReadSelector);
    if (!readHostStatus.isOK()) {
        return readHostStatus.getStatus();
    }

    auto statusFind =
        grid.shardRegistry()->exhaustiveFind(readHostStatus.getValue(),
                                             NamespaceString(CollectionType::ConfigNS),
                                             BSON(CollectionType::fullNs(collNs)),
                                             BSONObj(),
                                             1);
    if (!statusFind.isOK()) {
        return statusFind.getStatus();
    }

    const auto& retVal = statusFind.getValue();
    if (retVal.empty()) {
        return Status(ErrorCodes::NamespaceNotFound,
                      stream() << "collection " << collNs << " not found");
    }

    invariant(retVal.size() == 1);

    return CollectionType::fromBSON(retVal.front());
}
Status CatalogManagerReplicaSet::getAllShards(vector<ShardType>* shards) {
    const auto configShard = grid.shardRegistry()->getShard("config");
    const auto readHost = configShard->getTargeter()->findHost(kConfigReadSelector);
    if (!readHost.isOK()) {
        return readHost.getStatus();
    }

    auto findStatus = grid.shardRegistry()->exhaustiveFind(readHost.getValue(),
                                                           NamespaceString(ShardType::ConfigNS),
                                                           BSONObj(),     // no query filter
                                                           BSONObj(),     // no sort
                                                           boost::none);  // no limit
    if (!findStatus.isOK()) {
        return findStatus.getStatus();
    }

    for (const BSONObj& doc : findStatus.getValue()) {
        auto shardRes = ShardType::fromBSON(doc);
        if (!shardRes.isOK()) {
            shards->clear();
            return {ErrorCodes::FailedToParse,
                    stream() << "Failed to parse shard with id ("
                             << doc[ShardType::name()].toString()
                             << "): " << shardRes.getStatus().toString()};
        }

        shards->push_back(shardRes.getValue());
    }

    return Status::OK();
}
StatusWith<VersionType> CatalogManagerReplicaSet::_getConfigVersion(OperationContext* txn) {
    const auto configShard = grid.shardRegistry()->getShard(txn, "config");
    const auto readHostStatus = configShard->getTargeter()->findHost(kConfigReadSelector);
    if (!readHostStatus.isOK()) {
        return readHostStatus.getStatus();
    }

    auto readHost = readHostStatus.getValue();
    auto findStatus = _exhaustiveFindOnConfig(readHost,
                                              NamespaceString(VersionType::ConfigNS),
                                              BSONObj(),
                                              BSONObj(),
                                              boost::none /* no limit */);
    if (!findStatus.isOK()) {
        return findStatus.getStatus();
    }

    auto queryResults = findStatus.getValue().value;

    if (queryResults.size() > 1) {
        return {ErrorCodes::RemoteValidationError,
                str::stream() << "should only have 1 document in " << VersionType::ConfigNS};
    }

    if (queryResults.empty()) {
        auto countStatus =
            _runCountCommandOnConfig(readHost, NamespaceString(ShardType::ConfigNS), BSONObj());

        if (!countStatus.isOK()) {
            return countStatus.getStatus();
        }

        const auto& shardCount = countStatus.getValue();
        if (shardCount > 0) {
            // Version document doesn't exist, but config.shards is not empty. Assuming that
            // the current config metadata is pre v2.4.
            VersionType versionInfo;
            versionInfo.setMinCompatibleVersion(UpgradeHistory_UnreportedVersion);
            versionInfo.setCurrentVersion(UpgradeHistory_UnreportedVersion);
            return versionInfo;
        }

        VersionType versionInfo;
        versionInfo.setMinCompatibleVersion(UpgradeHistory_EmptyVersion);
        versionInfo.setCurrentVersion(UpgradeHistory_EmptyVersion);
        return versionInfo;
    }

    BSONObj versionDoc = queryResults.front();
    auto versionTypeResult = VersionType::fromBSON(versionDoc);
    if (!versionTypeResult.isOK()) {
        return Status(ErrorCodes::UnsupportedFormat,
                      str::stream() << "invalid config version document: " << versionDoc
                                    << versionTypeResult.getStatus().toString());
    }

    return versionTypeResult.getValue();
}
Status MigrationChunkClonerSourceLegacy::startClone(OperationContext* txn) {
    invariant(!txn->lockState()->isLocked());
    auto scopedGuard = MakeGuard([&] { cancelClone(txn); });

    // Resolve the donor and recipient shards and their connection string

    {
        auto donorShardStatus = grid.shardRegistry()->getShard(txn, _args.getFromShardId());
        if (!donorShardStatus.isOK()) {
            return donorShardStatus.getStatus();
        }
        _donorCS = donorShardStatus.getValue()->getConnString();
    }

    {
        auto recipientShardStatus = grid.shardRegistry()->getShard(txn, _args.getToShardId());
        if (!recipientShardStatus.isOK()) {
            return recipientShardStatus.getStatus();
        }
        auto recipientShard = recipientShardStatus.getValue();

        auto shardHostStatus = recipientShard->getTargeter()->findHost(
            ReadPreferenceSetting{ReadPreference::PrimaryOnly});
        if (!shardHostStatus.isOK()) {
            return shardHostStatus.getStatus();
        }

        _recipientHost = std::move(shardHostStatus.getValue());
    }

    // Prepare the currently available documents
    Status status = _storeCurrentLocs(txn);
    if (!status.isOK()) {
        return status;
    }

    // Tell the recipient shard to start cloning
    BSONObjBuilder cmdBuilder;
    StartChunkCloneRequest::appendAsCommand(&cmdBuilder,
                                            _args.getNss(),
                                            _sessionId,
                                            _args.getConfigServerCS(),
                                            _donorCS,
                                            _args.getFromShardId(),
                                            _args.getToShardId(),
                                            _args.getMinKey(),
                                            _args.getMaxKey(),
                                            _shardKeyPattern.toBSON(),
                                            _args.getSecondaryThrottle());

    auto responseStatus = _callRecipient(cmdBuilder.obj());
    if (!responseStatus.isOK()) {
        return responseStatus.getStatus();
    }

    scopedGuard.Dismiss();
    return Status::OK();
}
Example #10
0
void ChunkManager::calcInitSplitsAndShards(OperationContext* txn,
                                           const ShardId& primaryShardId,
                                           const vector<BSONObj>* initPoints,
                                           const set<ShardId>* initShardIds,
                                           vector<BSONObj>* splitPoints,
                                           vector<ShardId>* shardIds) const {
    verify(_chunkMap.size() == 0);

    Chunk c(this,
            _keyPattern.getKeyPattern().globalMin(),
            _keyPattern.getKeyPattern().globalMax(),
            primaryShardId);

    if (!initPoints || !initPoints->size()) {
        // discover split points
        const auto primaryShard = grid.shardRegistry()->getShard(txn, primaryShardId);
        auto targetStatus =
            primaryShard->getTargeter()->findHost({ReadPreference::PrimaryPreferred, TagSet{}});
        uassertStatusOK(targetStatus);

        NamespaceString nss(getns());
        auto result = grid.shardRegistry()->runCommand(
            txn, targetStatus.getValue(), nss.db().toString(), BSON("count" << nss.coll()));

        long long numObjects = 0;
        uassertStatusOK(result.getStatus());
        uassertStatusOK(Command::getStatusFromCommandResult(result.getValue()));
        uassertStatusOK(bsonExtractIntegerField(result.getValue(), "n", &numObjects));

        if (numObjects > 0)
            c.pickSplitVector(txn, *splitPoints, Chunk::MaxChunkSize);

        // since docs already exists, must use primary shard
        shardIds->push_back(primaryShardId);
    } else {
        // make sure points are unique and ordered
        set<BSONObj> orderedPts;
        for (unsigned i = 0; i < initPoints->size(); ++i) {
            BSONObj pt = (*initPoints)[i];
            orderedPts.insert(pt);
        }
        for (set<BSONObj>::iterator it = orderedPts.begin(); it != orderedPts.end(); ++it) {
            splitPoints->push_back(*it);
        }

        if (!initShardIds || !initShardIds->size()) {
            // If not specified, only use the primary shard (note that it's not safe for mongos
            // to put initial chunks on other shards without the primary mongod knowing).
            shardIds->push_back(primaryShardId);
        } else {
            std::copy(initShardIds->begin(), initShardIds->end(), std::back_inserter(*shardIds));
        }
    }
}
Example #11
0
Status AsyncRequestsSender::RemoteData::resolveShardIdToHostAndPort(
    AsyncRequestsSender* ars, const ReadPreferenceSetting& readPref) {
    const auto shard = getShard();
    if (!shard) {
        return Status(ErrorCodes::ShardNotFound,
                      str::stream() << "Could not find shard " << shardId);
    }

    auto findHostStatus = shard->getTargeter()->findHost(ars->_opCtx, readPref);
    if (findHostStatus.isOK())
        shardHostAndPort = std::move(findHostStatus.getValue());

    return findHostStatus.getStatus();
}
Status CatalogManagerReplicaSet::getCollections(OperationContext* txn,
                                                const std::string* dbName,
                                                std::vector<CollectionType>* collections,
                                                OpTime* opTime) {
    BSONObjBuilder b;
    if (dbName) {
        invariant(!dbName->empty());
        b.appendRegex(CollectionType::fullNs(),
                      string(str::stream() << "^" << pcrecpp::RE::QuoteMeta(*dbName) << "\\."));
    }

    auto configShard = grid.shardRegistry()->getShard(txn, "config");
    auto readHost = configShard->getTargeter()->findHost(kConfigReadSelector);
    if (!readHost.isOK()) {
        return readHost.getStatus();
    }

    auto findStatus = _exhaustiveFindOnConfig(readHost.getValue(),
                                              NamespaceString(CollectionType::ConfigNS),
                                              b.obj(),
                                              BSONObj(),
                                              boost::none);  // no limit
    if (!findStatus.isOK()) {
        return findStatus.getStatus();
    }

    const auto& docsOpTimePair = findStatus.getValue();

    for (const BSONObj& obj : docsOpTimePair.value) {
        const auto collectionResult = CollectionType::fromBSON(obj);
        if (!collectionResult.isOK()) {
            collections->clear();
            return {ErrorCodes::FailedToParse,
                    str::stream() << "error while parsing " << CollectionType::ConfigNS
                                  << " document: " << obj << " : "
                                  << collectionResult.getStatus().toString()};
        }

        collections->push_back(collectionResult.getValue());
    }

    if (opTime) {
        *opTime = docsOpTimePair.opTime;
    }

    return Status::OK();
}
Status CatalogManagerReplicaSet::_checkDbDoesNotExist(OperationContext* txn,
                                                      const string& dbName,
                                                      DatabaseType* db) {
    BSONObjBuilder queryBuilder;
    queryBuilder.appendRegex(
        DatabaseType::name(), (string) "^" + pcrecpp::RE::QuoteMeta(dbName) + "$", "i");

    const auto configShard = grid.shardRegistry()->getShard(txn, "config");
    const auto readHost = configShard->getTargeter()->findHost(kConfigReadSelector);
    if (!readHost.isOK()) {
        return readHost.getStatus();
    }

    auto findStatus = _exhaustiveFindOnConfig(readHost.getValue(),
                                              NamespaceString(DatabaseType::ConfigNS),
                                              queryBuilder.obj(),
                                              BSONObj(),
                                              1);
    if (!findStatus.isOK()) {
        return findStatus.getStatus();
    }

    const auto& docs = findStatus.getValue().value;
    if (docs.empty()) {
        return Status::OK();
    }

    BSONObj dbObj = docs.front();
    std::string actualDbName = dbObj[DatabaseType::name()].String();
    if (actualDbName == dbName) {
        if (db) {
            auto parseDBStatus = DatabaseType::fromBSON(dbObj);
            if (!parseDBStatus.isOK()) {
                return parseDBStatus.getStatus();
            }

            *db = parseDBStatus.getValue();
        }

        return Status(ErrorCodes::NamespaceExists,
                      str::stream() << "database " << dbName << " already exists");
    }

    return Status(ErrorCodes::DatabaseDifferCase,
                  str::stream() << "can't have 2 databases that just differ on case "
                                << " have: " << actualDbName << " want to add: " << dbName);
}
StatusWith<OpTimePair<DatabaseType>> CatalogManagerReplicaSet::getDatabase(
    OperationContext* txn, const std::string& dbName) {
    invariant(nsIsDbOnly(dbName));

    // The two databases that are hosted on the config server are config and admin
    if (dbName == "config" || dbName == "admin") {
        DatabaseType dbt;
        dbt.setName(dbName);
        dbt.setSharded(false);
        dbt.setPrimary("config");

        return OpTimePair<DatabaseType>(dbt);
    }

    const auto configShard = grid.shardRegistry()->getShard(txn, "config");
    const auto readHost = configShard->getTargeter()->findHost(kConfigReadSelector);
    if (!readHost.isOK()) {
        return readHost.getStatus();
    }

    auto findStatus = _exhaustiveFindOnConfig(readHost.getValue(),
                                              NamespaceString(DatabaseType::ConfigNS),
                                              BSON(DatabaseType::name(dbName)),
                                              BSONObj(),
                                              1);
    if (!findStatus.isOK()) {
        return findStatus.getStatus();
    }

    const auto& docsWithOpTime = findStatus.getValue();
    if (docsWithOpTime.value.empty()) {
        return {ErrorCodes::DatabaseNotFound, stream() << "database " << dbName << " not found"};
    }

    invariant(docsWithOpTime.value.size() == 1);

    auto parseStatus = DatabaseType::fromBSON(docsWithOpTime.value.front());
    if (!parseStatus.isOK()) {
        return parseStatus.getStatus();
    }

    return OpTimePair<DatabaseType>(parseStatus.getValue(), docsWithOpTime.opTime);
}
StatusWith<std::string> CatalogManagerReplicaSet::_generateNewShardName() const {
    const auto configShard = grid.shardRegistry()->getShard("config");
    const auto readHost = configShard->getTargeter()->findHost(kConfigReadSelector);
    if (!readHost.isOK()) {
        return readHost.getStatus();
    }

    BSONObjBuilder shardNameRegex;
    shardNameRegex.appendRegex(ShardType::name(), "^shard");

    auto findStatus = grid.shardRegistry()->exhaustiveFind(readHost.getValue(),
                                                           NamespaceString(ShardType::ConfigNS),
                                                           shardNameRegex.obj(),
                                                           BSON(ShardType::name() << -1),
                                                           1);
    if (!findStatus.isOK()) {
        return findStatus.getStatus();
    }

    const auto& docs = findStatus.getValue();

    int count = 0;
    if (!docs.empty()) {
        const auto shardStatus = ShardType::fromBSON(docs.front());
        if (!shardStatus.isOK()) {
            return shardStatus.getStatus();
        }

        std::istringstream is(shardStatus.getValue().getName().substr(5));
        is >> count;
        count++;
    }

    // TODO fix so that we can have more than 10000 automatically generated shard names
    if (count < 9999) {
        std::stringstream ss;
        ss << "shard" << std::setfill('0') << std::setw(4) << count;
        return ss.str();
    }

    return Status(ErrorCodes::OperationFailed, "unable to generate new shard name");
}
Example #16
0
Status AsyncResultsMerger::RemoteCursorData::resolveShardIdToHostAndPort(
    const ReadPreferenceSetting& readPref) {
    invariant(shardId);
    invariant(!cursorId);

    const auto shard = getShard();
    if (!shard) {
        return Status(ErrorCodes::ShardNotFound,
                      str::stream() << "Could not find shard " << *shardId);
    }

    // TODO: Pass down an OperationContext* to use here.
    auto findHostStatus = shard->getTargeter()->findHostWithMaxWait(readPref, Seconds{20});
    if (!findHostStatus.isOK()) {
        return findHostStatus.getStatus();
    }

    _shardHostAndPort = std::move(findHostStatus.getValue());

    return Status::OK();
}
Example #17
0
Status AsyncResultsMerger::RemoteCursorData::resolveShardIdToHostAndPort(
    OperationContext* txn, const ReadPreferenceSetting& readPref) {
    invariant(shardId);
    invariant(!cursorId);

    const auto shard = grid.shardRegistry()->getShard(txn, *shardId);
    if (!shard) {
        return Status(ErrorCodes::ShardNotFound,
                      str::stream() << "Could not find shard " << *shardId);
    }

    auto findHostStatus = shard->getTargeter()->findHost(
        readPref, RemoteCommandTargeter::selectFindHostMaxWaitTime(txn));
    if (!findHostStatus.isOK()) {
        return findHostStatus.getStatus();
    }

    _shardHostAndPort = std::move(findHostStatus.getValue());

    return Status::OK();
}
StatusWith<SettingsType> CatalogManagerReplicaSet::getGlobalSettings(OperationContext* txn,
                                                                     const string& key) {
    const auto configShard = grid.shardRegistry()->getShard(txn, "config");
    const auto readHost = configShard->getTargeter()->findHost(kConfigReadSelector);
    if (!readHost.isOK()) {
        return readHost.getStatus();
    }

    auto findStatus = _exhaustiveFindOnConfig(readHost.getValue(),
                                              NamespaceString(SettingsType::ConfigNS),
                                              BSON(SettingsType::key(key)),
                                              BSONObj(),
                                              1);
    if (!findStatus.isOK()) {
        return findStatus.getStatus();
    }

    const auto& docs = findStatus.getValue().value;
    if (docs.empty()) {
        return {ErrorCodes::NoMatchingDocument,
                str::stream() << "can't find settings document with key: " << key};
    }

    BSONObj settingsDoc = docs.front();
    StatusWith<SettingsType> settingsResult = SettingsType::fromBSON(settingsDoc);
    if (!settingsResult.isOK()) {
        return {ErrorCodes::FailedToParse,
                str::stream() << "error while parsing settings document: " << settingsDoc << " : "
                              << settingsResult.getStatus().toString()};
    }

    const SettingsType& settings = settingsResult.getValue();

    Status validationStatus = settings.validate();
    if (!validationStatus.isOK()) {
        return validationStatus;
    }

    return settingsResult;
}
Example #19
0
void MigrationManager::_executeMigrations(OperationContext* txn,
                                          MigrationStatuses* migrationStatuses) {
    for (auto& migration : _activeMigrations) {
        const NamespaceString nss(migration.chunkInfo.migrateInfo.ns);

        auto scopedCMStatus = ScopedChunkManager::getExisting(txn, nss);
        if (!scopedCMStatus.isOK()) {
            // Unable to find the ChunkManager for "nss" for whatever reason; abandon this
            // migration and proceed to the next.
            stdx::lock_guard<stdx::mutex> lk(_mutex);
            migrationStatuses->insert(MigrationStatuses::value_type(
                migration.chunkInfo.migrateInfo.getName(), std::move(scopedCMStatus.getStatus())));
            continue;
        }

        ChunkManager* const chunkManager = scopedCMStatus.getValue().cm();
        auto chunk =
            chunkManager->findIntersectingChunk(txn, migration.chunkInfo.migrateInfo.minKey);

        {
            // No need to lock the mutex. Only this function and _takeDistLockForAMigration
            // manipulate "_distributedLocks". No need to protect serial actions.
            if (!_takeDistLockForAMigration(txn, migration, migrationStatuses)) {
                // If there is a lock conflict between the balancer and the shard, or a shard and a
                // shard, the migration has been rescheduled. Otherwise an attempt to take the lock
                // failed for whatever reason and this migration is being abandoned.
                continue;
            }
        }

        const MigrationRequest& migrationRequest = migration.chunkInfo;

        BSONObjBuilder builder;
        MoveChunkRequest::appendAsCommand(
            &builder,
            nss,
            chunkManager->getVersion(),
            Grid::get(txn)->shardRegistry()->getConfigServerConnectionString(),
            migrationRequest.migrateInfo.from,
            migrationRequest.migrateInfo.to,
            ChunkRange(chunk->getMin(), chunk->getMax()),
            migrationRequest.maxChunkSizeBytes,
            migrationRequest.secondaryThrottle,
            migrationRequest.waitForDelete,
            migration.oldShard ? true : false);  // takeDistLock flag.

        BSONObj moveChunkRequestObj = builder.obj();

        const auto recipientShard =
            grid.shardRegistry()->getShard(txn, migration.chunkInfo.migrateInfo.from);
        const auto host = recipientShard->getTargeter()->findHost(
            ReadPreferenceSetting{ReadPreference::PrimaryOnly},
            RemoteCommandTargeter::selectFindHostMaxWaitTime(txn));
        if (!host.isOK()) {
            // Unable to find a target shard for whatever reason; abandon this migration and proceed
            // to the next.
            stdx::lock_guard<stdx::mutex> lk(_mutex);
            migrationStatuses->insert(MigrationStatuses::value_type(
                migration.chunkInfo.migrateInfo.getName(), std::move(host.getStatus())));
            continue;
        }

        RemoteCommandRequest remoteRequest(host.getValue(), "admin", moveChunkRequestObj);

        StatusWith<RemoteCommandResponse> remoteCommandResponse(
            Status{ErrorCodes::InternalError, "Uninitialized value"});

        executor::TaskExecutor* executor = Grid::get(txn)->getExecutorPool()->getFixedExecutor();

        StatusWith<executor::TaskExecutor::CallbackHandle> callbackHandleWithStatus =
            executor->scheduleRemoteCommand(remoteRequest,
                                            stdx::bind(&MigrationManager::_checkMigrationCallback,
                                                       this,
                                                       stdx::placeholders::_1,
                                                       txn,
                                                       &migration,
                                                       migrationStatuses));

        if (!callbackHandleWithStatus.isOK()) {
            // Scheduling the migration moveChunk failed.
            stdx::lock_guard<stdx::mutex> lk(_mutex);
            migrationStatuses->insert(
                MigrationStatuses::value_type(migration.chunkInfo.migrateInfo.getName(),
                                              std::move(callbackHandleWithStatus.getStatus())));
            continue;
        }

        // The moveChunk command was successfully scheduled. Store the callback handle so that the
        // command's return can be waited for later.
        stdx::lock_guard<stdx::mutex> lk(_mutex);
        migration.setCallbackHandle(std::move(callbackHandleWithStatus.getValue()));
    }

    _waitForMigrations(txn);
    // At this point, there are no parallel running threads so it is safe not to lock the mutex.

    // All the migrations have returned, release all of the distributed locks that are no longer
    // being used.
    _distributedLocks.clear();

    // If there are rescheduled migrations, move them to active and run the function again.
    if (!_rescheduledMigrations.empty()) {
        // Clear all the callback handles of the rescheduled migrations.
        for (auto& migration : _rescheduledMigrations) {
            migration.clearCallbackHandle();
        }

        _activeMigrations = std::move(_rescheduledMigrations);
        _rescheduledMigrations.clear();
        _executeMigrations(txn, migrationStatuses);
    } else {
        _activeMigrations.clear();
    }
}
StatusWith<ShardDrainingStatus> CatalogManagerReplicaSet::removeShard(OperationContext* txn,
                                                                      const std::string& name) {
    const auto configShard = grid.shardRegistry()->getShard("config");
    const auto readHost = configShard->getTargeter()->findHost(kConfigReadSelector);
    if (!readHost.isOK()) {
        return readHost.getStatus();
    }

    // Check preconditions for removing the shard
    auto countStatus =
        _runCountCommand(readHost.getValue(),
                         NamespaceString(ShardType::ConfigNS),
                         BSON(ShardType::name() << NE << name << ShardType::draining(true)));
    if (!countStatus.isOK()) {
        return countStatus.getStatus();
    }
    if (countStatus.getValue() > 0) {
        return Status(ErrorCodes::ConflictingOperationInProgress,
                      "Can't have more than one draining shard at a time");
    }

    countStatus = _runCountCommand(readHost.getValue(),
                                   NamespaceString(ShardType::ConfigNS),
                                   BSON(ShardType::name() << NE << name));
    if (!countStatus.isOK()) {
        return countStatus.getStatus();
    }
    if (countStatus.getValue() == 0) {
        return Status(ErrorCodes::IllegalOperation, "Can't remove last shard");
    }

    // Figure out if shard is already draining
    countStatus = _runCountCommand(readHost.getValue(),
                                   NamespaceString(ShardType::ConfigNS),
                                   BSON(ShardType::name() << name << ShardType::draining(true)));
    if (!countStatus.isOK()) {
        return countStatus.getStatus();
    }
    if (countStatus.getValue() == 0) {
        log() << "going to start draining shard: " << name;

        Status status = update(ShardType::ConfigNS,
                               BSON(ShardType::name() << name),
                               BSON("$set" << BSON(ShardType::draining(true))),
                               false,  // upsert
                               false,  // multi
                               NULL);
        if (!status.isOK()) {
            log() << "error starting removeShard: " << name << "; err: " << status.reason();
            return status;
        }

        grid.shardRegistry()->reload();

        // Record start in changelog
        logChange(
            txn->getClient()->clientAddress(true), "removeShard.start", "", BSON("shard" << name));
        return ShardDrainingStatus::STARTED;
    }

    // Draining has already started, now figure out how many chunks and databases are still on the
    // shard.
    countStatus = _runCountCommand(
        readHost.getValue(), NamespaceString(ChunkType::ConfigNS), BSON(ChunkType::shard(name)));
    if (!countStatus.isOK()) {
        return countStatus.getStatus();
    }
    const long long chunkCount = countStatus.getValue();

    countStatus = _runCountCommand(readHost.getValue(),
                                   NamespaceString(DatabaseType::ConfigNS),
                                   BSON(DatabaseType::primary(name)));
    if (!countStatus.isOK()) {
        return countStatus.getStatus();
    }
    const long long databaseCount = countStatus.getValue();

    if (chunkCount > 0 || databaseCount > 0) {
        // Still more draining to do
        return ShardDrainingStatus::ONGOING;
    }

    // Draining is done, now finish removing the shard.
    log() << "going to remove shard: " << name;
    audit::logRemoveShard(txn->getClient(), name);

    Status status = remove(ShardType::ConfigNS, BSON(ShardType::name() << name), 0, NULL);
    if (!status.isOK()) {
        log() << "Error concluding removeShard operation on: " << name
              << "; err: " << status.reason();
        return status;
    }

    grid.shardRegistry()->remove(name);
    grid.shardRegistry()->reload();

    // Record finish in changelog
    logChange(txn->getClient()->clientAddress(true), "removeShard", "", BSON("shard" << name));

    return ShardDrainingStatus::COMPLETED;
}
Status CatalogManagerReplicaSet::shardCollection(OperationContext* txn,
                                                 const string& ns,
                                                 const ShardKeyPattern& fieldsAndOrder,
                                                 bool unique,
                                                 const vector<BSONObj>& initPoints,
                                                 const set<ShardId>& initShardIds) {
    // Lock the collection globally so that no other mongos can try to shard or drop the collection
    // at the same time.
    auto scopedDistLock = getDistLockManager()->lock(ns, "shardCollection");
    if (!scopedDistLock.isOK()) {
        return scopedDistLock.getStatus();
    }

    StatusWith<DatabaseType> status = getDatabase(nsToDatabase(ns));
    if (!status.isOK()) {
        return status.getStatus();
    }

    DatabaseType dbt = status.getValue();
    ShardId dbPrimaryShardId = dbt.getPrimary();
    const auto primaryShard = grid.shardRegistry()->getShard(dbPrimaryShardId);

    {
        // In 3.0 and prior we include this extra safety check that the collection is not getting
        // sharded concurrently by two different mongos instances. It is not 100%-proof, but it
        // reduces the chance that two invocations of shard collection will step on each other's
        // toes.  Now we take the distributed lock so going forward this check won't be necessary
        // but we leave it around for compatibility with other mongoses from 3.0.
        // TODO(spencer): Remove this after 3.2 ships.
        const auto configShard = grid.shardRegistry()->getShard("config");
        const auto readHost = configShard->getTargeter()->findHost(kConfigReadSelector);
        if (!readHost.isOK()) {
            return readHost.getStatus();
        }

        auto countStatus = _runCountCommand(
            readHost.getValue(), NamespaceString(ChunkType::ConfigNS), BSON(ChunkType::ns(ns)));
        if (!countStatus.isOK()) {
            return countStatus.getStatus();
        }
        if (countStatus.getValue() > 0) {
            return Status(ErrorCodes::AlreadyInitialized,
                          str::stream() << "collection " << ns << " already sharded with "
                                        << countStatus.getValue() << " chunks.");
        }
    }

    // Record start in changelog
    {
        BSONObjBuilder collectionDetail;
        collectionDetail.append("shardKey", fieldsAndOrder.toBSON());
        collectionDetail.append("collection", ns);
        collectionDetail.append("primary", primaryShard->toString());

        {
            BSONArrayBuilder initialShards(collectionDetail.subarrayStart("initShards"));
            for (const ShardId& shardId : initShardIds) {
                initialShards.append(shardId);
            }
        }

        collectionDetail.append("numChunks", static_cast<int>(initPoints.size() + 1));

        logChange(txn->getClient()->clientAddress(true),
                  "shardCollection.start",
                  ns,
                  collectionDetail.obj());
    }

    ChunkManagerPtr manager(new ChunkManager(ns, fieldsAndOrder, unique));
    manager->createFirstChunks(dbPrimaryShardId, &initPoints, &initShardIds);
    manager->loadExistingRanges(nullptr);

    CollectionInfo collInfo;
    collInfo.useChunkManager(manager);
    collInfo.save(ns);
    manager->reload(true);

    // TODO(spencer) SERVER-19319: Send setShardVersion to primary shard so it knows to start
    // rejecting unversioned writes.

    BSONObj finishDetail = BSON("version"
                                << "");  // TODO(spencer) SERVER-19319 Report actual version used

    logChange(txn->getClient()->clientAddress(true), "shardCollection", ns, finishDetail);

    return Status::OK();
}
Status CatalogManagerReplicaSet::shardCollection(OperationContext* txn,
                                                 const string& ns,
                                                 const ShardKeyPattern& fieldsAndOrder,
                                                 bool unique,
                                                 const vector<BSONObj>& initPoints,
                                                 const set<ShardId>& initShardIds) {
    // Lock the collection globally so that no other mongos can try to shard or drop the collection
    // at the same time.
    auto scopedDistLock = getDistLockManager()->lock(ns, "shardCollection");
    if (!scopedDistLock.isOK()) {
        return scopedDistLock.getStatus();
    }

    auto status = getDatabase(txn, nsToDatabase(ns));
    if (!status.isOK()) {
        return status.getStatus();
    }

    ShardId dbPrimaryShardId = status.getValue().value.getPrimary();
    const auto primaryShard = grid.shardRegistry()->getShard(txn, dbPrimaryShardId);

    {
        // In 3.0 and prior we include this extra safety check that the collection is not getting
        // sharded concurrently by two different mongos instances. It is not 100%-proof, but it
        // reduces the chance that two invocations of shard collection will step on each other's
        // toes.  Now we take the distributed lock so going forward this check won't be necessary
        // but we leave it around for compatibility with other mongoses from 3.0.
        // TODO(spencer): Remove this after 3.2 ships.
        const auto configShard = grid.shardRegistry()->getShard(txn, "config");
        const auto readHost = configShard->getTargeter()->findHost(kConfigReadSelector);
        if (!readHost.isOK()) {
            return readHost.getStatus();
        }

        auto countStatus = _runCountCommandOnConfig(
            readHost.getValue(), NamespaceString(ChunkType::ConfigNS), BSON(ChunkType::ns(ns)));
        if (!countStatus.isOK()) {
            return countStatus.getStatus();
        }
        if (countStatus.getValue() > 0) {
            return Status(ErrorCodes::AlreadyInitialized,
                          str::stream() << "collection " << ns << " already sharded with "
                                        << countStatus.getValue() << " chunks.");
        }
    }

    // Record start in changelog
    {
        BSONObjBuilder collectionDetail;
        collectionDetail.append("shardKey", fieldsAndOrder.toBSON());
        collectionDetail.append("collection", ns);
        collectionDetail.append("primary", primaryShard->toString());

        {
            BSONArrayBuilder initialShards(collectionDetail.subarrayStart("initShards"));
            for (const ShardId& shardId : initShardIds) {
                initialShards.append(shardId);
            }
        }

        collectionDetail.append("numChunks", static_cast<int>(initPoints.size() + 1));

        logChange(txn,
                  txn->getClient()->clientAddress(true),
                  "shardCollection.start",
                  ns,
                  collectionDetail.obj());
    }

    shared_ptr<ChunkManager> manager(new ChunkManager(ns, fieldsAndOrder, unique));
    manager->createFirstChunks(txn, dbPrimaryShardId, &initPoints, &initShardIds);
    manager->loadExistingRanges(txn, nullptr);

    CollectionInfo collInfo;
    collInfo.useChunkManager(manager);
    collInfo.save(txn, ns);
    manager->reload(txn, true);

    // Tell the primary mongod to refresh its data
    // TODO:  Think the real fix here is for mongos to just
    //        assume that all collections are sharded, when we get there
    SetShardVersionRequest ssv = SetShardVersionRequest::makeForVersioningNoPersist(
        grid.shardRegistry()->getConfigServerConnectionString(),
        dbPrimaryShardId,
        primaryShard->getConnString(),
        NamespaceString(ns),
        ChunkVersionAndOpTime(manager->getVersion(), manager->getConfigOpTime()),
        true);

    auto ssvStatus = grid.shardRegistry()->runCommandWithNotMasterRetries(
        txn, dbPrimaryShardId, "admin", ssv.toBSON());
    if (!ssvStatus.isOK()) {
        warning() << "could not update initial version of " << ns << " on shard primary "
                  << dbPrimaryShardId << ssvStatus.getStatus();
    }

    logChange(txn,
              txn->getClient()->clientAddress(true),
              "shardCollection",
              ns,
              BSON("version" << manager->getVersion().toString()));

    return Status::OK();
}
Example #23
0
void AsyncResultsMerger::handleBatchResponse(
    const executor::TaskExecutor::RemoteCommandCallbackArgs& cbData, size_t remoteIndex) {
    stdx::lock_guard<stdx::mutex> lk(_mutex);

    auto& remote = _remotes[remoteIndex];

    // Clear the callback handle. This indicates that we are no longer waiting on a response from
    // 'remote'.
    remote.cbHandle = executor::TaskExecutor::CallbackHandle();

    // If we're in the process of shutting down then there's no need to process the batch.
    if (_lifecycleState != kAlive) {
        invariant(_lifecycleState == kKillStarted);

        // Make sure to wake up anyone waiting on '_currentEvent' if we're shutting down.
        signalCurrentEventIfReady_inlock();

        // If we're killed and we're not waiting on any more batches to come back, then we are ready
        // to kill the cursors on the remote hosts and clean up this cursor. Schedule the
        // killCursors command and signal that this cursor is safe now safe to destroy. We have to
        // promise not to touch any members of this class because 'this' could become invalid as
        // soon as we signal the event.
        if (!haveOutstandingBatchRequests_inlock()) {
            // If the event handle is invalid, then the executor is in the middle of shutting down,
            // and we can't schedule any more work for it to complete.
            if (_killCursorsScheduledEvent.isValid()) {
                scheduleKillCursors_inlock();
                _executor->signalEvent(_killCursorsScheduledEvent);
            }

            _lifecycleState = kKillComplete;
        }
        return;
    }

    // Early return from this point on signal anyone waiting on an event, if ready() is true.
    ScopeGuard signaller = MakeGuard(&AsyncResultsMerger::signalCurrentEventIfReady_inlock, this);

    if (!cbData.response.isOK()) {
        remote.status = cbData.response.getStatus();

        // If we failed to retrieve the batch because we couldn't contact the remote, we notify that
        // targeter that the host is unreachable. The caller can then retry on a new host.
        if (remote.status == ErrorCodes::HostUnreachable && remote.shardId) {
            auto shard = _params.shardRegistry->getShard(_params.txn, *remote.shardId);
            if (!shard) {
                remote.status =
                    Status(ErrorCodes::HostUnreachable,
                           str::stream() << "Could not find shard " << *remote.shardId
                                         << " containing host " << remote.hostAndPort.toString());
            } else {
                shard->getTargeter()->markHostUnreachable(remote.hostAndPort);
            }
        }

        return;
    }

    auto getMoreParseStatus = CursorResponse::parseFromBSON(cbData.response.getValue().data);
    if (!getMoreParseStatus.isOK()) {
        remote.status = getMoreParseStatus.getStatus();
        return;
    }

    auto cursorResponse = getMoreParseStatus.getValue();

    // If we have a cursor established, and we get a non-zero cursorid that is not equal to the
    // established cursorid, we will fail the operation.
    if (remote.cursorId && cursorResponse.cursorId != 0 &&
        *remote.cursorId != cursorResponse.cursorId) {
        remote.status = Status(ErrorCodes::BadValue,
                               str::stream() << "Expected cursorid " << *remote.cursorId
                                             << " but received " << cursorResponse.cursorId);
        return;
    }

    remote.cursorId = cursorResponse.cursorId;
    remote.cmdObj = boost::none;

    for (const auto& obj : cursorResponse.batch) {
        // If there's a sort, we're expecting the remote node to give us back a sort key.
        if (!_params.sort.isEmpty() &&
            obj[ClusterClientCursorParams::kSortKeyField].type() != BSONType::Object) {
            remote.status = Status(ErrorCodes::InternalError,
                                   str::stream() << "Missing field '"
                                                 << ClusterClientCursorParams::kSortKeyField
                                                 << "' in document: " << obj);
            return;
        }

        remote.docBuffer.push(obj);
        ++remote.fetchedCount;
    }

    // If we're doing a sorted merge, then we have to make sure to put this remote onto the
    // merge queue.
    if (!_params.sort.isEmpty() && !cursorResponse.batch.empty()) {
        _mergeQueue.push(remoteIndex);
    }

    // If the cursor is tailable and we just received an empty batch, the next return value should
    // be boost::none in order to indicate the end of the batch.
    if (_params.isTailable && !remote.hasNext()) {
        _eofNext = true;
    }

    // If even after receiving this batch we still don't have anything buffered (i.e. the batchSize
    // was zero), then can schedule work to retrieve the next batch right away.
    //
    // We do not ask for the next batch if the cursor is tailable, as batches received from remote
    // tailable cursors should be passed through to the client without asking for more batches.
    if (!_params.isTailable && !remote.hasNext() && !remote.exhausted()) {
        auto nextBatchStatus = askForNextBatch_inlock(remoteIndex);
        if (!nextBatchStatus.isOK()) {
            remote.status = nextBatchStatus;
            return;
        }
    }

    // ScopeGuard requires dismiss on success, but we want waiter to be signalled on success as
    // well as failure.
    signaller.Dismiss();
    signalCurrentEventIfReady_inlock();
}
StatusWith<VersionType> CatalogManagerReplicaSet::_getConfigVersion() {
    const auto configShard = grid.shardRegistry()->getShard("config");
    const auto readHostStatus = configShard->getTargeter()->findHost(kConfigReadSelector);
    if (!readHostStatus.isOK()) {
        return readHostStatus.getStatus();
    }

    auto readHost = readHostStatus.getValue();
    auto findStatus = grid.shardRegistry()->exhaustiveFind(readHost,
                                                           NamespaceString(VersionType::ConfigNS),
                                                           BSONObj(),
                                                           BSONObj(),
                                                           boost::none /* no limit */);
    if (!findStatus.isOK()) {
        return findStatus.getStatus();
    }

    auto queryResults = findStatus.getValue();

    if (queryResults.size() > 1) {
        return {ErrorCodes::RemoteValidationError,
                str::stream() << "should only have 1 document in " << VersionType::ConfigNS};
    }

    if (queryResults.empty()) {
        auto cmdStatus =
            grid.shardRegistry()->runCommand(readHost, "admin", BSON("listDatabases" << 1));
        if (!cmdStatus.isOK()) {
            return cmdStatus.getStatus();
        }

        const BSONObj& cmdResult = cmdStatus.getValue();

        Status cmdResultStatus = getStatusFromCommandResult(cmdResult);
        if (!cmdResultStatus.isOK()) {
            return cmdResultStatus;
        }

        for (const auto& dbEntry : cmdResult["databases"].Obj()) {
            const string& dbName = dbEntry["name"].String();

            if (dbName != "local" && dbName != "admin") {
                VersionType versionInfo;
                versionInfo.setMinCompatibleVersion(UpgradeHistory_UnreportedVersion);
                versionInfo.setCurrentVersion(UpgradeHistory_UnreportedVersion);
                return versionInfo;
            }
        }

        VersionType versionInfo;
        versionInfo.setMinCompatibleVersion(UpgradeHistory_EmptyVersion);
        versionInfo.setCurrentVersion(UpgradeHistory_EmptyVersion);
        return versionInfo;
    }

    BSONObj versionDoc = queryResults.front();
    auto versionTypeResult = VersionType::fromBSON(versionDoc);
    if (!versionTypeResult.isOK()) {
        return Status(ErrorCodes::UnsupportedFormat,
                      str::stream() << "invalid config version document: " << versionDoc
                                    << versionTypeResult.getStatus().toString());
    }

    return versionTypeResult.getValue();
}
Example #25
0
void AsyncResultsMerger::handleBatchResponse(
    const executor::TaskExecutor::RemoteCommandCallbackArgs& cbData, size_t remoteIndex) {
    stdx::lock_guard<stdx::mutex> lk(_mutex);

    auto& remote = _remotes[remoteIndex];

    // Clear the callback handle. This indicates that we are no longer waiting on a response from
    // 'remote'.
    remote.cbHandle = executor::TaskExecutor::CallbackHandle();

    // If we're in the process of shutting down then there's no need to process the batch.
    if (_lifecycleState != kAlive) {
        invariant(_lifecycleState == kKillStarted);

        // Make sure to wake up anyone waiting on '_currentEvent' if we're shutting down.
        signalCurrentEventIfReady_inlock();

        // Make a best effort to parse the response and retrieve the cursor id. We need the cursor
        // id in order to issue a killCursors command against it.
        if (cbData.response.isOK()) {
            auto cursorResponse = parseCursorResponse(cbData.response.getValue().data, remote);
            if (cursorResponse.isOK()) {
                remote.cursorId = cursorResponse.getValue().getCursorId();
            }
        }

        // If we're killed and we're not waiting on any more batches to come back, then we are ready
        // to kill the cursors on the remote hosts and clean up this cursor. Schedule the
        // killCursors command and signal that this cursor is safe now safe to destroy. We have to
        // promise not to touch any members of this class because 'this' could become invalid as
        // soon as we signal the event.
        if (!haveOutstandingBatchRequests_inlock()) {
            // If the event handle is invalid, then the executor is in the middle of shutting down,
            // and we can't schedule any more work for it to complete.
            if (_killCursorsScheduledEvent.isValid()) {
                scheduleKillCursors_inlock();
                _executor->signalEvent(_killCursorsScheduledEvent);
            }

            _lifecycleState = kKillComplete;
        }

        return;
    }

    // Early return from this point on signal anyone waiting on an event, if ready() is true.
    ScopeGuard signaller = MakeGuard(&AsyncResultsMerger::signalCurrentEventIfReady_inlock, this);

    StatusWith<CursorResponse> cursorResponseStatus(
        cbData.response.isOK() ? parseCursorResponse(cbData.response.getValue().data, remote)
                               : cbData.response.getStatus());

    if (!cursorResponseStatus.isOK()) {
        // Notify the shard registry of the failure.
        if (remote.shardId) {
            auto shard = grid.shardRegistry()->getShard(_params.txn, *remote.shardId);
            if (!shard) {
                remote.status = Status(cursorResponseStatus.getStatus().code(),
                                       str::stream() << "Could not find shard " << *remote.shardId
                                                     << " containing host "
                                                     << remote.getTargetHost().toString());
            } else {
                ShardRegistry::updateReplSetMonitor(
                    shard->getTargeter(), remote.getTargetHost(), cursorResponseStatus.getStatus());
            }
        }

        // If the error is retriable, schedule another request.
        if (!remote.cursorId && remote.retryCount < kMaxNumFailedHostRetryAttempts &&
            isPerShardRetriableError(cursorResponseStatus.getStatus().code())) {
            ++remote.retryCount;

            // Since we potentially updated the targeter that the last host it chose might be
            // faulty, the call below may end up getting a different host.
            remote.status = askForNextBatch_inlock(remoteIndex);
            if (remote.status.isOK()) {
                return;
            }

            // If we end up here, it means we failed to schedule the retry request, which is a more
            // severe error that should not be retried. Just pass through to the error handling
            // logic below.
        } else {
            remote.status = cursorResponseStatus.getStatus();
        }

        // Unreachable host errors are swallowed if the 'allowPartialResults' option is set. We
        // remove the unreachable host entirely from consideration by marking it as exhausted.
        if (_params.isAllowPartialResults) {
            remote.status = Status::OK();

            // Clear the results buffer and cursor id.
            std::queue<BSONObj> emptyBuffer;
            std::swap(remote.docBuffer, emptyBuffer);
            remote.cursorId = 0;
        }

        return;
    }

    // Cursor id successfully established.
    auto cursorResponse = std::move(cursorResponseStatus.getValue());
    remote.cursorId = cursorResponse.getCursorId();
    remote.initialCmdObj = boost::none;

    for (const auto& obj : cursorResponse.getBatch()) {
        // If there's a sort, we're expecting the remote node to give us back a sort key.
        if (!_params.sort.isEmpty() &&
            obj[ClusterClientCursorParams::kSortKeyField].type() != BSONType::Object) {
            remote.status = Status(ErrorCodes::InternalError,
                                   str::stream() << "Missing field '"
                                                 << ClusterClientCursorParams::kSortKeyField
                                                 << "' in document: " << obj);
            return;
        }

        remote.docBuffer.push(obj);
        ++remote.fetchedCount;
    }

    // If we're doing a sorted merge, then we have to make sure to put this remote onto the
    // merge queue.
    if (!_params.sort.isEmpty() && !cursorResponse.getBatch().empty()) {
        _mergeQueue.push(remoteIndex);
    }

    // If the cursor is tailable and we just received an empty batch, the next return value should
    // be boost::none in order to indicate the end of the batch.
    if (_params.isTailable && !remote.hasNext()) {
        _eofNext = true;
    }

    // If even after receiving this batch we still don't have anything buffered (i.e. the batchSize
    // was zero), then can schedule work to retrieve the next batch right away.
    //
    // We do not ask for the next batch if the cursor is tailable, as batches received from remote
    // tailable cursors should be passed through to the client without asking for more batches.
    if (!_params.isTailable && !remote.hasNext() && !remote.exhausted()) {
        remote.status = askForNextBatch_inlock(remoteIndex);
        if (!remote.status.isOK()) {
            return;
        }
    }

    // ScopeGuard requires dismiss on success, but we want waiter to be signalled on success as
    // well as failure.
    signaller.Dismiss();
    signalCurrentEventIfReady_inlock();
}