示例#1
0
Status ShardingStateRecovery::recover(OperationContext* txn) {
    BSONObj recoveryDocBSON;

    try {
        AutoGetCollection autoColl(txn, NamespaceString::kConfigCollectionNamespace, MODE_IS);
        if (!Helpers::findOne(
                txn, autoColl.getCollection(), RecoveryDocument::getQuery(), recoveryDocBSON)) {
            return Status::OK();
        }
    } catch (const DBException& ex) {
        return ex.toStatus();
    }

    const auto recoveryDocStatus = RecoveryDocument::fromBSON(recoveryDocBSON);
    if (!recoveryDocStatus.isOK())
        return recoveryDocStatus.getStatus();

    const auto recoveryDoc = std::move(recoveryDocStatus.getValue());

    log() << "Sharding state recovery process found document " << recoveryDoc.toBSON();

    // Make sure the sharding state is initialized
    ShardingState* const shardingState = ShardingState::get(txn);

    shardingState->initialize(txn, recoveryDoc.getConfigsvr().toString());
    shardingState->setShardName(recoveryDoc.getShardName());

    if (!recoveryDoc.getMinOpTimeUpdaters()) {
        // Treat the minOpTime as up-to-date
        grid.shardRegistry()->advanceConfigOpTime(recoveryDoc.getMinOpTime());
        return Status::OK();
    }

    log() << "Sharding state recovery document indicates there were "
          << recoveryDoc.getMinOpTimeUpdaters()
          << " metadata change operations in flight. Contacting the config server primary in order "
             "to retrieve the most recent opTime.";

    // Need to fetch the latest uptime from the config server, so do a logging write
    Status status =
        grid.catalogManager(txn)->logChange(txn,
                                            "Sharding recovery thread",
                                            "Sharding minOpTime recovery",
                                            NamespaceString::kConfigCollectionNamespace.ns(),
                                            recoveryDocBSON);
    if (!status.isOK())
        return status;

    log() << "Sharding state recovered. New config server opTime is "
          << grid.shardRegistry()->getConfigOpTime();

    // Finally, clear the recovery document so next time we don't need to recover
    status = modifyRecoveryDocument(txn, RecoveryDocument::Clear, kMajorityWriteConcern);
    if (!status.isOK()) {
        warning() << "Failed to reset sharding state recovery document due to " << status;
    }

    return Status::OK();
}
Status ShardingStateRecovery::recover(OperationContext* txn) {
    if (serverGlobalParams.clusterRole != ClusterRole::ShardServer) {
        return Status::OK();
    }

    BSONObj recoveryDocBSON;

    try {
        AutoGetCollection autoColl(txn, NamespaceString::kConfigCollectionNamespace, MODE_IS);
        if (!Helpers::findOne(
                txn, autoColl.getCollection(), RecoveryDocument::getQuery(), recoveryDocBSON)) {
            return Status::OK();
        }
    } catch (const DBException& ex) {
        return ex.toStatus();
    }

    const auto recoveryDocStatus = RecoveryDocument::fromBSON(recoveryDocBSON);
    if (!recoveryDocStatus.isOK())
        return recoveryDocStatus.getStatus();

    const auto recoveryDoc = std::move(recoveryDocStatus.getValue());

    log() << "Sharding state recovery process found document " << recoveryDoc.toBSON();

    // Make sure the sharding state is initialized
    ShardingState* const shardingState = ShardingState::get(txn);

    // For backwards compatibility. Shards added by v3.4 cluster should have been initialized by
    // the shard identity document.
    // TODO(SERER-25276): Remove this after 3.4 since 3.4 shards should always have ShardingState
    // initialized by this point.
    if (!shardingState->enabled()) {
        shardingState->initializeFromConfigConnString(txn, recoveryDoc.getConfigsvr().toString());
        shardingState->setShardName(recoveryDoc.getShardName());
    }

    if (!recoveryDoc.getMinOpTimeUpdaters()) {
        // Treat the minOpTime as up-to-date
        grid.advanceConfigOpTime(recoveryDoc.getMinOpTime());
        return Status::OK();
    }

    log() << "Sharding state recovery document indicates there were "
          << recoveryDoc.getMinOpTimeUpdaters()
          << " metadata change operations in flight. Contacting the config server primary in order "
             "to retrieve the most recent opTime.";

    // Need to fetch the latest uptime from the config server, so do a logging write
    Status status =
        grid.catalogClient(txn)->logChange(txn,
                                           "Sharding minOpTime recovery",
                                           NamespaceString::kConfigCollectionNamespace.ns(),
                                           recoveryDocBSON,
                                           ShardingCatalogClient::kMajorityWriteConcern);
    if (!status.isOK())
        return status;

    log() << "Sharding state recovered. New config server opTime is " << grid.configOpTime();

    // Finally, clear the recovery document so next time we don't need to recover
    status = modifyRecoveryDocument(txn, RecoveryDocument::Clear, kLocalWriteConcern);
    if (!status.isOK()) {
        warning() << "Failed to reset sharding state recovery document due to " << status;
    }

    return Status::OK();
}