Status ShardingStateRecovery::recover(OperationContext* opCtx) { if (serverGlobalParams.clusterRole != ClusterRole::ShardServer) { return Status::OK(); } BSONObj recoveryDocBSON; try { AutoGetCollection autoColl(opCtx, NamespaceString::kConfigCollectionNamespace, MODE_IS); if (!Helpers::findOne( opCtx, autoColl.getCollection(), RecoveryDocument::getQuery(), recoveryDocBSON)) { return Status::OK(); } } catch (const DBException& ex) { return ex.toStatus(); } const auto recoveryDocStatus = RecoveryDocument::fromBSON(recoveryDocBSON); if (!recoveryDocStatus.isOK()) return recoveryDocStatus.getStatus(); const auto recoveryDoc = std::move(recoveryDocStatus.getValue()); log() << "Sharding state recovery process found document " << redact(recoveryDoc.toBSON()); ShardingState* const shardingState = ShardingState::get(opCtx); invariant(shardingState->enabled()); if (!recoveryDoc.getMinOpTimeUpdaters()) { // Treat the minOpTime as up-to-date grid.advanceConfigOpTime(recoveryDoc.getMinOpTime()); return Status::OK(); } log() << "Sharding state recovery document indicates there were " << recoveryDoc.getMinOpTimeUpdaters() << " metadata change operations in flight. Contacting the config server primary in order " "to retrieve the most recent opTime."; // Need to fetch the latest uptime from the config server, so do a logging write Status status = grid.catalogClient(opCtx)->logChange(opCtx, "Sharding minOpTime recovery", NamespaceString::kConfigCollectionNamespace.ns(), recoveryDocBSON, ShardingCatalogClient::kMajorityWriteConcern); if (!status.isOK()) return status; log() << "Sharding state recovered. New config server opTime is " << grid.configOpTime(); // Finally, clear the recovery document so next time we don't need to recover status = modifyRecoveryDocument(opCtx, RecoveryDocument::Clear, kLocalWriteConcern); if (!status.isOK()) { warning() << "Failed to reset sharding state recovery document due to " << redact(status); } return Status::OK(); }
Status ShardingStateRecovery::recover(OperationContext* txn) { BSONObj recoveryDocBSON; try { AutoGetCollection autoColl(txn, NamespaceString::kConfigCollectionNamespace, MODE_IS); if (!Helpers::findOne( txn, autoColl.getCollection(), RecoveryDocument::getQuery(), recoveryDocBSON)) { return Status::OK(); } } catch (const DBException& ex) { return ex.toStatus(); } const auto recoveryDocStatus = RecoveryDocument::fromBSON(recoveryDocBSON); if (!recoveryDocStatus.isOK()) return recoveryDocStatus.getStatus(); const auto recoveryDoc = std::move(recoveryDocStatus.getValue()); log() << "Sharding state recovery process found document " << recoveryDoc.toBSON(); // Make sure the sharding state is initialized ShardingState* const shardingState = ShardingState::get(txn); shardingState->initialize(txn, recoveryDoc.getConfigsvr().toString()); shardingState->setShardName(recoveryDoc.getShardName()); if (!recoveryDoc.getMinOpTimeUpdaters()) { // Treat the minOpTime as up-to-date grid.shardRegistry()->advanceConfigOpTime(recoveryDoc.getMinOpTime()); return Status::OK(); } log() << "Sharding state recovery document indicates there were " << recoveryDoc.getMinOpTimeUpdaters() << " metadata change operations in flight. Contacting the config server primary in order " "to retrieve the most recent opTime."; // Need to fetch the latest uptime from the config server, so do a logging write Status status = grid.catalogManager(txn)->logChange(txn, "Sharding recovery thread", "Sharding minOpTime recovery", NamespaceString::kConfigCollectionNamespace.ns(), recoveryDocBSON); if (!status.isOK()) return status; log() << "Sharding state recovered. New config server opTime is " << grid.shardRegistry()->getConfigOpTime(); // Finally, clear the recovery document so next time we don't need to recover status = modifyRecoveryDocument(txn, RecoveryDocument::Clear, kMajorityWriteConcern); if (!status.isOK()) { warning() << "Failed to reset sharding state recovery document due to " << status; } return Status::OK(); }
Status ShardingStateRecovery::recover(OperationContext* txn) { if (serverGlobalParams.clusterRole != ClusterRole::ShardServer) { return Status::OK(); } BSONObj recoveryDocBSON; try { AutoGetCollection autoColl(txn, NamespaceString::kConfigCollectionNamespace, MODE_IS); if (!Helpers::findOne( txn, autoColl.getCollection(), RecoveryDocument::getQuery(), recoveryDocBSON)) { return Status::OK(); } } catch (const DBException& ex) { return ex.toStatus(); } const auto recoveryDocStatus = RecoveryDocument::fromBSON(recoveryDocBSON); if (!recoveryDocStatus.isOK()) return recoveryDocStatus.getStatus(); const auto recoveryDoc = std::move(recoveryDocStatus.getValue()); log() << "Sharding state recovery process found document " << recoveryDoc.toBSON(); // Make sure the sharding state is initialized ShardingState* const shardingState = ShardingState::get(txn); // For backwards compatibility. Shards added by v3.4 cluster should have been initialized by // the shard identity document. // TODO(SERER-25276): Remove this after 3.4 since 3.4 shards should always have ShardingState // initialized by this point. if (!shardingState->enabled()) { shardingState->initializeFromConfigConnString(txn, recoveryDoc.getConfigsvr().toString()); shardingState->setShardName(recoveryDoc.getShardName()); } if (!recoveryDoc.getMinOpTimeUpdaters()) { // Treat the minOpTime as up-to-date grid.advanceConfigOpTime(recoveryDoc.getMinOpTime()); return Status::OK(); } log() << "Sharding state recovery document indicates there were " << recoveryDoc.getMinOpTimeUpdaters() << " metadata change operations in flight. Contacting the config server primary in order " "to retrieve the most recent opTime."; // Need to fetch the latest uptime from the config server, so do a logging write Status status = grid.catalogClient(txn)->logChange(txn, "Sharding minOpTime recovery", NamespaceString::kConfigCollectionNamespace.ns(), recoveryDocBSON, ShardingCatalogClient::kMajorityWriteConcern); if (!status.isOK()) return status; log() << "Sharding state recovered. New config server opTime is " << grid.configOpTime(); // Finally, clear the recovery document so next time we don't need to recover status = modifyRecoveryDocument(txn, RecoveryDocument::Clear, kLocalWriteConcern); if (!status.isOK()) { warning() << "Failed to reset sharding state recovery document due to " << status; } return Status::OK(); }