ChunkVersion forceShardFilteringMetadataRefresh(OperationContext* opCtx, const NamespaceString& nss, bool forceRefreshFromThisThread) { invariant(!opCtx->lockState()->isLocked()); invariant(!opCtx->getClient()->isInDirectClient()); auto const shardingState = ShardingState::get(opCtx); invariant(shardingState->canAcceptShardedCommands()); const auto routingInfo = uassertStatusOK(Grid::get(opCtx)->catalogCache()->getCollectionRoutingInfoWithRefresh( opCtx, nss, forceRefreshFromThisThread)); const auto cm = routingInfo.cm(); if (!cm) { // No chunk manager, so unsharded. // Exclusive collection lock needed since we're now changing the metadata AutoGetCollection autoColl(opCtx, nss, MODE_IX, MODE_X); auto css = CollectionShardingState::get(opCtx, nss); css->refreshMetadata(opCtx, nullptr); return ChunkVersion::UNSHARDED(); } { AutoGetCollection autoColl(opCtx, nss, MODE_IS); auto metadata = CollectionShardingState::get(opCtx, nss)->getMetadata(opCtx); // We already have newer version if (metadata && metadata->getCollVersion().epoch() == cm->getVersion().epoch() && metadata->getCollVersion() >= cm->getVersion()) { LOG(1) << "Skipping refresh of metadata for " << nss << " " << metadata->getCollVersion() << " with an older " << cm->getVersion(); return metadata->getShardVersion(); } } // Exclusive collection lock needed since we're now changing the metadata AutoGetCollection autoColl(opCtx, nss, MODE_IX, MODE_X); auto css = CollectionShardingState::get(opCtx, nss); auto metadata = css->getMetadata(opCtx); // We already have newer version if (metadata && metadata->getCollVersion().epoch() == cm->getVersion().epoch() && metadata->getCollVersion() >= cm->getVersion()) { LOG(1) << "Skipping refresh of metadata for " << nss << " " << metadata->getCollVersion() << " with an older " << cm->getVersion(); return metadata->getShardVersion(); } std::unique_ptr<CollectionMetadata> newCollectionMetadata = stdx::make_unique<CollectionMetadata>(cm, shardingState->getShardName()); css->refreshMetadata(opCtx, std::move(newCollectionMetadata)); return css->getMetadata(opCtx)->getShardVersion(); }
void onDbVersionMismatch(OperationContext* opCtx, const StringData dbName, const DatabaseVersion& clientDbVersion, const boost::optional<DatabaseVersion>& serverDbVersion) noexcept { invariant(!opCtx->lockState()->isLocked()); invariant(!opCtx->getClient()->isInDirectClient()); auto const shardingState = ShardingState::get(opCtx); invariant(shardingState->canAcceptShardedCommands()); if (serverDbVersion && serverDbVersion->getUuid() == clientDbVersion.getUuid() && serverDbVersion->getLastMod() >= clientDbVersion.getLastMod()) { // The client was stale; do not trigger server-side refresh. return; } try { // TODO SERVER-33773 if the 'waitForMovePrimaryCriticalSection' flag is set on the // OperationShardingState, wait for the movePrimary critical section to complete before // attempting a refresh. } catch (const DBException& ex) { log() << "Failed to wait for movePrimary critical section to complete " << causedBy(redact(ex)); return; } try { forceDatabaseRefresh(opCtx, dbName); } catch (const DBException& ex) { log() << "Failed to refresh databaseVersion for database " << dbName << causedBy(redact(ex)); } }
void forceDatabaseRefresh(OperationContext* opCtx, const StringData dbName) { invariant(!opCtx->lockState()->isLocked()); invariant(!opCtx->getClient()->isInDirectClient()); auto const shardingState = ShardingState::get(opCtx); invariant(shardingState->canAcceptShardedCommands()); const auto refreshedDbVersion = uassertStatusOK(Grid::get(opCtx)->catalogCache()->getDatabaseWithRefresh(opCtx, dbName)) .databaseVersion(); // First, check under a shared lock if another thread already updated the cached version. // This is a best-effort optimization to make as few threads as possible to convoy on the // exclusive lock below. auto databaseHolder = DatabaseHolder::get(opCtx); { // Take the DBLock directly rather than using AutoGetDb, to prevent a recursive call // into checkDbVersion(). Lock::DBLock dbLock(opCtx, dbName, MODE_IS); auto db = databaseHolder->getDb(opCtx, dbName); if (!db) { log() << "Database " << dbName << " has been dropped; not caching the refreshed databaseVersion"; return; } auto& dss = DatabaseShardingState::get(db); auto dssLock = DatabaseShardingState::DSSLock::lock(opCtx, &dss); const auto cachedDbVersion = dss.getDbVersion(opCtx, dssLock); if (cachedDbVersion && cachedDbVersion->getUuid() == refreshedDbVersion.getUuid() && cachedDbVersion->getLastMod() >= refreshedDbVersion.getLastMod()) { LOG(2) << "Skipping setting cached databaseVersion for " << dbName << " to refreshed version " << refreshedDbVersion.toBSON() << " because current cached databaseVersion is already " << cachedDbVersion->toBSON(); return; } } // The cached version is older than the refreshed version; update the cached version. Lock::DBLock dbLock(opCtx, dbName, MODE_X); auto db = databaseHolder->getDb(opCtx, dbName); if (!db) { log() << "Database " << dbName << " has been dropped; not caching the refreshed databaseVersion"; return; } auto& dss = DatabaseShardingState::get(db); auto dssLock = DatabaseShardingState::DSSLock::lockExclusive(opCtx, &dss); dss.setDbVersion(opCtx, std::move(refreshedDbVersion), dssLock); }
Status onShardVersionMismatch(OperationContext* opCtx, const NamespaceString& nss, ChunkVersion shardVersionReceived, bool forceRefreshFromThisThread) noexcept { invariant(!opCtx->lockState()->isLocked()); invariant(!opCtx->getClient()->isInDirectClient()); auto const shardingState = ShardingState::get(opCtx); invariant(shardingState->canAcceptShardedCommands()); LOG(2) << "Metadata refresh requested for " << nss.ns() << " at shard version " << shardVersionReceived; ShardingStatistics::get(opCtx).countStaleConfigErrors.addAndFetch(1); // Ensure any ongoing migrations have completed before trying to do the refresh. This wait is // just an optimization so that MongoS does not exhaust its maximum number of StaleConfig retry // attempts while the migration is being committed. try { auto& oss = OperationShardingState::get(opCtx); oss.waitForMigrationCriticalSectionSignal(opCtx); } catch (const DBException& ex) { return ex.toStatus(); } const auto currentShardVersion = [&] { AutoGetCollection autoColl(opCtx, nss, MODE_IS); const auto currentMetadata = CollectionShardingState::get(opCtx, nss)->getMetadata(opCtx); if (currentMetadata) { return currentMetadata->getShardVersion(); } return ChunkVersion::UNSHARDED(); }(); if (currentShardVersion.epoch() == shardVersionReceived.epoch() && currentShardVersion.majorVersion() >= shardVersionReceived.majorVersion()) { // Don't need to remotely reload if we're in the same epoch and the requested version is // smaller than the one we know about. This means that the remote side is behind. return Status::OK(); } try { forceShardFilteringMetadataRefresh(opCtx, nss, forceRefreshFromThisThread); return Status::OK(); } catch (const DBException& ex) { log() << "Failed to refresh metadata for collection" << nss << causedBy(redact(ex)); return ex.toStatus(); } }