Status onShardVersionMismatch(OperationContext* opCtx, const NamespaceString& nss, ChunkVersion shardVersionReceived, bool forceRefreshFromThisThread) noexcept { invariant(!opCtx->lockState()->isLocked()); invariant(!opCtx->getClient()->isInDirectClient()); auto const shardingState = ShardingState::get(opCtx); invariant(shardingState->canAcceptShardedCommands()); LOG(2) << "Metadata refresh requested for " << nss.ns() << " at shard version " << shardVersionReceived; ShardingStatistics::get(opCtx).countStaleConfigErrors.addAndFetch(1); // Ensure any ongoing migrations have completed before trying to do the refresh. This wait is // just an optimization so that MongoS does not exhaust its maximum number of StaleConfig retry // attempts while the migration is being committed. try { auto& oss = OperationShardingState::get(opCtx); oss.waitForMigrationCriticalSectionSignal(opCtx); } catch (const DBException& ex) { return ex.toStatus(); } const auto currentShardVersion = [&] { AutoGetCollection autoColl(opCtx, nss, MODE_IS); const auto currentMetadata = CollectionShardingState::get(opCtx, nss)->getMetadata(opCtx); if (currentMetadata) { return currentMetadata->getShardVersion(); } return ChunkVersion::UNSHARDED(); }(); if (currentShardVersion.epoch() == shardVersionReceived.epoch() && currentShardVersion.majorVersion() >= shardVersionReceived.majorVersion()) { // Don't need to remotely reload if we're in the same epoch and the requested version is // smaller than the one we know about. This means that the remote side is behind. return Status::OK(); } try { forceShardFilteringMetadataRefresh(opCtx, nss, forceRefreshFromThisThread); return Status::OK(); } catch (const DBException& ex) { log() << "Failed to refresh metadata for collection" << nss << causedBy(redact(ex)); return ex.toStatus(); } }
TEST_F(MergeChunkTests, CompoundMerge) { const NamespaceString nss("foo.bar"); const BSONObj kp = BSON("x" << 1 << "y" << 1); const OID epoch = OID::gen(); vector<KeyRange> ranges; // Setup chunk metadata ranges.push_back( KeyRange(nss.ns(), BSON("x" << 0 << "y" << 1), BSON("x" << 1 << "y" << 0), kp)); ranges.push_back( KeyRange(nss.ns(), BSON("x" << 1 << "y" << 0), BSON("x" << 2 << "y" << 1), kp)); storeCollectionRanges(nss, shardName(), ranges, ChunkVersion(1, 0, epoch)); // Get latest version ChunkVersion latestVersion; ShardingState::get(getGlobalServiceContext()) ->refreshMetadataNow(&_txn, nss.ns(), &latestVersion); ShardingState::get(getGlobalServiceContext())->resetMetadata(nss.ns()); // Do merge string errMsg; bool result = mergeChunks( &_txn, nss, BSON("x" << 0 << "y" << 1), BSON("x" << 2 << "y" << 1), epoch, &errMsg); ASSERT_EQUALS(errMsg, ""); ASSERT(result); // Verify result CollectionMetadataPtr metadata = ShardingState::get(getGlobalServiceContext())->getCollectionMetadata(nss.ns()); ChunkType chunk; ASSERT(metadata->getNextChunk(BSON("x" << 0 << "y" << 1), &chunk)); ASSERT(chunk.getMin().woCompare(BSON("x" << 0 << "y" << 1)) == 0); ASSERT(chunk.getMax().woCompare(BSON("x" << 2 << "y" << 1)) == 0); ASSERT_EQUALS(metadata->getNumChunks(), 1u); ASSERT_EQUALS(metadata->getShardVersion().majorVersion(), latestVersion.majorVersion()); ASSERT_GREATER_THAN(metadata->getShardVersion().minorVersion(), latestVersion.minorVersion()); assertWrittenAsMerged(ranges); }
MigrationSourceManager::MigrationSourceManager(OperationContext* txn, MoveChunkRequest request) : _args(std::move(request)), _startTime() { invariant(!txn->lockState()->isLocked()); const auto& oss = OperationShardingState::get(txn); if (!oss.hasShardVersion()) { uasserted(ErrorCodes::InvalidOptions, "collection version is missing"); } // Even though the moveChunk command transmits a value in the operation's shardVersion field, // this value does not actually contain the shard version, but the global collection version. const ChunkVersion expectedCollectionVersion = oss.getShardVersion(_args.getNss()); log() << "Starting chunk migration for " << ChunkRange(_args.getMinKey(), _args.getMaxKey()).toString() << " with expected collection version " << expectedCollectionVersion; // Now that the collection is locked, snapshot the metadata and fetch the latest versions ShardingState* const shardingState = ShardingState::get(txn); ChunkVersion shardVersion; Status refreshStatus = shardingState->refreshMetadataNow(txn, _args.getNss().ns(), &shardVersion); if (!refreshStatus.isOK()) { uasserted(refreshStatus.code(), str::stream() << "cannot start migrate of chunk " << ChunkRange(_args.getMinKey(), _args.getMaxKey()).toString() << " due to " << refreshStatus.toString()); } if (shardVersion.majorVersion() == 0) { // If the major version is zero, this means we do not have any chunks locally to migrate in // the first place uasserted(ErrorCodes::IncompatibleShardingMetadata, str::stream() << "cannot start migrate of chunk " << ChunkRange(_args.getMinKey(), _args.getMaxKey()).toString() << " with zero shard version"); } // Snapshot the committed metadata from the time the migration starts { ScopedTransaction scopedXact(txn, MODE_IS); AutoGetCollection autoColl(txn, _args.getNss(), MODE_IS); auto css = CollectionShardingState::get(txn, _args.getNss()); _committedMetadata = css->getMetadata(); } const ChunkVersion collectionVersion = _committedMetadata->getCollVersion(); if (expectedCollectionVersion.epoch() != collectionVersion.epoch()) { throw SendStaleConfigException( _args.getNss().ns(), str::stream() << "cannot move chunk " << ChunkRange(_args.getMinKey(), _args.getMaxKey()).toString() << " because collection may have been dropped. " << "current epoch: " << collectionVersion.epoch() << ", cmd epoch: " << expectedCollectionVersion.epoch(), expectedCollectionVersion, collectionVersion); } // With nonzero shard version, we must have a coll version >= our shard version invariant(collectionVersion >= shardVersion); // With nonzero shard version, we must have a shard key invariant(!_committedMetadata->getKeyPattern().isEmpty()); ChunkType origChunk; if (!_committedMetadata->getNextChunk(_args.getMinKey(), &origChunk)) { // If this assertion is hit, it means that whoever called the shard moveChunk command // (mongos or the CSRS balancer) did not check whether the chunk actually belongs to this // shard. It is a benign error and does not indicate data corruption. uasserted(40145, str::stream() << "Chunk with bounds " << ChunkRange(_args.getMinKey(), _args.getMaxKey()).toString() << " is not owned by this shard."); } uassert(40146, str::stream() << "Unable to find chunk with the exact bounds " << ChunkRange(_args.getMinKey(), _args.getMaxKey()).toString() << " at collection version " << collectionVersion.toString() << ". This indicates corrupted metadata.", origChunk.getMin().woCompare(_args.getMinKey()) == 0 && origChunk.getMax().woCompare(_args.getMaxKey()) == 0); }
/** * @ return true if not in sharded mode or if version for this client is ok */ bool shardVersionOk( const string& ns , string& errmsg, ChunkVersion& received, ChunkVersion& wanted ) { if ( ! shardingState.enabled() ) return true; if ( ! isMasterNs( ns.c_str() ) ) { // right now connections to secondaries aren't versioned at all return true; } ShardedConnectionInfo* info = ShardedConnectionInfo::get( false ); if ( ! info ) { // this means the client has nothing sharded // so this allows direct connections to do whatever they want // which i think is the correct behavior return true; } if ( info->inForceVersionOkMode() ) { return true; } // TODO : all collections at some point, be sharded or not, will have a version // (and a CollectionMetadata) received = info->getVersion( ns ); wanted = shardingState.getVersion( ns ); if( received.isWriteCompatibleWith( wanted ) ) return true; // // Figure out exactly why not compatible, send appropriate error message // The versions themselves are returned in the error, so not needed in messages here // // Check epoch first, to send more meaningful message, since other parameters probably // won't match either if( ! wanted.hasCompatibleEpoch( received ) ){ errmsg = str::stream() << "version epoch mismatch detected for " << ns << ", " << "the collection may have been dropped and recreated"; return false; } if( ! wanted.isSet() && received.isSet() ){ errmsg = str::stream() << "this shard no longer contains chunks for " << ns << ", " << "the collection may have been dropped"; return false; } if( wanted.isSet() && ! received.isSet() ){ errmsg = str::stream() << "this shard contains versioned chunks for " << ns << ", " << "but no version set in request"; return false; } if( wanted.majorVersion() != received.majorVersion() ){ // // Could be > or < - wanted is > if this is the source of a migration, // wanted < if this is the target of a migration // errmsg = str::stream() << "version mismatch detected for " << ns << ", " << "stored major version " << wanted.majorVersion() << " does not match received " << received.majorVersion(); return false; } // Those are all the reasons the versions can mismatch verify( false ); return false; }