void MetadataManager::append(BSONObjBuilder* builder) const { stdx::lock_guard<stdx::mutex> lg(_managerLock); _rangesToClean.append(builder); BSONArrayBuilder pcArr(builder->subarrayStart("pendingChunks")); for (const auto& entry : _receivingChunks) { BSONObjBuilder obj; ChunkRange r = ChunkRange(entry.first, entry.second); r.append(&obj); pcArr.append(obj.done()); } pcArr.done(); if (_metadata.empty()) { return; } BSONArrayBuilder amrArr(builder->subarrayStart("activeMetadataRanges")); for (const auto& entry : _metadata.back()->metadata.getChunks()) { BSONObjBuilder obj; ChunkRange r = ChunkRange(entry.first, entry.second); r.append(&obj); amrArr.append(obj.done()); } amrArr.done(); }
void MetadataManager::_removeRangeToClean_inlock(const ChunkRange& range) { auto it = _rangesToClean.upper_bound(range.getMin()); // We want our iterator to point at the greatest value // that is still less than or equal to range. if (it != _rangesToClean.begin()) { --it; } for (; it != _rangesToClean.end() && it->first < range.getMax();) { if (it->second <= range.getMin()) { ++it; continue; } // There's overlap between *it and range so we remove *it // and then replace with new ranges. BSONObj oldMin = it->first, oldMax = it->second; _rangesToClean.erase(it++); if (oldMin < range.getMin()) { _addRangeToClean_inlock(ChunkRange(oldMin, range.getMin())); } if (oldMax > range.getMax()) { _addRangeToClean_inlock(ChunkRange(range.getMax(), oldMax)); } } }
void MetadataManager::append(BSONObjBuilder* builder) { stdx::lock_guard<stdx::mutex> scopedLock(_managerLock); BSONArrayBuilder rtcArr(builder->subarrayStart("rangesToClean")); for (const auto& entry : _rangesToClean) { BSONObjBuilder obj; ChunkRange r = ChunkRange(entry.first, entry.second); r.append(&obj); rtcArr.append(obj.done()); } rtcArr.done(); BSONArrayBuilder pcArr(builder->subarrayStart("pendingChunks")); for (const auto& entry : _receivingChunks) { BSONObjBuilder obj; ChunkRange r = ChunkRange(entry.first, entry.second); r.append(&obj); pcArr.append(obj.done()); } pcArr.done(); BSONArrayBuilder amrArr(builder->subarrayStart("activeMetadataRanges")); for (const auto& entry : _activeMetadataTracker->metadata->getChunks()) { BSONObjBuilder obj; ChunkRange r = ChunkRange(entry.first, entry.second); r.append(&obj); amrArr.append(obj.done()); } amrArr.done(); }
StatusWith<DistLockManager::ScopedDistLock> MigrationManager::_getDistLock( OperationContext* txn, const Migration& migration) { const std::string whyMessage(str::stream() << "migrating chunk " << ChunkRange(migration.chunkInfo.migrateInfo.minKey, migration.chunkInfo.migrateInfo.maxKey) .toString() << " in " << migration.chunkInfo.migrateInfo.ns); StatusWith<DistLockManager::ScopedDistLock> distLockStatus = Grid::get(txn)->catalogClient(txn)->distLock( txn, migration.chunkInfo.migrateInfo.ns, whyMessage); if (!distLockStatus.isOK()) { const std::string msg = str::stream() << "Could not acquire collection lock for " << migration.chunkInfo.migrateInfo.ns << " to migrate chunk " << redact(ChunkRange(migration.chunkInfo.migrateInfo.minKey, migration.chunkInfo.migrateInfo.maxKey) .toString()) << " due to " << distLockStatus.getStatus().toString(); warning() << msg; return {distLockStatus.getStatus().code(), msg}; } return std::move(distLockStatus.getValue()); }
StatusWith<ChunkRange> ChunkRange::fromBSON(const BSONObj& obj) { BSONElement minKey; { Status minKeyStatus = extractObject(obj, kMinKey, &minKey); if (!minKeyStatus.isOK()) { return minKeyStatus; } } BSONElement maxKey; { Status maxKeyStatus = extractObject(obj, kMaxKey, &maxKey); if (!maxKeyStatus.isOK()) { return maxKeyStatus; } } if (SimpleBSONObjComparator::kInstance.evaluate(minKey.Obj() >= maxKey.Obj())) { return {ErrorCodes::FailedToParse, str::stream() << "min: " << minKey.Obj() << " should be less than max: " << maxKey.Obj()}; } return ChunkRange(minKey.Obj().getOwned(), maxKey.Obj().getOwned()); }
std::string MoveChunkRequest::toString() const { std::stringstream ss; ss << "ns: " << getNss().ns() << ", " << redact(ChunkRange(getMinKey(), getMaxKey()).toString()) << ", fromShard: " << getFromShardId() << ", toShard: " << getToShardId(); return ss.str(); }
StatusWith<ChunkRange> ChunkRange::fromBSON(const BSONObj& obj) { BSONElement minKey; { Status minKeyStatus = bsonExtractTypedField(obj, kMinKey, Object, &minKey); if (!minKeyStatus.isOK()) { return {minKeyStatus.code(), str::stream() << "Invalid min key due to " << minKeyStatus.reason()}; } if (minKey.Obj().isEmpty()) { return {ErrorCodes::BadValue, "The min key cannot be empty"}; } } BSONElement maxKey; { Status maxKeyStatus = bsonExtractTypedField(obj, kMaxKey, Object, &maxKey); if (!maxKeyStatus.isOK()) { return {maxKeyStatus.code(), str::stream() << "Invalid max key due to " << maxKeyStatus.reason()}; } if (maxKey.Obj().isEmpty()) { return {ErrorCodes::BadValue, "The max key cannot be empty"}; } } if (SimpleBSONObjComparator::kInstance.evaluate(minKey.Obj() >= maxKey.Obj())) { return {ErrorCodes::FailedToParse, str::stream() << "min: " << minKey.Obj() << " should be less than max: " << maxKey.Obj()}; } return ChunkRange(minKey.Obj().getOwned(), maxKey.Obj().getOwned()); }
boost::optional<ChunkRange> ChunkRange::overlapWith(ChunkRange const& other) const { auto le = [](auto const& a, auto const& b) { return a.woCompare(b) <= 0; }; if (le(other._maxKey, _minKey) || le(_maxKey, other._minKey)) { return boost::none; } return ChunkRange(le(_minKey, other._minKey) ? other._minKey : _minKey, le(_maxKey, other._maxKey) ? _maxKey : other._maxKey); }
Status ActiveMigrationsRegistry::ActiveMoveChunkState::constructErrorStatus() const { return {ErrorCodes::ConflictingOperationInProgress, str::stream() << "Unable to start new migration because this shard is currently " "donating chunk " << ChunkRange(args.getMinKey(), args.getMaxKey()).toString() << " for namespace " << args.getNss().ns() << " to " << args.getToShardId()}; }
StatusWith<ChunkRange> ChunkRange::fromBSON(const BSONObj& obj) { BSONElement minKey; { Status minKeyStatus = bsonExtractTypedField(obj, kMinKey, Object, &minKey); if (!minKeyStatus.isOK()) { return {minKeyStatus.code(), str::stream() << "Invalid min key due to " << minKeyStatus.reason()}; } } BSONElement maxKey; { Status maxKeyStatus = bsonExtractTypedField(obj, kMaxKey, Object, &maxKey); if (!maxKeyStatus.isOK()) { return {maxKeyStatus.code(), str::stream() << "Invalid max key due to " << maxKeyStatus.reason()}; } } return ChunkRange(minKey.Obj().getOwned(), maxKey.Obj().getOwned()); }
ChunkRange ChunkRange::unionWith(ChunkRange const& other) const { auto le = [](auto const& a, auto const& b) { return a.woCompare(b) <= 0; }; return ChunkRange(le(_minKey, other._minKey) ? _minKey : other._minKey, le(_maxKey, other._maxKey) ? other._maxKey : _maxKey); }
MigrationSourceManager::MigrationSourceManager(OperationContext* txn, MoveChunkRequest request) : _args(std::move(request)), _startTime() { invariant(!txn->lockState()->isLocked()); const auto& oss = OperationShardingState::get(txn); if (!oss.hasShardVersion()) { uasserted(ErrorCodes::InvalidOptions, "collection version is missing"); } // Even though the moveChunk command transmits a value in the operation's shardVersion field, // this value does not actually contain the shard version, but the global collection version. const ChunkVersion expectedCollectionVersion = oss.getShardVersion(_args.getNss()); log() << "Starting chunk migration for " << ChunkRange(_args.getMinKey(), _args.getMaxKey()).toString() << " with expected collection version " << expectedCollectionVersion; // Now that the collection is locked, snapshot the metadata and fetch the latest versions ShardingState* const shardingState = ShardingState::get(txn); ChunkVersion shardVersion; Status refreshStatus = shardingState->refreshMetadataNow(txn, _args.getNss().ns(), &shardVersion); if (!refreshStatus.isOK()) { uasserted(refreshStatus.code(), str::stream() << "cannot start migrate of chunk " << ChunkRange(_args.getMinKey(), _args.getMaxKey()).toString() << " due to " << refreshStatus.toString()); } if (shardVersion.majorVersion() == 0) { // If the major version is zero, this means we do not have any chunks locally to migrate in // the first place uasserted(ErrorCodes::IncompatibleShardingMetadata, str::stream() << "cannot start migrate of chunk " << ChunkRange(_args.getMinKey(), _args.getMaxKey()).toString() << " with zero shard version"); } // Snapshot the committed metadata from the time the migration starts { ScopedTransaction scopedXact(txn, MODE_IS); AutoGetCollection autoColl(txn, _args.getNss(), MODE_IS); auto css = CollectionShardingState::get(txn, _args.getNss()); _committedMetadata = css->getMetadata(); } const ChunkVersion collectionVersion = _committedMetadata->getCollVersion(); if (expectedCollectionVersion.epoch() != collectionVersion.epoch()) { throw SendStaleConfigException( _args.getNss().ns(), str::stream() << "cannot move chunk " << ChunkRange(_args.getMinKey(), _args.getMaxKey()).toString() << " because collection may have been dropped. " << "current epoch: " << collectionVersion.epoch() << ", cmd epoch: " << expectedCollectionVersion.epoch(), expectedCollectionVersion, collectionVersion); } // With nonzero shard version, we must have a coll version >= our shard version invariant(collectionVersion >= shardVersion); // With nonzero shard version, we must have a shard key invariant(!_committedMetadata->getKeyPattern().isEmpty()); ChunkType origChunk; if (!_committedMetadata->getNextChunk(_args.getMinKey(), &origChunk)) { // If this assertion is hit, it means that whoever called the shard moveChunk command // (mongos or the CSRS balancer) did not check whether the chunk actually belongs to this // shard. It is a benign error and does not indicate data corruption. uasserted(40145, str::stream() << "Chunk with bounds " << ChunkRange(_args.getMinKey(), _args.getMaxKey()).toString() << " is not owned by this shard."); } uassert(40146, str::stream() << "Unable to find chunk with the exact bounds " << ChunkRange(_args.getMinKey(), _args.getMaxKey()).toString() << " at collection version " << collectionVersion.toString() << ". This indicates corrupted metadata.", origChunk.getMin().woCompare(_args.getMinKey()) == 0 && origChunk.getMax().woCompare(_args.getMaxKey()) == 0); }
void ChunkSplitter::_runAutosplit(const NamespaceString& nss, const BSONObj& min, const BSONObj& max, long dataWritten) { if (!_isPrimary) { return; } try { const auto opCtx = cc().makeOperationContext(); const auto routingInfo = uassertStatusOK( Grid::get(opCtx.get())->catalogCache()->getCollectionRoutingInfo(opCtx.get(), nss)); uassert(ErrorCodes::NamespaceNotSharded, "Could not split chunk. Collection is no longer sharded", routingInfo.cm()); const auto cm = routingInfo.cm(); const auto chunk = cm->findIntersectingChunkWithSimpleCollation(min); // Stop if chunk's range differs from the range we were expecting to split. if ((0 != chunk.getMin().woCompare(min)) || (0 != chunk.getMax().woCompare(max)) || (chunk.getShardId() != ShardingState::get(opCtx.get())->getShardName())) { LOG(1) << "Cannot auto-split chunk with range '" << redact(ChunkRange(min, max).toString()) << "' for nss '" << nss << "' on shard '" << ShardingState::get(opCtx.get())->getShardName() << "' because since scheduling auto-split the chunk has been changed to '" << redact(chunk.toString()) << "'"; return; } const ChunkRange chunkRange(chunk.getMin(), chunk.getMax()); const auto balancerConfig = Grid::get(opCtx.get())->getBalancerConfiguration(); // Ensure we have the most up-to-date balancer configuration uassertStatusOK(balancerConfig->refreshAndCheck(opCtx.get())); if (!balancerConfig->getShouldAutoSplit()) { return; } const uint64_t maxChunkSizeBytes = balancerConfig->getMaxChunkSizeBytes(); LOG(1) << "about to initiate autosplit: " << redact(chunk.toString()) << " dataWritten since last check: " << dataWritten << " maxChunkSizeBytes: " << maxChunkSizeBytes; auto splitPoints = uassertStatusOK(splitVector(opCtx.get(), nss, cm->getShardKeyPattern().toBSON(), chunk.getMin(), chunk.getMax(), false, boost::none, boost::none, boost::none, maxChunkSizeBytes)); if (splitPoints.size() <= 1) { // No split points means there isn't enough data to split on; 1 split point means we // have between half the chunk size to full chunk size so there is no need to split yet return; } // We assume that if the chunk being split is the first (or last) one on the collection, // this chunk is likely to see more insertions. Instead of splitting mid-chunk, we use the // very first (or last) key as a split point. // // This heuristic is skipped for "special" shard key patterns that are not likely to produce // monotonically increasing or decreasing values (e.g. hashed shard keys). // Keeps track of the minKey of the top chunk after the split so we can migrate the chunk. BSONObj topChunkMinKey; if (KeyPattern::isOrderedKeyPattern(cm->getShardKeyPattern().toBSON())) { if (0 == cm->getShardKeyPattern().getKeyPattern().globalMin().woCompare(chunk.getMin())) { // MinKey is infinity (This is the first chunk on the collection) BSONObj key = findExtremeKeyForShard(opCtx.get(), nss, cm->getShardKeyPattern(), true); if (!key.isEmpty()) { splitPoints.front() = key.getOwned(); topChunkMinKey = cm->getShardKeyPattern().getKeyPattern().globalMin(); } } else if (0 == cm->getShardKeyPattern().getKeyPattern().globalMax().woCompare( chunk.getMax())) { // MaxKey is infinity (This is the last chunk on the collection) BSONObj key = findExtremeKeyForShard(opCtx.get(), nss, cm->getShardKeyPattern(), false); if (!key.isEmpty()) { splitPoints.back() = key.getOwned(); topChunkMinKey = key.getOwned(); } } } uassertStatusOK(splitChunkAtMultiplePoints(opCtx.get(), chunk.getShardId(), nss, cm->getShardKeyPattern(), cm->getVersion(), chunkRange, splitPoints)); const bool shouldBalance = isAutoBalanceEnabled(opCtx.get(), nss, balancerConfig); log() << "autosplitted " << nss << " chunk: " << redact(chunk.toString()) << " into " << (splitPoints.size() + 1) << " parts (maxChunkSizeBytes " << maxChunkSizeBytes << ")" << (topChunkMinKey.isEmpty() ? "" : " (top chunk migration suggested" + (std::string)(shouldBalance ? ")" : ", but no migrations allowed)")); // Balance the resulting chunks if the autobalance option is enabled and if we split at the // first or last chunk on the collection as part of top chunk optimization. if (!shouldBalance || topChunkMinKey.isEmpty()) { return; } // Tries to move the top chunk out of the shard to prevent the hot spot from staying on a // single shard. This is based on the assumption that succeeding inserts will fall on the // top chunk. moveChunk(opCtx.get(), nss, topChunkMinKey); } catch (const DBException& ex) { log() << "Unable to auto-split chunk " << redact(ChunkRange(min, max).toString()) << " in nss " << nss << causedBy(redact(ex.toStatus())); } catch (const std::exception& e) { log() << "caught exception while splitting chunk: " << redact(e.what()); } }
void MigrationManager::_executeMigrations(OperationContext* txn, MigrationStatuses* migrationStatuses) { for (auto& migration : _activeMigrations) { const NamespaceString nss(migration.chunkInfo.migrateInfo.ns); auto scopedCMStatus = ScopedChunkManager::getExisting(txn, nss); if (!scopedCMStatus.isOK()) { // Unable to find the ChunkManager for "nss" for whatever reason; abandon this // migration and proceed to the next. stdx::lock_guard<stdx::mutex> lk(_mutex); migrationStatuses->insert(MigrationStatuses::value_type( migration.chunkInfo.migrateInfo.getName(), std::move(scopedCMStatus.getStatus()))); continue; } ChunkManager* const chunkManager = scopedCMStatus.getValue().cm(); auto chunk = chunkManager->findIntersectingChunk(txn, migration.chunkInfo.migrateInfo.minKey); { // No need to lock the mutex. Only this function and _takeDistLockForAMigration // manipulate "_distributedLocks". No need to protect serial actions. if (!_takeDistLockForAMigration(txn, migration, migrationStatuses)) { // If there is a lock conflict between the balancer and the shard, or a shard and a // shard, the migration has been rescheduled. Otherwise an attempt to take the lock // failed for whatever reason and this migration is being abandoned. continue; } } const MigrationRequest& migrationRequest = migration.chunkInfo; BSONObjBuilder builder; MoveChunkRequest::appendAsCommand( &builder, nss, chunkManager->getVersion(), Grid::get(txn)->shardRegistry()->getConfigServerConnectionString(), migrationRequest.migrateInfo.from, migrationRequest.migrateInfo.to, ChunkRange(chunk->getMin(), chunk->getMax()), migrationRequest.maxChunkSizeBytes, migrationRequest.secondaryThrottle, migrationRequest.waitForDelete, migration.oldShard ? true : false); // takeDistLock flag. BSONObj moveChunkRequestObj = builder.obj(); const auto recipientShard = grid.shardRegistry()->getShard(txn, migration.chunkInfo.migrateInfo.from); const auto host = recipientShard->getTargeter()->findHost( ReadPreferenceSetting{ReadPreference::PrimaryOnly}, RemoteCommandTargeter::selectFindHostMaxWaitTime(txn)); if (!host.isOK()) { // Unable to find a target shard for whatever reason; abandon this migration and proceed // to the next. stdx::lock_guard<stdx::mutex> lk(_mutex); migrationStatuses->insert(MigrationStatuses::value_type( migration.chunkInfo.migrateInfo.getName(), std::move(host.getStatus()))); continue; } RemoteCommandRequest remoteRequest(host.getValue(), "admin", moveChunkRequestObj); StatusWith<RemoteCommandResponse> remoteCommandResponse( Status{ErrorCodes::InternalError, "Uninitialized value"}); executor::TaskExecutor* executor = Grid::get(txn)->getExecutorPool()->getFixedExecutor(); StatusWith<executor::TaskExecutor::CallbackHandle> callbackHandleWithStatus = executor->scheduleRemoteCommand(remoteRequest, stdx::bind(&MigrationManager::_checkMigrationCallback, this, stdx::placeholders::_1, txn, &migration, migrationStatuses)); if (!callbackHandleWithStatus.isOK()) { // Scheduling the migration moveChunk failed. stdx::lock_guard<stdx::mutex> lk(_mutex); migrationStatuses->insert( MigrationStatuses::value_type(migration.chunkInfo.migrateInfo.getName(), std::move(callbackHandleWithStatus.getStatus()))); continue; } // The moveChunk command was successfully scheduled. Store the callback handle so that the // command's return can be waited for later. stdx::lock_guard<stdx::mutex> lk(_mutex); migration.setCallbackHandle(std::move(callbackHandleWithStatus.getValue())); } _waitForMigrations(txn); // At this point, there are no parallel running threads so it is safe not to lock the mutex. // All the migrations have returned, release all of the distributed locks that are no longer // being used. _distributedLocks.clear(); // If there are rescheduled migrations, move them to active and run the function again. if (!_rescheduledMigrations.empty()) { // Clear all the callback handles of the rescheduled migrations. for (auto& migration : _rescheduledMigrations) { migration.clearCallbackHandle(); } _activeMigrations = std::move(_rescheduledMigrations); _rescheduledMigrations.clear(); _executeMigrations(txn, migrationStatuses); } else { _activeMigrations.clear(); } }
StatusWith<ScopedMigrationRequest> ScopedMigrationRequest::writeMigration( OperationContext* opCtx, const MigrateInfo& migrateInfo, bool waitForDelete) { // Try to write a unique migration document to config.migrations. const MigrationType migrationType(migrateInfo, waitForDelete); for (int retry = 0; retry < kDuplicateKeyErrorMaxRetries; ++retry) { Status result = grid.catalogClient()->insertConfigDocument( opCtx, MigrationType::ConfigNS, migrationType.toBSON(), kMajorityWriteConcern); if (result == ErrorCodes::DuplicateKey) { // If the exact migration described by "migrateInfo" is active, return a scoped object // for the request because this migration request will join the active one once // scheduled. auto statusWithMigrationQueryResult = grid.shardRegistry()->getConfigShard()->exhaustiveFindOnConfig( opCtx, ReadPreferenceSetting{ReadPreference::PrimaryOnly}, repl::ReadConcernLevel::kLocalReadConcern, NamespaceString(MigrationType::ConfigNS), BSON(MigrationType::name(migrateInfo.getName())), BSONObj(), boost::none); if (!statusWithMigrationQueryResult.isOK()) { return {statusWithMigrationQueryResult.getStatus().code(), str::stream() << "Failed to verify whether conflicting migration is in " << "progress for migration '" << redact(migrateInfo.toString()) << "' while trying to query config.migrations." << causedBy(redact(statusWithMigrationQueryResult.getStatus()))}; } if (statusWithMigrationQueryResult.getValue().docs.empty()) { // The document that caused the DuplicateKey error is no longer in the collection, // so retrying the insert might succeed. continue; } invariant(statusWithMigrationQueryResult.getValue().docs.size() == 1); BSONObj activeMigrationBSON = statusWithMigrationQueryResult.getValue().docs.front(); auto statusWithActiveMigration = MigrationType::fromBSON(activeMigrationBSON); if (!statusWithActiveMigration.isOK()) { return {statusWithActiveMigration.getStatus().code(), str::stream() << "Failed to verify whether conflicting migration is in " << "progress for migration '" << redact(migrateInfo.toString()) << "' while trying to parse active migration document '" << redact(activeMigrationBSON.toString()) << "'." << causedBy(redact(statusWithActiveMigration.getStatus()))}; } MigrateInfo activeMigrateInfo = statusWithActiveMigration.getValue().toMigrateInfo(); if (activeMigrateInfo.to != migrateInfo.to || activeMigrateInfo.from != migrateInfo.from) { log() << "Failed to write document '" << redact(migrateInfo.toString()) << "' to config.migrations because there is already an active migration for" << " that chunk: '" << redact(activeMigrateInfo.toString()) << "'." << causedBy(redact(result)); return result; } result = Status::OK(); } // As long as there isn't a DuplicateKey error, the document may have been written, and it's // safe (won't delete another migration's document) and necessary to try to clean up the // document via the destructor. ScopedMigrationRequest scopedMigrationRequest( opCtx, NamespaceString(migrateInfo.ns), migrateInfo.minKey); // If there was a write error, let the object go out of scope and clean up in the // destructor. if (!result.isOK()) { return result; } return std::move(scopedMigrationRequest); } return Status(ErrorCodes::OperationFailed, str::stream() << "Failed to insert the config.migrations document after max " << "number of retries. Chunk '" << ChunkRange(migrateInfo.minKey, migrateInfo.maxKey).toString() << "' in collection '" << migrateInfo.ns << "' was being moved (somewhere) by another operation."); }
void MetadataManager::refreshActiveMetadata(std::unique_ptr<CollectionMetadata> remoteMetadata) { LOG(1) << "Refreshing the active metadata from " << (_activeMetadataTracker->metadata ? _activeMetadataTracker->metadata->toStringBasic() : "(empty)") << ", to " << (remoteMetadata ? remoteMetadata->toStringBasic() : "(empty)"); stdx::lock_guard<stdx::mutex> scopedLock(_managerLock); // Collection is not sharded anymore if (!remoteMetadata) { log() << "Marking collection as not sharded."; _receivingChunks.clear(); _rangesToClean.clear(); _setActiveMetadata_inlock(nullptr); return; } invariant(!remoteMetadata->getCollVersion().isWriteCompatibleWith(ChunkVersion::UNSHARDED())); invariant(!remoteMetadata->getShardVersion().isWriteCompatibleWith(ChunkVersion::UNSHARDED())); // Collection is not sharded currently if (!_activeMetadataTracker->metadata) { log() << "Marking collection as sharded with version " << remoteMetadata->toStringBasic(); invariant(_receivingChunks.empty()); invariant(_rangesToClean.empty()); _setActiveMetadata_inlock(std::move(remoteMetadata)); return; } // If the metadata being installed has a different epoch from ours, this means the collection // was dropped and recreated, so we must entirely reset the metadata state if (_activeMetadataTracker->metadata->getCollVersion().epoch() != remoteMetadata->getCollVersion().epoch()) { log() << "Overwriting collection metadata due to epoch change."; _receivingChunks.clear(); _rangesToClean.clear(); _setActiveMetadata_inlock(std::move(remoteMetadata)); return; } // We already have newer version if (_activeMetadataTracker->metadata->getCollVersion() >= remoteMetadata->getCollVersion()) { LOG(1) << "Attempted to refresh active metadata " << _activeMetadataTracker->metadata->toStringBasic() << " with an older version " << remoteMetadata->toStringBasic(); return; } // Resolve any receiving chunks, which might have completed by now for (auto it = _receivingChunks.begin(); it != _receivingChunks.end();) { const BSONObj min = it->first; const BSONObj max = it->second; // Our pending range overlaps at least one chunk if (rangeMapContains(remoteMetadata->getChunks(), min, max)) { // The remote metadata contains a chunk we were earlier in the process of receiving, so // we deem it successfully received. LOG(2) << "Verified chunk " << ChunkRange(min, max).toString() << " was migrated earlier to this shard"; _receivingChunks.erase(it++); continue; } else if (!rangeMapOverlaps(remoteMetadata->getChunks(), min, max)) { ++it; continue; } // Partial overlap indicates that the earlier migration has failed, but the chunk being // migrated underwent some splits and other migrations and ended up here again. In this // case, we will request full reload of the metadata. Currently this cannot happen, because // all migrations are with the explicit knowledge of the recipient shard. However, we leave // the option open so that chunk splits can do empty chunk move without having to notify the // recipient. RangeVector overlappedChunks; getRangeMapOverlap(remoteMetadata->getChunks(), min, max, &overlappedChunks); for (const auto& overlapChunkMin : overlappedChunks) { auto itRecv = _receivingChunks.find(overlapChunkMin.first); invariant(itRecv != _receivingChunks.end()); const ChunkRange receivingRange(itRecv->first, itRecv->second); _receivingChunks.erase(itRecv); // Make sure any potentially partially copied chunks are scheduled to be cleaned up _addRangeToClean_inlock(receivingRange); } // Need to reset the iterator it = _receivingChunks.begin(); } // For compatibility with the current range deleter, which is driven entirely by the contents of // the CollectionMetadata update the pending chunks for (const auto& receivingChunk : _receivingChunks) { ChunkType chunk; chunk.setMin(receivingChunk.first); chunk.setMax(receivingChunk.second); remoteMetadata = remoteMetadata->clonePlusPending(chunk); } _setActiveMetadata_inlock(std::move(remoteMetadata)); }
ChunkRange MetadataManager::getNextRangeToClean() { stdx::lock_guard<stdx::mutex> scopedLock(_managerLock); invariant(!_rangesToClean.empty()); auto it = _rangesToClean.begin(); return ChunkRange(it->first, it->second.getMax()); }