StatusWith<string> DistributionStatus::getTagForSingleChunk(const string& configServer, const string& ns, const ChunkType& chunk) { BSONObj tagRangeDoc; try { ScopedDbConnection conn(configServer, 30); Query query(QUERY(TagsType::ns(ns) << TagsType::min() << BSON("$lte" << chunk.getMin()) << TagsType::max() << BSON("$gte" << chunk.getMax()))); tagRangeDoc = conn->findOne(TagsType::ConfigNS, query); conn.done(); } catch (const DBException& excep) { return StatusWith<string>(excep.toStatus()); } if (tagRangeDoc.isEmpty()) { return StatusWith<string>(""); } TagsType tagRange; string errMsg; if (!tagRange.parseBSON(tagRangeDoc, &errMsg)) { return StatusWith<string>(ErrorCodes::FailedToParse, errMsg); } return StatusWith<string>(tagRange.getTag()); }
StatusWith<string> CatalogManagerReplicaSet::getTagForChunk(OperationContext* txn, const std::string& collectionNs, const ChunkType& chunk) { BSONObj query = BSON(TagsType::ns(collectionNs) << TagsType::min() << BSON("$lte" << chunk.getMin()) << TagsType::max() << BSON("$gte" << chunk.getMax())); auto findStatus = _exhaustiveFindOnConfig(txn, NamespaceString(TagsType::ConfigNS), query, BSONObj(), 1); if (!findStatus.isOK()) { return findStatus.getStatus(); } const auto& docs = findStatus.getValue().value; if (docs.empty()) { return string{}; } invariant(docs.size() == 1); BSONObj tagsDoc = docs.front(); const auto tagsResult = TagsType::fromBSON(tagsDoc); if (!tagsResult.isOK()) { return {ErrorCodes::FailedToParse, stream() << "error while parsing " << TagsType::ConfigNS << " document: " << tagsDoc << " : " << tagsResult.getStatus().toString()}; } return tagsResult.getValue().getTag(); }
StatusWith<string> CatalogManagerReplicaSet::getTagForChunk(const std::string& collectionNs, const ChunkType& chunk) { auto configShard = grid.shardRegistry()->getShard("config"); auto readHostStatus = configShard->getTargeter()->findHost(kConfigReadSelector); if (!readHostStatus.isOK()) { return readHostStatus.getStatus(); } BSONObj query = BSON(TagsType::ns(collectionNs) << TagsType::min() << BSON("$lte" << chunk.getMin()) << TagsType::max() << BSON("$gte" << chunk.getMax())); auto findStatus = grid.shardRegistry()->exhaustiveFind( readHostStatus.getValue(), NamespaceString(TagsType::ConfigNS), query, BSONObj(), 1); if (!findStatus.isOK()) { return findStatus.getStatus(); } const auto& docs = findStatus.getValue(); if (docs.empty()) { return string{}; } invariant(docs.size() == 1); BSONObj tagsDoc = docs.front(); const auto tagsResult = TagsType::fromBSON(tagsDoc); if (!tagsResult.isOK()) { return {ErrorCodes::FailedToParse, stream() << "error while parsing " << TagsType::ConfigNS << " document: " << tagsDoc << " : " << tagsResult.getStatus().toString()}; } return tagsResult.getValue().getTag(); }
Status CollectionMetadata::checkChunkIsValid(const ChunkType& chunk) { ChunkType existingChunk; if (!getNextChunk(chunk.getMin(), &existingChunk)) { return {ErrorCodes::IncompatibleShardingMetadata, str::stream() << "Chunk with bounds " << ChunkRange(chunk.getMin(), chunk.getMax()).toString() << " is not owned by this shard."}; } if (existingChunk.getMin().woCompare(chunk.getMin()) || existingChunk.getMax().woCompare(chunk.getMax())) { return {ErrorCodes::IncompatibleShardingMetadata, str::stream() << "Unable to find chunk with the exact bounds " << ChunkRange(chunk.getMin(), chunk.getMax()).toString() << " at collection version " << getCollVersion().toString()}; } return Status::OK(); }
std::unique_ptr<CollectionMetadata> CollectionMetadata::clonePlusPending( const ChunkType& chunk) const { invariant(!rangeMapOverlaps(_chunksMap, chunk.getMin(), chunk.getMax())); unique_ptr<CollectionMetadata> metadata(stdx::make_unique<CollectionMetadata>()); metadata->_keyPattern = _keyPattern.getOwned(); metadata->fillKeyPatternFields(); metadata->_pendingMap = _pendingMap; metadata->_chunksMap = _chunksMap; metadata->_rangesMap = _rangesMap; metadata->_shardVersion = _shardVersion; metadata->_collVersion = _collVersion; // If there are any pending chunks on the interval to be added this is ok, since pending chunks // aren't officially tracked yet and something may have changed on servers we do not see yet. // // We remove any chunks we overlap because the remote request starting a chunk migration is what // is authoritative. if (rangeMapOverlaps(_pendingMap, chunk.getMin(), chunk.getMax())) { RangeVector pendingOverlap; getRangeMapOverlap(_pendingMap, chunk.getMin(), chunk.getMax(), &pendingOverlap); warning() << "new pending chunk " << redact(rangeToString(chunk.getMin(), chunk.getMax())) << " overlaps existing pending chunks " << redact(overlapToString(pendingOverlap)) << ", a migration may not have completed"; for (RangeVector::iterator it = pendingOverlap.begin(); it != pendingOverlap.end(); ++it) { metadata->_pendingMap.erase(it->first); } } // The pending map entry cannot contain a specific chunk version because we don't know what // version would be generated for it at commit time. That's why we insert an IGNORED value. metadata->_pendingMap.insert( make_pair(chunk.getMin(), CachedChunkInfo(chunk.getMax(), ChunkVersion::IGNORED()))); invariant(metadata->isValid()); return metadata; }
bool CollectionManager::chunkExists(const ChunkType& chunk, string* errMsg) const { RangeMap::const_iterator it = _chunksMap.find(chunk.getMin()); if (it == _chunksMap.end()) { *errMsg = stream() << "couldn't find chunk " << chunk.getMin() << "->" << chunk.getMax(); return false; } if (it->second.woCompare(chunk.getMax()) != 0) { *errMsg = stream() << "ranges differ, " << "requested: " << chunk.getMin() << " -> " << chunk.getMax() << " " << "existing: " << ((it == _chunksMap.end()) ? "<empty>" : it->first.toString() + " -> " + it->second.toString()); return false; } return true; }
string DistributionStatus::getTagForChunk(const ChunkType& chunk) const { const auto minIntersect = _tagRanges.upper_bound(chunk.getMin()); const auto maxIntersect = _tagRanges.lower_bound(chunk.getMax()); // We should never have a partial overlap with a chunk range. If it happens, treat it as if this // chunk doesn't belong to a tag if (minIntersect != maxIntersect) { return ""; } if (minIntersect == _tagRanges.end()) { return ""; } const TagRange& intersectRange = minIntersect->second; // Check for containment if (intersectRange.min <= chunk.getMin() && chunk.getMax() <= intersectRange.max) { return intersectRange.tag; } return ""; }
std::unique_ptr<CollectionMetadata> CollectionMetadata::clonePlusPending( const ChunkType& chunk) const { invariant(!rangeMapOverlaps(_chunksMap, chunk.getMin(), chunk.getMax())); unique_ptr<CollectionMetadata> metadata(stdx::make_unique<CollectionMetadata>()); metadata->_keyPattern = _keyPattern.getOwned(); metadata->fillKeyPatternFields(); metadata->_pendingMap = _pendingMap; metadata->_chunksMap = _chunksMap; metadata->_rangesMap = _rangesMap; metadata->_shardVersion = _shardVersion; metadata->_collVersion = _collVersion; // If there are any pending chunks on the interval to be added this is ok, since pending // chunks aren't officially tracked yet and something may have changed on servers we do not // see yet. // We remove any chunks we overlap, the remote request starting a chunk migration must have // been authoritative. if (rangeMapOverlaps(_pendingMap, chunk.getMin(), chunk.getMax())) { RangeVector pendingOverlap; getRangeMapOverlap(_pendingMap, chunk.getMin(), chunk.getMax(), &pendingOverlap); warning() << "new pending chunk " << rangeToString(chunk.getMin(), chunk.getMax()) << " overlaps existing pending chunks " << overlapToString(pendingOverlap) << ", a migration may not have completed"; for (RangeVector::iterator it = pendingOverlap.begin(); it != pendingOverlap.end(); ++it) { metadata->_pendingMap.erase(it->first); } } metadata->_pendingMap.insert(make_pair(chunk.getMin(), chunk.getMax())); invariant(metadata->isValid()); return metadata; }
std::unique_ptr<CollectionMetadata> CollectionMetadata::cloneMinusPending( const ChunkType& chunk) const { invariant(rangeMapContains(_pendingMap, chunk.getMin(), chunk.getMax())); unique_ptr<CollectionMetadata> metadata(stdx::make_unique<CollectionMetadata>()); metadata->_keyPattern = _keyPattern.getOwned(); metadata->fillKeyPatternFields(); metadata->_pendingMap = _pendingMap; metadata->_pendingMap.erase(chunk.getMin()); metadata->_chunksMap = _chunksMap; metadata->_rangesMap = _rangesMap; metadata->_shardVersion = _shardVersion; metadata->_collVersion = _collVersion; invariant(metadata->isValid()); return metadata; }
Chunk::Chunk(OperationContext* txn, ChunkManager* manager, const ChunkType& from) : _manager(manager), _lastmod(from.getVersion()), _dataWritten(mkDataWritten()) { string ns = from.getNS(); _shardId = from.getShard(); verify(_lastmod.isSet()); _min = from.getMin().getOwned(); _max = from.getMax().getOwned(); _jumbo = from.getJumbo(); uassert(10170, "Chunk needs a ns", !ns.empty()); uassert(13327, "Chunk ns must match server ns", ns == _manager->getns()); uassert(10172, "Chunk needs a min", !_min.isEmpty()); uassert(10173, "Chunk needs a max", !_max.isEmpty()); uassert(10171, "Chunk needs a server", grid.shardRegistry()->getShard(txn, _shardId)); }
TEST_F(MergeChunkTests, CompoundMerge) { const NamespaceString nss("foo.bar"); const BSONObj kp = BSON("x" << 1 << "y" << 1); const OID epoch = OID::gen(); vector<KeyRange> ranges; // Setup chunk metadata ranges.push_back( KeyRange(nss.ns(), BSON("x" << 0 << "y" << 1), BSON("x" << 1 << "y" << 0), kp)); ranges.push_back( KeyRange(nss.ns(), BSON("x" << 1 << "y" << 0), BSON("x" << 2 << "y" << 1), kp)); storeCollectionRanges(nss, shardName(), ranges, ChunkVersion(1, 0, epoch)); // Get latest version ChunkVersion latestVersion; ShardingState::get(getGlobalServiceContext()) ->refreshMetadataNow(&_txn, nss.ns(), &latestVersion); ShardingState::get(getGlobalServiceContext())->resetMetadata(nss.ns()); // Do merge string errMsg; bool result = mergeChunks( &_txn, nss, BSON("x" << 0 << "y" << 1), BSON("x" << 2 << "y" << 1), epoch, &errMsg); ASSERT_EQUALS(errMsg, ""); ASSERT(result); // Verify result CollectionMetadataPtr metadata = ShardingState::get(getGlobalServiceContext())->getCollectionMetadata(nss.ns()); ChunkType chunk; ASSERT(metadata->getNextChunk(BSON("x" << 0 << "y" << 1), &chunk)); ASSERT(chunk.getMin().woCompare(BSON("x" << 0 << "y" << 1)) == 0); ASSERT(chunk.getMax().woCompare(BSON("x" << 2 << "y" << 1)) == 0); ASSERT_EQUALS(metadata->getNumChunks(), 1u); ASSERT_EQUALS(metadata->getShardVersion().majorVersion(), latestVersion.majorVersion()); ASSERT_GREATER_THAN(metadata->getShardVersion().minorVersion(), latestVersion.minorVersion()); assertWrittenAsMerged(ranges); }
std::unique_ptr<CollectionMetadata> CollectionMetadata::cloneMigrate( const ChunkType& chunk, const ChunkVersion& newCollectionVersion) const { invariant(newCollectionVersion.epoch() == _collVersion.epoch()); invariant(newCollectionVersion > _collVersion); invariant(rangeMapContains(_chunksMap, chunk.getMin(), chunk.getMax())); unique_ptr<CollectionMetadata> metadata(stdx::make_unique<CollectionMetadata>()); metadata->_keyPattern = _keyPattern.getOwned(); metadata->fillKeyPatternFields(); metadata->_pendingMap = _pendingMap; metadata->_chunksMap = _chunksMap; metadata->_chunksMap.erase(chunk.getMin()); metadata->_shardVersion = (metadata->_chunksMap.empty() ? ChunkVersion(0, 0, newCollectionVersion.epoch()) : newCollectionVersion); metadata->_collVersion = newCollectionVersion; metadata->fillRanges(); invariant(metadata->isValid()); return metadata; }
StatusWith<ForwardingCatalogManager::ScopedDistLock*> ChunkMoveOperationState::acquireMoveMetadata( OperationContext* txn) { // Get the distributed lock const string whyMessage(stream() << "migrating chunk [" << minKey << ", " << maxKey << ") in " << _nss.ns()); _distLockStatus = grid.forwardingCatalogManager()->distLock(txn, _nss.ns(), whyMessage); if (!_distLockStatus->isOK()) { const string msg = stream() << "could not acquire collection lock for " << _nss.ns() << " to migrate chunk [" << minKey << "," << maxKey << ")" << causedBy(_distLockStatus->getStatus()); warning() << msg; return Status(_distLockStatus->getStatus().code(), msg); } ShardingState* const shardingState = ShardingState::get(txn); // Snapshot the metadata Status refreshStatus = shardingState->refreshMetadataNow(txn, _nss.ns(), &_shardVersion); if (!refreshStatus.isOK()) { const string msg = stream() << "moveChunk cannot start migrate of chunk " << "[" << minKey << "," << maxKey << ")" << causedBy(refreshStatus.reason()); warning() << msg; return Status(refreshStatus.code(), msg); } if (_shardVersion.majorVersion() == 0) { // It makes no sense to migrate if our version is zero and we have no chunks const string msg = stream() << "moveChunk cannot start migrate of chunk " << "[" << minKey << "," << maxKey << ")" << " with zero shard version"; warning() << msg; return Status(ErrorCodes::IncompatibleShardingMetadata, msg); } if (_collectionEpoch != _shardVersion.epoch()) { const string msg = stream() << "moveChunk cannot move chunk " << "[" << minKey << "," << maxKey << "), " << "collection may have been dropped. " << "current epoch: " << _shardVersion.epoch() << ", cmd epoch: " << _collectionEpoch; warning() << msg; return Status(ErrorCodes::IncompatibleShardingMetadata, msg); } _collMetadata = shardingState->getCollectionMetadata(_nss.ns()); // With nonzero shard version, we must have a coll version >= our shard version invariant(_collMetadata->getCollVersion() >= _shardVersion); // With nonzero shard version, we must have a shard key invariant(!_collMetadata->getKeyPattern().isEmpty()); ChunkType origChunk; if (!_collMetadata->getNextChunk(getMinKey(), &origChunk) || origChunk.getMin().woCompare(getMinKey()) || origChunk.getMax().woCompare(getMaxKey())) { // Our boundaries are different from those passed in const string msg = stream() << "moveChunk cannot find chunk " << "[" << minKey << "," << maxKey << ")" << " to migrate, the chunk boundaries may be stale"; warning() << msg; return Status(ErrorCodes::IncompatibleShardingMetadata, msg); } return &_distLockStatus->getValue(); }
bool mergeChunks(OperationContext* txn, const NamespaceString& nss, const BSONObj& minKey, const BSONObj& maxKey, const OID& epoch, string* errMsg) { // Get the distributed lock string whyMessage = stream() << "merging chunks in " << nss.ns() << " from " << minKey << " to " << maxKey; auto scopedDistLock = grid.catalogManager(txn)->distLock( txn, nss.ns(), whyMessage, DistLockManager::kSingleLockAttemptTimeout); if (!scopedDistLock.isOK()) { *errMsg = stream() << "could not acquire collection lock for " << nss.ns() << " to merge chunks in [" << minKey << "," << maxKey << ")" << causedBy(scopedDistLock.getStatus()); warning() << *errMsg; return false; } ShardingState* shardingState = ShardingState::get(txn); // // We now have the collection lock, refresh metadata to latest version and sanity check // ChunkVersion shardVersion; Status status = shardingState->refreshMetadataNow(txn, nss.ns(), &shardVersion); if (!status.isOK()) { *errMsg = str::stream() << "could not merge chunks, failed to refresh metadata for " << nss.ns() << causedBy(status.reason()); warning() << *errMsg; return false; } if (epoch.isSet() && shardVersion.epoch() != epoch) { *errMsg = stream() << "could not merge chunks, collection " << nss.ns() << " has changed" << " since merge was sent" << "(sent epoch : " << epoch.toString() << ", current epoch : " << shardVersion.epoch().toString() << ")"; warning() << *errMsg; return false; } shared_ptr<CollectionMetadata> metadata = shardingState->getCollectionMetadata(nss.ns()); if (!metadata || metadata->getKeyPattern().isEmpty()) { *errMsg = stream() << "could not merge chunks, collection " << nss.ns() << " is not sharded"; warning() << *errMsg; return false; } dassert(metadata->getShardVersion().equals(shardVersion)); if (!metadata->isValidKey(minKey) || !metadata->isValidKey(maxKey)) { *errMsg = stream() << "could not merge chunks, the range " << rangeToString(minKey, maxKey) << " is not valid" << " for collection " << nss.ns() << " with key pattern " << metadata->getKeyPattern(); warning() << *errMsg; return false; } // // Get merged chunk information // ChunkVersion mergeVersion = metadata->getCollVersion(); mergeVersion.incMinor(); std::vector<ChunkType> chunksToMerge; ChunkType itChunk; itChunk.setMin(minKey); itChunk.setMax(minKey); itChunk.setNS(nss.ns()); itChunk.setShard(shardingState->getShardName()); while (itChunk.getMax().woCompare(maxKey) < 0 && metadata->getNextChunk(itChunk.getMax(), &itChunk)) { chunksToMerge.push_back(itChunk); } if (chunksToMerge.empty()) { *errMsg = stream() << "could not merge chunks, collection " << nss.ns() << " range starting at " << minKey << " and ending at " << maxKey << " does not belong to shard " << shardingState->getShardName(); warning() << *errMsg; return false; } // // Validate the range starts and ends at chunks and has no holes, error if not valid // BSONObj firstDocMin = chunksToMerge.front().getMin(); BSONObj firstDocMax = chunksToMerge.front().getMax(); // minKey is inclusive bool minKeyInRange = rangeContains(firstDocMin, firstDocMax, minKey); if (!minKeyInRange) { *errMsg = stream() << "could not merge chunks, collection " << nss.ns() << " range starting at " << minKey << " does not belong to shard " << shardingState->getShardName(); warning() << *errMsg; return false; } BSONObj lastDocMin = chunksToMerge.back().getMin(); BSONObj lastDocMax = chunksToMerge.back().getMax(); // maxKey is exclusive bool maxKeyInRange = lastDocMin.woCompare(maxKey) < 0 && lastDocMax.woCompare(maxKey) >= 0; if (!maxKeyInRange) { *errMsg = stream() << "could not merge chunks, collection " << nss.ns() << " range ending at " << maxKey << " does not belong to shard " << shardingState->getShardName(); warning() << *errMsg; return false; } bool validRangeStartKey = firstDocMin.woCompare(minKey) == 0; bool validRangeEndKey = lastDocMax.woCompare(maxKey) == 0; if (!validRangeStartKey || !validRangeEndKey) { *errMsg = stream() << "could not merge chunks, collection " << nss.ns() << " does not contain a chunk " << (!validRangeStartKey ? "starting at " + minKey.toString() : "") << (!validRangeStartKey && !validRangeEndKey ? " or " : "") << (!validRangeEndKey ? "ending at " + maxKey.toString() : ""); warning() << *errMsg; return false; } if (chunksToMerge.size() == 1) { *errMsg = stream() << "could not merge chunks, collection " << nss.ns() << " already contains chunk for " << rangeToString(minKey, maxKey); warning() << *errMsg; return false; } // Look for hole in range for (size_t i = 1; i < chunksToMerge.size(); ++i) { if (chunksToMerge[i - 1].getMax().woCompare(chunksToMerge[i].getMin()) != 0) { *errMsg = stream() << "could not merge chunks, collection " << nss.ns() << " has a hole in the range " << rangeToString(minKey, maxKey) << " at " << rangeToString(chunksToMerge[i - 1].getMax(), chunksToMerge[i].getMin()); warning() << *errMsg; return false; } } // // Run apply ops command // Status applyOpsStatus = runApplyOpsCmd(txn, chunksToMerge, shardVersion, mergeVersion); if (!applyOpsStatus.isOK()) { warning() << applyOpsStatus; return false; } // // Install merged chunk metadata // { ScopedTransaction transaction(txn, MODE_IX); Lock::DBLock writeLk(txn->lockState(), nss.db(), MODE_IX); Lock::CollectionLock collLock(txn->lockState(), nss.ns(), MODE_X); shardingState->mergeChunks(txn, nss.ns(), minKey, maxKey, mergeVersion); } // // Log change // BSONObj mergeLogEntry = buildMergeLogEntry(chunksToMerge, shardVersion, mergeVersion); grid.catalogManager(txn)->logChange(txn, "merge", nss.ns(), mergeLogEntry); return true; }
virtual pair<BSONObj, BSONObj> rangeFor(const ChunkType& chunk) const { return make_pair(chunk.getMin(), chunk.getMax()); }
Status ChunkMoveOperationState::commitMigration() { invariant(_distLockStatus.is_initialized()); invariant(_distLockStatus->isOK()); log() << "About to enter migrate critical section"; // We're under the collection distributed lock here, so no other migrate can change maxVersion // or CollectionMetadata state. ShardingState* const shardingState = ShardingState::get(_txn); Status startStatus = ShardingStateRecovery::startMetadataOp(_txn); if (!startStatus.isOK()) return startStatus; shardingState->migrationSourceManager()->setInCriticalSection(true); const ChunkVersion originalCollVersion = getCollMetadata()->getCollVersion(); ChunkVersion myVersion = originalCollVersion; myVersion.incMajor(); { ScopedTransaction transaction(_txn, MODE_IX); Lock::DBLock lk(_txn->lockState(), _nss.db(), MODE_IX); Lock::CollectionLock collLock(_txn->lockState(), _nss.ns(), MODE_X); invariant(myVersion > shardingState->getVersion(_nss.ns())); // Bump the metadata's version up and "forget" about the chunk being moved. This is // not the commit point, but in practice the state in this shard won't change until // the commit it done. shardingState->donateChunk(_txn, _nss.ns(), _minKey, _maxKey, myVersion); } log() << "moveChunk setting version to: " << myVersion << migrateLog; // We're under the collection lock here, too, so we can undo the chunk donation because // no other state change could be ongoing BSONObj res; Status recvChunkCommitStatus{ErrorCodes::InternalError, "status not set"}; try { ScopedDbConnection connTo(_toShardCS, 35.0); connTo->runCommand("admin", BSON("_recvChunkCommit" << 1), res); connTo.done(); recvChunkCommitStatus = getStatusFromCommandResult(res); } catch (const DBException& e) { const string msg = stream() << "moveChunk could not contact to shard " << _toShard << " to commit transfer" << causedBy(e); warning() << msg; recvChunkCommitStatus = Status(e.toStatus().code(), msg); } if (MONGO_FAIL_POINT(failMigrationCommit) && recvChunkCommitStatus.isOK()) { recvChunkCommitStatus = Status(ErrorCodes::InternalError, "Failing _recvChunkCommit due to failpoint."); } if (!recvChunkCommitStatus.isOK()) { log() << "moveChunk migrate commit not accepted by TO-shard: " << res << " resetting shard version to: " << getShardVersion() << migrateLog; { ScopedTransaction transaction(_txn, MODE_IX); Lock::DBLock dbLock(_txn->lockState(), _nss.db(), MODE_IX); Lock::CollectionLock collLock(_txn->lockState(), _nss.ns(), MODE_X); log() << "moveChunk collection lock acquired to reset shard version from " "failed migration"; // Revert the chunk manager back to the state before "forgetting" about the chunk shardingState->undoDonateChunk(_txn, _nss.ns(), getCollMetadata()); } log() << "Shard version successfully reset to clean up failed migration"; return Status(recvChunkCommitStatus.code(), stream() << "_recvChunkCommit failed: " << causedBy(recvChunkCommitStatus)); } log() << "moveChunk migrate commit accepted by TO-shard: " << res << migrateLog; BSONArrayBuilder updates; { // Update for the chunk being moved BSONObjBuilder op; op.append("op", "u"); op.appendBool("b", false); // No upserting op.append("ns", ChunkType::ConfigNS); BSONObjBuilder n(op.subobjStart("o")); n.append(ChunkType::name(), Chunk::genID(_nss.ns(), _minKey)); myVersion.addToBSON(n, ChunkType::DEPRECATED_lastmod()); n.append(ChunkType::ns(), _nss.ns()); n.append(ChunkType::min(), _minKey); n.append(ChunkType::max(), _maxKey); n.append(ChunkType::shard(), _toShard); n.done(); BSONObjBuilder q(op.subobjStart("o2")); q.append(ChunkType::name(), Chunk::genID(_nss.ns(), _minKey)); q.done(); updates.append(op.obj()); } // Version at which the next highest lastmod will be set. If the chunk being moved is the last // in the shard, nextVersion is that chunk's lastmod otherwise the highest version is from the // chunk being bumped on the FROM-shard. ChunkVersion nextVersion = myVersion; // If we have chunks left on the FROM shard, update the version of one of them as well. We can // figure that out by grabbing the metadata as it has been changed. const std::shared_ptr<CollectionMetadata> bumpedCollMetadata( shardingState->getCollectionMetadata(_nss.ns())); if (bumpedCollMetadata->getNumChunks() > 0) { // get another chunk on that shard ChunkType bumpChunk; invariant(bumpedCollMetadata->getNextChunk(bumpedCollMetadata->getMinKey(), &bumpChunk)); BSONObj bumpMin = bumpChunk.getMin(); BSONObj bumpMax = bumpChunk.getMax(); dassert(bumpMin.woCompare(_minKey) != 0); BSONObjBuilder op; op.append("op", "u"); op.appendBool("b", false); op.append("ns", ChunkType::ConfigNS); nextVersion.incMinor(); // same as used on donateChunk BSONObjBuilder n(op.subobjStart("o")); n.append(ChunkType::name(), Chunk::genID(_nss.ns(), bumpMin)); nextVersion.addToBSON(n, ChunkType::DEPRECATED_lastmod()); n.append(ChunkType::ns(), _nss.ns()); n.append(ChunkType::min(), bumpMin); n.append(ChunkType::max(), bumpMax); n.append(ChunkType::shard(), _fromShard); n.done(); BSONObjBuilder q(op.subobjStart("o2")); q.append(ChunkType::name(), Chunk::genID(_nss.ns(), bumpMin)); q.done(); updates.append(op.obj()); log() << "moveChunk updating self version to: " << nextVersion << " through " << bumpMin << " -> " << bumpMax << " for collection '" << _nss.ns() << "'" << migrateLog; } else { log() << "moveChunk moved last chunk out for collection '" << _nss.ns() << "'" << migrateLog; } BSONArrayBuilder preCond; { BSONObjBuilder b; b.append("ns", ChunkType::ConfigNS); b.append("q", BSON("query" << BSON(ChunkType::ns(_nss.ns())) << "orderby" << BSON(ChunkType::DEPRECATED_lastmod() << -1))); { BSONObjBuilder bb(b.subobjStart("res")); // TODO: For backwards compatibility, we can't yet require an epoch here bb.appendTimestamp(ChunkType::DEPRECATED_lastmod(), originalCollVersion.toLong()); bb.done(); } preCond.append(b.obj()); } Status applyOpsStatus{Status::OK()}; try { // For testing migration failures if (MONGO_FAIL_POINT(failMigrationConfigWritePrepare)) { throw DBException("mock migration failure before config write", ErrorCodes::PrepareConfigsFailed); } applyOpsStatus = grid.catalogManager(_txn)->applyChunkOpsDeprecated(_txn, updates.arr(), preCond.arr()); if (MONGO_FAIL_POINT(failMigrationApplyOps)) { throw SocketException(SocketException::RECV_ERROR, shardingState->getConfigServer(_txn).toString()); } } catch (const DBException& ex) { warning() << ex << migrateLog; applyOpsStatus = ex.toStatus(); } if (applyOpsStatus == ErrorCodes::PrepareConfigsFailed) { // In the process of issuing the migrate commit, the SyncClusterConnection checks that // the config servers are reachable. If they are not, we are sure that the applyOps // command was not sent to any of the configs, so we can safely back out of the // migration here, by resetting the shard version that we bumped up to in the // donateChunk() call above. log() << "About to acquire moveChunk coll lock to reset shard version from " << "failed migration"; { ScopedTransaction transaction(_txn, MODE_IX); Lock::DBLock dbLock(_txn->lockState(), _nss.db(), MODE_IX); Lock::CollectionLock collLock(_txn->lockState(), _nss.ns(), MODE_X); // Revert the metadata back to the state before "forgetting" about the chunk shardingState->undoDonateChunk(_txn, _nss.ns(), getCollMetadata()); } log() << "Shard version successfully reset to clean up failed migration"; const string msg = stream() << "Failed to send migrate commit to configs " << causedBy(applyOpsStatus); return Status(applyOpsStatus.code(), msg); } else if (!applyOpsStatus.isOK()) { // This could be a blip in the connectivity. Wait out a few seconds and check if the // commit request made it. // // If the commit made it to the config, we'll see the chunk in the new shard and // there's no further action to be done. // // If the commit did not make it, currently the only way to fix this state is to // bounce the mongod so that the old state (before migrating) is brought in. warning() << "moveChunk commit outcome ongoing" << migrateLog; sleepsecs(10); // Look for the chunk in this shard whose version got bumped. We assume that if that // mod made it to the config server, then applyOps was successful. try { std::vector<ChunkType> newestChunk; Status status = grid.catalogManager(_txn)->getChunks(_txn, BSON(ChunkType::ns(_nss.ns())), BSON(ChunkType::DEPRECATED_lastmod() << -1), 1, &newestChunk, nullptr); uassertStatusOK(status); ChunkVersion checkVersion; if (!newestChunk.empty()) { invariant(newestChunk.size() == 1); checkVersion = newestChunk[0].getVersion(); } if (checkVersion.equals(nextVersion)) { log() << "moveChunk commit confirmed" << migrateLog; } else { error() << "moveChunk commit failed: version is at " << checkVersion << " instead of " << nextVersion << migrateLog; error() << "TERMINATING" << migrateLog; dbexit(EXIT_SHARDING_ERROR); } } catch (...) { error() << "moveChunk failed to get confirmation of commit" << migrateLog; error() << "TERMINATING" << migrateLog; dbexit(EXIT_SHARDING_ERROR); } } MONGO_FAIL_POINT_PAUSE_WHILE_SET(hangBeforeLeavingCriticalSection); shardingState->migrationSourceManager()->setInCriticalSection(false); ShardingStateRecovery::endMetadataOp(_txn); // Migration is done, just log some diagnostics information BSONObj chunkInfo = BSON("min" << _minKey << "max" << _maxKey << "from" << _fromShard << "to" << _toShard); BSONObjBuilder commitInfo; commitInfo.appendElements(chunkInfo); if (res["counts"].type() == Object) { commitInfo.appendElements(res["counts"].Obj()); } grid.catalogManager(_txn)->logChange(_txn, "moveChunk.commit", _nss.ns(), commitInfo.obj()); shardingState->migrationSourceManager()->done(_txn); _isRunning = false; return Status::OK(); }
StatusWith<ForwardingCatalogManager::ScopedDistLock*> ChunkMoveOperationState::acquireMoveMetadata() { // Get the distributed lock const string whyMessage(stream() << "migrating chunk [" << _minKey << ", " << _maxKey << ") in " << _nss.ns()); _distLockStatus = grid.forwardingCatalogManager()->distLock(_txn, _nss.ns(), whyMessage); if (!_distLockStatus->isOK()) { const string msg = stream() << "could not acquire collection lock for " << _nss.ns() << " to migrate chunk [" << _minKey << "," << _maxKey << ")" << causedBy(_distLockStatus->getStatus()); warning() << msg; return Status(_distLockStatus->getStatus().code(), msg); } ShardingState* const shardingState = ShardingState::get(_txn); // Snapshot the metadata Status refreshStatus = shardingState->refreshMetadataNow(_txn, _nss.ns(), &_shardVersion); if (!refreshStatus.isOK()) { const string msg = stream() << "moveChunk cannot start migrate of chunk " << "[" << _minKey << "," << _maxKey << ")" << causedBy(refreshStatus.reason()); warning() << msg; return Status(refreshStatus.code(), msg); } if (_shardVersion.majorVersion() == 0) { // It makes no sense to migrate if our version is zero and we have no chunks const string msg = stream() << "moveChunk cannot start migrate of chunk " << "[" << _minKey << "," << _maxKey << ")" << " with zero shard version"; warning() << msg; return Status(ErrorCodes::IncompatibleShardingMetadata, msg); } { // Mongos >= v3.2 sends the full version, v3.0 only sends the epoch. // TODO(SERVER-20742): Stop parsing epoch separately after 3.2. auto& operationVersion = OperationShardVersion::get(_txn); if (operationVersion.hasShardVersion()) { _collectionVersion = operationVersion.getShardVersion(_nss); _collectionEpoch = _collectionVersion.epoch(); } // else the epoch will already be set from the parsing of the ChunkMoveOperationState if (_collectionEpoch != _shardVersion.epoch()) { const string msg = stream() << "moveChunk cannot move chunk " << "[" << _minKey << "," << _maxKey << "), " << "collection may have been dropped. " << "current epoch: " << _shardVersion.epoch() << ", cmd epoch: " << _collectionEpoch; warning() << msg; throw SendStaleConfigException(_nss.toString(), msg, _collectionVersion, _shardVersion); } } _collMetadata = shardingState->getCollectionMetadata(_nss.ns()); // With nonzero shard version, we must have a coll version >= our shard version invariant(_collMetadata->getCollVersion() >= _shardVersion); // With nonzero shard version, we must have a shard key invariant(!_collMetadata->getKeyPattern().isEmpty()); ChunkType origChunk; if (!_collMetadata->getNextChunk(_minKey, &origChunk) || origChunk.getMin().woCompare(_minKey) || origChunk.getMax().woCompare(_maxKey)) { // Our boundaries are different from those passed in const string msg = stream() << "moveChunk cannot find chunk " << "[" << _minKey << "," << _maxKey << ")" << " to migrate, the chunk boundaries may be stale"; warning() << msg; throw SendStaleConfigException(_nss.toString(), msg, _collectionVersion, _shardVersion); } return &_distLockStatus->getValue(); }
static bool overlap(const ChunkType& chunk, const BSONObj& l2, const BSONObj& h2) { return ! ((chunk.getMax().woCompare(l2) <= 0) || (h2.woCompare(chunk.getMin()) <= 0)); }
bool mergeChunks( OperationContext* txn, const NamespaceString& nss, const BSONObj& minKey, const BSONObj& maxKey, const OID& epoch, string* errMsg ) { // // Get sharding state up-to-date // ConnectionString configLoc = ConnectionString::parse( shardingState.getConfigServer(), *errMsg ); if ( !configLoc.isValid() ){ warning() << *errMsg << endl; return false; } // // Get the distributed lock // ScopedDistributedLock collLock( configLoc, nss.ns() ); collLock.setLockMessage( stream() << "merging chunks in " << nss.ns() << " from " << minKey << " to " << maxKey ); Status acquisitionStatus = collLock.tryAcquire(); if (!acquisitionStatus.isOK()) { *errMsg = stream() << "could not acquire collection lock for " << nss.ns() << " to merge chunks in [" << minKey << "," << maxKey << ")" << causedBy(acquisitionStatus); warning() << *errMsg << endl; return false; } // // We now have the collection lock, refresh metadata to latest version and sanity check // ChunkVersion shardVersion; Status status = shardingState.refreshMetadataNow(txn, nss.ns(), &shardVersion); if ( !status.isOK() ) { *errMsg = str::stream() << "could not merge chunks, failed to refresh metadata for " << nss.ns() << causedBy( status.reason() ); warning() << *errMsg << endl; return false; } if ( epoch.isSet() && shardVersion.epoch() != epoch ) { *errMsg = stream() << "could not merge chunks, collection " << nss.ns() << " has changed" << " since merge was sent" << "(sent epoch : " << epoch.toString() << ", current epoch : " << shardVersion.epoch().toString() << ")"; warning() << *errMsg << endl; return false; } CollectionMetadataPtr metadata = shardingState.getCollectionMetadata( nss.ns() ); if ( !metadata || metadata->getKeyPattern().isEmpty() ) { *errMsg = stream() << "could not merge chunks, collection " << nss.ns() << " is not sharded"; warning() << *errMsg << endl; return false; } dassert( metadata->getShardVersion().equals( shardVersion ) ); if ( !metadata->isValidKey( minKey ) || !metadata->isValidKey( maxKey ) ) { *errMsg = stream() << "could not merge chunks, the range " << rangeToString( minKey, maxKey ) << " is not valid" << " for collection " << nss.ns() << " with key pattern " << metadata->getKeyPattern(); warning() << *errMsg << endl; return false; } // // Get merged chunk information // ChunkVersion mergeVersion = metadata->getCollVersion(); mergeVersion.incMinor(); OwnedPointerVector<ChunkType> chunksToMerge; ChunkType itChunk; itChunk.setMin( minKey ); itChunk.setMax( minKey ); itChunk.setNS( nss.ns() ); itChunk.setShard( shardingState.getShardName() ); while ( itChunk.getMax().woCompare( maxKey ) < 0 && metadata->getNextChunk( itChunk.getMax(), &itChunk ) ) { auto_ptr<ChunkType> saved( new ChunkType ); itChunk.cloneTo( saved.get() ); chunksToMerge.mutableVector().push_back( saved.release() ); } if ( chunksToMerge.empty() ) { *errMsg = stream() << "could not merge chunks, collection " << nss.ns() << " range starting at " << minKey << " and ending at " << maxKey << " does not belong to shard " << shardingState.getShardName(); warning() << *errMsg << endl; return false; } // // Validate the range starts and ends at chunks and has no holes, error if not valid // BSONObj firstDocMin = ( *chunksToMerge.begin() )->getMin(); BSONObj firstDocMax = ( *chunksToMerge.begin() )->getMax(); // minKey is inclusive bool minKeyInRange = rangeContains( firstDocMin, firstDocMax, minKey ); if ( !minKeyInRange ) { *errMsg = stream() << "could not merge chunks, collection " << nss.ns() << " range starting at " << minKey << " does not belong to shard " << shardingState.getShardName(); warning() << *errMsg << endl; return false; } BSONObj lastDocMin = ( *chunksToMerge.rbegin() )->getMin(); BSONObj lastDocMax = ( *chunksToMerge.rbegin() )->getMax(); // maxKey is exclusive bool maxKeyInRange = lastDocMin.woCompare( maxKey ) < 0 && lastDocMax.woCompare( maxKey ) >= 0; if ( !maxKeyInRange ) { *errMsg = stream() << "could not merge chunks, collection " << nss.ns() << " range ending at " << maxKey << " does not belong to shard " << shardingState.getShardName(); warning() << *errMsg << endl; return false; } bool validRangeStartKey = firstDocMin.woCompare( minKey ) == 0; bool validRangeEndKey = lastDocMax.woCompare( maxKey ) == 0; if ( !validRangeStartKey || !validRangeEndKey ) { *errMsg = stream() << "could not merge chunks, collection " << nss.ns() << " does not contain a chunk " << ( !validRangeStartKey ? "starting at " + minKey.toString() : "" ) << ( !validRangeStartKey && !validRangeEndKey ? " or " : "" ) << ( !validRangeEndKey ? "ending at " + maxKey.toString() : "" ); warning() << *errMsg << endl; return false; } if ( chunksToMerge.size() == 1 ) { *errMsg = stream() << "could not merge chunks, collection " << nss.ns() << " already contains chunk for " << rangeToString( minKey, maxKey ); warning() << *errMsg << endl; return false; } bool holeInRange = false; // Look for hole in range ChunkType* prevChunk = *chunksToMerge.begin(); ChunkType* nextChunk = NULL; for ( OwnedPointerVector<ChunkType>::const_iterator it = chunksToMerge.begin(); it != chunksToMerge.end(); ++it ) { if ( it == chunksToMerge.begin() ) continue; nextChunk = *it; if ( prevChunk->getMax().woCompare( nextChunk->getMin() ) != 0 ) { holeInRange = true; break; } prevChunk = nextChunk; } if ( holeInRange ) { dassert( NULL != nextChunk ); *errMsg = stream() << "could not merge chunks, collection " << nss.ns() << " has a hole in the range " << rangeToString( minKey, maxKey ) << " at " << rangeToString( prevChunk->getMax(), nextChunk->getMin() ); warning() << *errMsg << endl; return false; } // // Run apply ops command // BSONObj applyOpsCmd = buildApplyOpsCmd( chunksToMerge, shardVersion, mergeVersion ); bool ok; BSONObj result; try { ScopedDbConnection conn( configLoc, 30.0 ); ok = conn->runCommand( "config", applyOpsCmd, result ); if ( !ok ) *errMsg = result.toString(); conn.done(); } catch( const DBException& ex ) { ok = false; *errMsg = ex.toString(); } if ( !ok ) { *errMsg = stream() << "could not merge chunks for " << nss.ns() << ", writing to config failed" << causedBy( errMsg ); warning() << *errMsg << endl; return false; } // // Install merged chunk metadata // { Lock::DBLock writeLk(txn->lockState(), nss.db(), newlm::MODE_X); shardingState.mergeChunks(txn, nss.ns(), minKey, maxKey, mergeVersion); } // // Log change // BSONObj mergeLogEntry = buildMergeLogEntry( chunksToMerge, shardVersion, mergeVersion ); configServer.logChange( "merge", nss.ns(), mergeLogEntry ); return true; }
Status MigrationSourceManager::commitDonateChunk(OperationContext* txn) { invariant(!txn->lockState()->isLocked()); invariant(_state == kCriticalSection); auto scopedGuard = MakeGuard([&] { cleanupOnError(txn); }); // Tell the recipient shard to fetch the latest changes Status commitCloneStatus = _cloneDriver->commitClone(txn); if (MONGO_FAIL_POINT(failMigrationCommit) && commitCloneStatus.isOK()) { commitCloneStatus = {ErrorCodes::InternalError, "Failing _recvChunkCommit due to failpoint."}; } if (!commitCloneStatus.isOK()) { return {commitCloneStatus.code(), str::stream() << "commit clone failed due to " << commitCloneStatus.toString()}; } // Generate the next collection version. ChunkVersion uncommittedCollVersion = _committedMetadata->getCollVersion(); uncommittedCollVersion.incMajor(); // applyOps preparation for reflecting the uncommitted metadata on the config server // Preconditions BSONArrayBuilder preCond; { BSONObjBuilder b; b.append("ns", ChunkType::ConfigNS); b.append("q", BSON("query" << BSON(ChunkType::ns(_args.getNss().ns())) << "orderby" << BSON(ChunkType::DEPRECATED_lastmod() << -1))); { BSONObjBuilder bb(b.subobjStart("res")); // TODO: For backwards compatibility, we can't yet require an epoch here bb.appendTimestamp(ChunkType::DEPRECATED_lastmod(), _committedMetadata->getCollVersion().toLong()); bb.done(); } preCond.append(b.obj()); } // Update for the chunk which is being donated BSONArrayBuilder updates; { BSONObjBuilder op; op.append("op", "u"); op.appendBool("b", false); // No upserting op.append("ns", ChunkType::ConfigNS); BSONObjBuilder n(op.subobjStart("o")); n.append(ChunkType::name(), ChunkType::genID(_args.getNss().ns(), _args.getMinKey())); uncommittedCollVersion.addToBSON(n, ChunkType::DEPRECATED_lastmod()); n.append(ChunkType::ns(), _args.getNss().ns()); n.append(ChunkType::min(), _args.getMinKey()); n.append(ChunkType::max(), _args.getMaxKey()); n.append(ChunkType::shard(), _args.getToShardId()); n.done(); BSONObjBuilder q(op.subobjStart("o2")); q.append(ChunkType::name(), ChunkType::genID(_args.getNss().ns(), _args.getMinKey())); q.done(); updates.append(op.obj()); } // Update for the chunk being moved // Version at which the next highest lastmod will be set. If the chunk being moved is the last // in the shard, nextVersion is that chunk's lastmod otherwise the highest version is from the // chunk being bumped on the FROM-shard. ChunkVersion nextVersion = uncommittedCollVersion; // If we have chunks left on the FROM shard, update the version of one of them as well. We can // figure that out by grabbing the metadata as it has been changed. if (_committedMetadata->getNumChunks() > 1) { ChunkType bumpChunk; invariant(_committedMetadata->getDifferentChunk(_args.getMinKey(), &bumpChunk)); BSONObj bumpMin = bumpChunk.getMin(); BSONObj bumpMax = bumpChunk.getMax(); nextVersion.incMinor(); dassert(bumpMin.woCompare(_args.getMinKey()) != 0); BSONObjBuilder op; op.append("op", "u"); op.appendBool("b", false); op.append("ns", ChunkType::ConfigNS); BSONObjBuilder n(op.subobjStart("o")); n.append(ChunkType::name(), ChunkType::genID(_args.getNss().ns(), bumpMin)); nextVersion.addToBSON(n, ChunkType::DEPRECATED_lastmod()); n.append(ChunkType::ns(), _args.getNss().ns()); n.append(ChunkType::min(), bumpMin); n.append(ChunkType::max(), bumpMax); n.append(ChunkType::shard(), _args.getFromShardId()); n.done(); BSONObjBuilder q(op.subobjStart("o2")); q.append(ChunkType::name(), ChunkType::genID(_args.getNss().ns(), bumpMin)); q.done(); updates.append(op.obj()); log() << "moveChunk updating self version to: " << nextVersion << " through " << bumpMin << " -> " << bumpMax << " for collection '" << _args.getNss().ns() << "'"; } else { log() << "moveChunk moved last chunk out for collection '" << _args.getNss().ns() << "'"; } MONGO_FAIL_POINT_PAUSE_WHILE_SET(hangBeforeCommitMigration); Status applyOpsStatus = grid.catalogClient(txn)->applyChunkOpsDeprecated( txn, updates.arr(), preCond.arr(), _args.getNss().ns(), nextVersion); if (MONGO_FAIL_POINT(failCommitMigrationCommand)) { applyOpsStatus = Status(ErrorCodes::InternalError, "Failpoint 'failCommitMigrationCommand' generated error"); } if (applyOpsStatus.isOK()) { // Now that applyOps succeeded and the new collection version is committed, update the // collection metadata to the new collection version and forget the migrated chunk. ScopedTransaction scopedXact(txn, MODE_IX); AutoGetCollection autoColl(txn, _args.getNss(), MODE_IX, MODE_X); ChunkType migratingChunkToForget; migratingChunkToForget.setMin(_args.getMinKey()); migratingChunkToForget.setMax(_args.getMaxKey()); _committedMetadata = _committedMetadata->cloneMigrate(migratingChunkToForget, uncommittedCollVersion); auto css = CollectionShardingState::get(txn, _args.getNss().ns()); css->setMetadata(_committedMetadata); } else { // This could be an unrelated error (e.g. network error). Check whether the metadata update // succeeded by refreshing the collection metadata from the config server and checking that // the original chunks no longer exist. warning() << "Migration metadata commit may have failed: refreshing metadata to check" << causedBy(applyOpsStatus); // Need to get the latest optime in case the refresh request goes to a secondary -- // otherwise the read won't wait for the write that applyChunkOpsDeprecated may have done. Status status = grid.catalogClient(txn)->logChange( txn, "moveChunk.validating", _args.getNss().ns(), BSON("min" << _args.getMinKey() << "max" << _args.getMaxKey() << "from" << _args.getFromShardId() << "to" << _args.getToShardId())); if (!status.isOK()) { fassertStatusOK( 40137, {status.code(), str::stream() << "applyOps failed to commit chunk [" << _args.getMinKey() << "," << _args.getMaxKey() << ") due to " << causedBy(applyOpsStatus) << ", and updating the optime with a write before refreshing the " << "metadata also failed: " << causedBy(status)}); } ShardingState* const shardingState = ShardingState::get(txn); ChunkVersion shardVersion; Status refreshStatus = shardingState->refreshMetadataNow(txn, _args.getNss().ns(), &shardVersion); fassertStatusOK(34431, {refreshStatus.code(), str::stream() << "applyOps failed to commit chunk [" << _args.getMinKey() << "," << _args.getMaxKey() << ") due to " << causedBy(applyOpsStatus) << ", and refreshing collection metadata failed: " << causedBy(refreshStatus)}); { ScopedTransaction scopedXact(txn, MODE_IS); AutoGetCollection autoColl(txn, _args.getNss(), MODE_IS); auto css = CollectionShardingState::get(txn, _args.getNss()); std::shared_ptr<CollectionMetadata> refreshedMetadata = css->getMetadata(); if (refreshedMetadata->keyBelongsToMe(_args.getMinKey())) { invariant(refreshedMetadata->getCollVersion() == _committedMetadata->getCollVersion()); // After refresh, the collection metadata indicates that the donor shard still owns // the chunk, so no migration changes were written to the config server metadata. return {applyOpsStatus.code(), str::stream() << "Migration was not committed, applyOps failed: " << causedBy(applyOpsStatus)}; } ChunkVersion refreshedCollectionVersion = refreshedMetadata->getCollVersion(); if (!refreshedCollectionVersion.equals(nextVersion)) { // The refreshed collection metadata's collection version does not match the control // chunk's updated collection version, which should now be the highest. The control // chunk was not committed, but the migrated chunk was. This state is not // recoverable. fassertStatusOK(40138, {applyOpsStatus.code(), str::stream() << "Migration was partially committed, state is " << "unrecoverable. applyOps error: " << causedBy(applyOpsStatus)}); } } } MONGO_FAIL_POINT_PAUSE_WHILE_SET(hangBeforeLeavingCriticalSection); scopedGuard.Dismiss(); _cleanup(txn); grid.catalogClient(txn)->logChange(txn, "moveChunk.commit", _args.getNss().ns(), BSON("min" << _args.getMinKey() << "max" << _args.getMaxKey() << "from" << _args.getFromShardId() << "to" << _args.getToShardId())); return Status::OK(); }
MigrationSourceManager::MigrationSourceManager(OperationContext* txn, MoveChunkRequest request) : _args(std::move(request)), _startTime() { invariant(!txn->lockState()->isLocked()); const auto& oss = OperationShardingState::get(txn); if (!oss.hasShardVersion()) { uasserted(ErrorCodes::InvalidOptions, "collection version is missing"); } // Even though the moveChunk command transmits a value in the operation's shardVersion field, // this value does not actually contain the shard version, but the global collection version. const ChunkVersion expectedCollectionVersion = oss.getShardVersion(_args.getNss()); log() << "Starting chunk migration for " << ChunkRange(_args.getMinKey(), _args.getMaxKey()).toString() << " with expected collection version " << expectedCollectionVersion; // Now that the collection is locked, snapshot the metadata and fetch the latest versions ShardingState* const shardingState = ShardingState::get(txn); ChunkVersion shardVersion; Status refreshStatus = shardingState->refreshMetadataNow(txn, _args.getNss().ns(), &shardVersion); if (!refreshStatus.isOK()) { uasserted(refreshStatus.code(), str::stream() << "cannot start migrate of chunk " << ChunkRange(_args.getMinKey(), _args.getMaxKey()).toString() << " due to " << refreshStatus.toString()); } if (shardVersion.majorVersion() == 0) { // If the major version is zero, this means we do not have any chunks locally to migrate in // the first place uasserted(ErrorCodes::IncompatibleShardingMetadata, str::stream() << "cannot start migrate of chunk " << ChunkRange(_args.getMinKey(), _args.getMaxKey()).toString() << " with zero shard version"); } // Snapshot the committed metadata from the time the migration starts { ScopedTransaction scopedXact(txn, MODE_IS); AutoGetCollection autoColl(txn, _args.getNss(), MODE_IS); auto css = CollectionShardingState::get(txn, _args.getNss()); _committedMetadata = css->getMetadata(); } const ChunkVersion collectionVersion = _committedMetadata->getCollVersion(); if (expectedCollectionVersion.epoch() != collectionVersion.epoch()) { throw SendStaleConfigException( _args.getNss().ns(), str::stream() << "cannot move chunk " << ChunkRange(_args.getMinKey(), _args.getMaxKey()).toString() << " because collection may have been dropped. " << "current epoch: " << collectionVersion.epoch() << ", cmd epoch: " << expectedCollectionVersion.epoch(), expectedCollectionVersion, collectionVersion); } // With nonzero shard version, we must have a coll version >= our shard version invariant(collectionVersion >= shardVersion); // With nonzero shard version, we must have a shard key invariant(!_committedMetadata->getKeyPattern().isEmpty()); ChunkType origChunk; if (!_committedMetadata->getNextChunk(_args.getMinKey(), &origChunk)) { // If this assertion is hit, it means that whoever called the shard moveChunk command // (mongos or the CSRS balancer) did not check whether the chunk actually belongs to this // shard. It is a benign error and does not indicate data corruption. uasserted(40145, str::stream() << "Chunk with bounds " << ChunkRange(_args.getMinKey(), _args.getMaxKey()).toString() << " is not owned by this shard."); } uassert(40146, str::stream() << "Unable to find chunk with the exact bounds " << ChunkRange(_args.getMinKey(), _args.getMaxKey()).toString() << " at collection version " << collectionVersion.toString() << ". This indicates corrupted metadata.", origChunk.getMin().woCompare(_args.getMinKey()) == 0 && origChunk.getMax().woCompare(_args.getMaxKey()) == 0); }