void MoveChunkRequest::appendAsCommand(BSONObjBuilder* builder, const NamespaceString& nss, ChunkVersion chunkVersion, const ConnectionString& configServerConnectionString, const ShardId& fromShardId, const ShardId& toShardId, const ChunkRange& range, int64_t maxChunkSizeBytes, const MigrationSecondaryThrottleOptions& secondaryThrottle, bool waitForDelete) { invariant(builder->asTempObj().isEmpty()); invariant(nss.isValid()); builder->append(kMoveChunk, nss.ns()); chunkVersion.appendToCommand(builder); // 3.4 shard compatibility builder->append(kEpoch, chunkVersion.epoch()); // config connection string is included for 3.4 shard compatibility builder->append(kConfigServerConnectionString, configServerConnectionString.toString()); builder->append(kFromShardId, fromShardId.toString()); builder->append(kToShardId, toShardId.toString()); range.append(builder); builder->append(kMaxChunkSizeBytes, static_cast<long long>(maxChunkSizeBytes)); secondaryThrottle.append(builder); builder->append(kWaitForDelete, waitForDelete); builder->append(kTakeDistLock, false); }
void ChunkManagerTargeter::noteStaleResponse(const ShardEndpoint& endpoint, const StaleConfigInfo& staleInfo) { dassert(!_needsTargetingRefresh); ChunkVersion remoteShardVersion; if (!staleInfo.getVersionWanted()) { // If we don't have a vWanted sent, assume the version is higher than our current version. remoteShardVersion = getShardVersion(*_routingInfo, endpoint.shardName); remoteShardVersion.incMajor(); } else { remoteShardVersion = *staleInfo.getVersionWanted(); } ShardVersionMap::iterator it = _remoteShardVersions.find(endpoint.shardName); if (it == _remoteShardVersions.end()) { _remoteShardVersions.insert(std::make_pair(endpoint.shardName, remoteShardVersion)); } else { ChunkVersion& previouslyNotedVersion = it->second; if (previouslyNotedVersion.epoch() == remoteShardVersion.epoch()) { if (previouslyNotedVersion.isOlderThan(remoteShardVersion)) { previouslyNotedVersion = remoteShardVersion; } } else { // Epoch changed midway while applying the batch so set the version to something // unique // and non-existent to force a reload when refreshIsNeeded is called. previouslyNotedVersion = ChunkVersion::IGNORED(); } } }
static bool checkShardVersion( ShardingState* shardingState, const BatchedCommandRequest& request, WriteErrorDetail** error ) { const NamespaceString nss( request.getTargetingNS() ); Lock::assertWriteLocked( nss.ns() ); ChunkVersion requestShardVersion = request.isMetadataSet() && request.getMetadata()->isShardVersionSet() ? request.getMetadata()->getShardVersion() : ChunkVersion::IGNORED(); if ( shardingState->enabled() ) { CollectionMetadataPtr metadata = shardingState->getCollectionMetadata( nss.ns() ); if ( !ChunkVersion::isIgnoredVersion( requestShardVersion ) ) { ChunkVersion shardVersion = metadata ? metadata->getShardVersion() : ChunkVersion::UNSHARDED(); if ( !requestShardVersion.isWriteCompatibleWith( shardVersion ) ) { *error = new WriteErrorDetail; buildStaleError( requestShardVersion, shardVersion, *error ); return false; } } } return true; }
static bool checkShardVersion(OperationContext* txn, ShardingState* shardingState, const BatchedCommandRequest& request, WriteOpResult* result) { const NamespaceString nss( request.getTargetingNS() ); txn->lockState()->assertWriteLocked( nss.ns() ); ChunkVersion requestShardVersion = request.isMetadataSet() && request.getMetadata()->isShardVersionSet() ? request.getMetadata()->getShardVersion() : ChunkVersion::IGNORED(); if ( shardingState->enabled() ) { CollectionMetadataPtr metadata = shardingState->getCollectionMetadata( nss.ns() ); if ( !ChunkVersion::isIgnoredVersion( requestShardVersion ) ) { ChunkVersion shardVersion = metadata ? metadata->getShardVersion() : ChunkVersion::UNSHARDED(); if ( !requestShardVersion.isWriteCompatibleWith( shardVersion ) ) { result->setError(new WriteErrorDetail); buildStaleError(requestShardVersion, shardVersion, result->getError()); return false; } } } return true; }
void ChunkManagerTargeter::noteStaleResponse( const ShardEndpoint& endpoint, const BSONObj& staleInfo ) { dassert( !_needsTargetingRefresh ); ChunkVersion remoteShardVersion; if ( staleInfo["vWanted"].eoo() ) { // If we don't have a vWanted sent, assume the version is higher than our current // version. remoteShardVersion = getShardVersion( endpoint.shardName, _manager, _primary ); remoteShardVersion.incMajor(); } else { remoteShardVersion = ChunkVersion::fromBSON( staleInfo, "vWanted" ); } ShardVersionMap::iterator it = _remoteShardVersions.find( endpoint.shardName ); if ( it == _remoteShardVersions.end() ) { _remoteShardVersions.insert( make_pair( endpoint.shardName, remoteShardVersion ) ); } else { ChunkVersion& previouslyNotedVersion = it->second; if ( previouslyNotedVersion.hasCompatibleEpoch( remoteShardVersion )) { if ( previouslyNotedVersion.isOlderThan( remoteShardVersion )) { remoteShardVersion.cloneTo( &previouslyNotedVersion ); } } else { // Epoch changed midway while applying the batch so set the version to // something unique and non-existent to force a reload when // refreshIsNeeded is called. ChunkVersion::IGNORED().cloneTo( &previouslyNotedVersion ); } } }
void MoveChunkRequest::appendAsCommand(BSONObjBuilder* builder, const NamespaceString& nss, ChunkVersion collectionVersion, const ConnectionString& configServerConnectionString, const ShardId& fromShardId, const ShardId& toShardId, const ChunkRange& range, ChunkVersion chunkVersion, int64_t maxChunkSizeBytes, const MigrationSecondaryThrottleOptions& secondaryThrottle, bool waitForDelete, bool takeDistLock) { invariant(builder->asTempObj().isEmpty()); invariant(nss.isValid()); builder->append(kMoveChunk, nss.ns()); collectionVersion.appendForCommands(builder); builder->append(kEpoch, collectionVersion.epoch()); builder->append(kConfigServerConnectionString, configServerConnectionString.toString()); builder->append(kFromShardId, fromShardId.toString()); builder->append(kToShardId, toShardId.toString()); range.append(builder); chunkVersion.appendWithFieldForCommands(builder, kChunkVersion); builder->append(kMaxChunkSizeBytes, static_cast<long long>(maxChunkSizeBytes)); secondaryThrottle.append(builder); builder->append(kWaitForDelete, waitForDelete); builder->append(kTakeDistLock, takeDistLock); }
Status onShardVersionMismatch(OperationContext* opCtx, const NamespaceString& nss, ChunkVersion shardVersionReceived, bool forceRefreshFromThisThread) noexcept { invariant(!opCtx->lockState()->isLocked()); invariant(!opCtx->getClient()->isInDirectClient()); auto const shardingState = ShardingState::get(opCtx); invariant(shardingState->canAcceptShardedCommands()); LOG(2) << "Metadata refresh requested for " << nss.ns() << " at shard version " << shardVersionReceived; ShardingStatistics::get(opCtx).countStaleConfigErrors.addAndFetch(1); // Ensure any ongoing migrations have completed before trying to do the refresh. This wait is // just an optimization so that MongoS does not exhaust its maximum number of StaleConfig retry // attempts while the migration is being committed. try { auto& oss = OperationShardingState::get(opCtx); oss.waitForMigrationCriticalSectionSignal(opCtx); } catch (const DBException& ex) { return ex.toStatus(); } const auto currentShardVersion = [&] { AutoGetCollection autoColl(opCtx, nss, MODE_IS); const auto currentMetadata = CollectionShardingState::get(opCtx, nss)->getMetadata(opCtx); if (currentMetadata) { return currentMetadata->getShardVersion(); } return ChunkVersion::UNSHARDED(); }(); if (currentShardVersion.epoch() == shardVersionReceived.epoch() && currentShardVersion.majorVersion() >= shardVersionReceived.majorVersion()) { // Don't need to remotely reload if we're in the same epoch and the requested version is // smaller than the one we know about. This means that the remote side is behind. return Status::OK(); } try { forceShardFilteringMetadataRefresh(opCtx, nss, forceRefreshFromThisThread); return Status::OK(); } catch (const DBException& ex) { log() << "Failed to refresh metadata for collection" << nss << causedBy(redact(ex)); return ex.toStatus(); } }
CollectionMetadata::CollectionMetadata(const BSONObj& keyPattern, ChunkVersion collectionVersion) : _collVersion(collectionVersion), _shardVersion(ChunkVersion(0, 0, collectionVersion.epoch())), _keyPattern(keyPattern.getOwned()), _pendingMap(SimpleBSONObjComparator::kInstance.makeBSONObjIndexedMap<CachedChunkInfo>()), _chunksMap(SimpleBSONObjComparator::kInstance.makeBSONObjIndexedMap<CachedChunkInfo>()), _rangesMap(SimpleBSONObjComparator::kInstance.makeBSONObjIndexedMap<CachedChunkInfo>()) {}
void MoveChunkRequest::appendAsCommand(BSONObjBuilder* builder, const NamespaceString& nss, const ChunkVersion& shardVersion, const ConnectionString& configServerConnectionString, const std::string& fromShardId, const std::string& toShardId, const BSONObj& chunkMinKey, const BSONObj& chunkMaxKey, int64_t maxChunkSizeBytes, const MigrationSecondaryThrottleOptions& secondaryThrottle, bool waitForDelete) { invariant(builder->asTempObj().isEmpty()); invariant(nss.isValid()); builder->append(kMoveChunk, nss.ns()); shardVersion.appendForCommands(builder); builder->append(kConfigServerConnectionString, configServerConnectionString.toString()); builder->append(kFromShardId, fromShardId); builder->append(kToShardId, toShardId); builder->append(kChunkMinKey, chunkMinKey); builder->append(kChunkMaxKey, chunkMaxKey); builder->append(kMaxChunkSizeBytes, static_cast<long long>(maxChunkSizeBytes)); secondaryThrottle.append(builder); builder->append(kWaitForDelete, waitForDelete); }
bool setShardVersion(DBClientBase& conn, const string& ns, const string& configServerPrimary, ChunkVersion version, ChunkManager* manager, bool authoritative, BSONObj& result) { BSONObjBuilder cmdBuilder; cmdBuilder.append("setShardVersion", ns); cmdBuilder.append("configdb", configServerPrimary); Shard s = Shard::make(conn.getServerAddress()); cmdBuilder.append("shard", s.getName()); cmdBuilder.append("shardHost", s.getConnString()); if (ns.size() > 0) { version.addToBSON(cmdBuilder); } else { cmdBuilder.append("init", true); } if (authoritative) { cmdBuilder.appendBool("authoritative", 1); } BSONObj cmd = cmdBuilder.obj(); LOG(1) << " setShardVersion " << s.getName() << " " << conn.getServerAddress() << " " << ns << " " << cmd << (manager ? string(str::stream() << " " << manager->getSequenceNumber()) : ""); return conn.runCommand("admin", cmd, result, 0); }
static void buildStaleError( const ChunkVersion& shardVersionRecvd, const ChunkVersion& shardVersionWanted, BatchedErrorDetail* error ) { // Write stale error to results error->setErrCode( ErrorCodes::StaleShardVersion ); BSONObjBuilder infoB; shardVersionWanted.addToBSON( infoB, "vWanted" ); error->setErrInfo( infoB.obj() ); string errMsg = stream() << "stale shard version detected before write, received " << shardVersionRecvd.toString() << " but local version is " << shardVersionWanted.toString(); error->setErrMessage( errMsg ); }
void Chunk::serialize(BSONObjBuilder& to, ChunkVersion myLastMod) { to.append("_id", genID(_manager->getns(), _min)); if (myLastMod.isSet()) { myLastMod.addToBSON(to, ChunkType::DEPRECATED_lastmod()); } else if (_lastmod.isSet()) { _lastmod.addToBSON(to, ChunkType::DEPRECATED_lastmod()); } else { verify(0); } to << ChunkType::ns(_manager->getns()); to << ChunkType::min(_min); to << ChunkType::max(_max); to << ChunkType::shard(_shardId); }
BSONObj buildMergeLogEntry(const std::vector<ChunkType>& chunksToMerge, const ChunkVersion& currShardVersion, const ChunkVersion& newMergedVersion) { BSONObjBuilder logDetailB; BSONArrayBuilder mergedB(logDetailB.subarrayStart("merged")); for (const ChunkType& chunkToMerge : chunksToMerge) { mergedB.append(chunkToMerge.toBSON()); } mergedB.done(); currShardVersion.addToBSON(logDetailB, "prevShardVersion"); newMergedVersion.addToBSON(logDetailB, "mergedVersion"); return logDetailB.obj(); }
unique_ptr<CollectionMetadata> CollectionMetadata::clonePlusChunk( const BSONObj& minKey, const BSONObj& maxKey, const ChunkVersion& newShardVersion) const { invariant(newShardVersion.epoch() == _shardVersion.epoch()); invariant(newShardVersion.isSet()); invariant(minKey.woCompare(maxKey) < 0); invariant(!rangeMapOverlaps(_chunksMap, minKey, maxKey)); unique_ptr<CollectionMetadata> metadata(stdx::make_unique<CollectionMetadata>()); metadata->_keyPattern = _keyPattern.getOwned(); metadata->fillKeyPatternFields(); metadata->_pendingMap = _pendingMap; metadata->_chunksMap = _chunksMap; metadata->_chunksMap.insert(make_pair(minKey.getOwned(), maxKey.getOwned())); metadata->_shardVersion = newShardVersion; metadata->_collVersion = newShardVersion > _collVersion ? newShardVersion : _collVersion; metadata->fillRanges(); invariant(metadata->isValid()); return metadata; }
void ChunkManagerTargeter::noteStaleResponse( const ShardEndpoint& endpoint, const BSONObj& staleInfo ) { dassert( !_needsTargetingRefresh ); ChunkVersion remoteShardVersion; if ( staleInfo["vWanted"].eoo() ) { // If we don't have a vWanted sent, assume the version is higher than our current // version. remoteShardVersion = getShardVersion( endpoint.shardName, _manager, _primary ); remoteShardVersion.incMajor(); } else { remoteShardVersion = ChunkVersion::fromBSON( staleInfo, "vWanted" ); } // We assume here that we can't have more than one stale config per-shard dassert( _remoteShardVersions.find( endpoint.shardName ) == _remoteShardVersions.end() ); _remoteShardVersions.insert( make_pair( endpoint.shardName, remoteShardVersion ) ); }
TEST_F(MergeChunkTests, CompoundMerge) { const NamespaceString nss("foo.bar"); const BSONObj kp = BSON("x" << 1 << "y" << 1); const OID epoch = OID::gen(); vector<KeyRange> ranges; // Setup chunk metadata ranges.push_back( KeyRange(nss.ns(), BSON("x" << 0 << "y" << 1), BSON("x" << 1 << "y" << 0), kp)); ranges.push_back( KeyRange(nss.ns(), BSON("x" << 1 << "y" << 0), BSON("x" << 2 << "y" << 1), kp)); storeCollectionRanges(nss, shardName(), ranges, ChunkVersion(1, 0, epoch)); // Get latest version ChunkVersion latestVersion; ShardingState::get(getGlobalServiceContext()) ->refreshMetadataNow(&_txn, nss.ns(), &latestVersion); ShardingState::get(getGlobalServiceContext())->resetMetadata(nss.ns()); // Do merge string errMsg; bool result = mergeChunks( &_txn, nss, BSON("x" << 0 << "y" << 1), BSON("x" << 2 << "y" << 1), epoch, &errMsg); ASSERT_EQUALS(errMsg, ""); ASSERT(result); // Verify result CollectionMetadataPtr metadata = ShardingState::get(getGlobalServiceContext())->getCollectionMetadata(nss.ns()); ChunkType chunk; ASSERT(metadata->getNextChunk(BSON("x" << 0 << "y" << 1), &chunk)); ASSERT(chunk.getMin().woCompare(BSON("x" << 0 << "y" << 1)) == 0); ASSERT(chunk.getMax().woCompare(BSON("x" << 2 << "y" << 1)) == 0); ASSERT_EQUALS(metadata->getNumChunks(), 1u); ASSERT_EQUALS(metadata->getShardVersion().majorVersion(), latestVersion.majorVersion()); ASSERT_GREATER_THAN(metadata->getShardVersion().minorVersion(), latestVersion.minorVersion()); assertWrittenAsMerged(ranges); }
BSONObj buildMergeLogEntry( const OwnedPointerVector<ChunkType>& chunksToMerge, const ChunkVersion& currShardVersion, const ChunkVersion& newMergedVersion ) { BSONObjBuilder logDetailB; BSONArrayBuilder mergedB( logDetailB.subarrayStart( "merged" ) ); for ( OwnedPointerVector<ChunkType>::const_iterator it = chunksToMerge.begin(); it != chunksToMerge.end(); ++it ) { ChunkType* chunkToMerge = *it; mergedB.append( chunkToMerge->toBSON() ); } mergedB.done(); currShardVersion.addToBSON( logDetailB, "prevShardVersion" ); newMergedVersion.addToBSON( logDetailB, "mergedVersion" ); return logDetailB.obj(); }
/** * Stores ranges for a particular collection and shard starting from some version */ void storeCollectionRanges( const NamespaceString& nss, const string& shardName, const vector<KeyRange>& ranges, const ChunkVersion& startVersion ) { // Get key pattern from first range ASSERT_GREATER_THAN( ranges.size(), 0u ); CollectionType coll; coll.setNS( nss.ns() ); coll.setKeyPattern( ranges.begin()->keyPattern ); coll.setEpoch( startVersion.epoch() ); coll.setUpdatedAt( 1ULL ); string errMsg; ASSERT( coll.isValid( &errMsg ) ); DBDirectClient client(&_txn); client.update( CollectionType::ConfigNS, BSON( CollectionType::ns( coll.getNS() ) ), coll.toBSON(), true, false ); ChunkVersion nextVersion = startVersion; for ( vector<KeyRange>::const_iterator it = ranges.begin(); it != ranges.end(); ++it ) { ChunkType chunk; // TODO: We should not rely on the serialized ns, minkey being unique in the future, // causes problems since it links string serialization to correctness. chunk.setName( Chunk::genID( nss, it->minKey ) ); chunk.setShard( shardName ); chunk.setNS( nss.ns() ); chunk.setVersion( nextVersion ); chunk.setMin( it->minKey ); chunk.setMax( it->maxKey ); nextVersion.incMajor(); client.insert( ChunkType::ConfigNS, chunk.toBSON() ); } }
std::unique_ptr<CollectionMetadata> CollectionMetadata::cloneMigrate( const ChunkType& chunk, const ChunkVersion& newCollectionVersion) const { invariant(newCollectionVersion.epoch() == _collVersion.epoch()); invariant(newCollectionVersion > _collVersion); invariant(rangeMapContains(_chunksMap, chunk.getMin(), chunk.getMax())); unique_ptr<CollectionMetadata> metadata(stdx::make_unique<CollectionMetadata>()); metadata->_keyPattern = _keyPattern.getOwned(); metadata->fillKeyPatternFields(); metadata->_pendingMap = _pendingMap; metadata->_chunksMap = _chunksMap; metadata->_chunksMap.erase(chunk.getMin()); metadata->_shardVersion = (metadata->_chunksMap.empty() ? ChunkVersion(0, 0, newCollectionVersion.epoch()) : newCollectionVersion); metadata->_collVersion = newCollectionVersion; metadata->fillRanges(); invariant(metadata->isValid()); return metadata; }
BSONObj buildOpPrecond( const string& ns, const string& shardName, const ChunkVersion& shardVersion ) { BSONObjBuilder condB; condB.append( "ns", ChunkType::ConfigNS ); condB.append( "q", BSON( "query" << BSON( ChunkType::ns( ns ) ) << "orderby" << BSON( ChunkType::DEPRECATED_lastmod() << -1 ) ) ); { BSONObjBuilder resB( condB.subobjStart( "res" ) ); shardVersion.addToBSON( resB, ChunkType::DEPRECATED_lastmod() ); resB.done(); } return condB.obj(); }
void ChunkManager::createFirstChunks(OperationContext* txn, const ShardId& primaryShardId, const vector<BSONObj>* initPoints, const set<ShardId>* initShardIds) { // TODO distlock? // TODO: Race condition if we shard the collection and insert data while we split across // the non-primary shard. vector<BSONObj> splitPoints; vector<ShardId> shardIds; calcInitSplitsAndShards(txn, primaryShardId, initPoints, initShardIds, &splitPoints, &shardIds); // this is the first chunk; start the versioning from scratch ChunkVersion version; version.incEpoch(); version.incMajor(); log() << "going to create " << splitPoints.size() + 1 << " chunk(s) for: " << _ns << " using new epoch " << version.epoch(); for (unsigned i = 0; i <= splitPoints.size(); i++) { BSONObj min = i == 0 ? _keyPattern.getKeyPattern().globalMin() : splitPoints[i - 1]; BSONObj max = i < splitPoints.size() ? splitPoints[i] : _keyPattern.getKeyPattern().globalMax(); Chunk temp(this, min, max, shardIds[i % shardIds.size()], version); BSONObjBuilder chunkBuilder; temp.serialize(chunkBuilder); BSONObj chunkObj = chunkBuilder.obj(); Status result = grid.catalogManager(txn)->update(txn, ChunkType::ConfigNS, BSON(ChunkType::name(temp.genID())), chunkObj, true, false, NULL); version.incMinor(); if (!result.isOK()) { string ss = str::stream() << "creating first chunks failed. result: " << result.reason(); error() << ss; msgasserted(15903, ss); } } _version = ChunkVersion(0, 0, version.epoch()); }
void ShardingTestFixture::expectSetShardVersion(const HostAndPort& expectedHost, const ShardType& expectedShard, const NamespaceString& expectedNs, const ChunkVersion& expectedChunkVersion) { onCommand([&](const RemoteCommandRequest& request) { ASSERT_EQ(expectedHost, request.target); ASSERT_EQUALS(rpc::makeEmptyMetadata(), request.metadata); SetShardVersionRequest ssv = assertGet(SetShardVersionRequest::parseFromBSON(request.cmdObj)); ASSERT(!ssv.isInit()); ASSERT(ssv.isAuthoritative()); ASSERT_EQ(grid.shardRegistry()->getConfigServerConnectionString().toString(), ssv.getConfigServer().toString()); ASSERT_EQ(expectedShard.getHost(), ssv.getShardConnectionString().toString()); ASSERT_EQ(expectedNs.toString(), ssv.getNS().ns()); ASSERT_EQ(expectedChunkVersion.toString(), ssv.getNSVersion().toString()); return BSON("ok" << true); }); }
int ConfigDiffTracker<ValType, ShardType>::calculateConfigDiff( const std::vector<ChunkType>& chunks) { _assertAttached(); // Apply the chunk changes to the ranges and versions // // Overall idea here is to work in two steps : // 1. For all the new chunks we find, increment the maximum version per-shard and // per-collection, and remove any conflicting chunks from the ranges. // 2. For all the new chunks we're interested in (all of them for mongos, just chunks on // the shard for mongod) add them to the ranges. std::vector<ChunkType> newTracked; // Store epoch now so it doesn't change when we change max OID currEpoch = _maxVersion->epoch(); _validDiffs = 0; for (const ChunkType& chunk : chunks) { ChunkVersion chunkVersion = ChunkVersion::fromBSON(chunk.toBSON(), ChunkType::DEPRECATED_lastmod()); if (!chunkVersion.isSet() || !chunkVersion.hasEqualEpoch(currEpoch)) { warning() << "got invalid chunk version " << chunkVersion << " in document " << chunk.toString() << " when trying to load differing chunks at version " << ChunkVersion( _maxVersion->majorVersion(), _maxVersion->minorVersion(), currEpoch); // Don't keep loading, since we know we'll be broken here return -1; } _validDiffs++; // Get max changed version and chunk version if (chunkVersion > *_maxVersion) { *_maxVersion = chunkVersion; } // Chunk version changes ShardType shard = shardFor(chunk.getShard()); typename MaxChunkVersionMap::const_iterator shardVersionIt = _maxShardVersions->find(shard); if (shardVersionIt == _maxShardVersions->end() || shardVersionIt->second < chunkVersion) { (*_maxShardVersions)[shard] = chunkVersion; } // See if we need to remove any chunks we are currently tracking because of this // chunk's changes removeOverlapping(chunk.getMin(), chunk.getMax()); // Figure out which of the new chunks we need to track // Important - we need to actually own this doc, in case the cursor decides to getMore // or unbuffer. if (isTracked(chunk)) { newTracked.push_back(chunk); } } LOG(3) << "found " << _validDiffs << " new chunks for collection " << _ns << " (tracking " << newTracked.size() << "), new version is " << *_maxVersion; for (const ChunkType& chunk : newTracked) { // Invariant enforced by sharding - it's possible to read inconsistent state due to // getMore and yielding, so we want to detect it as early as possible. // // TODO: This checks for overlap, we also should check for holes here iff we're // tracking all chunks. if (isOverlapping(chunk.getMin(), chunk.getMax())) { return -1; } _currMap->insert(rangeFor(chunk)); } return _validDiffs; }
Status ChunkMoveOperationState::commitMigration() { invariant(_distLockStatus.is_initialized()); invariant(_distLockStatus->isOK()); log() << "About to enter migrate critical section"; // We're under the collection distributed lock here, so no other migrate can change maxVersion // or CollectionMetadata state. ShardingState* const shardingState = ShardingState::get(_txn); Status startStatus = ShardingStateRecovery::startMetadataOp(_txn); if (!startStatus.isOK()) return startStatus; shardingState->migrationSourceManager()->setInCriticalSection(true); const ChunkVersion originalCollVersion = getCollMetadata()->getCollVersion(); ChunkVersion myVersion = originalCollVersion; myVersion.incMajor(); { ScopedTransaction transaction(_txn, MODE_IX); Lock::DBLock lk(_txn->lockState(), _nss.db(), MODE_IX); Lock::CollectionLock collLock(_txn->lockState(), _nss.ns(), MODE_X); invariant(myVersion > shardingState->getVersion(_nss.ns())); // Bump the metadata's version up and "forget" about the chunk being moved. This is // not the commit point, but in practice the state in this shard won't change until // the commit it done. shardingState->donateChunk(_txn, _nss.ns(), _minKey, _maxKey, myVersion); } log() << "moveChunk setting version to: " << myVersion << migrateLog; // We're under the collection lock here, too, so we can undo the chunk donation because // no other state change could be ongoing BSONObj res; Status recvChunkCommitStatus{ErrorCodes::InternalError, "status not set"}; try { ScopedDbConnection connTo(_toShardCS, 35.0); connTo->runCommand("admin", BSON("_recvChunkCommit" << 1), res); connTo.done(); recvChunkCommitStatus = getStatusFromCommandResult(res); } catch (const DBException& e) { const string msg = stream() << "moveChunk could not contact to shard " << _toShard << " to commit transfer" << causedBy(e); warning() << msg; recvChunkCommitStatus = Status(e.toStatus().code(), msg); } if (MONGO_FAIL_POINT(failMigrationCommit) && recvChunkCommitStatus.isOK()) { recvChunkCommitStatus = Status(ErrorCodes::InternalError, "Failing _recvChunkCommit due to failpoint."); } if (!recvChunkCommitStatus.isOK()) { log() << "moveChunk migrate commit not accepted by TO-shard: " << res << " resetting shard version to: " << getShardVersion() << migrateLog; { ScopedTransaction transaction(_txn, MODE_IX); Lock::DBLock dbLock(_txn->lockState(), _nss.db(), MODE_IX); Lock::CollectionLock collLock(_txn->lockState(), _nss.ns(), MODE_X); log() << "moveChunk collection lock acquired to reset shard version from " "failed migration"; // Revert the chunk manager back to the state before "forgetting" about the chunk shardingState->undoDonateChunk(_txn, _nss.ns(), getCollMetadata()); } log() << "Shard version successfully reset to clean up failed migration"; return Status(recvChunkCommitStatus.code(), stream() << "_recvChunkCommit failed: " << causedBy(recvChunkCommitStatus)); } log() << "moveChunk migrate commit accepted by TO-shard: " << res << migrateLog; BSONArrayBuilder updates; { // Update for the chunk being moved BSONObjBuilder op; op.append("op", "u"); op.appendBool("b", false); // No upserting op.append("ns", ChunkType::ConfigNS); BSONObjBuilder n(op.subobjStart("o")); n.append(ChunkType::name(), Chunk::genID(_nss.ns(), _minKey)); myVersion.addToBSON(n, ChunkType::DEPRECATED_lastmod()); n.append(ChunkType::ns(), _nss.ns()); n.append(ChunkType::min(), _minKey); n.append(ChunkType::max(), _maxKey); n.append(ChunkType::shard(), _toShard); n.done(); BSONObjBuilder q(op.subobjStart("o2")); q.append(ChunkType::name(), Chunk::genID(_nss.ns(), _minKey)); q.done(); updates.append(op.obj()); } // Version at which the next highest lastmod will be set. If the chunk being moved is the last // in the shard, nextVersion is that chunk's lastmod otherwise the highest version is from the // chunk being bumped on the FROM-shard. ChunkVersion nextVersion = myVersion; // If we have chunks left on the FROM shard, update the version of one of them as well. We can // figure that out by grabbing the metadata as it has been changed. const std::shared_ptr<CollectionMetadata> bumpedCollMetadata( shardingState->getCollectionMetadata(_nss.ns())); if (bumpedCollMetadata->getNumChunks() > 0) { // get another chunk on that shard ChunkType bumpChunk; invariant(bumpedCollMetadata->getNextChunk(bumpedCollMetadata->getMinKey(), &bumpChunk)); BSONObj bumpMin = bumpChunk.getMin(); BSONObj bumpMax = bumpChunk.getMax(); dassert(bumpMin.woCompare(_minKey) != 0); BSONObjBuilder op; op.append("op", "u"); op.appendBool("b", false); op.append("ns", ChunkType::ConfigNS); nextVersion.incMinor(); // same as used on donateChunk BSONObjBuilder n(op.subobjStart("o")); n.append(ChunkType::name(), Chunk::genID(_nss.ns(), bumpMin)); nextVersion.addToBSON(n, ChunkType::DEPRECATED_lastmod()); n.append(ChunkType::ns(), _nss.ns()); n.append(ChunkType::min(), bumpMin); n.append(ChunkType::max(), bumpMax); n.append(ChunkType::shard(), _fromShard); n.done(); BSONObjBuilder q(op.subobjStart("o2")); q.append(ChunkType::name(), Chunk::genID(_nss.ns(), bumpMin)); q.done(); updates.append(op.obj()); log() << "moveChunk updating self version to: " << nextVersion << " through " << bumpMin << " -> " << bumpMax << " for collection '" << _nss.ns() << "'" << migrateLog; } else { log() << "moveChunk moved last chunk out for collection '" << _nss.ns() << "'" << migrateLog; } BSONArrayBuilder preCond; { BSONObjBuilder b; b.append("ns", ChunkType::ConfigNS); b.append("q", BSON("query" << BSON(ChunkType::ns(_nss.ns())) << "orderby" << BSON(ChunkType::DEPRECATED_lastmod() << -1))); { BSONObjBuilder bb(b.subobjStart("res")); // TODO: For backwards compatibility, we can't yet require an epoch here bb.appendTimestamp(ChunkType::DEPRECATED_lastmod(), originalCollVersion.toLong()); bb.done(); } preCond.append(b.obj()); } Status applyOpsStatus{Status::OK()}; try { // For testing migration failures if (MONGO_FAIL_POINT(failMigrationConfigWritePrepare)) { throw DBException("mock migration failure before config write", ErrorCodes::PrepareConfigsFailed); } applyOpsStatus = grid.catalogManager(_txn)->applyChunkOpsDeprecated(_txn, updates.arr(), preCond.arr()); if (MONGO_FAIL_POINT(failMigrationApplyOps)) { throw SocketException(SocketException::RECV_ERROR, shardingState->getConfigServer(_txn).toString()); } } catch (const DBException& ex) { warning() << ex << migrateLog; applyOpsStatus = ex.toStatus(); } if (applyOpsStatus == ErrorCodes::PrepareConfigsFailed) { // In the process of issuing the migrate commit, the SyncClusterConnection checks that // the config servers are reachable. If they are not, we are sure that the applyOps // command was not sent to any of the configs, so we can safely back out of the // migration here, by resetting the shard version that we bumped up to in the // donateChunk() call above. log() << "About to acquire moveChunk coll lock to reset shard version from " << "failed migration"; { ScopedTransaction transaction(_txn, MODE_IX); Lock::DBLock dbLock(_txn->lockState(), _nss.db(), MODE_IX); Lock::CollectionLock collLock(_txn->lockState(), _nss.ns(), MODE_X); // Revert the metadata back to the state before "forgetting" about the chunk shardingState->undoDonateChunk(_txn, _nss.ns(), getCollMetadata()); } log() << "Shard version successfully reset to clean up failed migration"; const string msg = stream() << "Failed to send migrate commit to configs " << causedBy(applyOpsStatus); return Status(applyOpsStatus.code(), msg); } else if (!applyOpsStatus.isOK()) { // This could be a blip in the connectivity. Wait out a few seconds and check if the // commit request made it. // // If the commit made it to the config, we'll see the chunk in the new shard and // there's no further action to be done. // // If the commit did not make it, currently the only way to fix this state is to // bounce the mongod so that the old state (before migrating) is brought in. warning() << "moveChunk commit outcome ongoing" << migrateLog; sleepsecs(10); // Look for the chunk in this shard whose version got bumped. We assume that if that // mod made it to the config server, then applyOps was successful. try { std::vector<ChunkType> newestChunk; Status status = grid.catalogManager(_txn)->getChunks(_txn, BSON(ChunkType::ns(_nss.ns())), BSON(ChunkType::DEPRECATED_lastmod() << -1), 1, &newestChunk, nullptr); uassertStatusOK(status); ChunkVersion checkVersion; if (!newestChunk.empty()) { invariant(newestChunk.size() == 1); checkVersion = newestChunk[0].getVersion(); } if (checkVersion.equals(nextVersion)) { log() << "moveChunk commit confirmed" << migrateLog; } else { error() << "moveChunk commit failed: version is at " << checkVersion << " instead of " << nextVersion << migrateLog; error() << "TERMINATING" << migrateLog; dbexit(EXIT_SHARDING_ERROR); } } catch (...) { error() << "moveChunk failed to get confirmation of commit" << migrateLog; error() << "TERMINATING" << migrateLog; dbexit(EXIT_SHARDING_ERROR); } } MONGO_FAIL_POINT_PAUSE_WHILE_SET(hangBeforeLeavingCriticalSection); shardingState->migrationSourceManager()->setInCriticalSection(false); ShardingStateRecovery::endMetadataOp(_txn); // Migration is done, just log some diagnostics information BSONObj chunkInfo = BSON("min" << _minKey << "max" << _maxKey << "from" << _fromShard << "to" << _toShard); BSONObjBuilder commitInfo; commitInfo.appendElements(chunkInfo); if (res["counts"].type() == Object) { commitInfo.appendElements(res["counts"].Obj()); } grid.catalogManager(_txn)->logChange(_txn, "moveChunk.commit", _nss.ns(), commitInfo.obj()); shardingState->migrationSourceManager()->done(_txn); _isRunning = false; return Status::OK(); }
StatusWith<std::unique_ptr<CollectionMetadata>> CollectionMetadata::cloneMerge( const BSONObj& minKey, const BSONObj& maxKey, const ChunkVersion& newShardVersion) const { invariant(newShardVersion.epoch() == _shardVersion.epoch()); invariant(newShardVersion > _shardVersion); RangeVector overlap; getRangeMapOverlap(_chunksMap, minKey, maxKey, &overlap); if (overlap.empty() || overlap.size() == 1) { return {ErrorCodes::IllegalOperation, stream() << "cannot merge range " << rangeToString(minKey, maxKey) << (overlap.empty() ? ", no chunks found in this range" : ", only one chunk found in this range")}; } bool validStartEnd = true; bool validNoHoles = true; if (overlap.begin()->first.woCompare(minKey) != 0) { // First chunk doesn't start with minKey validStartEnd = false; } else if (overlap.rbegin()->second.woCompare(maxKey) != 0) { // Last chunk doesn't end with maxKey validStartEnd = false; } else { // Check that there are no holes BSONObj prevMaxKey = minKey; for (RangeVector::iterator it = overlap.begin(); it != overlap.end(); ++it) { if (it->first.woCompare(prevMaxKey) != 0) { validNoHoles = false; break; } prevMaxKey = it->second; } } if (!validStartEnd || !validNoHoles) { return {ErrorCodes::IllegalOperation, stream() << "cannot merge range " << rangeToString(minKey, maxKey) << ", overlapping chunks " << overlapToString(overlap) << (!validStartEnd ? " do not have the same min and max key" : " are not all adjacent")}; } unique_ptr<CollectionMetadata> metadata(stdx::make_unique<CollectionMetadata>()); metadata->_keyPattern = _keyPattern.getOwned(); metadata->fillKeyPatternFields(); metadata->_pendingMap = _pendingMap; metadata->_chunksMap = _chunksMap; metadata->_rangesMap = _rangesMap; metadata->_shardVersion = newShardVersion; metadata->_collVersion = newShardVersion > _collVersion ? newShardVersion : this->_collVersion; for (RangeVector::iterator it = overlap.begin(); it != overlap.end(); ++it) { metadata->_chunksMap.erase(it->first); } metadata->_chunksMap.insert(make_pair(minKey, maxKey)); invariant(metadata->isValid()); return std::move(metadata); }
void ChunkType::setVersion(const ChunkVersion& version) { invariant(version.isSet()); _version = version; }
StatusWith<boost::optional<ChunkRange>> splitChunkAtMultiplePoints( OperationContext* txn, const ShardId& shardId, const NamespaceString& nss, const ShardKeyPattern& shardKeyPattern, ChunkVersion collectionVersion, const BSONObj& minKey, const BSONObj& maxKey, const std::vector<BSONObj>& splitPoints) { invariant(!splitPoints.empty()); invariant(minKey.woCompare(maxKey) < 0); const size_t kMaxSplitPoints = 8192; if (splitPoints.size() > kMaxSplitPoints) { return {ErrorCodes::BadValue, str::stream() << "Cannot split chunk in more than " << kMaxSplitPoints << " parts at a time."}; } BSONObjBuilder cmd; cmd.append("splitChunk", nss.ns()); cmd.append("configdb", Grid::get(txn)->shardRegistry()->getConfigServerConnectionString().toString()); cmd.append("from", shardId.toString()); cmd.append("keyPattern", shardKeyPattern.toBSON()); collectionVersion.appendForCommands(&cmd); cmd.append(kMinKey, minKey); cmd.append(kMaxKey, maxKey); cmd.append("splitKeys", splitPoints); BSONObj cmdObj = cmd.obj(); Status status{ErrorCodes::InternalError, "Uninitialized value"}; BSONObj cmdResponse; auto shard = Grid::get(txn)->shardRegistry()->getShard(txn, shardId); if (!shard) { status = Status(ErrorCodes::ShardNotFound, str::stream() << "shard " << shardId << " not found"); } else { auto cmdStatus = shard->runCommand(txn, ReadPreferenceSetting{ReadPreference::PrimaryOnly}, "admin", cmdObj, Shard::RetryPolicy::kNotIdempotent); if (!cmdStatus.isOK()) { status = std::move(cmdStatus.getStatus()); } else { status = std::move(cmdStatus.getValue().commandStatus); cmdResponse = std::move(cmdStatus.getValue().response); } } if (!status.isOK()) { log() << "Split chunk " << redact(cmdObj) << " failed" << causedBy(redact(status)); return {status.code(), str::stream() << "split failed due to " << status.toString()}; } BSONElement shouldMigrateElement; status = bsonExtractTypedField(cmdResponse, kShouldMigrate, Object, &shouldMigrateElement); if (status.isOK()) { auto chunkRangeStatus = ChunkRange::fromBSON(shouldMigrateElement.embeddedObject()); if (!chunkRangeStatus.isOK()) { return chunkRangeStatus.getStatus(); } return boost::optional<ChunkRange>(std::move(chunkRangeStatus.getValue())); } else if (status != ErrorCodes::NoSuchKey) { warning() << "Chunk migration will be skipped because splitChunk returned invalid response: " << redact(cmdResponse) << ". Extracting " << kShouldMigrate << " field failed" << causedBy(redact(status)); } return boost::optional<ChunkRange>(); }
int ConfigDiffTracker<ValType,ShardType>:: calculateConfigDiff( DBClientCursorInterface& diffCursor ) { verifyAttached(); // Apply the chunk changes to the ranges and versions // // Overall idea here is to work in two steps : // 1. For all the new chunks we find, increment the maximum version per-shard and // per-collection, and remove any conflicting chunks from the ranges // 2. For all the new chunks we're interested in (all of them for mongos, just chunks on the // shard for mongod) add them to the ranges // vector<BSONObj> newTracked; // Store epoch now so it doesn't change when we change max OID currEpoch = _maxVersion->epoch(); _validDiffs = 0; while( diffCursor.more() ) { BSONObj diffChunkDoc = diffCursor.next(); ChunkVersion chunkVersion = ChunkVersion::fromBSON(diffChunkDoc, ChunkType::DEPRECATED_lastmod()); if( diffChunkDoc[ChunkType::min()].type() != Object || diffChunkDoc[ChunkType::max()].type() != Object || diffChunkDoc[ChunkType::shard()].type() != String ) { warning() << "got invalid chunk document " << diffChunkDoc << " when trying to load differing chunks" << endl; continue; } if( ! chunkVersion.isSet() || ! chunkVersion.hasCompatibleEpoch( currEpoch ) ) { warning() << "got invalid chunk version " << chunkVersion << " in document " << diffChunkDoc << " when trying to load differing chunks at version " << ChunkVersion( _maxVersion->toLong(), currEpoch ) << endl; // Don't keep loading, since we know we'll be broken here return -1; } _validDiffs++; // Get max changed version and chunk version if( chunkVersion > *_maxVersion ) *_maxVersion = chunkVersion; // Chunk version changes ShardType shard = shardFor( diffChunkDoc[ChunkType::shard()].String() ); typename map<ShardType, ChunkVersion>::iterator shardVersionIt = _maxShardVersions->find( shard ); if( shardVersionIt == _maxShardVersions->end() || shardVersionIt->second < chunkVersion ) { (*_maxShardVersions)[ shard ] = chunkVersion; } // See if we need to remove any chunks we are currently tracking b/c of this chunk's changes removeOverlapping(diffChunkDoc[ChunkType::min()].Obj(), diffChunkDoc[ChunkType::max()].Obj()); // Figure out which of the new chunks we need to track // Important - we need to actually own this doc, in case the cursor decides to getMore or unbuffer if( isTracked( diffChunkDoc ) ) newTracked.push_back( diffChunkDoc.getOwned() ); } LOG(3) << "found " << _validDiffs << " new chunks for collection " << _ns << " (tracking " << newTracked.size() << "), new version is " << *_maxVersion << endl; for( vector<BSONObj>::iterator it = newTracked.begin(); it != newTracked.end(); it++ ) { BSONObj chunkDoc = *it; // Important - we need to make sure we actually own the min and max here BSONObj min = chunkDoc[ChunkType::min()].Obj().getOwned(); BSONObj max = chunkDoc[ChunkType::max()].Obj().getOwned(); // Invariant enforced by sharding // It's possible to read inconsistent state b/c of getMore() and yielding, so we want // to detect as early as possible. // TODO: This checks for overlap, we also should check for holes here iff we're tracking // all chunks if( isOverlapping( min, max ) ) return -1; _currMap->insert( rangeFor( chunkDoc, min, max ) ); } return _validDiffs; }
StatusWith<std::unique_ptr<CollectionMetadata>> CollectionMetadata::cloneSplit( const BSONObj& minKey, const BSONObj& maxKey, const std::vector<BSONObj>& splitKeys, const ChunkVersion& newShardVersion) const { invariant(newShardVersion.epoch() == _shardVersion.epoch()); invariant(newShardVersion > _shardVersion); // The version required in both resulting chunks could be simply an increment in the // minor portion of the current version. However, we are enforcing uniqueness over the // attributes <ns, version> of the configdb collection 'chunks'. So in practice, a // migrate somewhere may force this split to pick up a version that has the major // portion higher than the one that this shard has been using. // // TODO drop the uniqueness constraint and tighten the check below so that only the // minor portion of version changes // Check that we have the exact chunk that will be subtracted. if (!rangeMapContains(_chunksMap, minKey, maxKey)) { stream errMsg; errMsg << "cannot split chunk " << rangeToString(minKey, maxKey) << ", this shard does not contain the chunk"; if (rangeMapOverlaps(_chunksMap, minKey, maxKey)) { RangeVector overlap; getRangeMapOverlap(_chunksMap, minKey, maxKey, &overlap); errMsg << " and it overlaps " << overlapToString(overlap); } return {ErrorCodes::IllegalOperation, errMsg}; } unique_ptr<CollectionMetadata> metadata(stdx::make_unique<CollectionMetadata>()); metadata->_keyPattern = _keyPattern.getOwned(); metadata->fillKeyPatternFields(); metadata->_pendingMap = _pendingMap; metadata->_chunksMap = _chunksMap; metadata->_shardVersion = newShardVersion; // will increment 2nd, 3rd,... chunks below BSONObj startKey = minKey; for (const auto& split : splitKeys) { // Check that the split key is valid if (!rangeContains(minKey, maxKey, split)) { return {ErrorCodes::IllegalOperation, stream() << "cannot split chunk " << rangeToString(minKey, maxKey) << " at key " << split}; } // Check that the split keys are in order if (split.woCompare(startKey) <= 0) { // The split keys came in out of order, this probably indicates a bug, so fail the // operation. Re-iterate splitKeys to build a useful error message including the array // of splitKeys in the order received. str::stream errMsg; errMsg << "Invalid input to splitChunk, split keys must be in order, got: ["; for (auto it2 = splitKeys.cbegin(); it2 != splitKeys.cend(); ++it2) { if (it2 != splitKeys.begin()) { errMsg << ", "; } errMsg << it2->toString(); } errMsg << "]"; return {ErrorCodes::IllegalOperation, errMsg}; } metadata->_chunksMap[startKey] = split.getOwned(); metadata->_chunksMap.insert(make_pair(split.getOwned(), maxKey.getOwned())); metadata->_shardVersion.incMinor(); startKey = split; } metadata->_collVersion = metadata->_shardVersion > _collVersion ? metadata->_shardVersion : _collVersion; metadata->fillRanges(); invariant(metadata->isValid()); return std::move(metadata); }
bool mergeChunks( OperationContext* txn, const NamespaceString& nss, const BSONObj& minKey, const BSONObj& maxKey, const OID& epoch, string* errMsg ) { // // Get sharding state up-to-date // ConnectionString configLoc = ConnectionString::parse( shardingState.getConfigServer(), *errMsg ); if ( !configLoc.isValid() ){ warning() << *errMsg << endl; return false; } // // Get the distributed lock // ScopedDistributedLock collLock( configLoc, nss.ns() ); collLock.setLockMessage( stream() << "merging chunks in " << nss.ns() << " from " << minKey << " to " << maxKey ); Status acquisitionStatus = collLock.tryAcquire(); if (!acquisitionStatus.isOK()) { *errMsg = stream() << "could not acquire collection lock for " << nss.ns() << " to merge chunks in [" << minKey << "," << maxKey << ")" << causedBy(acquisitionStatus); warning() << *errMsg << endl; return false; } // // We now have the collection lock, refresh metadata to latest version and sanity check // ChunkVersion shardVersion; Status status = shardingState.refreshMetadataNow(txn, nss.ns(), &shardVersion); if ( !status.isOK() ) { *errMsg = str::stream() << "could not merge chunks, failed to refresh metadata for " << nss.ns() << causedBy( status.reason() ); warning() << *errMsg << endl; return false; } if ( epoch.isSet() && shardVersion.epoch() != epoch ) { *errMsg = stream() << "could not merge chunks, collection " << nss.ns() << " has changed" << " since merge was sent" << "(sent epoch : " << epoch.toString() << ", current epoch : " << shardVersion.epoch().toString() << ")"; warning() << *errMsg << endl; return false; } CollectionMetadataPtr metadata = shardingState.getCollectionMetadata( nss.ns() ); if ( !metadata || metadata->getKeyPattern().isEmpty() ) { *errMsg = stream() << "could not merge chunks, collection " << nss.ns() << " is not sharded"; warning() << *errMsg << endl; return false; } dassert( metadata->getShardVersion().equals( shardVersion ) ); if ( !metadata->isValidKey( minKey ) || !metadata->isValidKey( maxKey ) ) { *errMsg = stream() << "could not merge chunks, the range " << rangeToString( minKey, maxKey ) << " is not valid" << " for collection " << nss.ns() << " with key pattern " << metadata->getKeyPattern(); warning() << *errMsg << endl; return false; } // // Get merged chunk information // ChunkVersion mergeVersion = metadata->getCollVersion(); mergeVersion.incMinor(); OwnedPointerVector<ChunkType> chunksToMerge; ChunkType itChunk; itChunk.setMin( minKey ); itChunk.setMax( minKey ); itChunk.setNS( nss.ns() ); itChunk.setShard( shardingState.getShardName() ); while ( itChunk.getMax().woCompare( maxKey ) < 0 && metadata->getNextChunk( itChunk.getMax(), &itChunk ) ) { auto_ptr<ChunkType> saved( new ChunkType ); itChunk.cloneTo( saved.get() ); chunksToMerge.mutableVector().push_back( saved.release() ); } if ( chunksToMerge.empty() ) { *errMsg = stream() << "could not merge chunks, collection " << nss.ns() << " range starting at " << minKey << " and ending at " << maxKey << " does not belong to shard " << shardingState.getShardName(); warning() << *errMsg << endl; return false; } // // Validate the range starts and ends at chunks and has no holes, error if not valid // BSONObj firstDocMin = ( *chunksToMerge.begin() )->getMin(); BSONObj firstDocMax = ( *chunksToMerge.begin() )->getMax(); // minKey is inclusive bool minKeyInRange = rangeContains( firstDocMin, firstDocMax, minKey ); if ( !minKeyInRange ) { *errMsg = stream() << "could not merge chunks, collection " << nss.ns() << " range starting at " << minKey << " does not belong to shard " << shardingState.getShardName(); warning() << *errMsg << endl; return false; } BSONObj lastDocMin = ( *chunksToMerge.rbegin() )->getMin(); BSONObj lastDocMax = ( *chunksToMerge.rbegin() )->getMax(); // maxKey is exclusive bool maxKeyInRange = lastDocMin.woCompare( maxKey ) < 0 && lastDocMax.woCompare( maxKey ) >= 0; if ( !maxKeyInRange ) { *errMsg = stream() << "could not merge chunks, collection " << nss.ns() << " range ending at " << maxKey << " does not belong to shard " << shardingState.getShardName(); warning() << *errMsg << endl; return false; } bool validRangeStartKey = firstDocMin.woCompare( minKey ) == 0; bool validRangeEndKey = lastDocMax.woCompare( maxKey ) == 0; if ( !validRangeStartKey || !validRangeEndKey ) { *errMsg = stream() << "could not merge chunks, collection " << nss.ns() << " does not contain a chunk " << ( !validRangeStartKey ? "starting at " + minKey.toString() : "" ) << ( !validRangeStartKey && !validRangeEndKey ? " or " : "" ) << ( !validRangeEndKey ? "ending at " + maxKey.toString() : "" ); warning() << *errMsg << endl; return false; } if ( chunksToMerge.size() == 1 ) { *errMsg = stream() << "could not merge chunks, collection " << nss.ns() << " already contains chunk for " << rangeToString( minKey, maxKey ); warning() << *errMsg << endl; return false; } bool holeInRange = false; // Look for hole in range ChunkType* prevChunk = *chunksToMerge.begin(); ChunkType* nextChunk = NULL; for ( OwnedPointerVector<ChunkType>::const_iterator it = chunksToMerge.begin(); it != chunksToMerge.end(); ++it ) { if ( it == chunksToMerge.begin() ) continue; nextChunk = *it; if ( prevChunk->getMax().woCompare( nextChunk->getMin() ) != 0 ) { holeInRange = true; break; } prevChunk = nextChunk; } if ( holeInRange ) { dassert( NULL != nextChunk ); *errMsg = stream() << "could not merge chunks, collection " << nss.ns() << " has a hole in the range " << rangeToString( minKey, maxKey ) << " at " << rangeToString( prevChunk->getMax(), nextChunk->getMin() ); warning() << *errMsg << endl; return false; } // // Run apply ops command // BSONObj applyOpsCmd = buildApplyOpsCmd( chunksToMerge, shardVersion, mergeVersion ); bool ok; BSONObj result; try { ScopedDbConnection conn( configLoc, 30.0 ); ok = conn->runCommand( "config", applyOpsCmd, result ); if ( !ok ) *errMsg = result.toString(); conn.done(); } catch( const DBException& ex ) { ok = false; *errMsg = ex.toString(); } if ( !ok ) { *errMsg = stream() << "could not merge chunks for " << nss.ns() << ", writing to config failed" << causedBy( errMsg ); warning() << *errMsg << endl; return false; } // // Install merged chunk metadata // { Lock::DBLock writeLk(txn->lockState(), nss.db(), newlm::MODE_X); shardingState.mergeChunks(txn, nss.ns(), minKey, maxKey, mergeVersion); } // // Log change // BSONObj mergeLogEntry = buildMergeLogEntry( chunksToMerge, shardVersion, mergeVersion ); configServer.logChange( "merge", nss.ns(), mergeLogEntry ); return true; }