Status ChunkMoveOperationState::commitMigration() { invariant(_distLockStatus.is_initialized()); invariant(_distLockStatus->isOK()); log() << "About to enter migrate critical section"; // We're under the collection distributed lock here, so no other migrate can change maxVersion // or CollectionMetadata state. ShardingState* const shardingState = ShardingState::get(_txn); Status startStatus = ShardingStateRecovery::startMetadataOp(_txn); if (!startStatus.isOK()) return startStatus; shardingState->migrationSourceManager()->setInCriticalSection(true); const ChunkVersion originalCollVersion = getCollMetadata()->getCollVersion(); ChunkVersion myVersion = originalCollVersion; myVersion.incMajor(); { ScopedTransaction transaction(_txn, MODE_IX); Lock::DBLock lk(_txn->lockState(), _nss.db(), MODE_IX); Lock::CollectionLock collLock(_txn->lockState(), _nss.ns(), MODE_X); invariant(myVersion > shardingState->getVersion(_nss.ns())); // Bump the metadata's version up and "forget" about the chunk being moved. This is // not the commit point, but in practice the state in this shard won't change until // the commit it done. shardingState->donateChunk(_txn, _nss.ns(), _minKey, _maxKey, myVersion); } log() << "moveChunk setting version to: " << myVersion << migrateLog; // We're under the collection lock here, too, so we can undo the chunk donation because // no other state change could be ongoing BSONObj res; Status recvChunkCommitStatus{ErrorCodes::InternalError, "status not set"}; try { ScopedDbConnection connTo(_toShardCS, 35.0); connTo->runCommand("admin", BSON("_recvChunkCommit" << 1), res); connTo.done(); recvChunkCommitStatus = getStatusFromCommandResult(res); } catch (const DBException& e) { const string msg = stream() << "moveChunk could not contact to shard " << _toShard << " to commit transfer" << causedBy(e); warning() << msg; recvChunkCommitStatus = Status(e.toStatus().code(), msg); } if (MONGO_FAIL_POINT(failMigrationCommit) && recvChunkCommitStatus.isOK()) { recvChunkCommitStatus = Status(ErrorCodes::InternalError, "Failing _recvChunkCommit due to failpoint."); } if (!recvChunkCommitStatus.isOK()) { log() << "moveChunk migrate commit not accepted by TO-shard: " << res << " resetting shard version to: " << getShardVersion() << migrateLog; { ScopedTransaction transaction(_txn, MODE_IX); Lock::DBLock dbLock(_txn->lockState(), _nss.db(), MODE_IX); Lock::CollectionLock collLock(_txn->lockState(), _nss.ns(), MODE_X); log() << "moveChunk collection lock acquired to reset shard version from " "failed migration"; // Revert the chunk manager back to the state before "forgetting" about the chunk shardingState->undoDonateChunk(_txn, _nss.ns(), getCollMetadata()); } log() << "Shard version successfully reset to clean up failed migration"; return Status(recvChunkCommitStatus.code(), stream() << "_recvChunkCommit failed: " << causedBy(recvChunkCommitStatus)); } log() << "moveChunk migrate commit accepted by TO-shard: " << res << migrateLog; BSONArrayBuilder updates; { // Update for the chunk being moved BSONObjBuilder op; op.append("op", "u"); op.appendBool("b", false); // No upserting op.append("ns", ChunkType::ConfigNS); BSONObjBuilder n(op.subobjStart("o")); n.append(ChunkType::name(), Chunk::genID(_nss.ns(), _minKey)); myVersion.addToBSON(n, ChunkType::DEPRECATED_lastmod()); n.append(ChunkType::ns(), _nss.ns()); n.append(ChunkType::min(), _minKey); n.append(ChunkType::max(), _maxKey); n.append(ChunkType::shard(), _toShard); n.done(); BSONObjBuilder q(op.subobjStart("o2")); q.append(ChunkType::name(), Chunk::genID(_nss.ns(), _minKey)); q.done(); updates.append(op.obj()); } // Version at which the next highest lastmod will be set. If the chunk being moved is the last // in the shard, nextVersion is that chunk's lastmod otherwise the highest version is from the // chunk being bumped on the FROM-shard. ChunkVersion nextVersion = myVersion; // If we have chunks left on the FROM shard, update the version of one of them as well. We can // figure that out by grabbing the metadata as it has been changed. const std::shared_ptr<CollectionMetadata> bumpedCollMetadata( shardingState->getCollectionMetadata(_nss.ns())); if (bumpedCollMetadata->getNumChunks() > 0) { // get another chunk on that shard ChunkType bumpChunk; invariant(bumpedCollMetadata->getNextChunk(bumpedCollMetadata->getMinKey(), &bumpChunk)); BSONObj bumpMin = bumpChunk.getMin(); BSONObj bumpMax = bumpChunk.getMax(); dassert(bumpMin.woCompare(_minKey) != 0); BSONObjBuilder op; op.append("op", "u"); op.appendBool("b", false); op.append("ns", ChunkType::ConfigNS); nextVersion.incMinor(); // same as used on donateChunk BSONObjBuilder n(op.subobjStart("o")); n.append(ChunkType::name(), Chunk::genID(_nss.ns(), bumpMin)); nextVersion.addToBSON(n, ChunkType::DEPRECATED_lastmod()); n.append(ChunkType::ns(), _nss.ns()); n.append(ChunkType::min(), bumpMin); n.append(ChunkType::max(), bumpMax); n.append(ChunkType::shard(), _fromShard); n.done(); BSONObjBuilder q(op.subobjStart("o2")); q.append(ChunkType::name(), Chunk::genID(_nss.ns(), bumpMin)); q.done(); updates.append(op.obj()); log() << "moveChunk updating self version to: " << nextVersion << " through " << bumpMin << " -> " << bumpMax << " for collection '" << _nss.ns() << "'" << migrateLog; } else { log() << "moveChunk moved last chunk out for collection '" << _nss.ns() << "'" << migrateLog; } BSONArrayBuilder preCond; { BSONObjBuilder b; b.append("ns", ChunkType::ConfigNS); b.append("q", BSON("query" << BSON(ChunkType::ns(_nss.ns())) << "orderby" << BSON(ChunkType::DEPRECATED_lastmod() << -1))); { BSONObjBuilder bb(b.subobjStart("res")); // TODO: For backwards compatibility, we can't yet require an epoch here bb.appendTimestamp(ChunkType::DEPRECATED_lastmod(), originalCollVersion.toLong()); bb.done(); } preCond.append(b.obj()); } Status applyOpsStatus{Status::OK()}; try { // For testing migration failures if (MONGO_FAIL_POINT(failMigrationConfigWritePrepare)) { throw DBException("mock migration failure before config write", ErrorCodes::PrepareConfigsFailed); } applyOpsStatus = grid.catalogManager(_txn)->applyChunkOpsDeprecated(_txn, updates.arr(), preCond.arr()); if (MONGO_FAIL_POINT(failMigrationApplyOps)) { throw SocketException(SocketException::RECV_ERROR, shardingState->getConfigServer(_txn).toString()); } } catch (const DBException& ex) { warning() << ex << migrateLog; applyOpsStatus = ex.toStatus(); } if (applyOpsStatus == ErrorCodes::PrepareConfigsFailed) { // In the process of issuing the migrate commit, the SyncClusterConnection checks that // the config servers are reachable. If they are not, we are sure that the applyOps // command was not sent to any of the configs, so we can safely back out of the // migration here, by resetting the shard version that we bumped up to in the // donateChunk() call above. log() << "About to acquire moveChunk coll lock to reset shard version from " << "failed migration"; { ScopedTransaction transaction(_txn, MODE_IX); Lock::DBLock dbLock(_txn->lockState(), _nss.db(), MODE_IX); Lock::CollectionLock collLock(_txn->lockState(), _nss.ns(), MODE_X); // Revert the metadata back to the state before "forgetting" about the chunk shardingState->undoDonateChunk(_txn, _nss.ns(), getCollMetadata()); } log() << "Shard version successfully reset to clean up failed migration"; const string msg = stream() << "Failed to send migrate commit to configs " << causedBy(applyOpsStatus); return Status(applyOpsStatus.code(), msg); } else if (!applyOpsStatus.isOK()) { // This could be a blip in the connectivity. Wait out a few seconds and check if the // commit request made it. // // If the commit made it to the config, we'll see the chunk in the new shard and // there's no further action to be done. // // If the commit did not make it, currently the only way to fix this state is to // bounce the mongod so that the old state (before migrating) is brought in. warning() << "moveChunk commit outcome ongoing" << migrateLog; sleepsecs(10); // Look for the chunk in this shard whose version got bumped. We assume that if that // mod made it to the config server, then applyOps was successful. try { std::vector<ChunkType> newestChunk; Status status = grid.catalogManager(_txn)->getChunks(_txn, BSON(ChunkType::ns(_nss.ns())), BSON(ChunkType::DEPRECATED_lastmod() << -1), 1, &newestChunk, nullptr); uassertStatusOK(status); ChunkVersion checkVersion; if (!newestChunk.empty()) { invariant(newestChunk.size() == 1); checkVersion = newestChunk[0].getVersion(); } if (checkVersion.equals(nextVersion)) { log() << "moveChunk commit confirmed" << migrateLog; } else { error() << "moveChunk commit failed: version is at " << checkVersion << " instead of " << nextVersion << migrateLog; error() << "TERMINATING" << migrateLog; dbexit(EXIT_SHARDING_ERROR); } } catch (...) { error() << "moveChunk failed to get confirmation of commit" << migrateLog; error() << "TERMINATING" << migrateLog; dbexit(EXIT_SHARDING_ERROR); } } MONGO_FAIL_POINT_PAUSE_WHILE_SET(hangBeforeLeavingCriticalSection); shardingState->migrationSourceManager()->setInCriticalSection(false); ShardingStateRecovery::endMetadataOp(_txn); // Migration is done, just log some diagnostics information BSONObj chunkInfo = BSON("min" << _minKey << "max" << _maxKey << "from" << _fromShard << "to" << _toShard); BSONObjBuilder commitInfo; commitInfo.appendElements(chunkInfo); if (res["counts"].type() == Object) { commitInfo.appendElements(res["counts"].Obj()); } grid.catalogManager(_txn)->logChange(_txn, "moveChunk.commit", _nss.ns(), commitInfo.obj()); shardingState->migrationSourceManager()->done(_txn); _isRunning = false; return Status::OK(); }
QueryAndSort createShardChunkDiffQuery(const ChunkVersion& collectionVersion) { return {BSON(ChunkType::DEPRECATED_lastmod() << BSON("$gte" << Timestamp(collectionVersion.toLong()))), BSON(ChunkType::DEPRECATED_lastmod() << 1)}; }