bool setShardVersion(DBClientBase& conn, const string& ns, const string& configServerPrimary, ChunkVersion version, ChunkManager* manager, bool authoritative, BSONObj& result) { BSONObjBuilder cmdBuilder; cmdBuilder.append("setShardVersion", ns); cmdBuilder.append("configdb", configServerPrimary); Shard s = Shard::make(conn.getServerAddress()); cmdBuilder.append("shard", s.getName()); cmdBuilder.append("shardHost", s.getConnString()); if (ns.size() > 0) { version.addToBSON(cmdBuilder); } else { cmdBuilder.append("init", true); } if (authoritative) { cmdBuilder.appendBool("authoritative", 1); } BSONObj cmd = cmdBuilder.obj(); LOG(1) << " setShardVersion " << s.getName() << " " << conn.getServerAddress() << " " << ns << " " << cmd << (manager ? string(str::stream() << " " << manager->getSequenceNumber()) : ""); return conn.runCommand("admin", cmd, result, 0); }
BSONObj buildMergeLogEntry(const std::vector<ChunkType>& chunksToMerge, const ChunkVersion& currShardVersion, const ChunkVersion& newMergedVersion) { BSONObjBuilder logDetailB; BSONArrayBuilder mergedB(logDetailB.subarrayStart("merged")); for (const ChunkType& chunkToMerge : chunksToMerge) { mergedB.append(chunkToMerge.toBSON()); } mergedB.done(); currShardVersion.addToBSON(logDetailB, "prevShardVersion"); newMergedVersion.addToBSON(logDetailB, "mergedVersion"); return logDetailB.obj(); }
BSONObj buildMergeLogEntry( const OwnedPointerVector<ChunkType>& chunksToMerge, const ChunkVersion& currShardVersion, const ChunkVersion& newMergedVersion ) { BSONObjBuilder logDetailB; BSONArrayBuilder mergedB( logDetailB.subarrayStart( "merged" ) ); for ( OwnedPointerVector<ChunkType>::const_iterator it = chunksToMerge.begin(); it != chunksToMerge.end(); ++it ) { ChunkType* chunkToMerge = *it; mergedB.append( chunkToMerge->toBSON() ); } mergedB.done(); currShardVersion.addToBSON( logDetailB, "prevShardVersion" ); newMergedVersion.addToBSON( logDetailB, "mergedVersion" ); return logDetailB.obj(); }
BSONObj buildOpPrecond( const string& ns, const string& shardName, const ChunkVersion& shardVersion ) { BSONObjBuilder condB; condB.append( "ns", ChunkType::ConfigNS ); condB.append( "q", BSON( "query" << BSON( ChunkType::ns( ns ) ) << "orderby" << BSON( ChunkType::DEPRECATED_lastmod() << -1 ) ) ); { BSONObjBuilder resB( condB.subobjStart( "res" ) ); shardVersion.addToBSON( resB, ChunkType::DEPRECATED_lastmod() ); resB.done(); } return condB.obj(); }
static void buildStaleError( const ChunkVersion& shardVersionRecvd, const ChunkVersion& shardVersionWanted, BatchedErrorDetail* error ) { // Write stale error to results error->setErrCode( ErrorCodes::StaleShardVersion ); BSONObjBuilder infoB; shardVersionWanted.addToBSON( infoB, "vWanted" ); error->setErrInfo( infoB.obj() ); string errMsg = stream() << "stale shard version detected before write, received " << shardVersionRecvd.toString() << " but local version is " << shardVersionWanted.toString(); error->setErrMessage( errMsg ); }
void Chunk::serialize(BSONObjBuilder& to, ChunkVersion myLastMod) { to.append("_id", genID(_manager->getns(), _min)); if (myLastMod.isSet()) { myLastMod.addToBSON(to, ChunkType::DEPRECATED_lastmod()); } else if (_lastmod.isSet()) { _lastmod.addToBSON(to, ChunkType::DEPRECATED_lastmod()); } else { verify(0); } to << ChunkType::ns(_manager->getns()); to << ChunkType::min(_min); to << ChunkType::max(_max); to << ChunkType::shard(_shardId); }
Status ChunkMoveOperationState::commitMigration() { invariant(_distLockStatus.is_initialized()); invariant(_distLockStatus->isOK()); log() << "About to enter migrate critical section"; // We're under the collection distributed lock here, so no other migrate can change maxVersion // or CollectionMetadata state. ShardingState* const shardingState = ShardingState::get(_txn); Status startStatus = ShardingStateRecovery::startMetadataOp(_txn); if (!startStatus.isOK()) return startStatus; shardingState->migrationSourceManager()->setInCriticalSection(true); const ChunkVersion originalCollVersion = getCollMetadata()->getCollVersion(); ChunkVersion myVersion = originalCollVersion; myVersion.incMajor(); { ScopedTransaction transaction(_txn, MODE_IX); Lock::DBLock lk(_txn->lockState(), _nss.db(), MODE_IX); Lock::CollectionLock collLock(_txn->lockState(), _nss.ns(), MODE_X); invariant(myVersion > shardingState->getVersion(_nss.ns())); // Bump the metadata's version up and "forget" about the chunk being moved. This is // not the commit point, but in practice the state in this shard won't change until // the commit it done. shardingState->donateChunk(_txn, _nss.ns(), _minKey, _maxKey, myVersion); } log() << "moveChunk setting version to: " << myVersion << migrateLog; // We're under the collection lock here, too, so we can undo the chunk donation because // no other state change could be ongoing BSONObj res; Status recvChunkCommitStatus{ErrorCodes::InternalError, "status not set"}; try { ScopedDbConnection connTo(_toShardCS, 35.0); connTo->runCommand("admin", BSON("_recvChunkCommit" << 1), res); connTo.done(); recvChunkCommitStatus = getStatusFromCommandResult(res); } catch (const DBException& e) { const string msg = stream() << "moveChunk could not contact to shard " << _toShard << " to commit transfer" << causedBy(e); warning() << msg; recvChunkCommitStatus = Status(e.toStatus().code(), msg); } if (MONGO_FAIL_POINT(failMigrationCommit) && recvChunkCommitStatus.isOK()) { recvChunkCommitStatus = Status(ErrorCodes::InternalError, "Failing _recvChunkCommit due to failpoint."); } if (!recvChunkCommitStatus.isOK()) { log() << "moveChunk migrate commit not accepted by TO-shard: " << res << " resetting shard version to: " << getShardVersion() << migrateLog; { ScopedTransaction transaction(_txn, MODE_IX); Lock::DBLock dbLock(_txn->lockState(), _nss.db(), MODE_IX); Lock::CollectionLock collLock(_txn->lockState(), _nss.ns(), MODE_X); log() << "moveChunk collection lock acquired to reset shard version from " "failed migration"; // Revert the chunk manager back to the state before "forgetting" about the chunk shardingState->undoDonateChunk(_txn, _nss.ns(), getCollMetadata()); } log() << "Shard version successfully reset to clean up failed migration"; return Status(recvChunkCommitStatus.code(), stream() << "_recvChunkCommit failed: " << causedBy(recvChunkCommitStatus)); } log() << "moveChunk migrate commit accepted by TO-shard: " << res << migrateLog; BSONArrayBuilder updates; { // Update for the chunk being moved BSONObjBuilder op; op.append("op", "u"); op.appendBool("b", false); // No upserting op.append("ns", ChunkType::ConfigNS); BSONObjBuilder n(op.subobjStart("o")); n.append(ChunkType::name(), Chunk::genID(_nss.ns(), _minKey)); myVersion.addToBSON(n, ChunkType::DEPRECATED_lastmod()); n.append(ChunkType::ns(), _nss.ns()); n.append(ChunkType::min(), _minKey); n.append(ChunkType::max(), _maxKey); n.append(ChunkType::shard(), _toShard); n.done(); BSONObjBuilder q(op.subobjStart("o2")); q.append(ChunkType::name(), Chunk::genID(_nss.ns(), _minKey)); q.done(); updates.append(op.obj()); } // Version at which the next highest lastmod will be set. If the chunk being moved is the last // in the shard, nextVersion is that chunk's lastmod otherwise the highest version is from the // chunk being bumped on the FROM-shard. ChunkVersion nextVersion = myVersion; // If we have chunks left on the FROM shard, update the version of one of them as well. We can // figure that out by grabbing the metadata as it has been changed. const std::shared_ptr<CollectionMetadata> bumpedCollMetadata( shardingState->getCollectionMetadata(_nss.ns())); if (bumpedCollMetadata->getNumChunks() > 0) { // get another chunk on that shard ChunkType bumpChunk; invariant(bumpedCollMetadata->getNextChunk(bumpedCollMetadata->getMinKey(), &bumpChunk)); BSONObj bumpMin = bumpChunk.getMin(); BSONObj bumpMax = bumpChunk.getMax(); dassert(bumpMin.woCompare(_minKey) != 0); BSONObjBuilder op; op.append("op", "u"); op.appendBool("b", false); op.append("ns", ChunkType::ConfigNS); nextVersion.incMinor(); // same as used on donateChunk BSONObjBuilder n(op.subobjStart("o")); n.append(ChunkType::name(), Chunk::genID(_nss.ns(), bumpMin)); nextVersion.addToBSON(n, ChunkType::DEPRECATED_lastmod()); n.append(ChunkType::ns(), _nss.ns()); n.append(ChunkType::min(), bumpMin); n.append(ChunkType::max(), bumpMax); n.append(ChunkType::shard(), _fromShard); n.done(); BSONObjBuilder q(op.subobjStart("o2")); q.append(ChunkType::name(), Chunk::genID(_nss.ns(), bumpMin)); q.done(); updates.append(op.obj()); log() << "moveChunk updating self version to: " << nextVersion << " through " << bumpMin << " -> " << bumpMax << " for collection '" << _nss.ns() << "'" << migrateLog; } else { log() << "moveChunk moved last chunk out for collection '" << _nss.ns() << "'" << migrateLog; } BSONArrayBuilder preCond; { BSONObjBuilder b; b.append("ns", ChunkType::ConfigNS); b.append("q", BSON("query" << BSON(ChunkType::ns(_nss.ns())) << "orderby" << BSON(ChunkType::DEPRECATED_lastmod() << -1))); { BSONObjBuilder bb(b.subobjStart("res")); // TODO: For backwards compatibility, we can't yet require an epoch here bb.appendTimestamp(ChunkType::DEPRECATED_lastmod(), originalCollVersion.toLong()); bb.done(); } preCond.append(b.obj()); } Status applyOpsStatus{Status::OK()}; try { // For testing migration failures if (MONGO_FAIL_POINT(failMigrationConfigWritePrepare)) { throw DBException("mock migration failure before config write", ErrorCodes::PrepareConfigsFailed); } applyOpsStatus = grid.catalogManager(_txn)->applyChunkOpsDeprecated(_txn, updates.arr(), preCond.arr()); if (MONGO_FAIL_POINT(failMigrationApplyOps)) { throw SocketException(SocketException::RECV_ERROR, shardingState->getConfigServer(_txn).toString()); } } catch (const DBException& ex) { warning() << ex << migrateLog; applyOpsStatus = ex.toStatus(); } if (applyOpsStatus == ErrorCodes::PrepareConfigsFailed) { // In the process of issuing the migrate commit, the SyncClusterConnection checks that // the config servers are reachable. If they are not, we are sure that the applyOps // command was not sent to any of the configs, so we can safely back out of the // migration here, by resetting the shard version that we bumped up to in the // donateChunk() call above. log() << "About to acquire moveChunk coll lock to reset shard version from " << "failed migration"; { ScopedTransaction transaction(_txn, MODE_IX); Lock::DBLock dbLock(_txn->lockState(), _nss.db(), MODE_IX); Lock::CollectionLock collLock(_txn->lockState(), _nss.ns(), MODE_X); // Revert the metadata back to the state before "forgetting" about the chunk shardingState->undoDonateChunk(_txn, _nss.ns(), getCollMetadata()); } log() << "Shard version successfully reset to clean up failed migration"; const string msg = stream() << "Failed to send migrate commit to configs " << causedBy(applyOpsStatus); return Status(applyOpsStatus.code(), msg); } else if (!applyOpsStatus.isOK()) { // This could be a blip in the connectivity. Wait out a few seconds and check if the // commit request made it. // // If the commit made it to the config, we'll see the chunk in the new shard and // there's no further action to be done. // // If the commit did not make it, currently the only way to fix this state is to // bounce the mongod so that the old state (before migrating) is brought in. warning() << "moveChunk commit outcome ongoing" << migrateLog; sleepsecs(10); // Look for the chunk in this shard whose version got bumped. We assume that if that // mod made it to the config server, then applyOps was successful. try { std::vector<ChunkType> newestChunk; Status status = grid.catalogManager(_txn)->getChunks(_txn, BSON(ChunkType::ns(_nss.ns())), BSON(ChunkType::DEPRECATED_lastmod() << -1), 1, &newestChunk, nullptr); uassertStatusOK(status); ChunkVersion checkVersion; if (!newestChunk.empty()) { invariant(newestChunk.size() == 1); checkVersion = newestChunk[0].getVersion(); } if (checkVersion.equals(nextVersion)) { log() << "moveChunk commit confirmed" << migrateLog; } else { error() << "moveChunk commit failed: version is at " << checkVersion << " instead of " << nextVersion << migrateLog; error() << "TERMINATING" << migrateLog; dbexit(EXIT_SHARDING_ERROR); } } catch (...) { error() << "moveChunk failed to get confirmation of commit" << migrateLog; error() << "TERMINATING" << migrateLog; dbexit(EXIT_SHARDING_ERROR); } } MONGO_FAIL_POINT_PAUSE_WHILE_SET(hangBeforeLeavingCriticalSection); shardingState->migrationSourceManager()->setInCriticalSection(false); ShardingStateRecovery::endMetadataOp(_txn); // Migration is done, just log some diagnostics information BSONObj chunkInfo = BSON("min" << _minKey << "max" << _maxKey << "from" << _fromShard << "to" << _toShard); BSONObjBuilder commitInfo; commitInfo.appendElements(chunkInfo); if (res["counts"].type() == Object) { commitInfo.appendElements(res["counts"].Obj()); } grid.catalogManager(_txn)->logChange(_txn, "moveChunk.commit", _nss.ns(), commitInfo.obj()); shardingState->migrationSourceManager()->done(_txn); _isRunning = false; return Status::OK(); }
Status MigrationSourceManager::commitDonateChunk(OperationContext* txn) { invariant(!txn->lockState()->isLocked()); invariant(_state == kCriticalSection); auto scopedGuard = MakeGuard([&] { cleanupOnError(txn); }); // Tell the recipient shard to fetch the latest changes Status commitCloneStatus = _cloneDriver->commitClone(txn); if (MONGO_FAIL_POINT(failMigrationCommit) && commitCloneStatus.isOK()) { commitCloneStatus = {ErrorCodes::InternalError, "Failing _recvChunkCommit due to failpoint."}; } if (!commitCloneStatus.isOK()) { return {commitCloneStatus.code(), str::stream() << "commit clone failed due to " << commitCloneStatus.toString()}; } // Generate the next collection version. ChunkVersion uncommittedCollVersion = _committedMetadata->getCollVersion(); uncommittedCollVersion.incMajor(); // applyOps preparation for reflecting the uncommitted metadata on the config server // Preconditions BSONArrayBuilder preCond; { BSONObjBuilder b; b.append("ns", ChunkType::ConfigNS); b.append("q", BSON("query" << BSON(ChunkType::ns(_args.getNss().ns())) << "orderby" << BSON(ChunkType::DEPRECATED_lastmod() << -1))); { BSONObjBuilder bb(b.subobjStart("res")); // TODO: For backwards compatibility, we can't yet require an epoch here bb.appendTimestamp(ChunkType::DEPRECATED_lastmod(), _committedMetadata->getCollVersion().toLong()); bb.done(); } preCond.append(b.obj()); } // Update for the chunk which is being donated BSONArrayBuilder updates; { BSONObjBuilder op; op.append("op", "u"); op.appendBool("b", false); // No upserting op.append("ns", ChunkType::ConfigNS); BSONObjBuilder n(op.subobjStart("o")); n.append(ChunkType::name(), ChunkType::genID(_args.getNss().ns(), _args.getMinKey())); uncommittedCollVersion.addToBSON(n, ChunkType::DEPRECATED_lastmod()); n.append(ChunkType::ns(), _args.getNss().ns()); n.append(ChunkType::min(), _args.getMinKey()); n.append(ChunkType::max(), _args.getMaxKey()); n.append(ChunkType::shard(), _args.getToShardId()); n.done(); BSONObjBuilder q(op.subobjStart("o2")); q.append(ChunkType::name(), ChunkType::genID(_args.getNss().ns(), _args.getMinKey())); q.done(); updates.append(op.obj()); } // Update for the chunk being moved // Version at which the next highest lastmod will be set. If the chunk being moved is the last // in the shard, nextVersion is that chunk's lastmod otherwise the highest version is from the // chunk being bumped on the FROM-shard. ChunkVersion nextVersion = uncommittedCollVersion; // If we have chunks left on the FROM shard, update the version of one of them as well. We can // figure that out by grabbing the metadata as it has been changed. if (_committedMetadata->getNumChunks() > 1) { ChunkType bumpChunk; invariant(_committedMetadata->getDifferentChunk(_args.getMinKey(), &bumpChunk)); BSONObj bumpMin = bumpChunk.getMin(); BSONObj bumpMax = bumpChunk.getMax(); nextVersion.incMinor(); dassert(bumpMin.woCompare(_args.getMinKey()) != 0); BSONObjBuilder op; op.append("op", "u"); op.appendBool("b", false); op.append("ns", ChunkType::ConfigNS); BSONObjBuilder n(op.subobjStart("o")); n.append(ChunkType::name(), ChunkType::genID(_args.getNss().ns(), bumpMin)); nextVersion.addToBSON(n, ChunkType::DEPRECATED_lastmod()); n.append(ChunkType::ns(), _args.getNss().ns()); n.append(ChunkType::min(), bumpMin); n.append(ChunkType::max(), bumpMax); n.append(ChunkType::shard(), _args.getFromShardId()); n.done(); BSONObjBuilder q(op.subobjStart("o2")); q.append(ChunkType::name(), ChunkType::genID(_args.getNss().ns(), bumpMin)); q.done(); updates.append(op.obj()); log() << "moveChunk updating self version to: " << nextVersion << " through " << bumpMin << " -> " << bumpMax << " for collection '" << _args.getNss().ns() << "'"; } else { log() << "moveChunk moved last chunk out for collection '" << _args.getNss().ns() << "'"; } MONGO_FAIL_POINT_PAUSE_WHILE_SET(hangBeforeCommitMigration); Status applyOpsStatus = grid.catalogClient(txn)->applyChunkOpsDeprecated( txn, updates.arr(), preCond.arr(), _args.getNss().ns(), nextVersion); if (MONGO_FAIL_POINT(failCommitMigrationCommand)) { applyOpsStatus = Status(ErrorCodes::InternalError, "Failpoint 'failCommitMigrationCommand' generated error"); } if (applyOpsStatus.isOK()) { // Now that applyOps succeeded and the new collection version is committed, update the // collection metadata to the new collection version and forget the migrated chunk. ScopedTransaction scopedXact(txn, MODE_IX); AutoGetCollection autoColl(txn, _args.getNss(), MODE_IX, MODE_X); ChunkType migratingChunkToForget; migratingChunkToForget.setMin(_args.getMinKey()); migratingChunkToForget.setMax(_args.getMaxKey()); _committedMetadata = _committedMetadata->cloneMigrate(migratingChunkToForget, uncommittedCollVersion); auto css = CollectionShardingState::get(txn, _args.getNss().ns()); css->setMetadata(_committedMetadata); } else { // This could be an unrelated error (e.g. network error). Check whether the metadata update // succeeded by refreshing the collection metadata from the config server and checking that // the original chunks no longer exist. warning() << "Migration metadata commit may have failed: refreshing metadata to check" << causedBy(applyOpsStatus); // Need to get the latest optime in case the refresh request goes to a secondary -- // otherwise the read won't wait for the write that applyChunkOpsDeprecated may have done. Status status = grid.catalogClient(txn)->logChange( txn, "moveChunk.validating", _args.getNss().ns(), BSON("min" << _args.getMinKey() << "max" << _args.getMaxKey() << "from" << _args.getFromShardId() << "to" << _args.getToShardId())); if (!status.isOK()) { fassertStatusOK( 40137, {status.code(), str::stream() << "applyOps failed to commit chunk [" << _args.getMinKey() << "," << _args.getMaxKey() << ") due to " << causedBy(applyOpsStatus) << ", and updating the optime with a write before refreshing the " << "metadata also failed: " << causedBy(status)}); } ShardingState* const shardingState = ShardingState::get(txn); ChunkVersion shardVersion; Status refreshStatus = shardingState->refreshMetadataNow(txn, _args.getNss().ns(), &shardVersion); fassertStatusOK(34431, {refreshStatus.code(), str::stream() << "applyOps failed to commit chunk [" << _args.getMinKey() << "," << _args.getMaxKey() << ") due to " << causedBy(applyOpsStatus) << ", and refreshing collection metadata failed: " << causedBy(refreshStatus)}); { ScopedTransaction scopedXact(txn, MODE_IS); AutoGetCollection autoColl(txn, _args.getNss(), MODE_IS); auto css = CollectionShardingState::get(txn, _args.getNss()); std::shared_ptr<CollectionMetadata> refreshedMetadata = css->getMetadata(); if (refreshedMetadata->keyBelongsToMe(_args.getMinKey())) { invariant(refreshedMetadata->getCollVersion() == _committedMetadata->getCollVersion()); // After refresh, the collection metadata indicates that the donor shard still owns // the chunk, so no migration changes were written to the config server metadata. return {applyOpsStatus.code(), str::stream() << "Migration was not committed, applyOps failed: " << causedBy(applyOpsStatus)}; } ChunkVersion refreshedCollectionVersion = refreshedMetadata->getCollVersion(); if (!refreshedCollectionVersion.equals(nextVersion)) { // The refreshed collection metadata's collection version does not match the control // chunk's updated collection version, which should now be the highest. The control // chunk was not committed, but the migrated chunk was. This state is not // recoverable. fassertStatusOK(40138, {applyOpsStatus.code(), str::stream() << "Migration was partially committed, state is " << "unrecoverable. applyOps error: " << causedBy(applyOpsStatus)}); } } } MONGO_FAIL_POINT_PAUSE_WHILE_SET(hangBeforeLeavingCriticalSection); scopedGuard.Dismiss(); _cleanup(txn); grid.catalogClient(txn)->logChange(txn, "moveChunk.commit", _args.getNss().ns(), BSON("min" << _args.getMinKey() << "max" << _args.getMaxKey() << "from" << _args.getFromShardId() << "to" << _args.getToShardId())); return Status::OK(); }
bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) { // Steps // 1. check basic config // 2. extract params from command // 3. fast check // 4. slow check (LOCKS) // step 1 lastError.disableForCommand(); ShardedConnectionInfo* info = ShardedConnectionInfo::get( true ); // make sure we have the mongos id for writebacks if ( ! checkMongosID( info , cmdObj["serverID"] , errmsg ) ) return false; bool authoritative = cmdObj.getBoolField( "authoritative" ); // check config server is ok or enable sharding if ( ! checkConfigOrInit( cmdObj["configdb"].valuestrsafe() , authoritative , errmsg , result ) ) return false; // check shard name/hosts are correct if ( cmdObj["shard"].type() == String ) { shardingState.gotShardName( cmdObj["shard"].String() ); } // Handle initial shard connection if( cmdObj["version"].eoo() && cmdObj["init"].trueValue() ){ result.append( "initialized", true ); // Send back wire version to let mongos know what protocol we can speak result.append( "minWireVersion", minWireVersion ); result.append( "maxWireVersion", maxWireVersion ); return true; } // we can run on a slave up to here if ( ! isMaster( "admin" ) ) { result.append( "errmsg" , "not master" ); result.append( "note" , "from post init in setShardVersion" ); return false; } // step 2 string ns = cmdObj["setShardVersion"].valuestrsafe(); if ( ns.size() == 0 ) { errmsg = "need to specify namespace"; return false; } if( ! ChunkVersion::canParseBSON( cmdObj, "version" ) ){ errmsg = "need to specify version"; return false; } const ChunkVersion version = ChunkVersion::fromBSON( cmdObj, "version" ); // step 3 const ChunkVersion oldVersion = info->getVersion(ns); const ChunkVersion globalVersion = shardingState.getVersion(ns); oldVersion.addToBSON( result, "oldVersion" ); if ( globalVersion.isSet() && version.isSet() ) { // this means there is no reset going on an either side // so its safe to make some assumptions if ( version.isWriteCompatibleWith( globalVersion ) ) { // mongos and mongod agree! if ( ! oldVersion.isWriteCompatibleWith( version ) ) { if ( oldVersion < globalVersion && oldVersion.hasCompatibleEpoch(globalVersion) ) { info->setVersion( ns , version ); } else if ( authoritative ) { // this means there was a drop and our version is reset info->setVersion( ns , version ); } else { result.append( "ns" , ns ); result.appendBool( "need_authoritative" , true ); errmsg = "verifying drop on '" + ns + "'"; return false; } } return true; } } // step 4 // this is because of a weird segfault I saw and I can't see why this should ever be set massert( 13647 , str::stream() << "context should be empty here, is: " << cc().getContext()->ns() , cc().getContext() == 0 ); if ( oldVersion.isSet() && ! globalVersion.isSet() ) { // this had been reset info->setVersion( ns , ChunkVersion( 0, OID() ) ); } if ( ! version.isSet() && ! globalVersion.isSet() ) { // this connection is cleaning itself info->setVersion( ns , ChunkVersion( 0, OID() ) ); return true; } // Cases below all either return OR fall-through to remote metadata reload. if ( version.isSet() || !globalVersion.isSet() ) { // Not Dropping // TODO: Refactor all of this if ( version < oldVersion && version.hasCompatibleEpoch( oldVersion ) ) { errmsg = "this connection already had a newer version of collection '" + ns + "'"; result.append( "ns" , ns ); version.addToBSON( result, "newVersion" ); globalVersion.addToBSON( result, "globalVersion" ); return false; } // TODO: Refactor all of this if ( version < globalVersion && version.hasCompatibleEpoch( globalVersion ) ) { while ( shardingState.inCriticalMigrateSection() ) { log() << "waiting till out of critical section" << endl; shardingState.waitTillNotInCriticalSection( 10 ); } errmsg = "shard global version for collection is higher than trying to set to '" + ns + "'"; result.append( "ns" , ns ); version.addToBSON( result, "version" ); globalVersion.addToBSON( result, "globalVersion" ); result.appendBool( "reloadConfig" , true ); return false; } if ( ! globalVersion.isSet() && ! authoritative ) { // Needed b/c when the last chunk is moved off a shard, the version gets reset to zero, which // should require a reload. while ( shardingState.inCriticalMigrateSection() ) { log() << "waiting till out of critical section" << endl; shardingState.waitTillNotInCriticalSection( 10 ); } // need authoritative for first look result.append( "ns" , ns ); result.appendBool( "need_authoritative" , true ); errmsg = "first time for collection '" + ns + "'"; return false; } // Fall through to metadata reload below } else { // Dropping if ( ! authoritative ) { result.appendBool( "need_authoritative" , true ); result.append( "ns" , ns ); globalVersion.addToBSON( result, "globalVersion" ); errmsg = "dropping needs to be authoritative"; return false; } // Fall through to metadata reload below } ChunkVersion currVersion; Status status = shardingState.refreshMetadataIfNeeded( ns, version, &currVersion ); if (!status.isOK()) { // The reload itself was interrupted or confused here errmsg = str::stream() << "could not refresh metadata for " << ns << " with requested shard version " << version.toString() << ", stored shard version is " << currVersion.toString() << causedBy( status.reason() ); warning() << errmsg << endl; result.append( "ns" , ns ); version.addToBSON( result, "version" ); currVersion.addToBSON( result, "globalVersion" ); result.appendBool( "reloadConfig", true ); return false; } else if ( !version.isWriteCompatibleWith( currVersion ) ) { // We reloaded a version that doesn't match the version mongos was trying to // set. errmsg = str::stream() << "requested shard version differs from" << " config shard version for " << ns << ", requested version is " << version.toString() << " but found version " << currVersion.toString(); OCCASIONALLY warning() << errmsg << endl; // WARNING: the exact fields below are important for compatibility with mongos // version reload. result.append( "ns" , ns ); currVersion.addToBSON( result, "globalVersion" ); // If this was a reset of a collection or the last chunk moved out, inform mongos to // do a full reload. if (currVersion.epoch() != version.epoch() || !currVersion.isSet() ) { result.appendBool( "reloadConfig", true ); // Zero-version also needed to trigger full mongos reload, sadly // TODO: Make this saner, and less impactful (full reload on last chunk is bad) ChunkVersion( 0, 0, OID() ).addToBSON( result, "version" ); // For debugging version.addToBSON( result, "origVersion" ); } else { version.addToBSON( result, "version" ); } return false; } info->setVersion( ns , version ); return true; }
bool WriteBatchExecutor::doWrite( const string& ns, const BatchItemRef& itemRef, CurOp* currentOp, WriteStats* stats, BSONObj* upsertedID, BatchedErrorDetail* error ) { const BatchedCommandRequest& request = *itemRef.getRequest(); int index = itemRef.getItemIndex(); // // Check our shard version if we need to (must be in the write lock) // if ( shardingState.enabled() && request.isShardVersionSet() && !ChunkVersion::isIgnoredVersion( request.getShardVersion() ) ) { Lock::assertWriteLocked( ns ); CollectionMetadataPtr metadata = shardingState.getCollectionMetadata( ns ); ChunkVersion shardVersion = metadata ? metadata->getShardVersion() : ChunkVersion::UNSHARDED(); if ( !request.getShardVersion() // .isWriteCompatibleWith( shardVersion ) ) { // Write stale error to results error->setErrCode( ErrorCodes::StaleShardVersion ); BSONObjBuilder infoB; shardVersion.addToBSON( infoB, "vWanted" ); error->setErrInfo( infoB.obj() ); string errMsg = mongoutils::str::stream() << "stale shard version detected before write, received " << request.getShardVersion().toString() << " but local version is " << shardVersion.toString(); error->setErrMessage( errMsg ); return false; } } // // Not stale, do the actual write // if ( request.getBatchType() == BatchedCommandRequest::BatchType_Insert ) { // Insert return doInsert( ns, request.getInsertRequest()->getDocumentsAt( index ), currentOp, stats, error ); } else if ( request.getBatchType() == BatchedCommandRequest::BatchType_Update ) { // Update return doUpdate( ns, *request.getUpdateRequest()->getUpdatesAt( index ), currentOp, stats, upsertedID, error ); } else { dassert( request.getBatchType() == BatchedCommandRequest::BatchType_Delete ); // Delete return doDelete( ns, *request.getDeleteRequest()->getDeletesAt( index ), currentOp, stats, error ); } }