Status onShardVersionMismatch(OperationContext* opCtx,
                              const NamespaceString& nss,
                              ChunkVersion shardVersionReceived,
                              bool forceRefreshFromThisThread) noexcept {
    invariant(!opCtx->lockState()->isLocked());
    invariant(!opCtx->getClient()->isInDirectClient());

    auto const shardingState = ShardingState::get(opCtx);
    invariant(shardingState->canAcceptShardedCommands());

    LOG(2) << "Metadata refresh requested for " << nss.ns() << " at shard version "
           << shardVersionReceived;

    ShardingStatistics::get(opCtx).countStaleConfigErrors.addAndFetch(1);

    // Ensure any ongoing migrations have completed before trying to do the refresh. This wait is
    // just an optimization so that MongoS does not exhaust its maximum number of StaleConfig retry
    // attempts while the migration is being committed.
    try {
        auto& oss = OperationShardingState::get(opCtx);
        oss.waitForMigrationCriticalSectionSignal(opCtx);
    } catch (const DBException& ex) {
        return ex.toStatus();
    }

    const auto currentShardVersion = [&] {
        AutoGetCollection autoColl(opCtx, nss, MODE_IS);
        const auto currentMetadata = CollectionShardingState::get(opCtx, nss)->getMetadata(opCtx);
        if (currentMetadata) {
            return currentMetadata->getShardVersion();
        }

        return ChunkVersion::UNSHARDED();
    }();

    if (currentShardVersion.epoch() == shardVersionReceived.epoch() &&
        currentShardVersion.majorVersion() >= shardVersionReceived.majorVersion()) {
        // Don't need to remotely reload if we're in the same epoch and the requested version is
        // smaller than the one we know about. This means that the remote side is behind.
        return Status::OK();
    }

    try {
        forceShardFilteringMetadataRefresh(opCtx, nss, forceRefreshFromThisThread);
        return Status::OK();
    } catch (const DBException& ex) {
        log() << "Failed to refresh metadata for collection" << nss << causedBy(redact(ex));
        return ex.toStatus();
    }
}
TEST_F(MergeChunkTests, CompoundMerge) {
    const NamespaceString nss("foo.bar");
    const BSONObj kp = BSON("x" << 1 << "y" << 1);
    const OID epoch = OID::gen();
    vector<KeyRange> ranges;

    // Setup chunk metadata
    ranges.push_back(
        KeyRange(nss.ns(), BSON("x" << 0 << "y" << 1), BSON("x" << 1 << "y" << 0), kp));
    ranges.push_back(
        KeyRange(nss.ns(), BSON("x" << 1 << "y" << 0), BSON("x" << 2 << "y" << 1), kp));
    storeCollectionRanges(nss, shardName(), ranges, ChunkVersion(1, 0, epoch));

    // Get latest version
    ChunkVersion latestVersion;
    ShardingState::get(getGlobalServiceContext())
        ->refreshMetadataNow(&_txn, nss.ns(), &latestVersion);
    ShardingState::get(getGlobalServiceContext())->resetMetadata(nss.ns());

    // Do merge
    string errMsg;
    bool result = mergeChunks(
        &_txn, nss, BSON("x" << 0 << "y" << 1), BSON("x" << 2 << "y" << 1), epoch, &errMsg);
    ASSERT_EQUALS(errMsg, "");
    ASSERT(result);

    // Verify result
    CollectionMetadataPtr metadata =
        ShardingState::get(getGlobalServiceContext())->getCollectionMetadata(nss.ns());

    ChunkType chunk;
    ASSERT(metadata->getNextChunk(BSON("x" << 0 << "y" << 1), &chunk));
    ASSERT(chunk.getMin().woCompare(BSON("x" << 0 << "y" << 1)) == 0);
    ASSERT(chunk.getMax().woCompare(BSON("x" << 2 << "y" << 1)) == 0);
    ASSERT_EQUALS(metadata->getNumChunks(), 1u);

    ASSERT_EQUALS(metadata->getShardVersion().majorVersion(), latestVersion.majorVersion());
    ASSERT_GREATER_THAN(metadata->getShardVersion().minorVersion(), latestVersion.minorVersion());

    assertWrittenAsMerged(ranges);
}
MigrationSourceManager::MigrationSourceManager(OperationContext* txn, MoveChunkRequest request)
    : _args(std::move(request)), _startTime() {
    invariant(!txn->lockState()->isLocked());

    const auto& oss = OperationShardingState::get(txn);
    if (!oss.hasShardVersion()) {
        uasserted(ErrorCodes::InvalidOptions, "collection version is missing");
    }

    // Even though the moveChunk command transmits a value in the operation's shardVersion field,
    // this value does not actually contain the shard version, but the global collection version.
    const ChunkVersion expectedCollectionVersion = oss.getShardVersion(_args.getNss());

    log() << "Starting chunk migration for "
          << ChunkRange(_args.getMinKey(), _args.getMaxKey()).toString()
          << " with expected collection version " << expectedCollectionVersion;

    // Now that the collection is locked, snapshot the metadata and fetch the latest versions
    ShardingState* const shardingState = ShardingState::get(txn);

    ChunkVersion shardVersion;

    Status refreshStatus =
        shardingState->refreshMetadataNow(txn, _args.getNss().ns(), &shardVersion);
    if (!refreshStatus.isOK()) {
        uasserted(refreshStatus.code(),
                  str::stream() << "cannot start migrate of chunk "
                                << ChunkRange(_args.getMinKey(), _args.getMaxKey()).toString()
                                << " due to "
                                << refreshStatus.toString());
    }

    if (shardVersion.majorVersion() == 0) {
        // If the major version is zero, this means we do not have any chunks locally to migrate in
        // the first place
        uasserted(ErrorCodes::IncompatibleShardingMetadata,
                  str::stream() << "cannot start migrate of chunk "
                                << ChunkRange(_args.getMinKey(), _args.getMaxKey()).toString()
                                << " with zero shard version");
    }

    // Snapshot the committed metadata from the time the migration starts
    {
        ScopedTransaction scopedXact(txn, MODE_IS);
        AutoGetCollection autoColl(txn, _args.getNss(), MODE_IS);

        auto css = CollectionShardingState::get(txn, _args.getNss());
        _committedMetadata = css->getMetadata();
    }

    const ChunkVersion collectionVersion = _committedMetadata->getCollVersion();

    if (expectedCollectionVersion.epoch() != collectionVersion.epoch()) {
        throw SendStaleConfigException(
            _args.getNss().ns(),
            str::stream() << "cannot move chunk "
                          << ChunkRange(_args.getMinKey(), _args.getMaxKey()).toString()
                          << " because collection may have been dropped. "
                          << "current epoch: "
                          << collectionVersion.epoch()
                          << ", cmd epoch: "
                          << expectedCollectionVersion.epoch(),
            expectedCollectionVersion,
            collectionVersion);
    }

    // With nonzero shard version, we must have a coll version >= our shard version
    invariant(collectionVersion >= shardVersion);

    // With nonzero shard version, we must have a shard key
    invariant(!_committedMetadata->getKeyPattern().isEmpty());

    ChunkType origChunk;
    if (!_committedMetadata->getNextChunk(_args.getMinKey(), &origChunk)) {
        // If this assertion is hit, it means that whoever called the shard moveChunk command
        // (mongos or the CSRS balancer) did not check whether the chunk actually belongs to this
        // shard. It is a benign error and does not indicate data corruption.
        uasserted(40145,
                  str::stream() << "Chunk with bounds "
                                << ChunkRange(_args.getMinKey(), _args.getMaxKey()).toString()
                                << " is not owned by this shard.");
    }

    uassert(40146,
            str::stream() << "Unable to find chunk with the exact bounds "
                          << ChunkRange(_args.getMinKey(), _args.getMaxKey()).toString()
                          << " at collection version "
                          << collectionVersion.toString()
                          << ". This indicates corrupted metadata.",
            origChunk.getMin().woCompare(_args.getMinKey()) == 0 &&
                origChunk.getMax().woCompare(_args.getMaxKey()) == 0);
}
Exemple #4
0
    /**
     * @ return true if not in sharded mode
                     or if version for this client is ok
     */
    bool shardVersionOk( const string& ns , string& errmsg, ChunkVersion& received, ChunkVersion& wanted ) {

        if ( ! shardingState.enabled() )
            return true;

        if ( ! isMasterNs( ns.c_str() ) )  {
            // right now connections to secondaries aren't versioned at all
            return true;
        }

        ShardedConnectionInfo* info = ShardedConnectionInfo::get( false );

        if ( ! info ) {
            // this means the client has nothing sharded
            // so this allows direct connections to do whatever they want
            // which i think is the correct behavior
            return true;
        }

        if ( info->inForceVersionOkMode() ) {
            return true;
        }

        // TODO : all collections at some point, be sharded or not, will have a version
        //  (and a CollectionMetadata)
        received = info->getVersion( ns );
        wanted = shardingState.getVersion( ns );

        if( received.isWriteCompatibleWith( wanted ) ) return true;

        //
        // Figure out exactly why not compatible, send appropriate error message
        // The versions themselves are returned in the error, so not needed in messages here
        //

        // Check epoch first, to send more meaningful message, since other parameters probably
        // won't match either
        if( ! wanted.hasCompatibleEpoch( received ) ){
            errmsg = str::stream() << "version epoch mismatch detected for " << ns << ", "
                                   << "the collection may have been dropped and recreated";
            return false;
        }

        if( ! wanted.isSet() && received.isSet() ){
            errmsg = str::stream() << "this shard no longer contains chunks for " << ns << ", "
                                   << "the collection may have been dropped";
            return false;
        }

        if( wanted.isSet() && ! received.isSet() ){
            errmsg = str::stream() << "this shard contains versioned chunks for " << ns << ", "
                                   << "but no version set in request";
            return false;
        }

        if( wanted.majorVersion() != received.majorVersion() ){

            //
            // Could be > or < - wanted is > if this is the source of a migration,
            // wanted < if this is the target of a migration
            //

            errmsg = str::stream() << "version mismatch detected for " << ns << ", "
                                   << "stored major version " << wanted.majorVersion()
                                   << " does not match received " << received.majorVersion();
            return false;
        }

        // Those are all the reasons the versions can mismatch
        verify( false );

        return false;

    }