void MoveChunkRequest::appendAsCommand(BSONObjBuilder* builder,
                                       const NamespaceString& nss,
                                       ChunkVersion chunkVersion,
                                       const ConnectionString& configServerConnectionString,
                                       const ShardId& fromShardId,
                                       const ShardId& toShardId,
                                       const ChunkRange& range,
                                       int64_t maxChunkSizeBytes,
                                       const MigrationSecondaryThrottleOptions& secondaryThrottle,
                                       bool waitForDelete) {
    invariant(builder->asTempObj().isEmpty());
    invariant(nss.isValid());

    builder->append(kMoveChunk, nss.ns());
    chunkVersion.appendToCommand(builder);  // 3.4 shard compatibility
    builder->append(kEpoch, chunkVersion.epoch());
    // config connection string is included for 3.4 shard compatibility
    builder->append(kConfigServerConnectionString, configServerConnectionString.toString());
    builder->append(kFromShardId, fromShardId.toString());
    builder->append(kToShardId, toShardId.toString());
    range.append(builder);
    builder->append(kMaxChunkSizeBytes, static_cast<long long>(maxChunkSizeBytes));
    secondaryThrottle.append(builder);
    builder->append(kWaitForDelete, waitForDelete);
    builder->append(kTakeDistLock, false);
}
void ChunkManagerTargeter::noteStaleResponse(const ShardEndpoint& endpoint,
                                             const StaleConfigInfo& staleInfo) {
    dassert(!_needsTargetingRefresh);

    ChunkVersion remoteShardVersion;
    if (!staleInfo.getVersionWanted()) {
        // If we don't have a vWanted sent, assume the version is higher than our current version.
        remoteShardVersion = getShardVersion(*_routingInfo, endpoint.shardName);
        remoteShardVersion.incMajor();
    } else {
        remoteShardVersion = *staleInfo.getVersionWanted();
    }

    ShardVersionMap::iterator it = _remoteShardVersions.find(endpoint.shardName);
    if (it == _remoteShardVersions.end()) {
        _remoteShardVersions.insert(std::make_pair(endpoint.shardName, remoteShardVersion));
    } else {
        ChunkVersion& previouslyNotedVersion = it->second;
        if (previouslyNotedVersion.epoch() == remoteShardVersion.epoch()) {
            if (previouslyNotedVersion.isOlderThan(remoteShardVersion)) {
                previouslyNotedVersion = remoteShardVersion;
            }
        } else {
            // Epoch changed midway while applying the batch so set the version to something
            // unique
            // and non-existent to force a reload when refreshIsNeeded is called.
            previouslyNotedVersion = ChunkVersion::IGNORED();
        }
    }
}
Example #3
0
static bool checkShardVersion( ShardingState* shardingState,
                               const BatchedCommandRequest& request,
                               WriteErrorDetail** error ) {

    const NamespaceString nss( request.getTargetingNS() );
    Lock::assertWriteLocked( nss.ns() );

    ChunkVersion requestShardVersion =
        request.isMetadataSet() && request.getMetadata()->isShardVersionSet() ?
        request.getMetadata()->getShardVersion() : ChunkVersion::IGNORED();

    if ( shardingState->enabled() ) {

        CollectionMetadataPtr metadata = shardingState->getCollectionMetadata( nss.ns() );

        if ( !ChunkVersion::isIgnoredVersion( requestShardVersion ) ) {

            ChunkVersion shardVersion =
                metadata ? metadata->getShardVersion() : ChunkVersion::UNSHARDED();

            if ( !requestShardVersion.isWriteCompatibleWith( shardVersion ) ) {
                *error = new WriteErrorDetail;
                buildStaleError( requestShardVersion, shardVersion, *error );
                return false;
            }
        }
    }

    return true;
}
Example #4
0
    static bool checkShardVersion(OperationContext* txn,
                                  ShardingState* shardingState,
                                  const BatchedCommandRequest& request,
                                  WriteOpResult* result) {

        const NamespaceString nss( request.getTargetingNS() );
        txn->lockState()->assertWriteLocked( nss.ns() );

        ChunkVersion requestShardVersion =
            request.isMetadataSet() && request.getMetadata()->isShardVersionSet() ?
                request.getMetadata()->getShardVersion() : ChunkVersion::IGNORED();

        if ( shardingState->enabled() ) {

            CollectionMetadataPtr metadata = shardingState->getCollectionMetadata( nss.ns() );

            if ( !ChunkVersion::isIgnoredVersion( requestShardVersion ) ) {

                ChunkVersion shardVersion =
                    metadata ? metadata->getShardVersion() : ChunkVersion::UNSHARDED();

                if ( !requestShardVersion.isWriteCompatibleWith( shardVersion ) ) {
                    result->setError(new WriteErrorDetail);
                    buildStaleError(requestShardVersion, shardVersion, result->getError());
                    return false;
                }
            }
        }

        return true;
    }
    void ChunkManagerTargeter::noteStaleResponse( const ShardEndpoint& endpoint,
                                                  const BSONObj& staleInfo ) {
        dassert( !_needsTargetingRefresh );

        ChunkVersion remoteShardVersion;
        if ( staleInfo["vWanted"].eoo() ) {
            // If we don't have a vWanted sent, assume the version is higher than our current
            // version.
            remoteShardVersion = getShardVersion( endpoint.shardName, _manager, _primary );
            remoteShardVersion.incMajor();
        }
        else {
            remoteShardVersion = ChunkVersion::fromBSON( staleInfo, "vWanted" );
        }

        ShardVersionMap::iterator it = _remoteShardVersions.find( endpoint.shardName );
        if ( it == _remoteShardVersions.end() ) {
            _remoteShardVersions.insert( make_pair( endpoint.shardName, remoteShardVersion ) );
        }
        else {
            ChunkVersion& previouslyNotedVersion = it->second;
            if ( previouslyNotedVersion.hasCompatibleEpoch( remoteShardVersion )) {
                if ( previouslyNotedVersion.isOlderThan( remoteShardVersion )) {
                    remoteShardVersion.cloneTo( &previouslyNotedVersion );
                }
            }
            else {
                // Epoch changed midway while applying the batch so set the version to
                // something unique and non-existent to force a reload when
                // refreshIsNeeded is called.
                ChunkVersion::IGNORED().cloneTo( &previouslyNotedVersion );
            }
        }
    }
Example #6
0
void MoveChunkRequest::appendAsCommand(BSONObjBuilder* builder,
                                       const NamespaceString& nss,
                                       ChunkVersion collectionVersion,
                                       const ConnectionString& configServerConnectionString,
                                       const ShardId& fromShardId,
                                       const ShardId& toShardId,
                                       const ChunkRange& range,
                                       ChunkVersion chunkVersion,
                                       int64_t maxChunkSizeBytes,
                                       const MigrationSecondaryThrottleOptions& secondaryThrottle,
                                       bool waitForDelete,
                                       bool takeDistLock) {
    invariant(builder->asTempObj().isEmpty());
    invariant(nss.isValid());

    builder->append(kMoveChunk, nss.ns());
    collectionVersion.appendForCommands(builder);
    builder->append(kEpoch, collectionVersion.epoch());
    builder->append(kConfigServerConnectionString, configServerConnectionString.toString());
    builder->append(kFromShardId, fromShardId.toString());
    builder->append(kToShardId, toShardId.toString());
    range.append(builder);
    chunkVersion.appendWithFieldForCommands(builder, kChunkVersion);
    builder->append(kMaxChunkSizeBytes, static_cast<long long>(maxChunkSizeBytes));
    secondaryThrottle.append(builder);
    builder->append(kWaitForDelete, waitForDelete);
    builder->append(kTakeDistLock, takeDistLock);
}
Status onShardVersionMismatch(OperationContext* opCtx,
                              const NamespaceString& nss,
                              ChunkVersion shardVersionReceived,
                              bool forceRefreshFromThisThread) noexcept {
    invariant(!opCtx->lockState()->isLocked());
    invariant(!opCtx->getClient()->isInDirectClient());

    auto const shardingState = ShardingState::get(opCtx);
    invariant(shardingState->canAcceptShardedCommands());

    LOG(2) << "Metadata refresh requested for " << nss.ns() << " at shard version "
           << shardVersionReceived;

    ShardingStatistics::get(opCtx).countStaleConfigErrors.addAndFetch(1);

    // Ensure any ongoing migrations have completed before trying to do the refresh. This wait is
    // just an optimization so that MongoS does not exhaust its maximum number of StaleConfig retry
    // attempts while the migration is being committed.
    try {
        auto& oss = OperationShardingState::get(opCtx);
        oss.waitForMigrationCriticalSectionSignal(opCtx);
    } catch (const DBException& ex) {
        return ex.toStatus();
    }

    const auto currentShardVersion = [&] {
        AutoGetCollection autoColl(opCtx, nss, MODE_IS);
        const auto currentMetadata = CollectionShardingState::get(opCtx, nss)->getMetadata(opCtx);
        if (currentMetadata) {
            return currentMetadata->getShardVersion();
        }

        return ChunkVersion::UNSHARDED();
    }();

    if (currentShardVersion.epoch() == shardVersionReceived.epoch() &&
        currentShardVersion.majorVersion() >= shardVersionReceived.majorVersion()) {
        // Don't need to remotely reload if we're in the same epoch and the requested version is
        // smaller than the one we know about. This means that the remote side is behind.
        return Status::OK();
    }

    try {
        forceShardFilteringMetadataRefresh(opCtx, nss, forceRefreshFromThisThread);
        return Status::OK();
    } catch (const DBException& ex) {
        log() << "Failed to refresh metadata for collection" << nss << causedBy(redact(ex));
        return ex.toStatus();
    }
}
Example #8
0
CollectionMetadata::CollectionMetadata(const BSONObj& keyPattern, ChunkVersion collectionVersion)
    : _collVersion(collectionVersion),
      _shardVersion(ChunkVersion(0, 0, collectionVersion.epoch())),
      _keyPattern(keyPattern.getOwned()),
      _pendingMap(SimpleBSONObjComparator::kInstance.makeBSONObjIndexedMap<CachedChunkInfo>()),
      _chunksMap(SimpleBSONObjComparator::kInstance.makeBSONObjIndexedMap<CachedChunkInfo>()),
      _rangesMap(SimpleBSONObjComparator::kInstance.makeBSONObjIndexedMap<CachedChunkInfo>()) {}
Example #9
0
void MoveChunkRequest::appendAsCommand(BSONObjBuilder* builder,
                                       const NamespaceString& nss,
                                       const ChunkVersion& shardVersion,
                                       const ConnectionString& configServerConnectionString,
                                       const std::string& fromShardId,
                                       const std::string& toShardId,
                                       const BSONObj& chunkMinKey,
                                       const BSONObj& chunkMaxKey,
                                       int64_t maxChunkSizeBytes,
                                       const MigrationSecondaryThrottleOptions& secondaryThrottle,
                                       bool waitForDelete) {
    invariant(builder->asTempObj().isEmpty());
    invariant(nss.isValid());

    builder->append(kMoveChunk, nss.ns());
    shardVersion.appendForCommands(builder);
    builder->append(kConfigServerConnectionString, configServerConnectionString.toString());
    builder->append(kFromShardId, fromShardId);
    builder->append(kToShardId, toShardId);
    builder->append(kChunkMinKey, chunkMinKey);
    builder->append(kChunkMaxKey, chunkMaxKey);
    builder->append(kMaxChunkSizeBytes, static_cast<long long>(maxChunkSizeBytes));
    secondaryThrottle.append(builder);
    builder->append(kWaitForDelete, waitForDelete);
}
Example #10
0
    bool setShardVersion(DBClientBase& conn,
                         const string& ns,
                         const string& configServerPrimary,
                         ChunkVersion version,
                         ChunkManager* manager,
                         bool authoritative,
                         BSONObj& result) {

        BSONObjBuilder cmdBuilder;
        cmdBuilder.append("setShardVersion", ns);
        cmdBuilder.append("configdb", configServerPrimary);

        Shard s = Shard::make(conn.getServerAddress());
        cmdBuilder.append("shard", s.getName());
        cmdBuilder.append("shardHost", s.getConnString());

        if (ns.size() > 0) {
            version.addToBSON(cmdBuilder);
        }
        else {
            cmdBuilder.append("init", true);
        }

        if (authoritative) {
            cmdBuilder.appendBool("authoritative", 1);
        }

        BSONObj cmd = cmdBuilder.obj();

        LOG(1) << "    setShardVersion  " << s.getName() << " " << conn.getServerAddress()
               << "  " << ns << "  " << cmd
               << (manager ? string(str::stream() << " " << manager->getSequenceNumber()) : "");

        return conn.runCommand("admin", cmd, result, 0);
    }
Example #11
0
    static void buildStaleError( const ChunkVersion& shardVersionRecvd,
                                 const ChunkVersion& shardVersionWanted,
                                 BatchedErrorDetail* error ) {

        // Write stale error to results
        error->setErrCode( ErrorCodes::StaleShardVersion );

        BSONObjBuilder infoB;
        shardVersionWanted.addToBSON( infoB, "vWanted" );
        error->setErrInfo( infoB.obj() );

        string errMsg = stream() << "stale shard version detected before write, received "
                                 << shardVersionRecvd.toString() << " but local version is "
                                 << shardVersionWanted.toString();
        error->setErrMessage( errMsg );
    }
Example #12
0
void Chunk::serialize(BSONObjBuilder& to, ChunkVersion myLastMod) {
    to.append("_id", genID(_manager->getns(), _min));

    if (myLastMod.isSet()) {
        myLastMod.addToBSON(to, ChunkType::DEPRECATED_lastmod());
    } else if (_lastmod.isSet()) {
        _lastmod.addToBSON(to, ChunkType::DEPRECATED_lastmod());
    } else {
        verify(0);
    }

    to << ChunkType::ns(_manager->getns());
    to << ChunkType::min(_min);
    to << ChunkType::max(_max);
    to << ChunkType::shard(_shardId);
}
Example #13
0
BSONObj buildMergeLogEntry(const std::vector<ChunkType>& chunksToMerge,
                           const ChunkVersion& currShardVersion,
                           const ChunkVersion& newMergedVersion) {
    BSONObjBuilder logDetailB;

    BSONArrayBuilder mergedB(logDetailB.subarrayStart("merged"));

    for (const ChunkType& chunkToMerge : chunksToMerge) {
        mergedB.append(chunkToMerge.toBSON());
    }

    mergedB.done();

    currShardVersion.addToBSON(logDetailB, "prevShardVersion");
    newMergedVersion.addToBSON(logDetailB, "mergedVersion");

    return logDetailB.obj();
}
unique_ptr<CollectionMetadata> CollectionMetadata::clonePlusChunk(
    const BSONObj& minKey, const BSONObj& maxKey, const ChunkVersion& newShardVersion) const {
    invariant(newShardVersion.epoch() == _shardVersion.epoch());
    invariant(newShardVersion.isSet());
    invariant(minKey.woCompare(maxKey) < 0);
    invariant(!rangeMapOverlaps(_chunksMap, minKey, maxKey));

    unique_ptr<CollectionMetadata> metadata(stdx::make_unique<CollectionMetadata>());
    metadata->_keyPattern = _keyPattern.getOwned();
    metadata->fillKeyPatternFields();
    metadata->_pendingMap = _pendingMap;
    metadata->_chunksMap = _chunksMap;
    metadata->_chunksMap.insert(make_pair(minKey.getOwned(), maxKey.getOwned()));
    metadata->_shardVersion = newShardVersion;
    metadata->_collVersion = newShardVersion > _collVersion ? newShardVersion : _collVersion;
    metadata->fillRanges();

    invariant(metadata->isValid());
    return metadata;
}
Example #15
0
void ChunkManagerTargeter::noteStaleResponse( const ShardEndpoint& endpoint,
        const BSONObj& staleInfo ) {
    dassert( !_needsTargetingRefresh );

    ChunkVersion remoteShardVersion;
    if ( staleInfo["vWanted"].eoo() ) {
        // If we don't have a vWanted sent, assume the version is higher than our current
        // version.
        remoteShardVersion = getShardVersion( endpoint.shardName, _manager, _primary );
        remoteShardVersion.incMajor();
    }
    else {
        remoteShardVersion = ChunkVersion::fromBSON( staleInfo, "vWanted" );
    }

    // We assume here that we can't have more than one stale config per-shard
    dassert( _remoteShardVersions.find( endpoint.shardName ) == _remoteShardVersions.end() );

    _remoteShardVersions.insert( make_pair( endpoint.shardName, remoteShardVersion ) );
}
Example #16
0
TEST_F(MergeChunkTests, CompoundMerge) {
    const NamespaceString nss("foo.bar");
    const BSONObj kp = BSON("x" << 1 << "y" << 1);
    const OID epoch = OID::gen();
    vector<KeyRange> ranges;

    // Setup chunk metadata
    ranges.push_back(
        KeyRange(nss.ns(), BSON("x" << 0 << "y" << 1), BSON("x" << 1 << "y" << 0), kp));
    ranges.push_back(
        KeyRange(nss.ns(), BSON("x" << 1 << "y" << 0), BSON("x" << 2 << "y" << 1), kp));
    storeCollectionRanges(nss, shardName(), ranges, ChunkVersion(1, 0, epoch));

    // Get latest version
    ChunkVersion latestVersion;
    ShardingState::get(getGlobalServiceContext())
        ->refreshMetadataNow(&_txn, nss.ns(), &latestVersion);
    ShardingState::get(getGlobalServiceContext())->resetMetadata(nss.ns());

    // Do merge
    string errMsg;
    bool result = mergeChunks(
        &_txn, nss, BSON("x" << 0 << "y" << 1), BSON("x" << 2 << "y" << 1), epoch, &errMsg);
    ASSERT_EQUALS(errMsg, "");
    ASSERT(result);

    // Verify result
    CollectionMetadataPtr metadata =
        ShardingState::get(getGlobalServiceContext())->getCollectionMetadata(nss.ns());

    ChunkType chunk;
    ASSERT(metadata->getNextChunk(BSON("x" << 0 << "y" << 1), &chunk));
    ASSERT(chunk.getMin().woCompare(BSON("x" << 0 << "y" << 1)) == 0);
    ASSERT(chunk.getMax().woCompare(BSON("x" << 2 << "y" << 1)) == 0);
    ASSERT_EQUALS(metadata->getNumChunks(), 1u);

    ASSERT_EQUALS(metadata->getShardVersion().majorVersion(), latestVersion.majorVersion());
    ASSERT_GREATER_THAN(metadata->getShardVersion().minorVersion(), latestVersion.minorVersion());

    assertWrittenAsMerged(ranges);
}
Example #17
0
    BSONObj buildMergeLogEntry( const OwnedPointerVector<ChunkType>& chunksToMerge,
                                const ChunkVersion& currShardVersion,
                                const ChunkVersion& newMergedVersion ) {

        BSONObjBuilder logDetailB;

        BSONArrayBuilder mergedB( logDetailB.subarrayStart( "merged" ) );

        for ( OwnedPointerVector<ChunkType>::const_iterator it = chunksToMerge.begin();
                it != chunksToMerge.end(); ++it ) {
            ChunkType* chunkToMerge = *it;
            mergedB.append( chunkToMerge->toBSON() );
        }

        mergedB.done();

        currShardVersion.addToBSON( logDetailB, "prevShardVersion" );
        newMergedVersion.addToBSON( logDetailB, "mergedVersion" );

        return logDetailB.obj();
    }
Example #18
0
        /**
         * Stores ranges for a particular collection and shard starting from some version
         */
        void storeCollectionRanges( const NamespaceString& nss,
                                    const string& shardName,
                                    const vector<KeyRange>& ranges,
                                    const ChunkVersion& startVersion ) {

            // Get key pattern from first range
            ASSERT_GREATER_THAN( ranges.size(), 0u );

            CollectionType coll;
            coll.setNS( nss.ns() );
            coll.setKeyPattern( ranges.begin()->keyPattern );
            coll.setEpoch( startVersion.epoch() );
            coll.setUpdatedAt( 1ULL );
            string errMsg;
            ASSERT( coll.isValid( &errMsg ) );

            DBDirectClient client(&_txn);

            client.update( CollectionType::ConfigNS,
                           BSON( CollectionType::ns( coll.getNS() ) ),
                           coll.toBSON(), true, false );

            ChunkVersion nextVersion = startVersion;
            for ( vector<KeyRange>::const_iterator it = ranges.begin(); it != ranges.end(); ++it ) {

                ChunkType chunk;
                // TODO: We should not rely on the serialized ns, minkey being unique in the future,
                // causes problems since it links string serialization to correctness.
                chunk.setName( Chunk::genID( nss, it->minKey ) );
                chunk.setShard( shardName );
                chunk.setNS( nss.ns() );
                chunk.setVersion( nextVersion );
                chunk.setMin( it->minKey );
                chunk.setMax( it->maxKey );
                nextVersion.incMajor();

                client.insert( ChunkType::ConfigNS, chunk.toBSON() );
            }
        }
std::unique_ptr<CollectionMetadata> CollectionMetadata::cloneMigrate(
    const ChunkType& chunk, const ChunkVersion& newCollectionVersion) const {
    invariant(newCollectionVersion.epoch() == _collVersion.epoch());
    invariant(newCollectionVersion > _collVersion);
    invariant(rangeMapContains(_chunksMap, chunk.getMin(), chunk.getMax()));

    unique_ptr<CollectionMetadata> metadata(stdx::make_unique<CollectionMetadata>());
    metadata->_keyPattern = _keyPattern.getOwned();
    metadata->fillKeyPatternFields();
    metadata->_pendingMap = _pendingMap;
    metadata->_chunksMap = _chunksMap;
    metadata->_chunksMap.erase(chunk.getMin());

    metadata->_shardVersion =
        (metadata->_chunksMap.empty() ? ChunkVersion(0, 0, newCollectionVersion.epoch())
                                      : newCollectionVersion);
    metadata->_collVersion = newCollectionVersion;
    metadata->fillRanges();

    invariant(metadata->isValid());
    return metadata;
}
Example #20
0
    BSONObj buildOpPrecond( const string& ns,
                            const string& shardName,
                            const ChunkVersion& shardVersion ) {
        BSONObjBuilder condB;
        condB.append( "ns", ChunkType::ConfigNS );
        condB.append( "q", BSON( "query" << BSON( ChunkType::ns( ns ) )
                              << "orderby" << BSON( ChunkType::DEPRECATED_lastmod() << -1 ) ) );
        {
            BSONObjBuilder resB( condB.subobjStart( "res" ) );
            shardVersion.addToBSON( resB, ChunkType::DEPRECATED_lastmod() );
            resB.done();
        }

        return condB.obj();
    }
Example #21
0
void ChunkManager::createFirstChunks(OperationContext* txn,
                                     const ShardId& primaryShardId,
                                     const vector<BSONObj>* initPoints,
                                     const set<ShardId>* initShardIds) {
    // TODO distlock?
    // TODO: Race condition if we shard the collection and insert data while we split across
    // the non-primary shard.

    vector<BSONObj> splitPoints;
    vector<ShardId> shardIds;
    calcInitSplitsAndShards(txn, primaryShardId, initPoints, initShardIds, &splitPoints, &shardIds);


    // this is the first chunk; start the versioning from scratch
    ChunkVersion version;
    version.incEpoch();
    version.incMajor();

    log() << "going to create " << splitPoints.size() + 1 << " chunk(s) for: " << _ns
          << " using new epoch " << version.epoch();

    for (unsigned i = 0; i <= splitPoints.size(); i++) {
        BSONObj min = i == 0 ? _keyPattern.getKeyPattern().globalMin() : splitPoints[i - 1];
        BSONObj max =
            i < splitPoints.size() ? splitPoints[i] : _keyPattern.getKeyPattern().globalMax();

        Chunk temp(this, min, max, shardIds[i % shardIds.size()], version);

        BSONObjBuilder chunkBuilder;
        temp.serialize(chunkBuilder);

        BSONObj chunkObj = chunkBuilder.obj();

        Status result = grid.catalogManager(txn)->update(txn,
                                                         ChunkType::ConfigNS,
                                                         BSON(ChunkType::name(temp.genID())),
                                                         chunkObj,
                                                         true,
                                                         false,
                                                         NULL);

        version.incMinor();

        if (!result.isOK()) {
            string ss = str::stream()
                << "creating first chunks failed. result: " << result.reason();
            error() << ss;
            msgasserted(15903, ss);
        }
    }

    _version = ChunkVersion(0, 0, version.epoch());
}
Example #22
0
void ShardingTestFixture::expectSetShardVersion(const HostAndPort& expectedHost,
                                                const ShardType& expectedShard,
                                                const NamespaceString& expectedNs,
                                                const ChunkVersion& expectedChunkVersion) {
    onCommand([&](const RemoteCommandRequest& request) {
        ASSERT_EQ(expectedHost, request.target);
        ASSERT_EQUALS(rpc::makeEmptyMetadata(), request.metadata);

        SetShardVersionRequest ssv =
            assertGet(SetShardVersionRequest::parseFromBSON(request.cmdObj));

        ASSERT(!ssv.isInit());
        ASSERT(ssv.isAuthoritative());
        ASSERT_EQ(grid.shardRegistry()->getConfigServerConnectionString().toString(),
                  ssv.getConfigServer().toString());
        ASSERT_EQ(expectedShard.getHost(), ssv.getShardConnectionString().toString());
        ASSERT_EQ(expectedNs.toString(), ssv.getNS().ns());
        ASSERT_EQ(expectedChunkVersion.toString(), ssv.getNSVersion().toString());

        return BSON("ok" << true);
    });
}
Example #23
0
int ConfigDiffTracker<ValType, ShardType>::calculateConfigDiff(
    const std::vector<ChunkType>& chunks) {
    _assertAttached();

    // Apply the chunk changes to the ranges and versions
    //
    // Overall idea here is to work in two steps :
    // 1. For all the new chunks we find, increment the maximum version per-shard and
    //      per-collection, and remove any conflicting chunks from the ranges.
    // 2. For all the new chunks we're interested in (all of them for mongos, just chunks on
    //      the shard for mongod) add them to the ranges.

    std::vector<ChunkType> newTracked;

    // Store epoch now so it doesn't change when we change max
    OID currEpoch = _maxVersion->epoch();

    _validDiffs = 0;

    for (const ChunkType& chunk : chunks) {
        ChunkVersion chunkVersion =
            ChunkVersion::fromBSON(chunk.toBSON(), ChunkType::DEPRECATED_lastmod());

        if (!chunkVersion.isSet() || !chunkVersion.hasEqualEpoch(currEpoch)) {
            warning() << "got invalid chunk version " << chunkVersion << " in document "
                      << chunk.toString() << " when trying to load differing chunks at version "
                      << ChunkVersion(
                          _maxVersion->majorVersion(), _maxVersion->minorVersion(), currEpoch);

            // Don't keep loading, since we know we'll be broken here
            return -1;
        }

        _validDiffs++;

        // Get max changed version and chunk version
        if (chunkVersion > *_maxVersion) {
            *_maxVersion = chunkVersion;
        }

        // Chunk version changes
        ShardType shard = shardFor(chunk.getShard());

        typename MaxChunkVersionMap::const_iterator shardVersionIt = _maxShardVersions->find(shard);
        if (shardVersionIt == _maxShardVersions->end() || shardVersionIt->second < chunkVersion) {
            (*_maxShardVersions)[shard] = chunkVersion;
        }

        // See if we need to remove any chunks we are currently tracking because of this
        // chunk's changes
        removeOverlapping(chunk.getMin(), chunk.getMax());

        // Figure out which of the new chunks we need to track
        // Important - we need to actually own this doc, in case the cursor decides to getMore
        // or unbuffer.
        if (isTracked(chunk)) {
            newTracked.push_back(chunk);
        }
    }

    LOG(3) << "found " << _validDiffs << " new chunks for collection " << _ns << " (tracking "
           << newTracked.size() << "), new version is " << *_maxVersion;

    for (const ChunkType& chunk : newTracked) {
        // Invariant enforced by sharding - it's possible to read inconsistent state due to
        // getMore and yielding, so we want to detect it as early as possible.
        //
        // TODO: This checks for overlap, we also should check for holes here iff we're
        // tracking all chunks.
        if (isOverlapping(chunk.getMin(), chunk.getMax())) {
            return -1;
        }

        _currMap->insert(rangeFor(chunk));
    }

    return _validDiffs;
}
Example #24
0
Status ChunkMoveOperationState::commitMigration() {
    invariant(_distLockStatus.is_initialized());
    invariant(_distLockStatus->isOK());

    log() << "About to enter migrate critical section";

    // We're under the collection distributed lock here, so no other migrate can change maxVersion
    // or CollectionMetadata state.
    ShardingState* const shardingState = ShardingState::get(_txn);

    Status startStatus = ShardingStateRecovery::startMetadataOp(_txn);
    if (!startStatus.isOK())
        return startStatus;

    shardingState->migrationSourceManager()->setInCriticalSection(true);

    const ChunkVersion originalCollVersion = getCollMetadata()->getCollVersion();

    ChunkVersion myVersion = originalCollVersion;
    myVersion.incMajor();

    {
        ScopedTransaction transaction(_txn, MODE_IX);
        Lock::DBLock lk(_txn->lockState(), _nss.db(), MODE_IX);
        Lock::CollectionLock collLock(_txn->lockState(), _nss.ns(), MODE_X);

        invariant(myVersion > shardingState->getVersion(_nss.ns()));

        // Bump the metadata's version up and "forget" about the chunk being moved. This is
        // not the commit point, but in practice the state in this shard won't change until
        // the commit it done.
        shardingState->donateChunk(_txn, _nss.ns(), _minKey, _maxKey, myVersion);
    }

    log() << "moveChunk setting version to: " << myVersion << migrateLog;

    // We're under the collection lock here, too, so we can undo the chunk donation because
    // no other state change could be ongoing
    BSONObj res;
    Status recvChunkCommitStatus{ErrorCodes::InternalError, "status not set"};

    try {
        ScopedDbConnection connTo(_toShardCS, 35.0);
        connTo->runCommand("admin", BSON("_recvChunkCommit" << 1), res);
        connTo.done();
        recvChunkCommitStatus = getStatusFromCommandResult(res);
    } catch (const DBException& e) {
        const string msg = stream() << "moveChunk could not contact to shard " << _toShard
                                    << " to commit transfer" << causedBy(e);
        warning() << msg;
        recvChunkCommitStatus = Status(e.toStatus().code(), msg);
    }

    if (MONGO_FAIL_POINT(failMigrationCommit) && recvChunkCommitStatus.isOK()) {
        recvChunkCommitStatus =
            Status(ErrorCodes::InternalError, "Failing _recvChunkCommit due to failpoint.");
    }

    if (!recvChunkCommitStatus.isOK()) {
        log() << "moveChunk migrate commit not accepted by TO-shard: " << res
              << " resetting shard version to: " << getShardVersion() << migrateLog;

        {
            ScopedTransaction transaction(_txn, MODE_IX);
            Lock::DBLock dbLock(_txn->lockState(), _nss.db(), MODE_IX);
            Lock::CollectionLock collLock(_txn->lockState(), _nss.ns(), MODE_X);

            log() << "moveChunk collection lock acquired to reset shard version from "
                     "failed migration";

            // Revert the chunk manager back to the state before "forgetting" about the chunk
            shardingState->undoDonateChunk(_txn, _nss.ns(), getCollMetadata());
        }

        log() << "Shard version successfully reset to clean up failed migration";

        return Status(recvChunkCommitStatus.code(),
                      stream() << "_recvChunkCommit failed: " << causedBy(recvChunkCommitStatus));
    }

    log() << "moveChunk migrate commit accepted by TO-shard: " << res << migrateLog;

    BSONArrayBuilder updates;

    {
        // Update for the chunk being moved
        BSONObjBuilder op;
        op.append("op", "u");
        op.appendBool("b", false);  // No upserting
        op.append("ns", ChunkType::ConfigNS);

        BSONObjBuilder n(op.subobjStart("o"));
        n.append(ChunkType::name(), Chunk::genID(_nss.ns(), _minKey));
        myVersion.addToBSON(n, ChunkType::DEPRECATED_lastmod());
        n.append(ChunkType::ns(), _nss.ns());
        n.append(ChunkType::min(), _minKey);
        n.append(ChunkType::max(), _maxKey);
        n.append(ChunkType::shard(), _toShard);
        n.done();

        BSONObjBuilder q(op.subobjStart("o2"));
        q.append(ChunkType::name(), Chunk::genID(_nss.ns(), _minKey));
        q.done();

        updates.append(op.obj());
    }

    // Version at which the next highest lastmod will be set. If the chunk being moved is the last
    // in the shard, nextVersion is that chunk's lastmod otherwise the highest version is from the
    // chunk being bumped on the FROM-shard.
    ChunkVersion nextVersion = myVersion;

    // If we have chunks left on the FROM shard, update the version of one of them as well. We can
    // figure that out by grabbing the metadata as it has been changed.
    const std::shared_ptr<CollectionMetadata> bumpedCollMetadata(
        shardingState->getCollectionMetadata(_nss.ns()));
    if (bumpedCollMetadata->getNumChunks() > 0) {
        // get another chunk on that shard
        ChunkType bumpChunk;
        invariant(bumpedCollMetadata->getNextChunk(bumpedCollMetadata->getMinKey(), &bumpChunk));

        BSONObj bumpMin = bumpChunk.getMin();
        BSONObj bumpMax = bumpChunk.getMax();

        dassert(bumpMin.woCompare(_minKey) != 0);

        BSONObjBuilder op;
        op.append("op", "u");
        op.appendBool("b", false);
        op.append("ns", ChunkType::ConfigNS);

        nextVersion.incMinor();  // same as used on donateChunk

        BSONObjBuilder n(op.subobjStart("o"));
        n.append(ChunkType::name(), Chunk::genID(_nss.ns(), bumpMin));
        nextVersion.addToBSON(n, ChunkType::DEPRECATED_lastmod());
        n.append(ChunkType::ns(), _nss.ns());
        n.append(ChunkType::min(), bumpMin);
        n.append(ChunkType::max(), bumpMax);
        n.append(ChunkType::shard(), _fromShard);
        n.done();

        BSONObjBuilder q(op.subobjStart("o2"));
        q.append(ChunkType::name(), Chunk::genID(_nss.ns(), bumpMin));
        q.done();

        updates.append(op.obj());

        log() << "moveChunk updating self version to: " << nextVersion << " through " << bumpMin
              << " -> " << bumpMax << " for collection '" << _nss.ns() << "'" << migrateLog;
    } else {
        log() << "moveChunk moved last chunk out for collection '" << _nss.ns() << "'"
              << migrateLog;
    }

    BSONArrayBuilder preCond;
    {
        BSONObjBuilder b;
        b.append("ns", ChunkType::ConfigNS);
        b.append("q",
                 BSON("query" << BSON(ChunkType::ns(_nss.ns())) << "orderby"
                              << BSON(ChunkType::DEPRECATED_lastmod() << -1)));
        {
            BSONObjBuilder bb(b.subobjStart("res"));

            // TODO: For backwards compatibility, we can't yet require an epoch here
            bb.appendTimestamp(ChunkType::DEPRECATED_lastmod(), originalCollVersion.toLong());
            bb.done();
        }

        preCond.append(b.obj());
    }

    Status applyOpsStatus{Status::OK()};
    try {
        // For testing migration failures
        if (MONGO_FAIL_POINT(failMigrationConfigWritePrepare)) {
            throw DBException("mock migration failure before config write",
                              ErrorCodes::PrepareConfigsFailed);
        }

        applyOpsStatus =
            grid.catalogManager(_txn)->applyChunkOpsDeprecated(_txn, updates.arr(), preCond.arr());

        if (MONGO_FAIL_POINT(failMigrationApplyOps)) {
            throw SocketException(SocketException::RECV_ERROR,
                                  shardingState->getConfigServer(_txn).toString());
        }
    } catch (const DBException& ex) {
        warning() << ex << migrateLog;
        applyOpsStatus = ex.toStatus();
    }

    if (applyOpsStatus == ErrorCodes::PrepareConfigsFailed) {
        // In the process of issuing the migrate commit, the SyncClusterConnection checks that
        // the config servers are reachable. If they are not, we are sure that the applyOps
        // command was not sent to any of the configs, so we can safely back out of the
        // migration here, by resetting the shard version that we bumped up to in the
        // donateChunk() call above.
        log() << "About to acquire moveChunk coll lock to reset shard version from "
              << "failed migration";

        {
            ScopedTransaction transaction(_txn, MODE_IX);
            Lock::DBLock dbLock(_txn->lockState(), _nss.db(), MODE_IX);
            Lock::CollectionLock collLock(_txn->lockState(), _nss.ns(), MODE_X);

            // Revert the metadata back to the state before "forgetting" about the chunk
            shardingState->undoDonateChunk(_txn, _nss.ns(), getCollMetadata());
        }

        log() << "Shard version successfully reset to clean up failed migration";

        const string msg = stream() << "Failed to send migrate commit to configs "
                                    << causedBy(applyOpsStatus);
        return Status(applyOpsStatus.code(), msg);
    } else if (!applyOpsStatus.isOK()) {
        // This could be a blip in the connectivity. Wait out a few seconds and check if the
        // commit request made it.
        //
        // If the commit made it to the config, we'll see the chunk in the new shard and
        // there's no further action to be done.
        //
        // If the commit did not make it, currently the only way to fix this state is to
        // bounce the mongod so that the old state (before migrating) is brought in.

        warning() << "moveChunk commit outcome ongoing" << migrateLog;
        sleepsecs(10);

        // Look for the chunk in this shard whose version got bumped. We assume that if that
        // mod made it to the config server, then applyOps was successful.
        try {
            std::vector<ChunkType> newestChunk;
            Status status =
                grid.catalogManager(_txn)->getChunks(_txn,
                                                     BSON(ChunkType::ns(_nss.ns())),
                                                     BSON(ChunkType::DEPRECATED_lastmod() << -1),
                                                     1,
                                                     &newestChunk,
                                                     nullptr);
            uassertStatusOK(status);

            ChunkVersion checkVersion;
            if (!newestChunk.empty()) {
                invariant(newestChunk.size() == 1);
                checkVersion = newestChunk[0].getVersion();
            }

            if (checkVersion.equals(nextVersion)) {
                log() << "moveChunk commit confirmed" << migrateLog;
            } else {
                error() << "moveChunk commit failed: version is at " << checkVersion
                        << " instead of " << nextVersion << migrateLog;
                error() << "TERMINATING" << migrateLog;

                dbexit(EXIT_SHARDING_ERROR);
            }
        } catch (...) {
            error() << "moveChunk failed to get confirmation of commit" << migrateLog;
            error() << "TERMINATING" << migrateLog;

            dbexit(EXIT_SHARDING_ERROR);
        }
    }

    MONGO_FAIL_POINT_PAUSE_WHILE_SET(hangBeforeLeavingCriticalSection);

    shardingState->migrationSourceManager()->setInCriticalSection(false);
    ShardingStateRecovery::endMetadataOp(_txn);

    // Migration is done, just log some diagnostics information
    BSONObj chunkInfo =
        BSON("min" << _minKey << "max" << _maxKey << "from" << _fromShard << "to" << _toShard);

    BSONObjBuilder commitInfo;
    commitInfo.appendElements(chunkInfo);
    if (res["counts"].type() == Object) {
        commitInfo.appendElements(res["counts"].Obj());
    }

    grid.catalogManager(_txn)->logChange(_txn, "moveChunk.commit", _nss.ns(), commitInfo.obj());

    shardingState->migrationSourceManager()->done(_txn);
    _isRunning = false;

    return Status::OK();
}
StatusWith<std::unique_ptr<CollectionMetadata>> CollectionMetadata::cloneMerge(
    const BSONObj& minKey, const BSONObj& maxKey, const ChunkVersion& newShardVersion) const {
    invariant(newShardVersion.epoch() == _shardVersion.epoch());
    invariant(newShardVersion > _shardVersion);

    RangeVector overlap;
    getRangeMapOverlap(_chunksMap, minKey, maxKey, &overlap);

    if (overlap.empty() || overlap.size() == 1) {
        return {ErrorCodes::IllegalOperation,
                stream() << "cannot merge range " << rangeToString(minKey, maxKey)
                         << (overlap.empty() ? ", no chunks found in this range"
                                             : ", only one chunk found in this range")};
    }

    bool validStartEnd = true;
    bool validNoHoles = true;

    if (overlap.begin()->first.woCompare(minKey) != 0) {
        // First chunk doesn't start with minKey
        validStartEnd = false;
    } else if (overlap.rbegin()->second.woCompare(maxKey) != 0) {
        // Last chunk doesn't end with maxKey
        validStartEnd = false;
    } else {
        // Check that there are no holes
        BSONObj prevMaxKey = minKey;
        for (RangeVector::iterator it = overlap.begin(); it != overlap.end(); ++it) {
            if (it->first.woCompare(prevMaxKey) != 0) {
                validNoHoles = false;
                break;
            }
            prevMaxKey = it->second;
        }
    }

    if (!validStartEnd || !validNoHoles) {
        return {ErrorCodes::IllegalOperation,
                stream() << "cannot merge range " << rangeToString(minKey, maxKey)
                         << ", overlapping chunks " << overlapToString(overlap)
                         << (!validStartEnd ? " do not have the same min and max key"
                                            : " are not all adjacent")};
    }

    unique_ptr<CollectionMetadata> metadata(stdx::make_unique<CollectionMetadata>());
    metadata->_keyPattern = _keyPattern.getOwned();
    metadata->fillKeyPatternFields();
    metadata->_pendingMap = _pendingMap;
    metadata->_chunksMap = _chunksMap;
    metadata->_rangesMap = _rangesMap;
    metadata->_shardVersion = newShardVersion;
    metadata->_collVersion = newShardVersion > _collVersion ? newShardVersion : this->_collVersion;

    for (RangeVector::iterator it = overlap.begin(); it != overlap.end(); ++it) {
        metadata->_chunksMap.erase(it->first);
    }

    metadata->_chunksMap.insert(make_pair(minKey, maxKey));

    invariant(metadata->isValid());
    return std::move(metadata);
}
Example #26
0
void ChunkType::setVersion(const ChunkVersion& version) {
    invariant(version.isSet());
    _version = version;
}
Example #27
0
StatusWith<boost::optional<ChunkRange>> splitChunkAtMultiplePoints(
        OperationContext* txn,
        const ShardId& shardId,
        const NamespaceString& nss,
        const ShardKeyPattern& shardKeyPattern,
        ChunkVersion collectionVersion,
        const BSONObj& minKey,
        const BSONObj& maxKey,
const std::vector<BSONObj>& splitPoints) {
    invariant(!splitPoints.empty());
    invariant(minKey.woCompare(maxKey) < 0);

    const size_t kMaxSplitPoints = 8192;

    if (splitPoints.size() > kMaxSplitPoints) {
        return {ErrorCodes::BadValue,
                str::stream() << "Cannot split chunk in more than " << kMaxSplitPoints
                << " parts at a time."};
    }

    BSONObjBuilder cmd;
    cmd.append("splitChunk", nss.ns());
    cmd.append("configdb",
               Grid::get(txn)->shardRegistry()->getConfigServerConnectionString().toString());
    cmd.append("from", shardId.toString());
    cmd.append("keyPattern", shardKeyPattern.toBSON());
    collectionVersion.appendForCommands(&cmd);
    cmd.append(kMinKey, minKey);
    cmd.append(kMaxKey, maxKey);
    cmd.append("splitKeys", splitPoints);

    BSONObj cmdObj = cmd.obj();

    Status status{ErrorCodes::InternalError, "Uninitialized value"};
    BSONObj cmdResponse;

    auto shard = Grid::get(txn)->shardRegistry()->getShard(txn, shardId);
    if (!shard) {
        status =
            Status(ErrorCodes::ShardNotFound, str::stream() << "shard " << shardId << " not found");
    } else {
        auto cmdStatus = shard->runCommand(txn,
                                           ReadPreferenceSetting{ReadPreference::PrimaryOnly},
                                           "admin",
                                           cmdObj,
                                           Shard::RetryPolicy::kNotIdempotent);
        if (!cmdStatus.isOK()) {
            status = std::move(cmdStatus.getStatus());
        } else {
            status = std::move(cmdStatus.getValue().commandStatus);
            cmdResponse = std::move(cmdStatus.getValue().response);
        }
    }

    if (!status.isOK()) {
        log() << "Split chunk " << redact(cmdObj) << " failed" << causedBy(redact(status));
        return {status.code(), str::stream() << "split failed due to " << status.toString()};
    }

    BSONElement shouldMigrateElement;
    status = bsonExtractTypedField(cmdResponse, kShouldMigrate, Object, &shouldMigrateElement);
    if (status.isOK()) {
        auto chunkRangeStatus = ChunkRange::fromBSON(shouldMigrateElement.embeddedObject());
        if (!chunkRangeStatus.isOK()) {
            return chunkRangeStatus.getStatus();
        }

        return boost::optional<ChunkRange>(std::move(chunkRangeStatus.getValue()));
    } else if (status != ErrorCodes::NoSuchKey) {
        warning()
                << "Chunk migration will be skipped because splitChunk returned invalid response: "
                << redact(cmdResponse) << ". Extracting " << kShouldMigrate << " field failed"
                << causedBy(redact(status));
    }

    return boost::optional<ChunkRange>();
}
int ConfigDiffTracker<ValType,ShardType>::
calculateConfigDiff( DBClientCursorInterface& diffCursor )
{
    verifyAttached();

    // Apply the chunk changes to the ranges and versions

    //
    // Overall idea here is to work in two steps :
    // 1. For all the new chunks we find, increment the maximum version per-shard and
    //    per-collection, and remove any conflicting chunks from the ranges
    // 2. For all the new chunks we're interested in (all of them for mongos, just chunks on the
    //    shard for mongod) add them to the ranges
    //

    vector<BSONObj> newTracked;
    // Store epoch now so it doesn't change when we change max
    OID currEpoch = _maxVersion->epoch();

    _validDiffs = 0;
    while( diffCursor.more() ) {

        BSONObj diffChunkDoc = diffCursor.next();

        ChunkVersion chunkVersion = ChunkVersion::fromBSON(diffChunkDoc, ChunkType::DEPRECATED_lastmod());

        if( diffChunkDoc[ChunkType::min()].type() != Object ||
                diffChunkDoc[ChunkType::max()].type() != Object ||
                diffChunkDoc[ChunkType::shard()].type() != String )
        {
            warning() << "got invalid chunk document " << diffChunkDoc
                      << " when trying to load differing chunks" << endl;
            continue;
        }

        if( ! chunkVersion.isSet() || ! chunkVersion.hasCompatibleEpoch( currEpoch ) ) {

            warning() << "got invalid chunk version " << chunkVersion << " in document " << diffChunkDoc
                      << " when trying to load differing chunks at version "
                      << ChunkVersion( _maxVersion->toLong(), currEpoch ) << endl;

            // Don't keep loading, since we know we'll be broken here
            return -1;
        }

        _validDiffs++;

        // Get max changed version and chunk version
        if( chunkVersion > *_maxVersion ) *_maxVersion = chunkVersion;

        // Chunk version changes
        ShardType shard = shardFor( diffChunkDoc[ChunkType::shard()].String() );
        typename map<ShardType, ChunkVersion>::iterator shardVersionIt = _maxShardVersions->find( shard );
        if( shardVersionIt == _maxShardVersions->end() || shardVersionIt->second < chunkVersion ) {
            (*_maxShardVersions)[ shard ] = chunkVersion;
        }

        // See if we need to remove any chunks we are currently tracking b/c of this chunk's changes
        removeOverlapping(diffChunkDoc[ChunkType::min()].Obj(),
                          diffChunkDoc[ChunkType::max()].Obj());

        // Figure out which of the new chunks we need to track
        // Important - we need to actually own this doc, in case the cursor decides to getMore or unbuffer
        if( isTracked( diffChunkDoc ) ) newTracked.push_back( diffChunkDoc.getOwned() );
    }

    LOG(3) << "found " << _validDiffs
           << " new chunks for collection " << _ns
           << " (tracking " << newTracked.size()
           << "), new version is " << *_maxVersion
           << endl;

    for( vector<BSONObj>::iterator it = newTracked.begin(); it != newTracked.end(); it++ ) {

        BSONObj chunkDoc = *it;

        // Important - we need to make sure we actually own the min and max here
        BSONObj min = chunkDoc[ChunkType::min()].Obj().getOwned();
        BSONObj max = chunkDoc[ChunkType::max()].Obj().getOwned();

        // Invariant enforced by sharding
        // It's possible to read inconsistent state b/c of getMore() and yielding, so we want
        // to detect as early as possible.
        // TODO: This checks for overlap, we also should check for holes here iff we're tracking
        // all chunks
        if( isOverlapping( min, max ) ) return -1;

        _currMap->insert( rangeFor( chunkDoc, min, max ) );
    }

    return _validDiffs;
}
StatusWith<std::unique_ptr<CollectionMetadata>> CollectionMetadata::cloneSplit(
    const BSONObj& minKey,
    const BSONObj& maxKey,
    const std::vector<BSONObj>& splitKeys,
    const ChunkVersion& newShardVersion) const {
    invariant(newShardVersion.epoch() == _shardVersion.epoch());
    invariant(newShardVersion > _shardVersion);

    // The version required in both resulting chunks could be simply an increment in the
    // minor portion of the current version.  However, we are enforcing uniqueness over the
    // attributes <ns, version> of the configdb collection 'chunks'.  So in practice, a
    // migrate somewhere may force this split to pick up a version that has the major
    // portion higher than the one that this shard has been using.
    //
    // TODO drop the uniqueness constraint and tighten the check below so that only the
    // minor portion of version changes

    // Check that we have the exact chunk that will be subtracted.
    if (!rangeMapContains(_chunksMap, minKey, maxKey)) {
        stream errMsg;
        errMsg << "cannot split chunk " << rangeToString(minKey, maxKey)
               << ", this shard does not contain the chunk";

        if (rangeMapOverlaps(_chunksMap, minKey, maxKey)) {
            RangeVector overlap;
            getRangeMapOverlap(_chunksMap, minKey, maxKey, &overlap);

            errMsg << " and it overlaps " << overlapToString(overlap);
        }

        return {ErrorCodes::IllegalOperation, errMsg};
    }

    unique_ptr<CollectionMetadata> metadata(stdx::make_unique<CollectionMetadata>());
    metadata->_keyPattern = _keyPattern.getOwned();
    metadata->fillKeyPatternFields();
    metadata->_pendingMap = _pendingMap;
    metadata->_chunksMap = _chunksMap;
    metadata->_shardVersion = newShardVersion;  // will increment 2nd, 3rd,... chunks below

    BSONObj startKey = minKey;
    for (const auto& split : splitKeys) {
        // Check that the split key is valid
        if (!rangeContains(minKey, maxKey, split)) {
            return {ErrorCodes::IllegalOperation,
                    stream() << "cannot split chunk " << rangeToString(minKey, maxKey) << " at key "
                             << split};
        }

        // Check that the split keys are in order
        if (split.woCompare(startKey) <= 0) {
            // The split keys came in out of order, this probably indicates a bug, so fail the
            // operation. Re-iterate splitKeys to build a useful error message including the array
            // of splitKeys in the order received.
            str::stream errMsg;
            errMsg << "Invalid input to splitChunk, split keys must be in order, got: [";
            for (auto it2 = splitKeys.cbegin(); it2 != splitKeys.cend(); ++it2) {
                if (it2 != splitKeys.begin()) {
                    errMsg << ", ";
                }
                errMsg << it2->toString();
            }
            errMsg << "]";
            return {ErrorCodes::IllegalOperation, errMsg};
        }

        metadata->_chunksMap[startKey] = split.getOwned();
        metadata->_chunksMap.insert(make_pair(split.getOwned(), maxKey.getOwned()));
        metadata->_shardVersion.incMinor();
        startKey = split;
    }

    metadata->_collVersion =
        metadata->_shardVersion > _collVersion ? metadata->_shardVersion : _collVersion;
    metadata->fillRanges();

    invariant(metadata->isValid());
    return std::move(metadata);
}
Example #30
0
    bool mergeChunks( OperationContext* txn,
                      const NamespaceString& nss,
                      const BSONObj& minKey,
                      const BSONObj& maxKey,
                      const OID& epoch,
                      string* errMsg ) {

        //
        // Get sharding state up-to-date
        //

        ConnectionString configLoc = ConnectionString::parse( shardingState.getConfigServer(),
                                                              *errMsg );
        if ( !configLoc.isValid() ){
            warning() << *errMsg << endl;
            return false;
        }

        //
        // Get the distributed lock
        //

        ScopedDistributedLock collLock( configLoc, nss.ns() );
        collLock.setLockMessage( stream() << "merging chunks in " << nss.ns() << " from "
                                          << minKey << " to " << maxKey );

        Status acquisitionStatus = collLock.tryAcquire();
        if (!acquisitionStatus.isOK()) {
            *errMsg = stream() << "could not acquire collection lock for " << nss.ns()
                               << " to merge chunks in [" << minKey << "," << maxKey << ")"
                               << causedBy(acquisitionStatus);

            warning() << *errMsg << endl;
            return false;
        }

        //
        // We now have the collection lock, refresh metadata to latest version and sanity check
        //

        ChunkVersion shardVersion;
        Status status = shardingState.refreshMetadataNow(txn, nss.ns(), &shardVersion);

        if ( !status.isOK() ) {

            *errMsg = str::stream() << "could not merge chunks, failed to refresh metadata for "
                                    << nss.ns() << causedBy( status.reason() );

            warning() << *errMsg << endl;
            return false;
        }

        if ( epoch.isSet() && shardVersion.epoch() != epoch ) {

            *errMsg = stream() << "could not merge chunks, collection " << nss.ns()
                               << " has changed" << " since merge was sent" << "(sent epoch : "
                               << epoch.toString()
                               << ", current epoch : " << shardVersion.epoch().toString() << ")";

            warning() << *errMsg << endl;
            return false;
        }

        CollectionMetadataPtr metadata = shardingState.getCollectionMetadata( nss.ns() );

        if ( !metadata || metadata->getKeyPattern().isEmpty() ) {

            *errMsg = stream() << "could not merge chunks, collection " << nss.ns()
                               << " is not sharded";

            warning() << *errMsg << endl;
            return false;
        }

        dassert( metadata->getShardVersion().equals( shardVersion ) );

        if ( !metadata->isValidKey( minKey ) || !metadata->isValidKey( maxKey ) ) {

            *errMsg = stream() << "could not merge chunks, the range "
                               << rangeToString( minKey, maxKey ) << " is not valid"
                               << " for collection " << nss.ns() << " with key pattern "
                               << metadata->getKeyPattern();

            warning() << *errMsg << endl;
            return false;
        }

        //
        // Get merged chunk information
        //

        ChunkVersion mergeVersion = metadata->getCollVersion();
        mergeVersion.incMinor();

        OwnedPointerVector<ChunkType> chunksToMerge;

        ChunkType itChunk;
        itChunk.setMin( minKey );
        itChunk.setMax( minKey );
        itChunk.setNS( nss.ns() );
        itChunk.setShard( shardingState.getShardName() );

        while ( itChunk.getMax().woCompare( maxKey ) < 0 &&
                metadata->getNextChunk( itChunk.getMax(), &itChunk ) ) {
            auto_ptr<ChunkType> saved( new ChunkType );
            itChunk.cloneTo( saved.get() );
            chunksToMerge.mutableVector().push_back( saved.release() );
        }

        if ( chunksToMerge.empty() ) {

            *errMsg = stream() << "could not merge chunks, collection " << nss.ns()
                               << " range starting at " << minKey
                               << " and ending at " << maxKey
                               << " does not belong to shard " << shardingState.getShardName();

            warning() << *errMsg << endl;
            return false;
        }

        //
        // Validate the range starts and ends at chunks and has no holes, error if not valid
        //

        BSONObj firstDocMin = ( *chunksToMerge.begin() )->getMin();
        BSONObj firstDocMax = ( *chunksToMerge.begin() )->getMax();
        // minKey is inclusive
        bool minKeyInRange = rangeContains( firstDocMin, firstDocMax, minKey );

        if ( !minKeyInRange ) {

            *errMsg = stream() << "could not merge chunks, collection " << nss.ns()
                               << " range starting at " << minKey
                               << " does not belong to shard " << shardingState.getShardName();

            warning() << *errMsg << endl;
            return false;
        }

        BSONObj lastDocMin = ( *chunksToMerge.rbegin() )->getMin();
        BSONObj lastDocMax = ( *chunksToMerge.rbegin() )->getMax();
        // maxKey is exclusive
        bool maxKeyInRange = lastDocMin.woCompare( maxKey ) < 0 &&
                lastDocMax.woCompare( maxKey ) >= 0;

        if ( !maxKeyInRange ) {
            *errMsg = stream() << "could not merge chunks, collection " << nss.ns()
                               << " range ending at " << maxKey
                               << " does not belong to shard " << shardingState.getShardName();

            warning() << *errMsg << endl;
            return false;
        }

        bool validRangeStartKey = firstDocMin.woCompare( minKey ) == 0;
        bool validRangeEndKey = lastDocMax.woCompare( maxKey ) == 0;

        if ( !validRangeStartKey || !validRangeEndKey ) {

            *errMsg = stream() << "could not merge chunks, collection " << nss.ns()
                               << " does not contain a chunk "
                               << ( !validRangeStartKey ? "starting at " + minKey.toString() : "" )
                               << ( !validRangeStartKey && !validRangeEndKey ? " or " : "" )
                               << ( !validRangeEndKey ? "ending at " + maxKey.toString() : "" );

            warning() << *errMsg << endl;
            return false;
        }

        if ( chunksToMerge.size() == 1 ) {

            *errMsg = stream() << "could not merge chunks, collection " << nss.ns()
                               << " already contains chunk for " << rangeToString( minKey, maxKey );

            warning() << *errMsg << endl;
            return false;
        }

        bool holeInRange = false;

        // Look for hole in range
        ChunkType* prevChunk = *chunksToMerge.begin();
        ChunkType* nextChunk = NULL;
        for ( OwnedPointerVector<ChunkType>::const_iterator it = chunksToMerge.begin();
                it != chunksToMerge.end(); ++it ) {
            if ( it == chunksToMerge.begin() ) continue;

            nextChunk = *it;
            if ( prevChunk->getMax().woCompare( nextChunk->getMin() ) != 0 ) {
                holeInRange = true;
                break;
            }
            prevChunk = nextChunk;
        }

        if ( holeInRange ) {

            dassert( NULL != nextChunk );
            *errMsg = stream() << "could not merge chunks, collection " << nss.ns()
                               << " has a hole in the range " << rangeToString( minKey, maxKey )
                               << " at " << rangeToString( prevChunk->getMax(),
                                                           nextChunk->getMin() );

            warning() << *errMsg << endl;
            return false;
        }

        //
        // Run apply ops command
        //

        BSONObj applyOpsCmd = buildApplyOpsCmd( chunksToMerge,
                                                shardVersion,
                                                mergeVersion );

        bool ok;
        BSONObj result;
        try {
            ScopedDbConnection conn( configLoc, 30.0 );
            ok = conn->runCommand( "config", applyOpsCmd, result );
            if ( !ok ) *errMsg = result.toString();
            conn.done();
        }
        catch( const DBException& ex ) {
            ok = false;
            *errMsg = ex.toString();
        }

        if ( !ok ) {
            *errMsg = stream() << "could not merge chunks for " << nss.ns()
                               << ", writing to config failed" << causedBy( errMsg );

            warning() << *errMsg << endl;
            return false;
        }

        //
        // Install merged chunk metadata
        //

        {
            Lock::DBLock writeLk(txn->lockState(), nss.db(), newlm::MODE_X);
            shardingState.mergeChunks(txn, nss.ns(), minKey, maxKey, mergeVersion);
        }

        //
        // Log change
        //

        BSONObj mergeLogEntry = buildMergeLogEntry( chunksToMerge,
                                                    shardVersion,
                                                    mergeVersion );

        configServer.logChange( "merge", nss.ns(), mergeLogEntry );

        return true;
    }