예제 #1
0
MigrateInfo MigrationType::toMigrateInfo() const {
    ChunkType chunk;
    chunk.setNS(_nss);
    chunk.setShard(_fromShard);
    chunk.setMin(_min);
    chunk.setMax(_max);
    chunk.setVersion(_chunkVersion);

    return MigrateInfo(_toShard, chunk);
}
예제 #2
0
Status ChunkManager::createFirstChunks(OperationContext* txn,
                                       const ShardId& primaryShardId,
                                       const vector<BSONObj>* initPoints,
                                       const set<ShardId>* initShardIds) {
    // TODO distlock?
    // TODO: Race condition if we shard the collection and insert data while we split across
    // the non-primary shard.

    vector<BSONObj> splitPoints;
    vector<ShardId> shardIds;
    calcInitSplitsAndShards(txn, primaryShardId, initPoints, initShardIds, &splitPoints, &shardIds);

    // this is the first chunk; start the versioning from scratch
    ChunkVersion version(1, 0, OID::gen());

    log() << "going to create " << splitPoints.size() + 1 << " chunk(s) for: " << _ns
          << " using new epoch " << version.epoch();

    for (unsigned i = 0; i <= splitPoints.size(); i++) {
        BSONObj min = i == 0 ? _keyPattern.getKeyPattern().globalMin() : splitPoints[i - 1];
        BSONObj max =
            i < splitPoints.size() ? splitPoints[i] : _keyPattern.getKeyPattern().globalMax();

        ChunkType chunk;
        chunk.setName(Chunk::genID(_ns, min));
        chunk.setNS(_ns);
        chunk.setMin(min);
        chunk.setMax(max);
        chunk.setShard(shardIds[i % shardIds.size()]);
        chunk.setVersion(version);

        Status status = grid.catalogManager(txn)
                        ->insertConfigDocument(txn, ChunkType::ConfigNS, chunk.toBSON());
        if (!status.isOK()) {
            const string errMsg = str::stream()
                                  << "Creating first chunks failed: " << status.reason();
            error() << errMsg;
            return Status(status.code(), errMsg);
        }

        version.incMinor();
    }

    _version = ChunkVersion(0, 0, version.epoch());

    return Status::OK();
}
예제 #3
0
        /**
         * Stores ranges for a particular collection and shard starting from some version
         */
        void storeCollectionRanges( const NamespaceString& nss,
                                    const string& shardName,
                                    const vector<KeyRange>& ranges,
                                    const ChunkVersion& startVersion ) {

            // Get key pattern from first range
            ASSERT_GREATER_THAN( ranges.size(), 0u );

            CollectionType coll;
            coll.setNS( nss.ns() );
            coll.setKeyPattern( ranges.begin()->keyPattern );
            coll.setEpoch( startVersion.epoch() );
            coll.setUpdatedAt( 1ULL );
            string errMsg;
            ASSERT( coll.isValid( &errMsg ) );

            DBDirectClient client(&_txn);

            client.update( CollectionType::ConfigNS,
                           BSON( CollectionType::ns( coll.getNS() ) ),
                           coll.toBSON(), true, false );

            ChunkVersion nextVersion = startVersion;
            for ( vector<KeyRange>::const_iterator it = ranges.begin(); it != ranges.end(); ++it ) {

                ChunkType chunk;
                // TODO: We should not rely on the serialized ns, minkey being unique in the future,
                // causes problems since it links string serialization to correctness.
                chunk.setName( Chunk::genID( nss, it->minKey ) );
                chunk.setShard( shardName );
                chunk.setNS( nss.ns() );
                chunk.setVersion( nextVersion );
                chunk.setMin( it->minKey );
                chunk.setMax( it->maxKey );
                nextVersion.incMajor();

                client.insert( ChunkType::ConfigNS, chunk.toBSON() );
            }
        }
예제 #4
0
void DistributionStatus::populateShardToChunksMap(const ShardStatisticsVector& allShards,
                                                  const ChunkManager& chunkMgr,
                                                  ShardToChunksMap* shardToChunksMap) {
    // Makes sure there is an entry in shardToChunksMap for every shard.
    for (const auto& stat : allShards) {
        (*shardToChunksMap)[stat.shardId];
    }

    const ChunkMap& chunkMap = chunkMgr.getChunkMap();
    for (ChunkMap::const_iterator it = chunkMap.begin(); it != chunkMap.end(); ++it) {
        const ChunkPtr chunkPtr = it->second;

        ChunkType chunk;
        chunk.setNS(chunkMgr.getns());
        chunk.setMin(chunkPtr->getMin().getOwned());
        chunk.setMax(chunkPtr->getMax().getOwned());
        chunk.setJumbo(chunkPtr->isJumbo());  // TODO: is this reliable?

        const string shardName(chunkPtr->getShardId());
        chunk.setShard(shardName);

        (*shardToChunksMap)[shardName].push_back(chunk);
    }
}
예제 #5
0
void updateChunkWriteStatsAndSplitIfNeeded(OperationContext* opCtx,
                                           ChunkManager* manager,
                                           Chunk* chunk,
                                           long dataWritten) {
    // Disable lastError tracking so that any errors, which occur during auto-split do not get
    // bubbled up on the client connection doing a write.
    LastError::Disabled d(&LastError::get(cc()));

    const auto balancerConfig = Grid::get(opCtx)->getBalancerConfiguration();

    const bool minIsInf =
        (0 == manager->getShardKeyPattern().getKeyPattern().globalMin().woCompare(chunk->getMin()));
    const bool maxIsInf =
        (0 == manager->getShardKeyPattern().getKeyPattern().globalMax().woCompare(chunk->getMax()));

    const uint64_t chunkBytesWritten = chunk->addBytesWritten(dataWritten);

    const uint64_t desiredChunkSize =
        calculateDesiredChunkSize(balancerConfig->getMaxChunkSizeBytes(), manager->numChunks());

    if (!chunk->shouldSplit(desiredChunkSize, minIsInf, maxIsInf)) {
        return;
    }

    const NamespaceString nss(manager->getns());

    if (!manager->_autoSplitThrottle._splitTickets.tryAcquire()) {
        LOG(1) << "won't auto split because not enough tickets: " << nss;
        return;
    }

    TicketHolderReleaser releaser(&(manager->_autoSplitThrottle._splitTickets));

    const ChunkRange chunkRange(chunk->getMin(), chunk->getMax());

    try {
        // Ensure we have the most up-to-date balancer configuration
        uassertStatusOK(balancerConfig->refreshAndCheck(opCtx));

        if (!balancerConfig->getShouldAutoSplit()) {
            return;
        }

        LOG(1) << "about to initiate autosplit: " << redact(chunk->toString())
               << " dataWritten: " << chunkBytesWritten
               << " desiredChunkSize: " << desiredChunkSize;

        const uint64_t chunkSizeToUse = [&]() {
            const uint64_t estNumSplitPoints = chunkBytesWritten / desiredChunkSize * 2;

            if (estNumSplitPoints >= kTooManySplitPoints) {
                // The current desired chunk size will split the chunk into lots of small chunk and
                // at the worst case this can result into thousands of chunks. So check and see if a
                // bigger value can be used.
                return std::min(chunkBytesWritten, balancerConfig->getMaxChunkSizeBytes());
            } else {
                return desiredChunkSize;
            }
        }();

        auto splitPoints =
            uassertStatusOK(shardutil::selectChunkSplitPoints(opCtx,
                                                              chunk->getShardId(),
                                                              nss,
                                                              manager->getShardKeyPattern(),
                                                              chunkRange,
                                                              chunkSizeToUse,
                                                              boost::none));

        if (splitPoints.size() <= 1) {
            // No split points means there isn't enough data to split on; 1 split point means we
            // have
            // between half the chunk size to full chunk size so there is no need to split yet
            chunk->clearBytesWritten();
            return;
        }

        if (minIsInf || maxIsInf) {
            // We don't want to reset _dataWritten since we want to check the other side right away
        } else {
            // We're splitting, so should wait a bit
            chunk->clearBytesWritten();
        }

        // We assume that if the chunk being split is the first (or last) one on the collection,
        // this chunk is likely to see more insertions. Instead of splitting mid-chunk, we use the
        // very first (or last) key as a split point.
        //
        // This heuristic is skipped for "special" shard key patterns that are not likely to produce
        // monotonically increasing or decreasing values (e.g. hashed shard keys).
        if (KeyPattern::isOrderedKeyPattern(manager->getShardKeyPattern().toBSON())) {
            if (minIsInf) {
                BSONObj key = findExtremeKeyForShard(
                    opCtx, nss, chunk->getShardId(), manager->getShardKeyPattern(), true);
                if (!key.isEmpty()) {
                    splitPoints.front() = key.getOwned();
                }
            } else if (maxIsInf) {
                BSONObj key = findExtremeKeyForShard(
                    opCtx, nss, chunk->getShardId(), manager->getShardKeyPattern(), false);
                if (!key.isEmpty()) {
                    splitPoints.back() = key.getOwned();
                }
            }
        }

        const auto suggestedMigrateChunk =
            uassertStatusOK(shardutil::splitChunkAtMultiplePoints(opCtx,
                                                                  chunk->getShardId(),
                                                                  nss,
                                                                  manager->getShardKeyPattern(),
                                                                  manager->getVersion(),
                                                                  chunkRange,
                                                                  splitPoints));

        // Balance the resulting chunks if the option is enabled and if the shard suggested a chunk
        // to balance
        const bool shouldBalance = [&]() {
            if (!balancerConfig->shouldBalanceForAutoSplit())
                return false;

            auto collStatus =
                Grid::get(opCtx)->catalogClient()->getCollection(opCtx, manager->getns());
            if (!collStatus.isOK()) {
                log() << "Auto-split for " << nss << " failed to load collection metadata"
                      << causedBy(redact(collStatus.getStatus()));
                return false;
            }

            return collStatus.getValue().value.getAllowBalance();
        }();

        log() << "autosplitted " << nss << " chunk: " << redact(chunk->toString()) << " into "
              << (splitPoints.size() + 1) << " parts (desiredChunkSize " << desiredChunkSize << ")"
              << (suggestedMigrateChunk ? "" : (std::string) " (migrate suggested" +
                          (shouldBalance ? ")" : ", but no migrations allowed)"));

        // Reload the chunk manager after the split
        auto routingInfo = uassertStatusOK(
            Grid::get(opCtx)->catalogCache()->getShardedCollectionRoutingInfoWithRefresh(opCtx,
                                                                                         nss));

        if (!shouldBalance || !suggestedMigrateChunk) {
            return;
        }

        // Top chunk optimization - try to move the top chunk out of this shard to prevent the hot
        // spot from staying on a single shard. This is based on the assumption that succeeding
        // inserts will fall on the top chunk.

        // We need to use the latest chunk manager (after the split) in order to have the most
        // up-to-date view of the chunk we are about to move
        auto suggestedChunk = routingInfo.cm()->findIntersectingChunkWithSimpleCollation(
            suggestedMigrateChunk->getMin());

        ChunkType chunkToMove;
        chunkToMove.setNS(nss.ns());
        chunkToMove.setShard(suggestedChunk->getShardId());
        chunkToMove.setMin(suggestedChunk->getMin());
        chunkToMove.setMax(suggestedChunk->getMax());
        chunkToMove.setVersion(suggestedChunk->getLastmod());

        uassertStatusOK(configsvr_client::rebalanceChunk(opCtx, chunkToMove));

        // Ensure the collection gets reloaded because of the move
        Grid::get(opCtx)->catalogCache()->invalidateShardedCollection(nss);
    } catch (const DBException& ex) {
        chunk->clearBytesWritten();

        if (ErrorCodes::isStaleShardingError(ErrorCodes::Error(ex.getCode()))) {
            log() << "Unable to auto-split chunk " << redact(chunkRange.toString()) << causedBy(ex)
                  << ", going to invalidate routing table entry for " << nss;
            Grid::get(opCtx)->catalogCache()->invalidateShardedCollection(nss);
        }
    }
}
예제 #6
0
파일: chunk.cpp 프로젝트: alabid/mongo
bool Chunk::splitIfShould(long dataWritten) const {
    dassert(ShouldAutoSplit);
    LastError::Disabled d(&LastError::get(cc()));

    try {
        _dataWritten += dataWritten;
        int splitThreshold = getManager()->getCurrentDesiredChunkSize();
        if (_minIsInf() || _maxIsInf()) {
            splitThreshold = (int)((double)splitThreshold * .9);
        }

        if (_dataWritten < splitThreshold / ChunkManager::SplitHeuristics::splitTestFactor)
            return false;

        if (!getManager()->_splitHeuristics._splitTickets.tryAcquire()) {
            LOG(1) << "won't auto split because not enough tickets: " << getManager()->getns();
            return false;
        }

        TicketHolderReleaser releaser(&(getManager()->_splitHeuristics._splitTickets));

        // this is a bit ugly
        // we need it so that mongos blocks for the writes to actually be committed
        // this does mean mongos has more back pressure than mongod alone
        // since it nots 100% tcp queue bound
        // this was implicit before since we did a splitVector on the same socket
        ShardConnection::sync();

        LOG(1) << "about to initiate autosplit: " << *this << " dataWritten: " << _dataWritten
               << " splitThreshold: " << splitThreshold;

        BSONObj res;
        size_t splitCount = 0;
        Status status = split(Chunk::autoSplitInternal, &splitCount, &res);
        if (!status.isOK()) {
            // Split would have issued a message if we got here. This means there wasn't enough
            // data to split, so don't want to try again until considerable more data
            _dataWritten = 0;
            return false;
        }

        if (_maxIsInf() || _minIsInf()) {
            // we don't want to reset _dataWritten since we kind of want to check the other side
            // right away
        } else {
            // we're splitting, so should wait a bit
            _dataWritten = 0;
        }

        bool shouldBalance = grid.getConfigShouldBalance();
        if (shouldBalance) {
            auto status = grid.catalogManager()->getCollection(_manager->getns());
            if (!status.isOK()) {
                log() << "Auto-split for " << _manager->getns()
                      << " failed to load collection metadata due to " << status.getStatus();
                return false;
            }

            shouldBalance = status.getValue().getAllowBalance();
        }

        log() << "autosplitted " << _manager->getns() << " shard: " << toString() << " into "
              << (splitCount + 1) << " (splitThreshold " << splitThreshold << ")"
              << (res["shouldMigrate"].eoo() ? "" : (string) " (migrate suggested" +
                          (shouldBalance ? ")" : ", but no migrations allowed)"));

        // Top chunk optimization - try to move the top chunk out of this shard
        // to prevent the hot spot from staying on a single shard. This is based on
        // the assumption that succeeding inserts will fall on the top chunk.
        BSONElement shouldMigrate = res["shouldMigrate"];  // not in mongod < 1.9.1 but that is ok
        if (!shouldMigrate.eoo() && shouldBalance) {
            BSONObj range = shouldMigrate.embeddedObject();

            ChunkType chunkToMove;
            {
                const auto shard = grid.shardRegistry()->getShard(getShardId());
                chunkToMove.setShard(shard->toString());
            }
            chunkToMove.setMin(range["min"].embeddedObject());
            chunkToMove.setMax(range["max"].embeddedObject());

            tryMoveToOtherShard(*_manager, chunkToMove);
        }

        return true;

    } catch (DBException& e) {
        // TODO: Make this better - there are lots of reasons a split could fail
        // Random so that we don't sync up with other failed splits
        _dataWritten = mkDataWritten();

        // if the collection lock is taken (e.g. we're migrating), it is fine for the split to fail.
        warning() << "could not autosplit collection " << _manager->getns() << causedBy(e);
        return false;
    }
}
예제 #7
0
    bool mergeChunks( OperationContext* txn,
                      const NamespaceString& nss,
                      const BSONObj& minKey,
                      const BSONObj& maxKey,
                      const OID& epoch,
                      string* errMsg ) {

        //
        // Get sharding state up-to-date
        //

        ConnectionString configLoc = ConnectionString::parse( shardingState.getConfigServer(),
                                                              *errMsg );
        if ( !configLoc.isValid() ){
            warning() << *errMsg << endl;
            return false;
        }

        //
        // Get the distributed lock
        //

        ScopedDistributedLock collLock( configLoc, nss.ns() );
        collLock.setLockMessage( stream() << "merging chunks in " << nss.ns() << " from "
                                          << minKey << " to " << maxKey );

        Status acquisitionStatus = collLock.tryAcquire();
        if (!acquisitionStatus.isOK()) {
            *errMsg = stream() << "could not acquire collection lock for " << nss.ns()
                               << " to merge chunks in [" << minKey << "," << maxKey << ")"
                               << causedBy(acquisitionStatus);

            warning() << *errMsg << endl;
            return false;
        }

        //
        // We now have the collection lock, refresh metadata to latest version and sanity check
        //

        ChunkVersion shardVersion;
        Status status = shardingState.refreshMetadataNow(txn, nss.ns(), &shardVersion);

        if ( !status.isOK() ) {

            *errMsg = str::stream() << "could not merge chunks, failed to refresh metadata for "
                                    << nss.ns() << causedBy( status.reason() );

            warning() << *errMsg << endl;
            return false;
        }

        if ( epoch.isSet() && shardVersion.epoch() != epoch ) {

            *errMsg = stream() << "could not merge chunks, collection " << nss.ns()
                               << " has changed" << " since merge was sent" << "(sent epoch : "
                               << epoch.toString()
                               << ", current epoch : " << shardVersion.epoch().toString() << ")";

            warning() << *errMsg << endl;
            return false;
        }

        CollectionMetadataPtr metadata = shardingState.getCollectionMetadata( nss.ns() );

        if ( !metadata || metadata->getKeyPattern().isEmpty() ) {

            *errMsg = stream() << "could not merge chunks, collection " << nss.ns()
                               << " is not sharded";

            warning() << *errMsg << endl;
            return false;
        }

        dassert( metadata->getShardVersion().equals( shardVersion ) );

        if ( !metadata->isValidKey( minKey ) || !metadata->isValidKey( maxKey ) ) {

            *errMsg = stream() << "could not merge chunks, the range "
                               << rangeToString( minKey, maxKey ) << " is not valid"
                               << " for collection " << nss.ns() << " with key pattern "
                               << metadata->getKeyPattern();

            warning() << *errMsg << endl;
            return false;
        }

        //
        // Get merged chunk information
        //

        ChunkVersion mergeVersion = metadata->getCollVersion();
        mergeVersion.incMinor();

        OwnedPointerVector<ChunkType> chunksToMerge;

        ChunkType itChunk;
        itChunk.setMin( minKey );
        itChunk.setMax( minKey );
        itChunk.setNS( nss.ns() );
        itChunk.setShard( shardingState.getShardName() );

        while ( itChunk.getMax().woCompare( maxKey ) < 0 &&
                metadata->getNextChunk( itChunk.getMax(), &itChunk ) ) {
            auto_ptr<ChunkType> saved( new ChunkType );
            itChunk.cloneTo( saved.get() );
            chunksToMerge.mutableVector().push_back( saved.release() );
        }

        if ( chunksToMerge.empty() ) {

            *errMsg = stream() << "could not merge chunks, collection " << nss.ns()
                               << " range starting at " << minKey
                               << " and ending at " << maxKey
                               << " does not belong to shard " << shardingState.getShardName();

            warning() << *errMsg << endl;
            return false;
        }

        //
        // Validate the range starts and ends at chunks and has no holes, error if not valid
        //

        BSONObj firstDocMin = ( *chunksToMerge.begin() )->getMin();
        BSONObj firstDocMax = ( *chunksToMerge.begin() )->getMax();
        // minKey is inclusive
        bool minKeyInRange = rangeContains( firstDocMin, firstDocMax, minKey );

        if ( !minKeyInRange ) {

            *errMsg = stream() << "could not merge chunks, collection " << nss.ns()
                               << " range starting at " << minKey
                               << " does not belong to shard " << shardingState.getShardName();

            warning() << *errMsg << endl;
            return false;
        }

        BSONObj lastDocMin = ( *chunksToMerge.rbegin() )->getMin();
        BSONObj lastDocMax = ( *chunksToMerge.rbegin() )->getMax();
        // maxKey is exclusive
        bool maxKeyInRange = lastDocMin.woCompare( maxKey ) < 0 &&
                lastDocMax.woCompare( maxKey ) >= 0;

        if ( !maxKeyInRange ) {
            *errMsg = stream() << "could not merge chunks, collection " << nss.ns()
                               << " range ending at " << maxKey
                               << " does not belong to shard " << shardingState.getShardName();

            warning() << *errMsg << endl;
            return false;
        }

        bool validRangeStartKey = firstDocMin.woCompare( minKey ) == 0;
        bool validRangeEndKey = lastDocMax.woCompare( maxKey ) == 0;

        if ( !validRangeStartKey || !validRangeEndKey ) {

            *errMsg = stream() << "could not merge chunks, collection " << nss.ns()
                               << " does not contain a chunk "
                               << ( !validRangeStartKey ? "starting at " + minKey.toString() : "" )
                               << ( !validRangeStartKey && !validRangeEndKey ? " or " : "" )
                               << ( !validRangeEndKey ? "ending at " + maxKey.toString() : "" );

            warning() << *errMsg << endl;
            return false;
        }

        if ( chunksToMerge.size() == 1 ) {

            *errMsg = stream() << "could not merge chunks, collection " << nss.ns()
                               << " already contains chunk for " << rangeToString( minKey, maxKey );

            warning() << *errMsg << endl;
            return false;
        }

        bool holeInRange = false;

        // Look for hole in range
        ChunkType* prevChunk = *chunksToMerge.begin();
        ChunkType* nextChunk = NULL;
        for ( OwnedPointerVector<ChunkType>::const_iterator it = chunksToMerge.begin();
                it != chunksToMerge.end(); ++it ) {
            if ( it == chunksToMerge.begin() ) continue;

            nextChunk = *it;
            if ( prevChunk->getMax().woCompare( nextChunk->getMin() ) != 0 ) {
                holeInRange = true;
                break;
            }
            prevChunk = nextChunk;
        }

        if ( holeInRange ) {

            dassert( NULL != nextChunk );
            *errMsg = stream() << "could not merge chunks, collection " << nss.ns()
                               << " has a hole in the range " << rangeToString( minKey, maxKey )
                               << " at " << rangeToString( prevChunk->getMax(),
                                                           nextChunk->getMin() );

            warning() << *errMsg << endl;
            return false;
        }

        //
        // Run apply ops command
        //

        BSONObj applyOpsCmd = buildApplyOpsCmd( chunksToMerge,
                                                shardVersion,
                                                mergeVersion );

        bool ok;
        BSONObj result;
        try {
            ScopedDbConnection conn( configLoc, 30.0 );
            ok = conn->runCommand( "config", applyOpsCmd, result );
            if ( !ok ) *errMsg = result.toString();
            conn.done();
        }
        catch( const DBException& ex ) {
            ok = false;
            *errMsg = ex.toString();
        }

        if ( !ok ) {
            *errMsg = stream() << "could not merge chunks for " << nss.ns()
                               << ", writing to config failed" << causedBy( errMsg );

            warning() << *errMsg << endl;
            return false;
        }

        //
        // Install merged chunk metadata
        //

        {
            Lock::DBLock writeLk(txn->lockState(), nss.db(), newlm::MODE_X);
            shardingState.mergeChunks(txn, nss.ns(), minKey, maxKey, mergeVersion);
        }

        //
        // Log change
        //

        BSONObj mergeLogEntry = buildMergeLogEntry( chunksToMerge,
                                                    shardVersion,
                                                    mergeVersion );

        configServer.logChange( "merge", nss.ns(), mergeLogEntry );

        return true;
    }
예제 #8
0
bool Chunk::splitIfShould(OperationContext* txn, long dataWritten) {
    LastError::Disabled d(&LastError::get(cc()));

    try {
        _dataWritten += dataWritten;
        uint64_t splitThreshold = _manager->getCurrentDesiredChunkSize();
        if (_minIsInf() || _maxIsInf()) {
            splitThreshold = static_cast<uint64_t>((double)splitThreshold * 0.9);
        }

        if (_dataWritten < splitThreshold / ChunkManager::SplitHeuristics::splitTestFactor) {
            return false;
        }

        if (!_manager->_splitHeuristics._splitTickets.tryAcquire()) {
            LOG(1) << "won't auto split because not enough tickets: " << _manager->getns();
            return false;
        }

        TicketHolderReleaser releaser(&(_manager->_splitHeuristics._splitTickets));

        const auto balancerConfig = Grid::get(txn)->getBalancerConfiguration();

        Status refreshStatus = balancerConfig->refreshAndCheck(txn);
        if (!refreshStatus.isOK()) {
            warning() << "Unable to refresh balancer settings" << causedBy(refreshStatus);
            return false;
        }

        bool shouldAutoSplit = balancerConfig->getShouldAutoSplit();
        if (!shouldAutoSplit) {
            return false;
        }

        LOG(1) << "about to initiate autosplit: " << *this << " dataWritten: " << _dataWritten
               << " splitThreshold: " << splitThreshold;

        size_t splitCount = 0;
        auto splitStatus = split(txn, Chunk::autoSplitInternal, &splitCount);
        if (!splitStatus.isOK()) {
            // Split would have issued a message if we got here. This means there wasn't enough
            // data to split, so don't want to try again until considerable more data
            _dataWritten = 0;
            return false;
        }

        if (_maxIsInf() || _minIsInf()) {
            // we don't want to reset _dataWritten since we kind of want to check the other side
            // right away
        } else {
            // we're splitting, so should wait a bit
            _dataWritten = 0;
        }

        bool shouldBalance = balancerConfig->shouldBalanceForAutoSplit();

        if (shouldBalance) {
            auto collStatus = grid.catalogClient(txn)->getCollection(txn, _manager->getns());
            if (!collStatus.isOK()) {
                warning() << "Auto-split for " << _manager->getns()
                          << " failed to load collection metadata"
                          << causedBy(collStatus.getStatus());
                return false;
            }

            shouldBalance = collStatus.getValue().value.getAllowBalance();
        }

        const auto suggestedMigrateChunk = std::move(splitStatus.getValue());

        log() << "autosplitted " << _manager->getns() << " shard: " << toString() << " into "
              << (splitCount + 1) << " (splitThreshold " << splitThreshold << ")"
              << (suggestedMigrateChunk ? "" : (string) " (migrate suggested" +
                          (shouldBalance ? ")" : ", but no migrations allowed)"));

        // Top chunk optimization - try to move the top chunk out of this shard to prevent the hot
        // spot from staying on a single shard. This is based on the assumption that succeeding
        // inserts will fall on the top chunk.
        if (suggestedMigrateChunk && shouldBalance) {
            const NamespaceString nss(_manager->getns());

            // We need to use the latest chunk manager (after the split) in order to have the most
            // up-to-date view of the chunk we are about to move
            auto scopedCM = uassertStatusOK(ScopedChunkManager::getExisting(txn, nss));
            auto suggestedChunk =
                scopedCM.cm()->findIntersectingChunk(txn, suggestedMigrateChunk->getMin());

            ChunkType chunkToMove;
            chunkToMove.setNS(nss.ns());
            chunkToMove.setShard(suggestedChunk->getShardId());
            chunkToMove.setMin(suggestedChunk->getMin());
            chunkToMove.setMax(suggestedChunk->getMax());
            chunkToMove.setVersion(suggestedChunk->getLastmod());

            Status rebalanceStatus = Balancer::get(txn)->rebalanceSingleChunk(txn, chunkToMove);
            if (!rebalanceStatus.isOK()) {
                msgassertedNoTraceWithStatus(10412, rebalanceStatus);
            }

            _manager->reload(txn);
        }

        return true;
    } catch (const DBException& e) {
        // TODO: Make this better - there are lots of reasons a split could fail
        // Random so that we don't sync up with other failed splits
        _dataWritten = mkDataWritten();

        // if the collection lock is taken (e.g. we're migrating), it is fine for the split to fail.
        warning() << "could not autosplit collection " << _manager->getns() << causedBy(e);
        return false;
    }
}
예제 #9
0
파일: d_merge.cpp 프로젝트: NikolaySt/mongo
bool mergeChunks(OperationContext* txn,
                 const NamespaceString& nss,
                 const BSONObj& minKey,
                 const BSONObj& maxKey,
                 const OID& epoch,
                 string* errMsg) {
    // Get the distributed lock
    string whyMessage = stream() << "merging chunks in " << nss.ns() << " from " << minKey << " to "
                                 << maxKey;
    auto scopedDistLock = grid.catalogManager(txn)->distLock(
        txn, nss.ns(), whyMessage, DistLockManager::kSingleLockAttemptTimeout);

    if (!scopedDistLock.isOK()) {
        *errMsg = stream() << "could not acquire collection lock for " << nss.ns()
                           << " to merge chunks in [" << minKey << "," << maxKey << ")"
                           << causedBy(scopedDistLock.getStatus());

        warning() << *errMsg;
        return false;
    }

    ShardingState* shardingState = ShardingState::get(txn);

    //
    // We now have the collection lock, refresh metadata to latest version and sanity check
    //

    ChunkVersion shardVersion;
    Status status = shardingState->refreshMetadataNow(txn, nss.ns(), &shardVersion);

    if (!status.isOK()) {
        *errMsg = str::stream() << "could not merge chunks, failed to refresh metadata for "
                                << nss.ns() << causedBy(status.reason());

        warning() << *errMsg;
        return false;
    }

    if (epoch.isSet() && shardVersion.epoch() != epoch) {
        *errMsg = stream() << "could not merge chunks, collection " << nss.ns() << " has changed"
                           << " since merge was sent"
                           << "(sent epoch : " << epoch.toString()
                           << ", current epoch : " << shardVersion.epoch().toString() << ")";

        warning() << *errMsg;
        return false;
    }

    shared_ptr<CollectionMetadata> metadata = shardingState->getCollectionMetadata(nss.ns());

    if (!metadata || metadata->getKeyPattern().isEmpty()) {
        *errMsg = stream() << "could not merge chunks, collection " << nss.ns()
                           << " is not sharded";

        warning() << *errMsg;
        return false;
    }

    dassert(metadata->getShardVersion().equals(shardVersion));

    if (!metadata->isValidKey(minKey) || !metadata->isValidKey(maxKey)) {
        *errMsg = stream() << "could not merge chunks, the range " << rangeToString(minKey, maxKey)
                           << " is not valid"
                           << " for collection " << nss.ns() << " with key pattern "
                           << metadata->getKeyPattern();

        warning() << *errMsg;
        return false;
    }

    //
    // Get merged chunk information
    //

    ChunkVersion mergeVersion = metadata->getCollVersion();
    mergeVersion.incMinor();

    std::vector<ChunkType> chunksToMerge;

    ChunkType itChunk;
    itChunk.setMin(minKey);
    itChunk.setMax(minKey);
    itChunk.setNS(nss.ns());
    itChunk.setShard(shardingState->getShardName());

    while (itChunk.getMax().woCompare(maxKey) < 0 &&
           metadata->getNextChunk(itChunk.getMax(), &itChunk)) {
        chunksToMerge.push_back(itChunk);
    }

    if (chunksToMerge.empty()) {
        *errMsg = stream() << "could not merge chunks, collection " << nss.ns()
                           << " range starting at " << minKey << " and ending at " << maxKey
                           << " does not belong to shard " << shardingState->getShardName();

        warning() << *errMsg;
        return false;
    }

    //
    // Validate the range starts and ends at chunks and has no holes, error if not valid
    //

    BSONObj firstDocMin = chunksToMerge.front().getMin();
    BSONObj firstDocMax = chunksToMerge.front().getMax();
    // minKey is inclusive
    bool minKeyInRange = rangeContains(firstDocMin, firstDocMax, minKey);

    if (!minKeyInRange) {
        *errMsg = stream() << "could not merge chunks, collection " << nss.ns()
                           << " range starting at " << minKey << " does not belong to shard "
                           << shardingState->getShardName();

        warning() << *errMsg;
        return false;
    }

    BSONObj lastDocMin = chunksToMerge.back().getMin();
    BSONObj lastDocMax = chunksToMerge.back().getMax();
    // maxKey is exclusive
    bool maxKeyInRange = lastDocMin.woCompare(maxKey) < 0 && lastDocMax.woCompare(maxKey) >= 0;

    if (!maxKeyInRange) {
        *errMsg = stream() << "could not merge chunks, collection " << nss.ns()
                           << " range ending at " << maxKey << " does not belong to shard "
                           << shardingState->getShardName();

        warning() << *errMsg;
        return false;
    }

    bool validRangeStartKey = firstDocMin.woCompare(minKey) == 0;
    bool validRangeEndKey = lastDocMax.woCompare(maxKey) == 0;

    if (!validRangeStartKey || !validRangeEndKey) {
        *errMsg = stream() << "could not merge chunks, collection " << nss.ns()
                           << " does not contain a chunk "
                           << (!validRangeStartKey ? "starting at " + minKey.toString() : "")
                           << (!validRangeStartKey && !validRangeEndKey ? " or " : "")
                           << (!validRangeEndKey ? "ending at " + maxKey.toString() : "");

        warning() << *errMsg;
        return false;
    }

    if (chunksToMerge.size() == 1) {
        *errMsg = stream() << "could not merge chunks, collection " << nss.ns()
                           << " already contains chunk for " << rangeToString(minKey, maxKey);

        warning() << *errMsg;
        return false;
    }

    // Look for hole in range
    for (size_t i = 1; i < chunksToMerge.size(); ++i) {
        if (chunksToMerge[i - 1].getMax().woCompare(chunksToMerge[i].getMin()) != 0) {
            *errMsg =
                stream() << "could not merge chunks, collection " << nss.ns()
                         << " has a hole in the range " << rangeToString(minKey, maxKey) << " at "
                         << rangeToString(chunksToMerge[i - 1].getMax(), chunksToMerge[i].getMin());

            warning() << *errMsg;
            return false;
        }
    }

    //
    // Run apply ops command
    //
    Status applyOpsStatus = runApplyOpsCmd(txn, chunksToMerge, shardVersion, mergeVersion);
    if (!applyOpsStatus.isOK()) {
        warning() << applyOpsStatus;
        return false;
    }

    //
    // Install merged chunk metadata
    //

    {
        ScopedTransaction transaction(txn, MODE_IX);
        Lock::DBLock writeLk(txn->lockState(), nss.db(), MODE_IX);
        Lock::CollectionLock collLock(txn->lockState(), nss.ns(), MODE_X);

        shardingState->mergeChunks(txn, nss.ns(), minKey, maxKey, mergeVersion);
    }

    //
    // Log change
    //

    BSONObj mergeLogEntry = buildMergeLogEntry(chunksToMerge, shardVersion, mergeVersion);

    grid.catalogManager(txn)->logChange(txn, "merge", nss.ns(), mergeLogEntry);

    return true;
}