Пример #1
0
    Status _findAllCollections(const ConnectionString& configLoc,
                               bool optionalEpochs,
                               OwnedPointerMap<string, CollectionType>* collections)
    {
        scoped_ptr<ScopedDbConnection> connPtr;

        try {
            connPtr.reset(ScopedDbConnection::getInternalScopedDbConnection(configLoc, 30));

            ScopedDbConnection& conn = *connPtr;
            scoped_ptr<DBClientCursor> cursor(_safeCursor(conn->query(CollectionType::ConfigNS,
                                                                      Query())));

            while (cursor->more()) {

                BSONObj collDoc = cursor->nextSafe();

                CollectionType* coll = new CollectionType();
                string errMsg;
                coll->parseBSON(collDoc, &errMsg);

                // Needed for the v3 to v4 upgrade
                bool epochNotSet = !coll->isEpochSet() || !coll->getEpoch().isSet();
                if (optionalEpochs && epochNotSet) {
                    // Set our epoch to something here, just to allow
                    coll->setEpoch(OID::gen());
                }

                if (errMsg != "" || !coll->isValid(&errMsg)) {
                    return Status(ErrorCodes::UnsupportedFormat,
                                  stream() << "invalid collection " << collDoc
                                           << " read from the config server" << causedBy(errMsg));
                }

                if (coll->isDroppedSet() && coll->getDropped()) {
                    continue;
                }

                if (optionalEpochs && epochNotSet) {
                    coll->setEpoch(OID());
                }

                collections->mutableMap().insert(make_pair(coll->getNS(), coll));
            }
        }
        catch (const DBException& e) {
            return e.toStatus();
        }

        connPtr->done();
        return Status::OK();
    }
Пример #2
0
void CollectionInfo::save(OperationContext* txn, const string& ns) {
    CollectionType coll;
    coll.setNs(NamespaceString{ns});

    if (_cm) {
        invariant(!_dropped);
        coll.setEpoch(_cm->getVersion().epoch());
        // TODO(schwerin): The following isn't really a date, but is stored as one in-memory and
        // in config.collections, as a historical oddity.
        coll.setUpdatedAt(Date_t::fromMillisSinceEpoch(_cm->getVersion().toLong()));
        coll.setKeyPattern(_cm->getShardKeyPattern().toBSON());
        coll.setDefaultCollation(_cm->getDefaultCollation());
        coll.setUnique(_cm->isUnique());
    } else {
        invariant(_dropped);
        coll.setDropped(true);
        coll.setEpoch(ChunkVersion::DROPPED().epoch());
        coll.setUpdatedAt(Date_t::now());
    }

    uassertStatusOK(grid.catalogClient(txn)->updateCollection(txn, ns, coll));
    _dirty = false;
}
Пример #3
0
        /**
         * Stores ranges for a particular collection and shard starting from some version
         */
        void storeCollectionRanges( const NamespaceString& nss,
                                    const string& shardName,
                                    const vector<KeyRange>& ranges,
                                    const ChunkVersion& startVersion ) {

            // Get key pattern from first range
            ASSERT_GREATER_THAN( ranges.size(), 0u );

            CollectionType coll;
            coll.setNS( nss.ns() );
            coll.setKeyPattern( ranges.begin()->keyPattern );
            coll.setEpoch( startVersion.epoch() );
            coll.setUpdatedAt( 1ULL );
            string errMsg;
            ASSERT( coll.isValid( &errMsg ) );

            DBDirectClient client(&_txn);

            client.update( CollectionType::ConfigNS,
                           BSON( CollectionType::ns( coll.getNS() ) ),
                           coll.toBSON(), true, false );

            ChunkVersion nextVersion = startVersion;
            for ( vector<KeyRange>::const_iterator it = ranges.begin(); it != ranges.end(); ++it ) {

                ChunkType chunk;
                // TODO: We should not rely on the serialized ns, minkey being unique in the future,
                // causes problems since it links string serialization to correctness.
                chunk.setName( Chunk::genID( nss, it->minKey ) );
                chunk.setShard( shardName );
                chunk.setNS( nss.ns() );
                chunk.setVersion( nextVersion );
                chunk.setMin( it->minKey );
                chunk.setMax( it->maxKey );
                nextVersion.incMajor();

                client.insert( ChunkType::ConfigNS, chunk.toBSON() );
            }
        }
void ShardingCatalogManager::shardCollection(OperationContext* opCtx,
                                             const NamespaceString& nss,
                                             const boost::optional<UUID> uuid,
                                             const ShardKeyPattern& fieldsAndOrder,
                                             const BSONObj& defaultCollation,
                                             bool unique,
                                             const vector<BSONObj>& splitPoints,
                                             bool isFromMapReduce,
                                             const ShardId& dbPrimaryShardId) {
    const auto shardRegistry = Grid::get(opCtx)->shardRegistry();

    const auto primaryShard = uassertStatusOK(shardRegistry->getShard(opCtx, dbPrimaryShardId));

    // Fail if there are partially written chunks from a previous failed shardCollection.
    checkForExistingChunks(opCtx, nss);

    // Prior to 4.0.5, zones cannot be taken into account at collection sharding time, so ignore
    // them and let the balancer apply them later
    const std::vector<TagsType> treatAsNoZonesDefined;

    // Map/reduce with output to sharded collection ignores consistency checks and requires the
    // initial chunks to be spread across shards unconditionally
    const bool treatAsEmpty = isFromMapReduce;

    // Record start in changelog
    {
        BSONObjBuilder collectionDetail;
        collectionDetail.append("shardKey", fieldsAndOrder.toBSON());
        collectionDetail.append("collection", nss.ns());
        if (uuid)
            uuid->appendToBuilder(&collectionDetail, "uuid");
        collectionDetail.append("empty", treatAsEmpty);
        collectionDetail.append("fromMapReduce", isFromMapReduce);
        collectionDetail.append("primary", primaryShard->toString());
        collectionDetail.append("numChunks", static_cast<int>(splitPoints.size() + 1));
        uassertStatusOK(ShardingLogging::get(opCtx)->logChangeChecked(
            opCtx,
            "shardCollection.start",
            nss.ns(),
            collectionDetail.obj(),
            ShardingCatalogClient::kMajorityWriteConcern));
    }

    // Construct the collection default collator.
    std::unique_ptr<CollatorInterface> defaultCollator;
    if (!defaultCollation.isEmpty()) {
        defaultCollator = uassertStatusOK(CollatorFactoryInterface::get(opCtx->getServiceContext())
                                              ->makeFromBSON(defaultCollation));
    }

    const auto initialChunks = InitialSplitPolicy::createFirstChunks(opCtx,
                                                                     nss,
                                                                     fieldsAndOrder,
                                                                     dbPrimaryShardId,
                                                                     splitPoints,
                                                                     treatAsNoZonesDefined,
                                                                     treatAsEmpty);

    InitialSplitPolicy::writeFirstChunksToConfig(opCtx, initialChunks);

    {
        CollectionType coll;
        coll.setNs(nss);
        if (uuid)
            coll.setUUID(*uuid);
        coll.setEpoch(initialChunks.collVersion().epoch());
        coll.setUpdatedAt(Date_t::fromMillisSinceEpoch(initialChunks.collVersion().toLong()));
        coll.setKeyPattern(fieldsAndOrder.toBSON());
        coll.setDefaultCollation(defaultCollator ? defaultCollator->getSpec().toBSON() : BSONObj());
        coll.setUnique(unique);

        uassertStatusOK(ShardingCatalogClientImpl::updateShardingCatalogEntryForCollection(
            opCtx, nss, coll, true /*upsert*/));
    }

    auto shard = uassertStatusOK(shardRegistry->getShard(opCtx, dbPrimaryShardId));
    invariant(!shard->isConfig());

    // Tell the primary mongod to refresh its data
    SetShardVersionRequest ssv = SetShardVersionRequest::makeForVersioningNoPersist(
        shardRegistry->getConfigServerConnectionString(),
        dbPrimaryShardId,
        primaryShard->getConnString(),
        nss,
        initialChunks.collVersion(),
        true /* isAuthoritative */,
        true /* forceRefresh */);

    auto ssvResponse =
        shard->runCommandWithFixedRetryAttempts(opCtx,
                                                ReadPreferenceSetting{ReadPreference::PrimaryOnly},
                                                "admin",
                                                ssv.toBSON(),
                                                Shard::RetryPolicy::kIdempotent);
    auto status = ssvResponse.isOK() ? std::move(ssvResponse.getValue().commandStatus)
                                     : std::move(ssvResponse.getStatus());
    if (!status.isOK()) {
        warning() << "could not update initial version of " << nss.ns() << " on shard primary "
                  << dbPrimaryShardId << causedBy(redact(status));
    }

    ShardingLogging::get(opCtx)->logChange(
        opCtx,
        "shardCollection.end",
        nss.ns(),
        BSON("version" << initialChunks.collVersion().toString()),
        ShardingCatalogClient::kMajorityWriteConcern);
}
Status ShardingCatalogManager::dropCollection(OperationContext* opCtx, const NamespaceString& nss) {
    const Status logStatus =
        ShardingLogging::get(opCtx)->logChangeChecked(opCtx,
                                                      "dropCollection.start",
                                                      nss.ns(),
                                                      BSONObj(),
                                                      ShardingCatalogClient::kMajorityWriteConcern);
    if (!logStatus.isOK()) {
        return logStatus;
    }

    const auto catalogClient = Grid::get(opCtx)->catalogClient();
    const auto shardsStatus =
        catalogClient->getAllShards(opCtx, repl::ReadConcernLevel::kLocalReadConcern);
    if (!shardsStatus.isOK()) {
        return shardsStatus.getStatus();
    }
    vector<ShardType> allShards = std::move(shardsStatus.getValue().value);

    LOG(1) << "dropCollection " << nss.ns() << " started";

    const auto dropCommandBSON = [opCtx, &nss] {
        BSONObjBuilder builder;
        builder.append("drop", nss.coll());

        if (!opCtx->getWriteConcern().usedDefault) {
            builder.append(WriteConcernOptions::kWriteConcernField,
                           opCtx->getWriteConcern().toBSON());
        }

        return builder.obj();
    }();

    std::map<std::string, BSONObj> errors;
    auto* const shardRegistry = Grid::get(opCtx)->shardRegistry();

    for (const auto& shardEntry : allShards) {
        auto swShard = shardRegistry->getShard(opCtx, shardEntry.getName());
        if (!swShard.isOK()) {
            return swShard.getStatus();
        }

        const auto& shard = swShard.getValue();

        auto swDropResult = shard->runCommandWithFixedRetryAttempts(
            opCtx,
            ReadPreferenceSetting{ReadPreference::PrimaryOnly},
            nss.db().toString(),
            dropCommandBSON,
            Shard::RetryPolicy::kIdempotent);

        if (!swDropResult.isOK()) {
            return swDropResult.getStatus().withContext(
                str::stream() << "Error dropping collection on shard " << shardEntry.getName());
        }

        auto& dropResult = swDropResult.getValue();

        auto dropStatus = std::move(dropResult.commandStatus);
        auto wcStatus = std::move(dropResult.writeConcernStatus);
        if (!dropStatus.isOK() || !wcStatus.isOK()) {
            if (dropStatus.code() == ErrorCodes::NamespaceNotFound && wcStatus.isOK()) {
                // Generally getting NamespaceNotFound is okay to ignore as it simply means that
                // the collection has already been dropped or doesn't exist on this shard.
                // If, however, we get NamespaceNotFound but also have a write concern error then we
                // can't confirm whether the fact that the namespace doesn't exist is actually
                // committed.  Thus we must still fail on NamespaceNotFound if there is also a write
                // concern error. This can happen if we call drop, it succeeds but with a write
                // concern error, then we retry the drop.
                continue;
            }

            errors.emplace(shardEntry.getHost(), std::move(dropResult.response));
        }
    }

    if (!errors.empty()) {
        StringBuilder sb;
        sb << "Dropping collection failed on the following hosts: ";

        for (auto it = errors.cbegin(); it != errors.cend(); ++it) {
            if (it != errors.cbegin()) {
                sb << ", ";
            }

            sb << it->first << ": " << it->second;
        }

        return {ErrorCodes::OperationFailed, sb.str()};
    }

    LOG(1) << "dropCollection " << nss.ns() << " shard data deleted";

    // Remove chunk data
    Status result =
        catalogClient->removeConfigDocuments(opCtx,
                                             ChunkType::ConfigNS,
                                             BSON(ChunkType::ns(nss.ns())),
                                             ShardingCatalogClient::kMajorityWriteConcern);
    if (!result.isOK()) {
        return result;
    }

    LOG(1) << "dropCollection " << nss.ns() << " chunk data deleted";

    // Remove tag data
    result = catalogClient->removeConfigDocuments(opCtx,
                                                  TagsType::ConfigNS,
                                                  BSON(TagsType::ns(nss.ns())),
                                                  ShardingCatalogClient::kMajorityWriteConcern);

    if (!result.isOK()) {
        return result;
    }

    LOG(1) << "dropCollection " << nss.ns() << " tag data deleted";

    // Mark the collection as dropped
    CollectionType coll;
    coll.setNs(nss);
    coll.setDropped(true);
    coll.setEpoch(ChunkVersion::DROPPED().epoch());
    coll.setUpdatedAt(Grid::get(opCtx)->getNetwork()->now());

    const bool upsert = false;
    result = ShardingCatalogClientImpl::updateShardingCatalogEntryForCollection(
        opCtx, nss, coll, upsert);
    if (!result.isOK()) {
        return result;
    }

    LOG(1) << "dropCollection " << nss.ns() << " collection marked as dropped";

    for (const auto& shardEntry : allShards) {
        auto swShard = shardRegistry->getShard(opCtx, shardEntry.getName());
        if (!swShard.isOK()) {
            return swShard.getStatus();
        }

        const auto& shard = swShard.getValue();

        SetShardVersionRequest ssv = SetShardVersionRequest::makeForVersioningNoPersist(
            shardRegistry->getConfigServerConnectionString(),
            shardEntry.getName(),
            fassert(28781, ConnectionString::parse(shardEntry.getHost())),
            nss,
            ChunkVersion::DROPPED(),
            true /* isAuthoritative */,
            true /* forceRefresh */);

        auto ssvResult = shard->runCommandWithFixedRetryAttempts(
            opCtx,
            ReadPreferenceSetting{ReadPreference::PrimaryOnly},
            "admin",
            ssv.toBSON(),
            Shard::RetryPolicy::kIdempotent);

        if (!ssvResult.isOK()) {
            return ssvResult.getStatus();
        }

        auto ssvStatus = std::move(ssvResult.getValue().commandStatus);
        if (!ssvStatus.isOK()) {
            return ssvStatus;
        }

        auto unsetShardingStatus = shard->runCommandWithFixedRetryAttempts(
            opCtx,
            ReadPreferenceSetting{ReadPreference::PrimaryOnly},
            "admin",
            BSON("unsetSharding" << 1),
            Shard::RetryPolicy::kIdempotent);

        if (!unsetShardingStatus.isOK()) {
            return unsetShardingStatus.getStatus();
        }

        auto unsetShardingResult = std::move(unsetShardingStatus.getValue().commandStatus);
        if (!unsetShardingResult.isOK()) {
            return unsetShardingResult;
        }
    }

    LOG(1) << "dropCollection " << nss.ns() << " completed";

    ShardingLogging::get(opCtx)->logChange(
        opCtx, "dropCollection", nss.ns(), BSONObj(), ShardingCatalogClient::kMajorityWriteConcern);

    return Status::OK();
}
Пример #6
0
    /**
     * Upgrade v3 to v4 described here.
     *
     * This upgrade takes a config server without collection epochs (potentially) and adds
     * epochs to all mongo processes.
     *
     */
    bool doUpgradeV3ToV4(const ConnectionString& configLoc,
                         const VersionType& lastVersionInfo,
                         string* errMsg)
    {
        string dummy;
        if (!errMsg) errMsg = &dummy;

        verify(lastVersionInfo.getCurrentVersion() == UpgradeHistory_NoEpochVersion);

        if (lastVersionInfo.isUpgradeIdSet() && lastVersionInfo.getUpgradeId().isSet()) {

            //
            // Another upgrade failed, so cleanup may be necessary
            //

            BSONObj lastUpgradeState = lastVersionInfo.getUpgradeState();

            bool inCriticalSection;
            if (!FieldParser::extract(lastUpgradeState,
                                      inCriticalSectionField,
                                      &inCriticalSection,
                                      errMsg))
            {

                *errMsg = stream() << "problem reading previous upgrade state" << causedBy(errMsg);

                return false;
            }

            if (inCriticalSection) {

                // Manual intervention is needed here.  Somehow our upgrade didn't get applied
                // consistently across config servers.

                *errMsg = cannotCleanupMessage;

                return false;
            }

            if (!_cleanupUpgradeState(configLoc, lastVersionInfo.getUpgradeId(), errMsg)) {
                
                // If we can't cleanup the old upgrade state, the user might have done it for us,
                // not a fatal problem (we'll just end up with extra collections).
                
                warning() << "could not cleanup previous upgrade state" << causedBy(errMsg) << endl;
                *errMsg = "";
            }
        }

        //
        // Check the versions of other mongo processes in the cluster before upgrade.
        // We can't upgrade if there are active pre-v2.2 processes in the cluster
        //

        Status mongoVersionStatus = checkClusterMongoVersions(configLoc,
                                                              string(minMongoProcessVersion));

        if (!mongoVersionStatus.isOK()) {

            *errMsg = stream() << "cannot upgrade with pre-v" << minMongoProcessVersion
                               << " mongo processes active in the cluster"
                               << causedBy(mongoVersionStatus);

            return false;
        }

        VersionType newVersionInfo;
        lastVersionInfo.cloneTo(&newVersionInfo);

        // Set our upgrade id and state
        OID upgradeId = OID::gen();
        newVersionInfo.setUpgradeId(upgradeId);
        newVersionInfo.setUpgradeState(BSONObj());

        // Write our upgrade id and state
        {
            scoped_ptr<ScopedDbConnection> connPtr;

            try {
                connPtr.reset(ScopedDbConnection::getInternalScopedDbConnection(configLoc, 30));
                ScopedDbConnection& conn = *connPtr;

                verify(newVersionInfo.isValid(NULL));

                conn->update(VersionType::ConfigNS,
                             BSON("_id" << 1 << VersionType::version_DEPRECATED(3)),
                             newVersionInfo.toBSON());
                _checkGLE(conn);
            }
            catch (const DBException& e) {

                *errMsg = stream() << "could not initialize version info for upgrade"
                                   << causedBy(e);

                return false;
            }

            connPtr->done();
        }

        //
        // First lock all collection namespaces that exist
        //

        OwnedPointerMap<string, CollectionType> ownedCollections;
        const map<string, CollectionType*>& collections = ownedCollections.map();

        Status findCollectionsStatus = findAllCollectionsV3(configLoc, &ownedCollections);

        if (!findCollectionsStatus.isOK()) {

            *errMsg = stream() << "could not read collections from config server"
                               << causedBy(findCollectionsStatus);

            return false;
        }

        //
        // Acquire locks for all sharded collections
        // Something that didn't involve getting thousands of locks would be better.
        //

        OwnedPointerVector<ScopedDistributedLock> collectionLocks;

        log() << "acquiring locks for " << collections.size() << " sharded collections..." << endl;
        
        // WARNING - this string is used programmatically when forcing locks, be careful when
        // changing!
        // TODO: Add programmatic "why" field to lock collection
        string lockMessage = str::stream() << "ensuring epochs for config upgrade"
                                           << " (" << upgradeId.toString() << ")";
        
        if (!_acquireAllCollectionLocks(configLoc,
                                        collections,
                                        lockMessage,
                                        20 * 60 * 1000,
                                        &collectionLocks,
                                        errMsg))
        {

            *errMsg = stream() << "could not acquire all namespace locks for upgrade" 
                               << " (" << upgradeId.toString() << ")"
                               << causedBy(errMsg);

            return false;
        }

        // We are now preventing all splits and migrates for all sharded collections

        // Get working and backup suffixes
        string workingSuffix = genWorkingSuffix(upgradeId);
        string backupSuffix = genBackupSuffix(upgradeId);

        log() << "copying collection and chunk metadata to working and backup collections..."
              << endl;

        // Get a backup and working copy of the config.collections and config.chunks collections

        Status copyStatus = copyFrozenCollection(configLoc,
                                                 CollectionType::ConfigNS,
                                                 CollectionType::ConfigNS + workingSuffix);

        if (!copyStatus.isOK()) {

            *errMsg = stream() << "could not copy " << CollectionType::ConfigNS << " to "
                               << (CollectionType::ConfigNS + workingSuffix)
                               << causedBy(copyStatus);

            return false;
        }

        copyStatus = copyFrozenCollection(configLoc,
                                          CollectionType::ConfigNS,
                                          CollectionType::ConfigNS + backupSuffix);

        if (!copyStatus.isOK()) {

            *errMsg = stream() << "could not copy " << CollectionType::ConfigNS << " to "
                               << (CollectionType::ConfigNS + backupSuffix) << causedBy(copyStatus);

            return false;
        }

        copyStatus = copyFrozenCollection(configLoc,
                                          ChunkType::ConfigNS,
                                          ChunkType::ConfigNS + workingSuffix);

        if (!copyStatus.isOK()) {

            *errMsg = stream() << "could not copy " << ChunkType::ConfigNS << " to "
                               << (ChunkType::ConfigNS + workingSuffix) << causedBy(copyStatus);

            return false;
        }

        copyStatus = copyFrozenCollection(configLoc,
                                          ChunkType::ConfigNS,
                                          ChunkType::ConfigNS + backupSuffix);

        if (!copyStatus.isOK()) {

            *errMsg = stream() << "could not copy " << ChunkType::ConfigNS << " to "
                               << (ChunkType::ConfigNS + backupSuffix) << causedBy(copyStatus);

            return false;
        }

        //
        // Go through sharded collections one-by-one and add epochs where missing
        //

        for (map<string, CollectionType*>::const_iterator it = collections.begin();
                it != collections.end(); ++it)
        {
            // Create a copy so that we can change the epoch later
            CollectionType collection;
            it->second->cloneTo(&collection);

            log() << "checking epochs for " << collection.getNS() << " collection..." << endl;

            OID epoch = collection.getEpoch();

            //
            // Go through chunks to find epoch if we haven't found it or to verify epoch is the same
            //

            OwnedPointerVector<ChunkType> ownedChunks;
            const vector<ChunkType*>& chunks = ownedChunks.vector();

            Status findChunksStatus = findAllChunks(configLoc, collection.getNS(), &ownedChunks);

            if (!findChunksStatus.isOK()) {

                *errMsg = stream() << "could not read chunks from config server"
                                   << causedBy(findChunksStatus);

                return false;
            }

            for (vector<ChunkType*>::const_iterator chunkIt = chunks.begin();
                    chunkIt != chunks.end(); ++chunkIt)
            {
                const ChunkType& chunk = *(*chunkIt);

                // If our chunk epoch is set and doesn't match
                if (epoch.isSet() && chunk.getVersion().epoch().isSet()
                    && chunk.getVersion().epoch() != epoch)
                {

                    *errMsg = stream() << "chunk epoch for " << chunk.toString() << " in "
                                       << collection.getNS() << " does not match found epoch "
                                       << epoch;

                    return false;
                }
                else if (!epoch.isSet() && chunk.getVersion().epoch().isSet()) {
                    epoch = chunk.getVersion().epoch();
                }
            }

            //
            // Write collection epoch if needed
            //

            if (!collection.getEpoch().isSet()) {

                OID newEpoch = OID::gen();

                log() << "writing new epoch " << newEpoch << " for " << collection.getNS()
                      << " collection..." << endl;

                scoped_ptr<ScopedDbConnection> connPtr;

                try {
                    connPtr.reset(ScopedDbConnection::getInternalScopedDbConnection(configLoc, 30));
                    ScopedDbConnection& conn = *connPtr;

                    conn->update(CollectionType::ConfigNS + workingSuffix,
                                 BSON(CollectionType::ns(collection.getNS())),
                                 BSON("$set" << BSON(CollectionType::DEPRECATED_lastmodEpoch(newEpoch))));
                    _checkGLE(conn);
                }
                catch (const DBException& e) {

                    *errMsg = stream() << "could not write a new epoch for " << collection.getNS()
                                       << causedBy(e);

                    return false;
                }

                connPtr->done();
                collection.setEpoch(newEpoch);
            }

            epoch = collection.getEpoch();
            verify(epoch.isSet());

            //
            // Now write verified epoch to all chunks
            //

            log() << "writing epoch " << epoch << " for " << chunks.size() << " chunks in "
                  << collection.getNS() << " collection..." << endl;

            {
                scoped_ptr<ScopedDbConnection> connPtr;

                try {
                    connPtr.reset(ScopedDbConnection::getInternalScopedDbConnection(configLoc, 30));
                    ScopedDbConnection& conn = *connPtr;

                    // Multi-update of all chunks
                    conn->update(ChunkType::ConfigNS + workingSuffix,
                                 BSON(ChunkType::ns(collection.getNS())),
                                 BSON("$set" << BSON(ChunkType::DEPRECATED_epoch(epoch))),
                                 false,
                                 true); // multi
                    _checkGLE(conn);
                }
                catch (const DBException& e) {

                    *errMsg = stream() << "could not write a new epoch " << epoch.toString()
                                       << " for chunks in " << collection.getNS() << causedBy(e);

                    return false;
                }

                connPtr->done();
            }
        }

        //
        // Paranoid verify the collection writes
        //

        {
            scoped_ptr<ScopedDbConnection> connPtr;

            try {
                connPtr.reset(ScopedDbConnection::getInternalScopedDbConnection(configLoc, 30));
                ScopedDbConnection& conn = *connPtr;

                // Find collections with no epochs
                BSONObj emptyDoc =
                        conn->findOne(CollectionType::ConfigNS + workingSuffix,
                                      BSON("$unset" << BSON(CollectionType::DEPRECATED_lastmodEpoch() << 1)));

                if (!emptyDoc.isEmpty()) {

                    *errMsg = stream() << "collection " << emptyDoc
                                       << " is still missing epoch after config upgrade";

                    connPtr->done();
                    return false;
                }

                // Find collections with empty epochs
                emptyDoc = conn->findOne(CollectionType::ConfigNS + workingSuffix,
                                         BSON(CollectionType::DEPRECATED_lastmodEpoch(OID())));

                if (!emptyDoc.isEmpty()) {

                    *errMsg = stream() << "collection " << emptyDoc
                                       << " still has empty epoch after config upgrade";

                    connPtr->done();
                    return false;
                }

                // Find chunks with no epochs
                emptyDoc =
                        conn->findOne(ChunkType::ConfigNS + workingSuffix,
                                      BSON("$unset" << BSON(ChunkType::DEPRECATED_epoch() << 1)));

                if (!emptyDoc.isEmpty()) {

                    *errMsg = stream() << "chunk " << emptyDoc
                                       << " is still missing epoch after config upgrade";

                    connPtr->done();
                    return false;
                }

                // Find chunks with empty epochs
                emptyDoc = conn->findOne(ChunkType::ConfigNS + workingSuffix,
                                         BSON(ChunkType::DEPRECATED_epoch(OID())));

                if (!emptyDoc.isEmpty()) {

                    *errMsg = stream() << "chunk " << emptyDoc
                                       << " still has empty epoch after config upgrade";

                    connPtr->done();
                    return false;
                }
            }
            catch (const DBException& e) {

                *errMsg = stream() << "could not verify epoch writes" << causedBy(e);

                return false;
            }

            connPtr->done();
        }

        //
        // Double check that our collections haven't changed
        //

        Status idCheckStatus = checkIdsTheSame(configLoc,
                                               CollectionType::ConfigNS,
                                               CollectionType::ConfigNS + workingSuffix);

        if (!idCheckStatus.isOK()) {

            *errMsg = stream() << CollectionType::ConfigNS
                               << " was modified while working on upgrade"
                               << causedBy(idCheckStatus);

            return false;
        }

        idCheckStatus = checkIdsTheSame(configLoc,
                                        ChunkType::ConfigNS,
                                        ChunkType::ConfigNS + workingSuffix);

        if (!idCheckStatus.isOK()) {

            *errMsg = stream() << ChunkType::ConfigNS << " was modified while working on upgrade"
                               << causedBy(idCheckStatus);

            return false;
        }

        //
        // ENTER CRITICAL SECTION
        //

        newVersionInfo.setUpgradeState(BSON(inCriticalSectionField(true)));

        {
            scoped_ptr<ScopedDbConnection> connPtr;

            try {
                connPtr.reset(ScopedDbConnection::getInternalScopedDbConnection(configLoc, 30));
                ScopedDbConnection& conn = *connPtr;

                verify(newVersionInfo.isValid(NULL));

                conn->update(VersionType::ConfigNS,
                             BSON("_id" << 1 << VersionType::version_DEPRECATED(3)),
                             newVersionInfo.toBSON());
                _checkGLE(conn);
            }
            catch (const DBException& e) {

                // No cleanup message here since we're not sure if we wrote or not, and
                // not dangerous either way except to prevent further updates (at which point
                // the message is printed)
                *errMsg = stream()
                        << "could not update version info to enter critical update section"
                        << causedBy(e);

                return false;
            }

            // AT THIS POINT ANY FAILURE REQUIRES MANUAL INTERVENTION!
            connPtr->done();
        }

        log() << "entered critical section for config upgrade" << endl;

        Status overwriteStatus = overwriteCollection(configLoc,
                                                     CollectionType::ConfigNS + workingSuffix,
                                                     CollectionType::ConfigNS);

        if (!overwriteStatus.isOK()) {

            error() << cleanupMessage << endl;
            *errMsg = stream() << "could not overwrite collection " << CollectionType::ConfigNS
                               << " with working collection "
                               << (CollectionType::ConfigNS + workingSuffix)
                               << causedBy(overwriteStatus);

            return false;
        }

        overwriteStatus = overwriteCollection(configLoc,
                                              ChunkType::ConfigNS + workingSuffix,
                                              ChunkType::ConfigNS);

        if (!overwriteStatus.isOK()) {

            error() << cleanupMessage << endl;
            *errMsg = stream() << "could not overwrite collection " << ChunkType::ConfigNS
                               << " with working collection "
                               << (ChunkType::ConfigNS + workingSuffix)
                               << causedBy(overwriteStatus);

            return false;
        }

        //
        // Finally update the version to latest and add clusterId to version
        //

        OID newClusterId = OID::gen();

        // Note: hardcoded versions, since this is a very particular upgrade
        // Note: DO NOT CLEAR the config version unless bumping the minCompatibleVersion,
        // we want to save the excludes that were set.

        newVersionInfo.setMinCompatibleVersion(UpgradeHistory_NoEpochVersion);
        newVersionInfo.setCurrentVersion(UpgradeHistory_MandatoryEpochVersion);
        newVersionInfo.setClusterId(newClusterId);

        // Leave critical section
        newVersionInfo.unsetUpgradeId();
        newVersionInfo.unsetUpgradeState();

        log() << "writing new version info and clusterId " << newClusterId << "..." << endl;

        {
            scoped_ptr<ScopedDbConnection> connPtr;

            try {
                connPtr.reset(ScopedDbConnection::getInternalScopedDbConnection(configLoc, 30));
                ScopedDbConnection& conn = *connPtr;

                verify(newVersionInfo.isValid(NULL));

                conn->update(VersionType::ConfigNS,
                             BSON("_id" << 1 << VersionType::version_DEPRECATED(UpgradeHistory_NoEpochVersion)),
                             newVersionInfo.toBSON());
                _checkGLE(conn);
            }
            catch (const DBException& e) {

                error() << cleanupMessage << endl;
                *errMsg = stream() << "could not write new version info "
                                   << "and exit critical upgrade section" << causedBy(e);

                return false;
            }

            connPtr->done();
        }

        //
        // END CRITICAL SECTION
        //

        return true;
    }
Status CatalogManagerReplicaSet::dropCollection(OperationContext* txn, const NamespaceString& ns) {
    logChange(
        txn, txn->getClient()->clientAddress(true), "dropCollection.start", ns.ns(), BSONObj());

    vector<ShardType> allShards;
    Status status = getAllShards(txn, &allShards);
    if (!status.isOK()) {
        return status;
    }

    LOG(1) << "dropCollection " << ns << " started";

    // Lock the collection globally so that split/migrate cannot run
    stdx::chrono::seconds waitFor(2);
    MONGO_FAIL_POINT_BLOCK(setDropCollDistLockWait, customWait) {
        const BSONObj& data = customWait.getData();
        waitFor = stdx::chrono::seconds(data["waitForSecs"].numberInt());
    }
    const stdx::chrono::milliseconds lockTryInterval(500);
    auto scopedDistLock = getDistLockManager()->lock(ns.ns(), "drop", waitFor, lockTryInterval);
    if (!scopedDistLock.isOK()) {
        return scopedDistLock.getStatus();
    }

    LOG(1) << "dropCollection " << ns << " locked";

    std::map<string, BSONObj> errors;
    auto* shardRegistry = grid.shardRegistry();

    for (const auto& shardEntry : allShards) {
        auto dropResult = shardRegistry->runCommandWithNotMasterRetries(
            txn, shardEntry.getName(), ns.db().toString(), BSON("drop" << ns.coll()));

        if (!dropResult.isOK()) {
            return dropResult.getStatus();
        }

        auto dropStatus = getStatusFromCommandResult(dropResult.getValue());
        if (!dropStatus.isOK()) {
            if (dropStatus.code() == ErrorCodes::NamespaceNotFound) {
                continue;
            }

            errors.emplace(shardEntry.getHost(), dropResult.getValue());
        }
    }

    if (!errors.empty()) {
        StringBuilder sb;
        sb << "Dropping collection failed on the following hosts: ";

        for (auto it = errors.cbegin(); it != errors.cend(); ++it) {
            if (it != errors.cbegin()) {
                sb << ", ";
            }

            sb << it->first << ": " << it->second;
        }

        return {ErrorCodes::OperationFailed, sb.str()};
    }

    LOG(1) << "dropCollection " << ns << " shard data deleted";

    // Remove chunk data
    Status result = remove(txn, ChunkType::ConfigNS, BSON(ChunkType::ns(ns.ns())), 0, nullptr);
    if (!result.isOK()) {
        return result;
    }

    LOG(1) << "dropCollection " << ns << " chunk data deleted";

    // Mark the collection as dropped
    CollectionType coll;
    coll.setNs(ns);
    coll.setDropped(true);
    coll.setEpoch(ChunkVersion::DROPPED().epoch());
    coll.setUpdatedAt(grid.shardRegistry()->getNetwork()->now());

    result = updateCollection(txn, ns.ns(), coll);
    if (!result.isOK()) {
        return result;
    }

    LOG(1) << "dropCollection " << ns << " collection marked as dropped";

    for (const auto& shardEntry : allShards) {
        SetShardVersionRequest ssv = SetShardVersionRequest::makeForVersioningNoPersist(
            grid.shardRegistry()->getConfigServerConnectionString(),
            shardEntry.getName(),
            fassertStatusOK(28781, ConnectionString::parse(shardEntry.getHost())),
            ns,
            ChunkVersion::DROPPED(),
            true);

        auto ssvResult = shardRegistry->runCommandWithNotMasterRetries(
            txn, shardEntry.getName(), "admin", ssv.toBSON());

        if (!ssvResult.isOK()) {
            return ssvResult.getStatus();
        }

        auto ssvStatus = getStatusFromCommandResult(ssvResult.getValue());
        if (!ssvStatus.isOK()) {
            return ssvStatus;
        }

        auto unsetShardingStatus = shardRegistry->runCommandWithNotMasterRetries(
            txn, shardEntry.getName(), "admin", BSON("unsetSharding" << 1));

        if (!unsetShardingStatus.isOK()) {
            return unsetShardingStatus.getStatus();
        }

        auto unsetShardingResult = getStatusFromCommandResult(unsetShardingStatus.getValue());
        if (!unsetShardingResult.isOK()) {
            return unsetShardingResult;
        }
    }

    LOG(1) << "dropCollection " << ns << " completed";

    logChange(txn, txn->getClient()->clientAddress(true), "dropCollection", ns.ns(), BSONObj());

    return Status::OK();
}
Status CatalogManagerReplicaSet::dropCollection(OperationContext* txn, const NamespaceString& ns) {
    logChange(
        txn, txn->getClient()->clientAddress(true), "dropCollection.start", ns.ns(), BSONObj());

    vector<ShardType> allShards;
    Status status = getAllShards(txn, &allShards);
    if (!status.isOK()) {
        return status;
    }

    LOG(1) << "dropCollection " << ns << " started";

    // Lock the collection globally so that split/migrate cannot run
    auto scopedDistLock = getDistLockManager()->lock(ns.ns(), "drop");
    if (!scopedDistLock.isOK()) {
        return scopedDistLock.getStatus();
    }

    LOG(1) << "dropCollection " << ns << " locked";

    std::map<string, BSONObj> errors;
    auto* shardRegistry = grid.shardRegistry();

    for (const auto& shardEntry : allShards) {
        auto dropResult = shardRegistry->runCommandWithNotMasterRetries(
            txn, shardEntry.getName(), ns.db().toString(), BSON("drop" << ns.coll()));

        if (!dropResult.isOK()) {
            return dropResult.getStatus();
        }

        auto dropStatus = getStatusFromCommandResult(dropResult.getValue());
        if (!dropStatus.isOK()) {
            if (dropStatus.code() == ErrorCodes::NamespaceNotFound) {
                continue;
            }

            errors.emplace(shardEntry.getHost(), dropResult.getValue());
        }
    }

    if (!errors.empty()) {
        StringBuilder sb;
        sb << "Dropping collection failed on the following hosts: ";

        for (auto it = errors.cbegin(); it != errors.cend(); ++it) {
            if (it != errors.cbegin()) {
                sb << ", ";
            }

            sb << it->first << ": " << it->second;
        }

        return {ErrorCodes::OperationFailed, sb.str()};
    }

    LOG(1) << "dropCollection " << ns << " shard data deleted";

    // Remove chunk data
    Status result = remove(txn, ChunkType::ConfigNS, BSON(ChunkType::ns(ns.ns())), 0, nullptr);
    if (!result.isOK()) {
        return result;
    }

    LOG(1) << "dropCollection " << ns << " chunk data deleted";

    // Mark the collection as dropped
    CollectionType coll;
    coll.setNs(ns);
    coll.setDropped(true);
    coll.setEpoch(ChunkVersion::DROPPED().epoch());
    coll.setUpdatedAt(grid.shardRegistry()->getNetwork()->now());

    result = updateCollection(txn, ns.ns(), coll);
    if (!result.isOK()) {
        return result;
    }

    LOG(1) << "dropCollection " << ns << " collection marked as dropped";

    // We just called updateCollection above and this would have advanced the config op time, so use
    // the latest value. On the MongoD side, we need to load the latest config metadata, which
    // indicates that the collection was dropped.
    const ChunkVersionAndOpTime droppedVersion(ChunkVersion::DROPPED(),
                                               grid.shardRegistry()->getConfigOpTime());

    for (const auto& shardEntry : allShards) {
        SetShardVersionRequest ssv = SetShardVersionRequest::makeForVersioningNoPersist(
            grid.shardRegistry()->getConfigServerConnectionString(),
            shardEntry.getName(),
            fassertStatusOK(28781, ConnectionString::parse(shardEntry.getHost())),
            ns,
            droppedVersion,
            true);

        auto ssvResult = shardRegistry->runCommandWithNotMasterRetries(
            txn, shardEntry.getName(), "admin", ssv.toBSON());

        if (!ssvResult.isOK()) {
            return ssvResult.getStatus();
        }

        auto ssvStatus = getStatusFromCommandResult(ssvResult.getValue());
        if (!ssvStatus.isOK()) {
            return ssvStatus;
        }

        auto unsetShardingStatus = shardRegistry->runCommandWithNotMasterRetries(
            txn, shardEntry.getName(), "admin", BSON("unsetSharding" << 1));

        if (!unsetShardingStatus.isOK()) {
            return unsetShardingStatus.getStatus();
        }

        auto unsetShardingResult = getStatusFromCommandResult(unsetShardingStatus.getValue());
        if (!unsetShardingResult.isOK()) {
            return unsetShardingResult;
        }
    }

    LOG(1) << "dropCollection " << ns << " completed";

    logChange(txn, txn->getClient()->clientAddress(true), "dropCollection", ns.ns(), BSONObj());

    return Status::OK();
}