Ejemplo n.º 1
0
    void Balancer::_doBalanceRound( DBClientBase& conn, vector<CandidateChunkPtr>* candidateChunks ) {
        verify( candidateChunks );

        //
        // 1. Check whether there is any sharded collection to be balanced by querying
        // the ShardsNS::collections collection
        //

        auto_ptr<DBClientCursor> cursor = conn.query(CollectionType::ConfigNS, BSONObj());

        if ( NULL == cursor.get() ) {
            warning() << "could not query " << CollectionType::ConfigNS
                      << " while trying to balance" << endl;
            return;
        }

        vector< string > collections;
        while ( cursor->more() ) {
            BSONObj col = cursor->nextSafe();

            // sharded collections will have a shard "key".
            if ( ! col[CollectionType::keyPattern()].eoo() &&
                 ! col[CollectionType::noBalance()].trueValue() ){
                collections.push_back( col[CollectionType::ns()].String() );
            }
            else if( col[CollectionType::noBalance()].trueValue() ){
                LOG(1) << "not balancing collection " << col[CollectionType::ns()].String()
                       << ", explicitly disabled" << endl;
            }

        }
        cursor.reset();

        if ( collections.empty() ) {
            LOG(1) << "no collections to balance" << endl;
            return;
        }

        //
        // 2. Get a list of all the shards that are participating in this balance round
        // along with any maximum allowed quotas and current utilization. We get the
        // latter by issuing db.serverStatus() (mem.mapped) to all shards.
        //
        // TODO: skip unresponsive shards and mark information as stale.
        //

        ShardInfoMap shardInfo;
        Status loadStatus = DistributionStatus::populateShardInfoMap(&shardInfo);

        if (!loadStatus.isOK()) {
            warning() << "failed to load shard metadata" << causedBy(loadStatus) << endl;
            return;
        }

        if (shardInfo.size() < 2) {
            LOG(1) << "can't balance without more active shards" << endl;
            return;
        }

        OCCASIONALLY warnOnMultiVersion( shardInfo );

        //
        // 3. For each collection, check if the balancing policy recommends moving anything around.
        //

        for (vector<string>::const_iterator it = collections.begin(); it != collections.end(); ++it ) {
            const string& ns = *it;

            OwnedPointerMap<string, OwnedPointerVector<ChunkType> > shardToChunksMap;
            cursor = conn.query(ChunkType::ConfigNS,
                                QUERY(ChunkType::ns(ns)).sort(ChunkType::min()));

            set<BSONObj> allChunkMinimums;

            while ( cursor->more() ) {
                BSONObj chunkDoc = cursor->nextSafe().getOwned();

                auto_ptr<ChunkType> chunk(new ChunkType());
                string errmsg;
                if (!chunk->parseBSON(chunkDoc, &errmsg)) {
                    error() << "bad chunk format for " << chunkDoc
                            << ": " << errmsg << endl;
                    return;
                }

                allChunkMinimums.insert(chunk->getMin().getOwned());
                OwnedPointerVector<ChunkType>*& chunkList =
                        shardToChunksMap.mutableMap()[chunk->getShard()];

                if (chunkList == NULL) {
                    chunkList = new OwnedPointerVector<ChunkType>();
                }

                chunkList->mutableVector().push_back(chunk.release());
            }
            cursor.reset();

            if (shardToChunksMap.map().empty()) {
                LOG(1) << "skipping empty collection (" << ns << ")";
                continue;
            }

            for (ShardInfoMap::const_iterator i = shardInfo.begin(); i != shardInfo.end(); ++i) {
                // this just makes sure there is an entry in shardToChunksMap for every shard
                OwnedPointerVector<ChunkType>*& chunkList =
                        shardToChunksMap.mutableMap()[i->first];

                if (chunkList == NULL) {
                    chunkList = new OwnedPointerVector<ChunkType>();
                }
            }

            DistributionStatus status(shardInfo, shardToChunksMap.map());

            // load tags
            Status result = clusterCreateIndex(TagsType::ConfigNS,
                                               BSON(TagsType::ns() << 1 << TagsType::min() << 1),
                                               true, // unique
                                               WriteConcernOptions::AllConfigs,
                                               NULL);

            if ( !result.isOK() ) {
                warning() << "could not create index tags_1_min_1: " << result.reason() << endl;
                continue;
            }

            cursor = conn.query(TagsType::ConfigNS,
                                QUERY(TagsType::ns(ns)).sort(TagsType::min()));

            vector<TagRange> ranges;

            while ( cursor->more() ) {
                BSONObj tag = cursor->nextSafe();
                TagRange tr(tag[TagsType::min()].Obj().getOwned(),
                            tag[TagsType::max()].Obj().getOwned(),
                            tag[TagsType::tag()].String());
                ranges.push_back(tr);
                uassert(16356,
                        str::stream() << "tag ranges not valid for: " << ns,
                        status.addTagRange(tr) );

            }
            cursor.reset();

            DBConfigPtr cfg = grid.getDBConfig( ns );
            if ( !cfg ) {
                warning() << "could not load db config to balance " << ns << " collection" << endl;
                continue;
            }

            // This line reloads the chunk manager once if this process doesn't know the collection
            // is sharded yet.
            ChunkManagerPtr cm = cfg->getChunkManagerIfExists( ns, true );
            if ( !cm ) {
                warning() << "could not load chunks to balance " << ns << " collection" << endl;
                continue;
            }

            // loop through tags to make sure no chunk spans tags; splits on tag min. for all chunks
            bool didAnySplits = false;
            for ( unsigned i = 0; i < ranges.size(); i++ ) {
                BSONObj min = ranges[i].min;

                min = cm->getShardKey().extendRangeBound( min, false );

                if ( allChunkMinimums.count( min ) > 0 )
                    continue;

                didAnySplits = true;

                log() << "ns: " << ns << " need to split on "
                      << min << " because there is a range there" << endl;

                ChunkPtr c = cm->findIntersectingChunk( min );

                vector<BSONObj> splitPoints;
                splitPoints.push_back( min );

                BSONObj res;
                if ( !c->multiSplit( splitPoints, res ) ) {
                    error() << "split failed: " << res << endl;
                }
                else {
                    LOG(1) << "split worked: " << res << endl;
                }
                break;
            }

            if ( didAnySplits ) {
                // state change, just wait till next round
                continue;
            }

            CandidateChunk* p = _policy->balance( ns, status, _balancedLastTime );
            if ( p ) candidateChunks->push_back( CandidateChunkPtr( p ) );
        }
    }
Ejemplo n.º 2
0
StatusWith<RecordId> Collection::updateDocument(OperationContext* txn,
                                                const RecordId& oldLocation,
                                                const Snapshotted<BSONObj>& oldDoc,
                                                const BSONObj& newDoc,
                                                bool enforceQuota,
                                                bool indexesAffected,
                                                OpDebug* debug,
                                                oplogUpdateEntryArgs& args) {
    {
        auto status = checkValidation(txn, newDoc);
        if (!status.isOK()) {
            if (_validationLevel == STRICT_V) {
                return status;
            }
            // moderate means we have to check the old doc
            auto oldDocStatus = checkValidation(txn, oldDoc.value());
            if (oldDocStatus.isOK()) {
                // transitioning from good -> bad is not ok
                return status;
            }
            // bad -> bad is ok in moderate mode
        }
    }

    dassert(txn->lockState()->isCollectionLockedForMode(ns().toString(), MODE_IX));
    invariant(oldDoc.snapshotId() == txn->recoveryUnit()->getSnapshotId());

    if (_needCappedLock) {
        // X-lock the metadata resource for this capped collection until the end of the WUOW. This
        // prevents the primary from executing with more concurrency than secondaries.
        // See SERVER-21646.
        Lock::ResourceLock{txn->lockState(), ResourceId(RESOURCE_METADATA, _ns.ns()), MODE_X};
    }

    SnapshotId sid = txn->recoveryUnit()->getSnapshotId();

    BSONElement oldId = oldDoc.value()["_id"];
    if (!oldId.eoo() && (oldId != newDoc["_id"]))
        return StatusWith<RecordId>(
            ErrorCodes::InternalError, "in Collection::updateDocument _id mismatch", 13596);

    // The MMAPv1 storage engine implements capped collections in a way that does not allow records
    // to grow beyond their original size. If MMAPv1 part of a replicaset with storage engines that
    // do not have this limitation, replication could result in errors, so it is necessary to set a
    // uniform rule here. Similarly, it is not sufficient to disallow growing records, because this
    // happens when secondaries roll back an update shrunk a record. Exactly replicating legacy
    // MMAPv1 behavior would require padding shrunk documents on all storage engines. Instead forbid
    // all size changes.
    const auto oldSize = oldDoc.value().objsize();
    if (_recordStore->isCapped() && oldSize != newDoc.objsize())
        return {ErrorCodes::CannotGrowDocumentInCappedNamespace,
                str::stream() << "Cannot change the size of a document in a capped collection: "
                              << oldSize << " != " << newDoc.objsize()};

    // At the end of this step, we will have a map of UpdateTickets, one per index, which
    // represent the index updates needed to be done, based on the changes between oldDoc and
    // newDoc.
    OwnedPointerMap<IndexDescriptor*, UpdateTicket> updateTickets;
    if (indexesAffected) {
        IndexCatalog::IndexIterator ii = _indexCatalog.getIndexIterator(txn, true);
        while (ii.more()) {
            IndexDescriptor* descriptor = ii.next();
            IndexCatalogEntry* entry = ii.catalogEntry(descriptor);
            IndexAccessMethod* iam = ii.accessMethod(descriptor);

            InsertDeleteOptions options;
            options.logIfError = false;
            options.dupsAllowed =
                !(KeyPattern::isIdKeyPattern(descriptor->keyPattern()) || descriptor->unique()) ||
                repl::getGlobalReplicationCoordinator()->shouldIgnoreUniqueIndex(descriptor);
            UpdateTicket* updateTicket = new UpdateTicket();
            updateTickets.mutableMap()[descriptor] = updateTicket;
            Status ret = iam->validateUpdate(txn,
                                             oldDoc.value(),
                                             newDoc,
                                             oldLocation,
                                             options,
                                             updateTicket,
                                             entry->getFilterExpression());
            if (!ret.isOK()) {
                return StatusWith<RecordId>(ret);
            }
        }
    }

    // This can call back into Collection::recordStoreGoingToMove.  If that happens, the old
    // object is removed from all indexes.
    StatusWith<RecordId> newLocation = _recordStore->updateRecord(
        txn, oldLocation, newDoc.objdata(), newDoc.objsize(), _enforceQuota(enforceQuota), this);

    if (!newLocation.isOK()) {
        return newLocation;
    }

    // At this point, the old object may or may not still be indexed, depending on if it was
    // moved. If the object did move, we need to add the new location to all indexes.
    if (newLocation.getValue() != oldLocation) {
        if (debug) {
            if (debug->nmoved == -1)  // default of -1 rather than 0
                debug->nmoved = 1;
            else
                debug->nmoved += 1;
        }

        std::vector<BsonRecord> bsonRecords;
        BsonRecord bsonRecord = {newLocation.getValue(), &newDoc};
        bsonRecords.push_back(bsonRecord);
        Status s = _indexCatalog.indexRecords(txn, bsonRecords);
        if (!s.isOK())
            return StatusWith<RecordId>(s);
        invariant(sid == txn->recoveryUnit()->getSnapshotId());
        args.ns = ns().ns();
        getGlobalServiceContext()->getOpObserver()->onUpdate(txn, args);

        return newLocation;
    }

    // Object did not move.  We update each index with each respective UpdateTicket.

    if (debug)
        debug->keyUpdates = 0;

    if (indexesAffected) {
        IndexCatalog::IndexIterator ii = _indexCatalog.getIndexIterator(txn, true);
        while (ii.more()) {
            IndexDescriptor* descriptor = ii.next();
            IndexAccessMethod* iam = ii.accessMethod(descriptor);

            int64_t updatedKeys;
            Status ret = iam->update(txn, *updateTickets.mutableMap()[descriptor], &updatedKeys);
            if (!ret.isOK())
                return StatusWith<RecordId>(ret);
            if (debug)
                debug->keyUpdates += updatedKeys;
        }
    }

    invariant(sid == txn->recoveryUnit()->getSnapshotId());
    args.ns = ns().ns();
    getGlobalServiceContext()->getOpObserver()->onUpdate(txn, args);

    return newLocation;
}
Ejemplo n.º 3
0
    /**
     * Upgrade v3 to v4 described here.
     *
     * This upgrade takes a config server without collection epochs (potentially) and adds
     * epochs to all mongo processes.
     *
     */
    bool doUpgradeV3ToV4(const ConnectionString& configLoc,
                         const VersionType& lastVersionInfo,
                         string* errMsg)
    {
        string dummy;
        if (!errMsg) errMsg = &dummy;

        verify(lastVersionInfo.getCurrentVersion() == UpgradeHistory_NoEpochVersion);

        if (lastVersionInfo.isUpgradeIdSet() && lastVersionInfo.getUpgradeId().isSet()) {

            //
            // Another upgrade failed, so cleanup may be necessary
            //

            BSONObj lastUpgradeState = lastVersionInfo.getUpgradeState();

            bool inCriticalSection;
            if (!FieldParser::extract(lastUpgradeState,
                                      inCriticalSectionField,
                                      &inCriticalSection,
                                      errMsg))
            {

                *errMsg = stream() << "problem reading previous upgrade state" << causedBy(errMsg);

                return false;
            }

            if (inCriticalSection) {

                // Manual intervention is needed here.  Somehow our upgrade didn't get applied
                // consistently across config servers.

                *errMsg = cannotCleanupMessage;

                return false;
            }

            if (!_cleanupUpgradeState(configLoc, lastVersionInfo.getUpgradeId(), errMsg)) {
                
                // If we can't cleanup the old upgrade state, the user might have done it for us,
                // not a fatal problem (we'll just end up with extra collections).
                
                warning() << "could not cleanup previous upgrade state" << causedBy(errMsg) << endl;
                *errMsg = "";
            }
        }

        //
        // Check the versions of other mongo processes in the cluster before upgrade.
        // We can't upgrade if there are active pre-v2.2 processes in the cluster
        //

        Status mongoVersionStatus = checkClusterMongoVersions(configLoc,
                                                              string(minMongoProcessVersion));

        if (!mongoVersionStatus.isOK()) {

            *errMsg = stream() << "cannot upgrade with pre-v" << minMongoProcessVersion
                               << " mongo processes active in the cluster"
                               << causedBy(mongoVersionStatus);

            return false;
        }

        VersionType newVersionInfo;
        lastVersionInfo.cloneTo(&newVersionInfo);

        // Set our upgrade id and state
        OID upgradeId = OID::gen();
        newVersionInfo.setUpgradeId(upgradeId);
        newVersionInfo.setUpgradeState(BSONObj());

        // Write our upgrade id and state
        {
            scoped_ptr<ScopedDbConnection> connPtr;

            try {
                connPtr.reset(ScopedDbConnection::getInternalScopedDbConnection(configLoc, 30));
                ScopedDbConnection& conn = *connPtr;

                verify(newVersionInfo.isValid(NULL));

                conn->update(VersionType::ConfigNS,
                             BSON("_id" << 1 << VersionType::version_DEPRECATED(3)),
                             newVersionInfo.toBSON());
                _checkGLE(conn);
            }
            catch (const DBException& e) {

                *errMsg = stream() << "could not initialize version info for upgrade"
                                   << causedBy(e);

                return false;
            }

            connPtr->done();
        }

        //
        // First lock all collection namespaces that exist
        //

        OwnedPointerMap<string, CollectionType> ownedCollections;
        const map<string, CollectionType*>& collections = ownedCollections.map();

        Status findCollectionsStatus = findAllCollectionsV3(configLoc, &ownedCollections);

        if (!findCollectionsStatus.isOK()) {

            *errMsg = stream() << "could not read collections from config server"
                               << causedBy(findCollectionsStatus);

            return false;
        }

        //
        // Acquire locks for all sharded collections
        // Something that didn't involve getting thousands of locks would be better.
        //

        OwnedPointerVector<ScopedDistributedLock> collectionLocks;

        log() << "acquiring locks for " << collections.size() << " sharded collections..." << endl;
        
        // WARNING - this string is used programmatically when forcing locks, be careful when
        // changing!
        // TODO: Add programmatic "why" field to lock collection
        string lockMessage = str::stream() << "ensuring epochs for config upgrade"
                                           << " (" << upgradeId.toString() << ")";
        
        if (!_acquireAllCollectionLocks(configLoc,
                                        collections,
                                        lockMessage,
                                        20 * 60 * 1000,
                                        &collectionLocks,
                                        errMsg))
        {

            *errMsg = stream() << "could not acquire all namespace locks for upgrade" 
                               << " (" << upgradeId.toString() << ")"
                               << causedBy(errMsg);

            return false;
        }

        // We are now preventing all splits and migrates for all sharded collections

        // Get working and backup suffixes
        string workingSuffix = genWorkingSuffix(upgradeId);
        string backupSuffix = genBackupSuffix(upgradeId);

        log() << "copying collection and chunk metadata to working and backup collections..."
              << endl;

        // Get a backup and working copy of the config.collections and config.chunks collections

        Status copyStatus = copyFrozenCollection(configLoc,
                                                 CollectionType::ConfigNS,
                                                 CollectionType::ConfigNS + workingSuffix);

        if (!copyStatus.isOK()) {

            *errMsg = stream() << "could not copy " << CollectionType::ConfigNS << " to "
                               << (CollectionType::ConfigNS + workingSuffix)
                               << causedBy(copyStatus);

            return false;
        }

        copyStatus = copyFrozenCollection(configLoc,
                                          CollectionType::ConfigNS,
                                          CollectionType::ConfigNS + backupSuffix);

        if (!copyStatus.isOK()) {

            *errMsg = stream() << "could not copy " << CollectionType::ConfigNS << " to "
                               << (CollectionType::ConfigNS + backupSuffix) << causedBy(copyStatus);

            return false;
        }

        copyStatus = copyFrozenCollection(configLoc,
                                          ChunkType::ConfigNS,
                                          ChunkType::ConfigNS + workingSuffix);

        if (!copyStatus.isOK()) {

            *errMsg = stream() << "could not copy " << ChunkType::ConfigNS << " to "
                               << (ChunkType::ConfigNS + workingSuffix) << causedBy(copyStatus);

            return false;
        }

        copyStatus = copyFrozenCollection(configLoc,
                                          ChunkType::ConfigNS,
                                          ChunkType::ConfigNS + backupSuffix);

        if (!copyStatus.isOK()) {

            *errMsg = stream() << "could not copy " << ChunkType::ConfigNS << " to "
                               << (ChunkType::ConfigNS + backupSuffix) << causedBy(copyStatus);

            return false;
        }

        //
        // Go through sharded collections one-by-one and add epochs where missing
        //

        for (map<string, CollectionType*>::const_iterator it = collections.begin();
                it != collections.end(); ++it)
        {
            // Create a copy so that we can change the epoch later
            CollectionType collection;
            it->second->cloneTo(&collection);

            log() << "checking epochs for " << collection.getNS() << " collection..." << endl;

            OID epoch = collection.getEpoch();

            //
            // Go through chunks to find epoch if we haven't found it or to verify epoch is the same
            //

            OwnedPointerVector<ChunkType> ownedChunks;
            const vector<ChunkType*>& chunks = ownedChunks.vector();

            Status findChunksStatus = findAllChunks(configLoc, collection.getNS(), &ownedChunks);

            if (!findChunksStatus.isOK()) {

                *errMsg = stream() << "could not read chunks from config server"
                                   << causedBy(findChunksStatus);

                return false;
            }

            for (vector<ChunkType*>::const_iterator chunkIt = chunks.begin();
                    chunkIt != chunks.end(); ++chunkIt)
            {
                const ChunkType& chunk = *(*chunkIt);

                // If our chunk epoch is set and doesn't match
                if (epoch.isSet() && chunk.getVersion().epoch().isSet()
                    && chunk.getVersion().epoch() != epoch)
                {

                    *errMsg = stream() << "chunk epoch for " << chunk.toString() << " in "
                                       << collection.getNS() << " does not match found epoch "
                                       << epoch;

                    return false;
                }
                else if (!epoch.isSet() && chunk.getVersion().epoch().isSet()) {
                    epoch = chunk.getVersion().epoch();
                }
            }

            //
            // Write collection epoch if needed
            //

            if (!collection.getEpoch().isSet()) {

                OID newEpoch = OID::gen();

                log() << "writing new epoch " << newEpoch << " for " << collection.getNS()
                      << " collection..." << endl;

                scoped_ptr<ScopedDbConnection> connPtr;

                try {
                    connPtr.reset(ScopedDbConnection::getInternalScopedDbConnection(configLoc, 30));
                    ScopedDbConnection& conn = *connPtr;

                    conn->update(CollectionType::ConfigNS + workingSuffix,
                                 BSON(CollectionType::ns(collection.getNS())),
                                 BSON("$set" << BSON(CollectionType::DEPRECATED_lastmodEpoch(newEpoch))));
                    _checkGLE(conn);
                }
                catch (const DBException& e) {

                    *errMsg = stream() << "could not write a new epoch for " << collection.getNS()
                                       << causedBy(e);

                    return false;
                }

                connPtr->done();
                collection.setEpoch(newEpoch);
            }

            epoch = collection.getEpoch();
            verify(epoch.isSet());

            //
            // Now write verified epoch to all chunks
            //

            log() << "writing epoch " << epoch << " for " << chunks.size() << " chunks in "
                  << collection.getNS() << " collection..." << endl;

            {
                scoped_ptr<ScopedDbConnection> connPtr;

                try {
                    connPtr.reset(ScopedDbConnection::getInternalScopedDbConnection(configLoc, 30));
                    ScopedDbConnection& conn = *connPtr;

                    // Multi-update of all chunks
                    conn->update(ChunkType::ConfigNS + workingSuffix,
                                 BSON(ChunkType::ns(collection.getNS())),
                                 BSON("$set" << BSON(ChunkType::DEPRECATED_epoch(epoch))),
                                 false,
                                 true); // multi
                    _checkGLE(conn);
                }
                catch (const DBException& e) {

                    *errMsg = stream() << "could not write a new epoch " << epoch.toString()
                                       << " for chunks in " << collection.getNS() << causedBy(e);

                    return false;
                }

                connPtr->done();
            }
        }

        //
        // Paranoid verify the collection writes
        //

        {
            scoped_ptr<ScopedDbConnection> connPtr;

            try {
                connPtr.reset(ScopedDbConnection::getInternalScopedDbConnection(configLoc, 30));
                ScopedDbConnection& conn = *connPtr;

                // Find collections with no epochs
                BSONObj emptyDoc =
                        conn->findOne(CollectionType::ConfigNS + workingSuffix,
                                      BSON("$unset" << BSON(CollectionType::DEPRECATED_lastmodEpoch() << 1)));

                if (!emptyDoc.isEmpty()) {

                    *errMsg = stream() << "collection " << emptyDoc
                                       << " is still missing epoch after config upgrade";

                    connPtr->done();
                    return false;
                }

                // Find collections with empty epochs
                emptyDoc = conn->findOne(CollectionType::ConfigNS + workingSuffix,
                                         BSON(CollectionType::DEPRECATED_lastmodEpoch(OID())));

                if (!emptyDoc.isEmpty()) {

                    *errMsg = stream() << "collection " << emptyDoc
                                       << " still has empty epoch after config upgrade";

                    connPtr->done();
                    return false;
                }

                // Find chunks with no epochs
                emptyDoc =
                        conn->findOne(ChunkType::ConfigNS + workingSuffix,
                                      BSON("$unset" << BSON(ChunkType::DEPRECATED_epoch() << 1)));

                if (!emptyDoc.isEmpty()) {

                    *errMsg = stream() << "chunk " << emptyDoc
                                       << " is still missing epoch after config upgrade";

                    connPtr->done();
                    return false;
                }

                // Find chunks with empty epochs
                emptyDoc = conn->findOne(ChunkType::ConfigNS + workingSuffix,
                                         BSON(ChunkType::DEPRECATED_epoch(OID())));

                if (!emptyDoc.isEmpty()) {

                    *errMsg = stream() << "chunk " << emptyDoc
                                       << " still has empty epoch after config upgrade";

                    connPtr->done();
                    return false;
                }
            }
            catch (const DBException& e) {

                *errMsg = stream() << "could not verify epoch writes" << causedBy(e);

                return false;
            }

            connPtr->done();
        }

        //
        // Double check that our collections haven't changed
        //

        Status idCheckStatus = checkIdsTheSame(configLoc,
                                               CollectionType::ConfigNS,
                                               CollectionType::ConfigNS + workingSuffix);

        if (!idCheckStatus.isOK()) {

            *errMsg = stream() << CollectionType::ConfigNS
                               << " was modified while working on upgrade"
                               << causedBy(idCheckStatus);

            return false;
        }

        idCheckStatus = checkIdsTheSame(configLoc,
                                        ChunkType::ConfigNS,
                                        ChunkType::ConfigNS + workingSuffix);

        if (!idCheckStatus.isOK()) {

            *errMsg = stream() << ChunkType::ConfigNS << " was modified while working on upgrade"
                               << causedBy(idCheckStatus);

            return false;
        }

        //
        // ENTER CRITICAL SECTION
        //

        newVersionInfo.setUpgradeState(BSON(inCriticalSectionField(true)));

        {
            scoped_ptr<ScopedDbConnection> connPtr;

            try {
                connPtr.reset(ScopedDbConnection::getInternalScopedDbConnection(configLoc, 30));
                ScopedDbConnection& conn = *connPtr;

                verify(newVersionInfo.isValid(NULL));

                conn->update(VersionType::ConfigNS,
                             BSON("_id" << 1 << VersionType::version_DEPRECATED(3)),
                             newVersionInfo.toBSON());
                _checkGLE(conn);
            }
            catch (const DBException& e) {

                // No cleanup message here since we're not sure if we wrote or not, and
                // not dangerous either way except to prevent further updates (at which point
                // the message is printed)
                *errMsg = stream()
                        << "could not update version info to enter critical update section"
                        << causedBy(e);

                return false;
            }

            // AT THIS POINT ANY FAILURE REQUIRES MANUAL INTERVENTION!
            connPtr->done();
        }

        log() << "entered critical section for config upgrade" << endl;

        Status overwriteStatus = overwriteCollection(configLoc,
                                                     CollectionType::ConfigNS + workingSuffix,
                                                     CollectionType::ConfigNS);

        if (!overwriteStatus.isOK()) {

            error() << cleanupMessage << endl;
            *errMsg = stream() << "could not overwrite collection " << CollectionType::ConfigNS
                               << " with working collection "
                               << (CollectionType::ConfigNS + workingSuffix)
                               << causedBy(overwriteStatus);

            return false;
        }

        overwriteStatus = overwriteCollection(configLoc,
                                              ChunkType::ConfigNS + workingSuffix,
                                              ChunkType::ConfigNS);

        if (!overwriteStatus.isOK()) {

            error() << cleanupMessage << endl;
            *errMsg = stream() << "could not overwrite collection " << ChunkType::ConfigNS
                               << " with working collection "
                               << (ChunkType::ConfigNS + workingSuffix)
                               << causedBy(overwriteStatus);

            return false;
        }

        //
        // Finally update the version to latest and add clusterId to version
        //

        OID newClusterId = OID::gen();

        // Note: hardcoded versions, since this is a very particular upgrade
        // Note: DO NOT CLEAR the config version unless bumping the minCompatibleVersion,
        // we want to save the excludes that were set.

        newVersionInfo.setMinCompatibleVersion(UpgradeHistory_NoEpochVersion);
        newVersionInfo.setCurrentVersion(UpgradeHistory_MandatoryEpochVersion);
        newVersionInfo.setClusterId(newClusterId);

        // Leave critical section
        newVersionInfo.unsetUpgradeId();
        newVersionInfo.unsetUpgradeState();

        log() << "writing new version info and clusterId " << newClusterId << "..." << endl;

        {
            scoped_ptr<ScopedDbConnection> connPtr;

            try {
                connPtr.reset(ScopedDbConnection::getInternalScopedDbConnection(configLoc, 30));
                ScopedDbConnection& conn = *connPtr;

                verify(newVersionInfo.isValid(NULL));

                conn->update(VersionType::ConfigNS,
                             BSON("_id" << 1 << VersionType::version_DEPRECATED(UpgradeHistory_NoEpochVersion)),
                             newVersionInfo.toBSON());
                _checkGLE(conn);
            }
            catch (const DBException& e) {

                error() << cleanupMessage << endl;
                *errMsg = stream() << "could not write new version info "
                                   << "and exit critical upgrade section" << causedBy(e);

                return false;
            }

            connPtr->done();
        }

        //
        // END CRITICAL SECTION
        //

        return true;
    }
Ejemplo n.º 4
0
StatusWith<RecordId> Collection::updateDocument(OperationContext* txn,
                                                const RecordId& oldLocation,
                                                const Snapshotted<BSONObj>& oldDoc,
                                                const BSONObj& newDoc,
                                                bool enforceQuota,
                                                bool indexesAffected,
                                                OpDebug* debug,
                                                oplogUpdateEntryArgs& args) {
    {
        auto status = checkValidation(txn, newDoc);
        if (!status.isOK()) {
            if (_validationLevel == STRICT_V) {
                return status;
            }
            // moderate means we have to check the old doc
            auto oldDocStatus = checkValidation(txn, oldDoc.value());
            if (oldDocStatus.isOK()) {
                // transitioning from good -> bad is not ok
                return status;
            }
            // bad -> bad is ok in moderate mode
        }
    }

    dassert(txn->lockState()->isCollectionLockedForMode(ns().toString(), MODE_IX));
    invariant(oldDoc.snapshotId() == txn->recoveryUnit()->getSnapshotId());

    SnapshotId sid = txn->recoveryUnit()->getSnapshotId();

    BSONElement oldId = oldDoc.value()["_id"];
    if (!oldId.eoo() && (oldId != newDoc["_id"]))
        return StatusWith<RecordId>(
            ErrorCodes::InternalError, "in Collection::updateDocument _id mismatch", 13596);

    // At the end of this step, we will have a map of UpdateTickets, one per index, which
    // represent the index updates needed to be done, based on the changes between oldDoc and
    // newDoc.
    OwnedPointerMap<IndexDescriptor*, UpdateTicket> updateTickets;
    if (indexesAffected) {
        IndexCatalog::IndexIterator ii = _indexCatalog.getIndexIterator(txn, true);
        while (ii.more()) {
            IndexDescriptor* descriptor = ii.next();
            IndexCatalogEntry* entry = ii.catalogEntry(descriptor);
            IndexAccessMethod* iam = ii.accessMethod(descriptor);

            InsertDeleteOptions options;
            options.logIfError = false;
            options.dupsAllowed =
                !(KeyPattern::isIdKeyPattern(descriptor->keyPattern()) || descriptor->unique()) ||
                repl::getGlobalReplicationCoordinator()->shouldIgnoreUniqueIndex(descriptor);
            UpdateTicket* updateTicket = new UpdateTicket();
            updateTickets.mutableMap()[descriptor] = updateTicket;
            Status ret = iam->validateUpdate(txn,
                                             oldDoc.value(),
                                             newDoc,
                                             oldLocation,
                                             options,
                                             updateTicket,
                                             entry->getFilterExpression());
            if (!ret.isOK()) {
                return StatusWith<RecordId>(ret);
            }
        }
    }

    // This can call back into Collection::recordStoreGoingToMove.  If that happens, the old
    // object is removed from all indexes.
    StatusWith<RecordId> newLocation = _recordStore->updateRecord(
        txn, oldLocation, newDoc.objdata(), newDoc.objsize(), _enforceQuota(enforceQuota), this);

    if (!newLocation.isOK()) {
        return newLocation;
    }

    // At this point, the old object may or may not still be indexed, depending on if it was
    // moved. If the object did move, we need to add the new location to all indexes.
    if (newLocation.getValue() != oldLocation) {
        if (debug) {
            if (debug->nmoved == -1)  // default of -1 rather than 0
                debug->nmoved = 1;
            else
                debug->nmoved += 1;
        }

        Status s = _indexCatalog.indexRecord(txn, newDoc, newLocation.getValue());
        if (!s.isOK())
            return StatusWith<RecordId>(s);
        invariant(sid == txn->recoveryUnit()->getSnapshotId());
        args.ns = ns().ns();
        getGlobalServiceContext()->getOpObserver()->onUpdate(txn, args);

        return newLocation;
    }

    // Object did not move.  We update each index with each respective UpdateTicket.

    if (debug)
        debug->keyUpdates = 0;

    if (indexesAffected) {
        IndexCatalog::IndexIterator ii = _indexCatalog.getIndexIterator(txn, true);
        while (ii.more()) {
            IndexDescriptor* descriptor = ii.next();
            IndexAccessMethod* iam = ii.accessMethod(descriptor);

            int64_t updatedKeys;
            Status ret = iam->update(txn, *updateTickets.mutableMap()[descriptor], &updatedKeys);
            if (!ret.isOK())
                return StatusWith<RecordId>(ret);
            if (debug)
                debug->keyUpdates += updatedKeys;
        }
    }

    invariant(sid == txn->recoveryUnit()->getSnapshotId());
    args.ns = ns().ns();
    getGlobalServiceContext()->getOpObserver()->onUpdate(txn, args);

    return newLocation;
}
Ejemplo n.º 5
0
    StatusWith<DiskLoc> Collection::updateDocument( OperationContext* txn,
                                                    const DiskLoc& oldLocation,
                                                    const BSONObj& objNew,
                                                    bool enforceQuota,
                                                    OpDebug* debug ) {

        BSONObj objOld = _recordStore->dataFor( txn, oldLocation ).toBson();

        if ( objOld.hasElement( "_id" ) ) {
            BSONElement oldId = objOld["_id"];
            BSONElement newId = objNew["_id"];
            if ( oldId != newId )
                return StatusWith<DiskLoc>( ErrorCodes::InternalError,
                                            "in Collection::updateDocument _id mismatch",
                                            13596 );
        }

        if ( ns().coll() == "system.users" ) {
            // XXX - andy and spencer think this should go away now
            V2UserDocumentParser parser;
            Status s = parser.checkValidUserDocument(objNew);
            if ( !s.isOK() )
                return StatusWith<DiskLoc>( s );
        }

        /* duplicate key check. we descend the btree twice - once for this check, and once for the actual inserts, further
           below.  that is suboptimal, but it's pretty complicated to do it the other way without rollbacks...
        */
        OwnedPointerMap<IndexDescriptor*,UpdateTicket> updateTickets;
        IndexCatalog::IndexIterator ii = _indexCatalog.getIndexIterator( txn, true );
        while ( ii.more() ) {
            IndexDescriptor* descriptor = ii.next();
            IndexAccessMethod* iam = _indexCatalog.getIndex( descriptor );

            InsertDeleteOptions options;
            options.logIfError = false;
            options.dupsAllowed =
                !(KeyPattern::isIdKeyPattern(descriptor->keyPattern()) || descriptor->unique())
                || repl::getGlobalReplicationCoordinator()->shouldIgnoreUniqueIndex(descriptor);
            UpdateTicket* updateTicket = new UpdateTicket();
            updateTickets.mutableMap()[descriptor] = updateTicket;
            Status ret = iam->validateUpdate(txn, objOld, objNew, oldLocation, options, updateTicket );
            if ( !ret.isOK() ) {
                return StatusWith<DiskLoc>( ret );
            }
        }

        // this can callback into Collection::recordStoreGoingToMove
        StatusWith<DiskLoc> newLocation = _recordStore->updateRecord( txn,
                                                                      oldLocation,
                                                                      objNew.objdata(),
                                                                      objNew.objsize(),
                                                                      _enforceQuota( enforceQuota ),
                                                                      this );

        if ( !newLocation.isOK() ) {
            return newLocation;
        }

        _infoCache.notifyOfWriteOp();

        if ( newLocation.getValue() != oldLocation ) {

            if ( debug ) {
                if (debug->nmoved == -1) // default of -1 rather than 0
                    debug->nmoved = 1;
                else
                    debug->nmoved += 1;
            }

            _indexCatalog.indexRecord(txn, objNew, newLocation.getValue());

            return newLocation;
        }

        if ( debug )
            debug->keyUpdates = 0;

        ii = _indexCatalog.getIndexIterator( txn, true );
        while ( ii.more() ) {
            IndexDescriptor* descriptor = ii.next();
            IndexAccessMethod* iam = _indexCatalog.getIndex( descriptor );

            int64_t updatedKeys;
            Status ret = iam->update(txn, *updateTickets.mutableMap()[descriptor], &updatedKeys);
            if ( !ret.isOK() )
                return StatusWith<DiskLoc>( ret );
            if ( debug )
                debug->keyUpdates += updatedKeys;
        }

        // Broadcast the mutation so that query results stay correct.
        _cursorCache.invalidateDocument(oldLocation, INVALIDATION_MUTATION);

        return newLocation;
    }
Ejemplo n.º 6
0
    StatusWith<DiskLoc> Collection::updateDocument( const DiskLoc& oldLocation,
                                                    const BSONObj& objNew,
                                                    bool enforceQuota,
                                                    OpDebug* debug ) {

        Record* oldRecord = _recordStore->recordFor( oldLocation );
        BSONObj objOld( oldRecord->accessed()->data() );

        if ( objOld.hasElement( "_id" ) ) {
            BSONElement oldId = objOld["_id"];
            BSONElement newId = objNew["_id"];
            if ( oldId != newId )
                return StatusWith<DiskLoc>( ErrorCodes::InternalError,
                                            "in Collection::updateDocument _id mismatch",
                                            13596 );
        }

        if ( ns().coll() == "system.users" ) {
            // XXX - andy and spencer think this should go away now
            V2UserDocumentParser parser;
            Status s = parser.checkValidUserDocument(objNew);
            if ( !s.isOK() )
                return StatusWith<DiskLoc>( s );
        }

        /* duplicate key check. we descend the btree twice - once for this check, and once for the actual inserts, further
           below.  that is suboptimal, but it's pretty complicated to do it the other way without rollbacks...
        */
        OwnedPointerMap<IndexDescriptor*,UpdateTicket> updateTickets;
        IndexCatalog::IndexIterator ii = _indexCatalog.getIndexIterator( true );
        while ( ii.more() ) {
            IndexDescriptor* descriptor = ii.next();
            IndexAccessMethod* iam = _indexCatalog.getIndex( descriptor );

            InsertDeleteOptions options;
            options.logIfError = false;
            options.dupsAllowed =
                !(KeyPattern::isIdKeyPattern(descriptor->keyPattern()) || descriptor->unique())
                || ignoreUniqueIndex(descriptor);
            UpdateTicket* updateTicket = new UpdateTicket();
            updateTickets.mutableMap()[descriptor] = updateTicket;
            Status ret = iam->validateUpdate(objOld, objNew, oldLocation, options, updateTicket );
            if ( !ret.isOK() ) {
                return StatusWith<DiskLoc>( ret );
            }
        }

        if ( oldRecord->netLength() < objNew.objsize() ) {
            // doesn't fit, have to move to new location

            if ( _details->isCapped() )
                return StatusWith<DiskLoc>( ErrorCodes::InternalError,
                                            "failing update: objects in a capped ns cannot grow",
                                            10003 );

            moveCounter.increment();
            _details->paddingTooSmall();

            // unindex old record, don't delete
            // this way, if inserting new doc fails, we can re-index this one
            _cursorCache.invalidateDocument(oldLocation, INVALIDATION_DELETION);
            _indexCatalog.unindexRecord( objOld, oldLocation, true );

            if ( debug ) {
                if (debug->nmoved == -1) // default of -1 rather than 0
                    debug->nmoved = 1;
                else
                    debug->nmoved += 1;
            }

            StatusWith<DiskLoc> loc = _insertDocument( objNew, enforceQuota );

            if ( loc.isOK() ) {
                // insert successful, now lets deallocate the old location
                // remember its already unindexed
                _recordStore->deleteRecord( oldLocation );
            }
            else {
                // new doc insert failed, so lets re-index the old document and location
                _indexCatalog.indexRecord( objOld, oldLocation );
            }

            return loc;
        }

        _infoCache.notifyOfWriteOp();
        _details->paddingFits();

        if ( debug )
            debug->keyUpdates = 0;

        ii = _indexCatalog.getIndexIterator( true );
        while ( ii.more() ) {
            IndexDescriptor* descriptor = ii.next();
            IndexAccessMethod* iam = _indexCatalog.getIndex( descriptor );

            int64_t updatedKeys;
            Status ret = iam->update(*updateTickets.mutableMap()[descriptor], &updatedKeys);
            if ( !ret.isOK() )
                return StatusWith<DiskLoc>( ret );
            if ( debug )
                debug->keyUpdates += updatedKeys;
        }

        // Broadcast the mutation so that query results stay correct.
        _cursorCache.invalidateDocument(oldLocation, INVALIDATION_MUTATION);

        //  update in place
        int sz = objNew.objsize();
        memcpy(getDur().writingPtr(oldRecord->data(), sz), objNew.objdata(), sz);

        return StatusWith<DiskLoc>( oldLocation );
    }
Ejemplo n.º 7
0
    StatusWith<RecordId> Collection::updateDocument( OperationContext* txn,
                                                    const RecordId& oldLocation,
                                                    const BSONObj& objNew,
                                                    bool enforceQuota,
                                                    OpDebug* debug ) {

        BSONObj objOld = _recordStore->dataFor( txn, oldLocation ).releaseToBson();

        if ( objOld.hasElement( "_id" ) ) {
            BSONElement oldId = objOld["_id"];
            BSONElement newId = objNew["_id"];
            if ( oldId != newId )
                return StatusWith<RecordId>( ErrorCodes::InternalError,
                                            "in Collection::updateDocument _id mismatch",
                                            13596 );
        }

        /* duplicate key check. we descend the btree twice - once for this check, and once for the actual inserts, further
           below.  that is suboptimal, but it's pretty complicated to do it the other way without rollbacks...
        */

        // At the end of this step, we will have a map of UpdateTickets, one per index, which
        // represent the index updates needed to be done, based on the changes between objOld and
        // objNew.
        OwnedPointerMap<IndexDescriptor*,UpdateTicket> updateTickets;
        IndexCatalog::IndexIterator ii = _indexCatalog.getIndexIterator( txn, true );
        while ( ii.more() ) {
            IndexDescriptor* descriptor = ii.next();
            IndexAccessMethod* iam = _indexCatalog.getIndex( descriptor );

            InsertDeleteOptions options;
            options.logIfError = false;
            options.dupsAllowed =
                !(KeyPattern::isIdKeyPattern(descriptor->keyPattern()) || descriptor->unique())
                || repl::getGlobalReplicationCoordinator()->shouldIgnoreUniqueIndex(descriptor);
            UpdateTicket* updateTicket = new UpdateTicket();
            updateTickets.mutableMap()[descriptor] = updateTicket;
            Status ret = iam->validateUpdate(txn, objOld, objNew, oldLocation, options, updateTicket );
            if ( !ret.isOK() ) {
                return StatusWith<RecordId>( ret );
            }
        }

        // This can call back into Collection::recordStoreGoingToMove.  If that happens, the old
        // object is removed from all indexes.
        StatusWith<RecordId> newLocation = _recordStore->updateRecord( txn,
                                                                      oldLocation,
                                                                      objNew.objdata(),
                                                                      objNew.objsize(),
                                                                      _enforceQuota( enforceQuota ),
                                                                      this );

        if ( !newLocation.isOK() ) {
            return newLocation;
        }

        // At this point, the old object may or may not still be indexed, depending on if it was
        // moved.

        _infoCache.notifyOfWriteOp();

        // If the object did move, we need to add the new location to all indexes.
        if ( newLocation.getValue() != oldLocation ) {

            if ( debug ) {
                if (debug->nmoved == -1) // default of -1 rather than 0
                    debug->nmoved = 1;
                else
                    debug->nmoved += 1;
            }

            Status s = _indexCatalog.indexRecord(txn, objNew, newLocation.getValue());
            if (!s.isOK())
                return StatusWith<RecordId>(s);

            return newLocation;
        }

        // Object did not move.  We update each index with each respective UpdateTicket.

        if ( debug )
            debug->keyUpdates = 0;

        ii = _indexCatalog.getIndexIterator( txn, true );
        while ( ii.more() ) {
            IndexDescriptor* descriptor = ii.next();
            IndexAccessMethod* iam = _indexCatalog.getIndex( descriptor );

            int64_t updatedKeys;
            Status ret = iam->update(txn, *updateTickets.mutableMap()[descriptor], &updatedKeys);
            if ( !ret.isOK() )
                return StatusWith<RecordId>( ret );
            if ( debug )
                debug->keyUpdates += updatedKeys;
        }

        // Broadcast the mutation so that query results stay correct.
        _cursorCache.invalidateDocument(txn, oldLocation, INVALIDATION_MUTATION);

        return newLocation;
    }