void Balancer::_doBalanceRound( DBClientBase& conn, vector<CandidateChunkPtr>* candidateChunks ) { verify( candidateChunks ); // // 1. Check whether there is any sharded collection to be balanced by querying // the ShardsNS::collections collection // auto_ptr<DBClientCursor> cursor = conn.query(CollectionType::ConfigNS, BSONObj()); if ( NULL == cursor.get() ) { warning() << "could not query " << CollectionType::ConfigNS << " while trying to balance" << endl; return; } vector< string > collections; while ( cursor->more() ) { BSONObj col = cursor->nextSafe(); // sharded collections will have a shard "key". if ( ! col[CollectionType::keyPattern()].eoo() && ! col[CollectionType::noBalance()].trueValue() ){ collections.push_back( col[CollectionType::ns()].String() ); } else if( col[CollectionType::noBalance()].trueValue() ){ LOG(1) << "not balancing collection " << col[CollectionType::ns()].String() << ", explicitly disabled" << endl; } } cursor.reset(); if ( collections.empty() ) { LOG(1) << "no collections to balance" << endl; return; } // // 2. Get a list of all the shards that are participating in this balance round // along with any maximum allowed quotas and current utilization. We get the // latter by issuing db.serverStatus() (mem.mapped) to all shards. // // TODO: skip unresponsive shards and mark information as stale. // ShardInfoMap shardInfo; Status loadStatus = DistributionStatus::populateShardInfoMap(&shardInfo); if (!loadStatus.isOK()) { warning() << "failed to load shard metadata" << causedBy(loadStatus) << endl; return; } if (shardInfo.size() < 2) { LOG(1) << "can't balance without more active shards" << endl; return; } OCCASIONALLY warnOnMultiVersion( shardInfo ); // // 3. For each collection, check if the balancing policy recommends moving anything around. // for (vector<string>::const_iterator it = collections.begin(); it != collections.end(); ++it ) { const string& ns = *it; OwnedPointerMap<string, OwnedPointerVector<ChunkType> > shardToChunksMap; cursor = conn.query(ChunkType::ConfigNS, QUERY(ChunkType::ns(ns)).sort(ChunkType::min())); set<BSONObj> allChunkMinimums; while ( cursor->more() ) { BSONObj chunkDoc = cursor->nextSafe().getOwned(); auto_ptr<ChunkType> chunk(new ChunkType()); string errmsg; if (!chunk->parseBSON(chunkDoc, &errmsg)) { error() << "bad chunk format for " << chunkDoc << ": " << errmsg << endl; return; } allChunkMinimums.insert(chunk->getMin().getOwned()); OwnedPointerVector<ChunkType>*& chunkList = shardToChunksMap.mutableMap()[chunk->getShard()]; if (chunkList == NULL) { chunkList = new OwnedPointerVector<ChunkType>(); } chunkList->mutableVector().push_back(chunk.release()); } cursor.reset(); if (shardToChunksMap.map().empty()) { LOG(1) << "skipping empty collection (" << ns << ")"; continue; } for (ShardInfoMap::const_iterator i = shardInfo.begin(); i != shardInfo.end(); ++i) { // this just makes sure there is an entry in shardToChunksMap for every shard OwnedPointerVector<ChunkType>*& chunkList = shardToChunksMap.mutableMap()[i->first]; if (chunkList == NULL) { chunkList = new OwnedPointerVector<ChunkType>(); } } DistributionStatus status(shardInfo, shardToChunksMap.map()); // load tags Status result = clusterCreateIndex(TagsType::ConfigNS, BSON(TagsType::ns() << 1 << TagsType::min() << 1), true, // unique WriteConcernOptions::AllConfigs, NULL); if ( !result.isOK() ) { warning() << "could not create index tags_1_min_1: " << result.reason() << endl; continue; } cursor = conn.query(TagsType::ConfigNS, QUERY(TagsType::ns(ns)).sort(TagsType::min())); vector<TagRange> ranges; while ( cursor->more() ) { BSONObj tag = cursor->nextSafe(); TagRange tr(tag[TagsType::min()].Obj().getOwned(), tag[TagsType::max()].Obj().getOwned(), tag[TagsType::tag()].String()); ranges.push_back(tr); uassert(16356, str::stream() << "tag ranges not valid for: " << ns, status.addTagRange(tr) ); } cursor.reset(); DBConfigPtr cfg = grid.getDBConfig( ns ); if ( !cfg ) { warning() << "could not load db config to balance " << ns << " collection" << endl; continue; } // This line reloads the chunk manager once if this process doesn't know the collection // is sharded yet. ChunkManagerPtr cm = cfg->getChunkManagerIfExists( ns, true ); if ( !cm ) { warning() << "could not load chunks to balance " << ns << " collection" << endl; continue; } // loop through tags to make sure no chunk spans tags; splits on tag min. for all chunks bool didAnySplits = false; for ( unsigned i = 0; i < ranges.size(); i++ ) { BSONObj min = ranges[i].min; min = cm->getShardKey().extendRangeBound( min, false ); if ( allChunkMinimums.count( min ) > 0 ) continue; didAnySplits = true; log() << "ns: " << ns << " need to split on " << min << " because there is a range there" << endl; ChunkPtr c = cm->findIntersectingChunk( min ); vector<BSONObj> splitPoints; splitPoints.push_back( min ); BSONObj res; if ( !c->multiSplit( splitPoints, res ) ) { error() << "split failed: " << res << endl; } else { LOG(1) << "split worked: " << res << endl; } break; } if ( didAnySplits ) { // state change, just wait till next round continue; } CandidateChunk* p = _policy->balance( ns, status, _balancedLastTime ); if ( p ) candidateChunks->push_back( CandidateChunkPtr( p ) ); } }
StatusWith<RecordId> Collection::updateDocument(OperationContext* txn, const RecordId& oldLocation, const Snapshotted<BSONObj>& oldDoc, const BSONObj& newDoc, bool enforceQuota, bool indexesAffected, OpDebug* debug, oplogUpdateEntryArgs& args) { { auto status = checkValidation(txn, newDoc); if (!status.isOK()) { if (_validationLevel == STRICT_V) { return status; } // moderate means we have to check the old doc auto oldDocStatus = checkValidation(txn, oldDoc.value()); if (oldDocStatus.isOK()) { // transitioning from good -> bad is not ok return status; } // bad -> bad is ok in moderate mode } } dassert(txn->lockState()->isCollectionLockedForMode(ns().toString(), MODE_IX)); invariant(oldDoc.snapshotId() == txn->recoveryUnit()->getSnapshotId()); if (_needCappedLock) { // X-lock the metadata resource for this capped collection until the end of the WUOW. This // prevents the primary from executing with more concurrency than secondaries. // See SERVER-21646. Lock::ResourceLock{txn->lockState(), ResourceId(RESOURCE_METADATA, _ns.ns()), MODE_X}; } SnapshotId sid = txn->recoveryUnit()->getSnapshotId(); BSONElement oldId = oldDoc.value()["_id"]; if (!oldId.eoo() && (oldId != newDoc["_id"])) return StatusWith<RecordId>( ErrorCodes::InternalError, "in Collection::updateDocument _id mismatch", 13596); // The MMAPv1 storage engine implements capped collections in a way that does not allow records // to grow beyond their original size. If MMAPv1 part of a replicaset with storage engines that // do not have this limitation, replication could result in errors, so it is necessary to set a // uniform rule here. Similarly, it is not sufficient to disallow growing records, because this // happens when secondaries roll back an update shrunk a record. Exactly replicating legacy // MMAPv1 behavior would require padding shrunk documents on all storage engines. Instead forbid // all size changes. const auto oldSize = oldDoc.value().objsize(); if (_recordStore->isCapped() && oldSize != newDoc.objsize()) return {ErrorCodes::CannotGrowDocumentInCappedNamespace, str::stream() << "Cannot change the size of a document in a capped collection: " << oldSize << " != " << newDoc.objsize()}; // At the end of this step, we will have a map of UpdateTickets, one per index, which // represent the index updates needed to be done, based on the changes between oldDoc and // newDoc. OwnedPointerMap<IndexDescriptor*, UpdateTicket> updateTickets; if (indexesAffected) { IndexCatalog::IndexIterator ii = _indexCatalog.getIndexIterator(txn, true); while (ii.more()) { IndexDescriptor* descriptor = ii.next(); IndexCatalogEntry* entry = ii.catalogEntry(descriptor); IndexAccessMethod* iam = ii.accessMethod(descriptor); InsertDeleteOptions options; options.logIfError = false; options.dupsAllowed = !(KeyPattern::isIdKeyPattern(descriptor->keyPattern()) || descriptor->unique()) || repl::getGlobalReplicationCoordinator()->shouldIgnoreUniqueIndex(descriptor); UpdateTicket* updateTicket = new UpdateTicket(); updateTickets.mutableMap()[descriptor] = updateTicket; Status ret = iam->validateUpdate(txn, oldDoc.value(), newDoc, oldLocation, options, updateTicket, entry->getFilterExpression()); if (!ret.isOK()) { return StatusWith<RecordId>(ret); } } } // This can call back into Collection::recordStoreGoingToMove. If that happens, the old // object is removed from all indexes. StatusWith<RecordId> newLocation = _recordStore->updateRecord( txn, oldLocation, newDoc.objdata(), newDoc.objsize(), _enforceQuota(enforceQuota), this); if (!newLocation.isOK()) { return newLocation; } // At this point, the old object may or may not still be indexed, depending on if it was // moved. If the object did move, we need to add the new location to all indexes. if (newLocation.getValue() != oldLocation) { if (debug) { if (debug->nmoved == -1) // default of -1 rather than 0 debug->nmoved = 1; else debug->nmoved += 1; } std::vector<BsonRecord> bsonRecords; BsonRecord bsonRecord = {newLocation.getValue(), &newDoc}; bsonRecords.push_back(bsonRecord); Status s = _indexCatalog.indexRecords(txn, bsonRecords); if (!s.isOK()) return StatusWith<RecordId>(s); invariant(sid == txn->recoveryUnit()->getSnapshotId()); args.ns = ns().ns(); getGlobalServiceContext()->getOpObserver()->onUpdate(txn, args); return newLocation; } // Object did not move. We update each index with each respective UpdateTicket. if (debug) debug->keyUpdates = 0; if (indexesAffected) { IndexCatalog::IndexIterator ii = _indexCatalog.getIndexIterator(txn, true); while (ii.more()) { IndexDescriptor* descriptor = ii.next(); IndexAccessMethod* iam = ii.accessMethod(descriptor); int64_t updatedKeys; Status ret = iam->update(txn, *updateTickets.mutableMap()[descriptor], &updatedKeys); if (!ret.isOK()) return StatusWith<RecordId>(ret); if (debug) debug->keyUpdates += updatedKeys; } } invariant(sid == txn->recoveryUnit()->getSnapshotId()); args.ns = ns().ns(); getGlobalServiceContext()->getOpObserver()->onUpdate(txn, args); return newLocation; }
/** * Upgrade v3 to v4 described here. * * This upgrade takes a config server without collection epochs (potentially) and adds * epochs to all mongo processes. * */ bool doUpgradeV3ToV4(const ConnectionString& configLoc, const VersionType& lastVersionInfo, string* errMsg) { string dummy; if (!errMsg) errMsg = &dummy; verify(lastVersionInfo.getCurrentVersion() == UpgradeHistory_NoEpochVersion); if (lastVersionInfo.isUpgradeIdSet() && lastVersionInfo.getUpgradeId().isSet()) { // // Another upgrade failed, so cleanup may be necessary // BSONObj lastUpgradeState = lastVersionInfo.getUpgradeState(); bool inCriticalSection; if (!FieldParser::extract(lastUpgradeState, inCriticalSectionField, &inCriticalSection, errMsg)) { *errMsg = stream() << "problem reading previous upgrade state" << causedBy(errMsg); return false; } if (inCriticalSection) { // Manual intervention is needed here. Somehow our upgrade didn't get applied // consistently across config servers. *errMsg = cannotCleanupMessage; return false; } if (!_cleanupUpgradeState(configLoc, lastVersionInfo.getUpgradeId(), errMsg)) { // If we can't cleanup the old upgrade state, the user might have done it for us, // not a fatal problem (we'll just end up with extra collections). warning() << "could not cleanup previous upgrade state" << causedBy(errMsg) << endl; *errMsg = ""; } } // // Check the versions of other mongo processes in the cluster before upgrade. // We can't upgrade if there are active pre-v2.2 processes in the cluster // Status mongoVersionStatus = checkClusterMongoVersions(configLoc, string(minMongoProcessVersion)); if (!mongoVersionStatus.isOK()) { *errMsg = stream() << "cannot upgrade with pre-v" << minMongoProcessVersion << " mongo processes active in the cluster" << causedBy(mongoVersionStatus); return false; } VersionType newVersionInfo; lastVersionInfo.cloneTo(&newVersionInfo); // Set our upgrade id and state OID upgradeId = OID::gen(); newVersionInfo.setUpgradeId(upgradeId); newVersionInfo.setUpgradeState(BSONObj()); // Write our upgrade id and state { scoped_ptr<ScopedDbConnection> connPtr; try { connPtr.reset(ScopedDbConnection::getInternalScopedDbConnection(configLoc, 30)); ScopedDbConnection& conn = *connPtr; verify(newVersionInfo.isValid(NULL)); conn->update(VersionType::ConfigNS, BSON("_id" << 1 << VersionType::version_DEPRECATED(3)), newVersionInfo.toBSON()); _checkGLE(conn); } catch (const DBException& e) { *errMsg = stream() << "could not initialize version info for upgrade" << causedBy(e); return false; } connPtr->done(); } // // First lock all collection namespaces that exist // OwnedPointerMap<string, CollectionType> ownedCollections; const map<string, CollectionType*>& collections = ownedCollections.map(); Status findCollectionsStatus = findAllCollectionsV3(configLoc, &ownedCollections); if (!findCollectionsStatus.isOK()) { *errMsg = stream() << "could not read collections from config server" << causedBy(findCollectionsStatus); return false; } // // Acquire locks for all sharded collections // Something that didn't involve getting thousands of locks would be better. // OwnedPointerVector<ScopedDistributedLock> collectionLocks; log() << "acquiring locks for " << collections.size() << " sharded collections..." << endl; // WARNING - this string is used programmatically when forcing locks, be careful when // changing! // TODO: Add programmatic "why" field to lock collection string lockMessage = str::stream() << "ensuring epochs for config upgrade" << " (" << upgradeId.toString() << ")"; if (!_acquireAllCollectionLocks(configLoc, collections, lockMessage, 20 * 60 * 1000, &collectionLocks, errMsg)) { *errMsg = stream() << "could not acquire all namespace locks for upgrade" << " (" << upgradeId.toString() << ")" << causedBy(errMsg); return false; } // We are now preventing all splits and migrates for all sharded collections // Get working and backup suffixes string workingSuffix = genWorkingSuffix(upgradeId); string backupSuffix = genBackupSuffix(upgradeId); log() << "copying collection and chunk metadata to working and backup collections..." << endl; // Get a backup and working copy of the config.collections and config.chunks collections Status copyStatus = copyFrozenCollection(configLoc, CollectionType::ConfigNS, CollectionType::ConfigNS + workingSuffix); if (!copyStatus.isOK()) { *errMsg = stream() << "could not copy " << CollectionType::ConfigNS << " to " << (CollectionType::ConfigNS + workingSuffix) << causedBy(copyStatus); return false; } copyStatus = copyFrozenCollection(configLoc, CollectionType::ConfigNS, CollectionType::ConfigNS + backupSuffix); if (!copyStatus.isOK()) { *errMsg = stream() << "could not copy " << CollectionType::ConfigNS << " to " << (CollectionType::ConfigNS + backupSuffix) << causedBy(copyStatus); return false; } copyStatus = copyFrozenCollection(configLoc, ChunkType::ConfigNS, ChunkType::ConfigNS + workingSuffix); if (!copyStatus.isOK()) { *errMsg = stream() << "could not copy " << ChunkType::ConfigNS << " to " << (ChunkType::ConfigNS + workingSuffix) << causedBy(copyStatus); return false; } copyStatus = copyFrozenCollection(configLoc, ChunkType::ConfigNS, ChunkType::ConfigNS + backupSuffix); if (!copyStatus.isOK()) { *errMsg = stream() << "could not copy " << ChunkType::ConfigNS << " to " << (ChunkType::ConfigNS + backupSuffix) << causedBy(copyStatus); return false; } // // Go through sharded collections one-by-one and add epochs where missing // for (map<string, CollectionType*>::const_iterator it = collections.begin(); it != collections.end(); ++it) { // Create a copy so that we can change the epoch later CollectionType collection; it->second->cloneTo(&collection); log() << "checking epochs for " << collection.getNS() << " collection..." << endl; OID epoch = collection.getEpoch(); // // Go through chunks to find epoch if we haven't found it or to verify epoch is the same // OwnedPointerVector<ChunkType> ownedChunks; const vector<ChunkType*>& chunks = ownedChunks.vector(); Status findChunksStatus = findAllChunks(configLoc, collection.getNS(), &ownedChunks); if (!findChunksStatus.isOK()) { *errMsg = stream() << "could not read chunks from config server" << causedBy(findChunksStatus); return false; } for (vector<ChunkType*>::const_iterator chunkIt = chunks.begin(); chunkIt != chunks.end(); ++chunkIt) { const ChunkType& chunk = *(*chunkIt); // If our chunk epoch is set and doesn't match if (epoch.isSet() && chunk.getVersion().epoch().isSet() && chunk.getVersion().epoch() != epoch) { *errMsg = stream() << "chunk epoch for " << chunk.toString() << " in " << collection.getNS() << " does not match found epoch " << epoch; return false; } else if (!epoch.isSet() && chunk.getVersion().epoch().isSet()) { epoch = chunk.getVersion().epoch(); } } // // Write collection epoch if needed // if (!collection.getEpoch().isSet()) { OID newEpoch = OID::gen(); log() << "writing new epoch " << newEpoch << " for " << collection.getNS() << " collection..." << endl; scoped_ptr<ScopedDbConnection> connPtr; try { connPtr.reset(ScopedDbConnection::getInternalScopedDbConnection(configLoc, 30)); ScopedDbConnection& conn = *connPtr; conn->update(CollectionType::ConfigNS + workingSuffix, BSON(CollectionType::ns(collection.getNS())), BSON("$set" << BSON(CollectionType::DEPRECATED_lastmodEpoch(newEpoch)))); _checkGLE(conn); } catch (const DBException& e) { *errMsg = stream() << "could not write a new epoch for " << collection.getNS() << causedBy(e); return false; } connPtr->done(); collection.setEpoch(newEpoch); } epoch = collection.getEpoch(); verify(epoch.isSet()); // // Now write verified epoch to all chunks // log() << "writing epoch " << epoch << " for " << chunks.size() << " chunks in " << collection.getNS() << " collection..." << endl; { scoped_ptr<ScopedDbConnection> connPtr; try { connPtr.reset(ScopedDbConnection::getInternalScopedDbConnection(configLoc, 30)); ScopedDbConnection& conn = *connPtr; // Multi-update of all chunks conn->update(ChunkType::ConfigNS + workingSuffix, BSON(ChunkType::ns(collection.getNS())), BSON("$set" << BSON(ChunkType::DEPRECATED_epoch(epoch))), false, true); // multi _checkGLE(conn); } catch (const DBException& e) { *errMsg = stream() << "could not write a new epoch " << epoch.toString() << " for chunks in " << collection.getNS() << causedBy(e); return false; } connPtr->done(); } } // // Paranoid verify the collection writes // { scoped_ptr<ScopedDbConnection> connPtr; try { connPtr.reset(ScopedDbConnection::getInternalScopedDbConnection(configLoc, 30)); ScopedDbConnection& conn = *connPtr; // Find collections with no epochs BSONObj emptyDoc = conn->findOne(CollectionType::ConfigNS + workingSuffix, BSON("$unset" << BSON(CollectionType::DEPRECATED_lastmodEpoch() << 1))); if (!emptyDoc.isEmpty()) { *errMsg = stream() << "collection " << emptyDoc << " is still missing epoch after config upgrade"; connPtr->done(); return false; } // Find collections with empty epochs emptyDoc = conn->findOne(CollectionType::ConfigNS + workingSuffix, BSON(CollectionType::DEPRECATED_lastmodEpoch(OID()))); if (!emptyDoc.isEmpty()) { *errMsg = stream() << "collection " << emptyDoc << " still has empty epoch after config upgrade"; connPtr->done(); return false; } // Find chunks with no epochs emptyDoc = conn->findOne(ChunkType::ConfigNS + workingSuffix, BSON("$unset" << BSON(ChunkType::DEPRECATED_epoch() << 1))); if (!emptyDoc.isEmpty()) { *errMsg = stream() << "chunk " << emptyDoc << " is still missing epoch after config upgrade"; connPtr->done(); return false; } // Find chunks with empty epochs emptyDoc = conn->findOne(ChunkType::ConfigNS + workingSuffix, BSON(ChunkType::DEPRECATED_epoch(OID()))); if (!emptyDoc.isEmpty()) { *errMsg = stream() << "chunk " << emptyDoc << " still has empty epoch after config upgrade"; connPtr->done(); return false; } } catch (const DBException& e) { *errMsg = stream() << "could not verify epoch writes" << causedBy(e); return false; } connPtr->done(); } // // Double check that our collections haven't changed // Status idCheckStatus = checkIdsTheSame(configLoc, CollectionType::ConfigNS, CollectionType::ConfigNS + workingSuffix); if (!idCheckStatus.isOK()) { *errMsg = stream() << CollectionType::ConfigNS << " was modified while working on upgrade" << causedBy(idCheckStatus); return false; } idCheckStatus = checkIdsTheSame(configLoc, ChunkType::ConfigNS, ChunkType::ConfigNS + workingSuffix); if (!idCheckStatus.isOK()) { *errMsg = stream() << ChunkType::ConfigNS << " was modified while working on upgrade" << causedBy(idCheckStatus); return false; } // // ENTER CRITICAL SECTION // newVersionInfo.setUpgradeState(BSON(inCriticalSectionField(true))); { scoped_ptr<ScopedDbConnection> connPtr; try { connPtr.reset(ScopedDbConnection::getInternalScopedDbConnection(configLoc, 30)); ScopedDbConnection& conn = *connPtr; verify(newVersionInfo.isValid(NULL)); conn->update(VersionType::ConfigNS, BSON("_id" << 1 << VersionType::version_DEPRECATED(3)), newVersionInfo.toBSON()); _checkGLE(conn); } catch (const DBException& e) { // No cleanup message here since we're not sure if we wrote or not, and // not dangerous either way except to prevent further updates (at which point // the message is printed) *errMsg = stream() << "could not update version info to enter critical update section" << causedBy(e); return false; } // AT THIS POINT ANY FAILURE REQUIRES MANUAL INTERVENTION! connPtr->done(); } log() << "entered critical section for config upgrade" << endl; Status overwriteStatus = overwriteCollection(configLoc, CollectionType::ConfigNS + workingSuffix, CollectionType::ConfigNS); if (!overwriteStatus.isOK()) { error() << cleanupMessage << endl; *errMsg = stream() << "could not overwrite collection " << CollectionType::ConfigNS << " with working collection " << (CollectionType::ConfigNS + workingSuffix) << causedBy(overwriteStatus); return false; } overwriteStatus = overwriteCollection(configLoc, ChunkType::ConfigNS + workingSuffix, ChunkType::ConfigNS); if (!overwriteStatus.isOK()) { error() << cleanupMessage << endl; *errMsg = stream() << "could not overwrite collection " << ChunkType::ConfigNS << " with working collection " << (ChunkType::ConfigNS + workingSuffix) << causedBy(overwriteStatus); return false; } // // Finally update the version to latest and add clusterId to version // OID newClusterId = OID::gen(); // Note: hardcoded versions, since this is a very particular upgrade // Note: DO NOT CLEAR the config version unless bumping the minCompatibleVersion, // we want to save the excludes that were set. newVersionInfo.setMinCompatibleVersion(UpgradeHistory_NoEpochVersion); newVersionInfo.setCurrentVersion(UpgradeHistory_MandatoryEpochVersion); newVersionInfo.setClusterId(newClusterId); // Leave critical section newVersionInfo.unsetUpgradeId(); newVersionInfo.unsetUpgradeState(); log() << "writing new version info and clusterId " << newClusterId << "..." << endl; { scoped_ptr<ScopedDbConnection> connPtr; try { connPtr.reset(ScopedDbConnection::getInternalScopedDbConnection(configLoc, 30)); ScopedDbConnection& conn = *connPtr; verify(newVersionInfo.isValid(NULL)); conn->update(VersionType::ConfigNS, BSON("_id" << 1 << VersionType::version_DEPRECATED(UpgradeHistory_NoEpochVersion)), newVersionInfo.toBSON()); _checkGLE(conn); } catch (const DBException& e) { error() << cleanupMessage << endl; *errMsg = stream() << "could not write new version info " << "and exit critical upgrade section" << causedBy(e); return false; } connPtr->done(); } // // END CRITICAL SECTION // return true; }
StatusWith<RecordId> Collection::updateDocument(OperationContext* txn, const RecordId& oldLocation, const Snapshotted<BSONObj>& oldDoc, const BSONObj& newDoc, bool enforceQuota, bool indexesAffected, OpDebug* debug, oplogUpdateEntryArgs& args) { { auto status = checkValidation(txn, newDoc); if (!status.isOK()) { if (_validationLevel == STRICT_V) { return status; } // moderate means we have to check the old doc auto oldDocStatus = checkValidation(txn, oldDoc.value()); if (oldDocStatus.isOK()) { // transitioning from good -> bad is not ok return status; } // bad -> bad is ok in moderate mode } } dassert(txn->lockState()->isCollectionLockedForMode(ns().toString(), MODE_IX)); invariant(oldDoc.snapshotId() == txn->recoveryUnit()->getSnapshotId()); SnapshotId sid = txn->recoveryUnit()->getSnapshotId(); BSONElement oldId = oldDoc.value()["_id"]; if (!oldId.eoo() && (oldId != newDoc["_id"])) return StatusWith<RecordId>( ErrorCodes::InternalError, "in Collection::updateDocument _id mismatch", 13596); // At the end of this step, we will have a map of UpdateTickets, one per index, which // represent the index updates needed to be done, based on the changes between oldDoc and // newDoc. OwnedPointerMap<IndexDescriptor*, UpdateTicket> updateTickets; if (indexesAffected) { IndexCatalog::IndexIterator ii = _indexCatalog.getIndexIterator(txn, true); while (ii.more()) { IndexDescriptor* descriptor = ii.next(); IndexCatalogEntry* entry = ii.catalogEntry(descriptor); IndexAccessMethod* iam = ii.accessMethod(descriptor); InsertDeleteOptions options; options.logIfError = false; options.dupsAllowed = !(KeyPattern::isIdKeyPattern(descriptor->keyPattern()) || descriptor->unique()) || repl::getGlobalReplicationCoordinator()->shouldIgnoreUniqueIndex(descriptor); UpdateTicket* updateTicket = new UpdateTicket(); updateTickets.mutableMap()[descriptor] = updateTicket; Status ret = iam->validateUpdate(txn, oldDoc.value(), newDoc, oldLocation, options, updateTicket, entry->getFilterExpression()); if (!ret.isOK()) { return StatusWith<RecordId>(ret); } } } // This can call back into Collection::recordStoreGoingToMove. If that happens, the old // object is removed from all indexes. StatusWith<RecordId> newLocation = _recordStore->updateRecord( txn, oldLocation, newDoc.objdata(), newDoc.objsize(), _enforceQuota(enforceQuota), this); if (!newLocation.isOK()) { return newLocation; } // At this point, the old object may or may not still be indexed, depending on if it was // moved. If the object did move, we need to add the new location to all indexes. if (newLocation.getValue() != oldLocation) { if (debug) { if (debug->nmoved == -1) // default of -1 rather than 0 debug->nmoved = 1; else debug->nmoved += 1; } Status s = _indexCatalog.indexRecord(txn, newDoc, newLocation.getValue()); if (!s.isOK()) return StatusWith<RecordId>(s); invariant(sid == txn->recoveryUnit()->getSnapshotId()); args.ns = ns().ns(); getGlobalServiceContext()->getOpObserver()->onUpdate(txn, args); return newLocation; } // Object did not move. We update each index with each respective UpdateTicket. if (debug) debug->keyUpdates = 0; if (indexesAffected) { IndexCatalog::IndexIterator ii = _indexCatalog.getIndexIterator(txn, true); while (ii.more()) { IndexDescriptor* descriptor = ii.next(); IndexAccessMethod* iam = ii.accessMethod(descriptor); int64_t updatedKeys; Status ret = iam->update(txn, *updateTickets.mutableMap()[descriptor], &updatedKeys); if (!ret.isOK()) return StatusWith<RecordId>(ret); if (debug) debug->keyUpdates += updatedKeys; } } invariant(sid == txn->recoveryUnit()->getSnapshotId()); args.ns = ns().ns(); getGlobalServiceContext()->getOpObserver()->onUpdate(txn, args); return newLocation; }
StatusWith<DiskLoc> Collection::updateDocument( OperationContext* txn, const DiskLoc& oldLocation, const BSONObj& objNew, bool enforceQuota, OpDebug* debug ) { BSONObj objOld = _recordStore->dataFor( txn, oldLocation ).toBson(); if ( objOld.hasElement( "_id" ) ) { BSONElement oldId = objOld["_id"]; BSONElement newId = objNew["_id"]; if ( oldId != newId ) return StatusWith<DiskLoc>( ErrorCodes::InternalError, "in Collection::updateDocument _id mismatch", 13596 ); } if ( ns().coll() == "system.users" ) { // XXX - andy and spencer think this should go away now V2UserDocumentParser parser; Status s = parser.checkValidUserDocument(objNew); if ( !s.isOK() ) return StatusWith<DiskLoc>( s ); } /* duplicate key check. we descend the btree twice - once for this check, and once for the actual inserts, further below. that is suboptimal, but it's pretty complicated to do it the other way without rollbacks... */ OwnedPointerMap<IndexDescriptor*,UpdateTicket> updateTickets; IndexCatalog::IndexIterator ii = _indexCatalog.getIndexIterator( txn, true ); while ( ii.more() ) { IndexDescriptor* descriptor = ii.next(); IndexAccessMethod* iam = _indexCatalog.getIndex( descriptor ); InsertDeleteOptions options; options.logIfError = false; options.dupsAllowed = !(KeyPattern::isIdKeyPattern(descriptor->keyPattern()) || descriptor->unique()) || repl::getGlobalReplicationCoordinator()->shouldIgnoreUniqueIndex(descriptor); UpdateTicket* updateTicket = new UpdateTicket(); updateTickets.mutableMap()[descriptor] = updateTicket; Status ret = iam->validateUpdate(txn, objOld, objNew, oldLocation, options, updateTicket ); if ( !ret.isOK() ) { return StatusWith<DiskLoc>( ret ); } } // this can callback into Collection::recordStoreGoingToMove StatusWith<DiskLoc> newLocation = _recordStore->updateRecord( txn, oldLocation, objNew.objdata(), objNew.objsize(), _enforceQuota( enforceQuota ), this ); if ( !newLocation.isOK() ) { return newLocation; } _infoCache.notifyOfWriteOp(); if ( newLocation.getValue() != oldLocation ) { if ( debug ) { if (debug->nmoved == -1) // default of -1 rather than 0 debug->nmoved = 1; else debug->nmoved += 1; } _indexCatalog.indexRecord(txn, objNew, newLocation.getValue()); return newLocation; } if ( debug ) debug->keyUpdates = 0; ii = _indexCatalog.getIndexIterator( txn, true ); while ( ii.more() ) { IndexDescriptor* descriptor = ii.next(); IndexAccessMethod* iam = _indexCatalog.getIndex( descriptor ); int64_t updatedKeys; Status ret = iam->update(txn, *updateTickets.mutableMap()[descriptor], &updatedKeys); if ( !ret.isOK() ) return StatusWith<DiskLoc>( ret ); if ( debug ) debug->keyUpdates += updatedKeys; } // Broadcast the mutation so that query results stay correct. _cursorCache.invalidateDocument(oldLocation, INVALIDATION_MUTATION); return newLocation; }
StatusWith<DiskLoc> Collection::updateDocument( const DiskLoc& oldLocation, const BSONObj& objNew, bool enforceQuota, OpDebug* debug ) { Record* oldRecord = _recordStore->recordFor( oldLocation ); BSONObj objOld( oldRecord->accessed()->data() ); if ( objOld.hasElement( "_id" ) ) { BSONElement oldId = objOld["_id"]; BSONElement newId = objNew["_id"]; if ( oldId != newId ) return StatusWith<DiskLoc>( ErrorCodes::InternalError, "in Collection::updateDocument _id mismatch", 13596 ); } if ( ns().coll() == "system.users" ) { // XXX - andy and spencer think this should go away now V2UserDocumentParser parser; Status s = parser.checkValidUserDocument(objNew); if ( !s.isOK() ) return StatusWith<DiskLoc>( s ); } /* duplicate key check. we descend the btree twice - once for this check, and once for the actual inserts, further below. that is suboptimal, but it's pretty complicated to do it the other way without rollbacks... */ OwnedPointerMap<IndexDescriptor*,UpdateTicket> updateTickets; IndexCatalog::IndexIterator ii = _indexCatalog.getIndexIterator( true ); while ( ii.more() ) { IndexDescriptor* descriptor = ii.next(); IndexAccessMethod* iam = _indexCatalog.getIndex( descriptor ); InsertDeleteOptions options; options.logIfError = false; options.dupsAllowed = !(KeyPattern::isIdKeyPattern(descriptor->keyPattern()) || descriptor->unique()) || ignoreUniqueIndex(descriptor); UpdateTicket* updateTicket = new UpdateTicket(); updateTickets.mutableMap()[descriptor] = updateTicket; Status ret = iam->validateUpdate(objOld, objNew, oldLocation, options, updateTicket ); if ( !ret.isOK() ) { return StatusWith<DiskLoc>( ret ); } } if ( oldRecord->netLength() < objNew.objsize() ) { // doesn't fit, have to move to new location if ( _details->isCapped() ) return StatusWith<DiskLoc>( ErrorCodes::InternalError, "failing update: objects in a capped ns cannot grow", 10003 ); moveCounter.increment(); _details->paddingTooSmall(); // unindex old record, don't delete // this way, if inserting new doc fails, we can re-index this one _cursorCache.invalidateDocument(oldLocation, INVALIDATION_DELETION); _indexCatalog.unindexRecord( objOld, oldLocation, true ); if ( debug ) { if (debug->nmoved == -1) // default of -1 rather than 0 debug->nmoved = 1; else debug->nmoved += 1; } StatusWith<DiskLoc> loc = _insertDocument( objNew, enforceQuota ); if ( loc.isOK() ) { // insert successful, now lets deallocate the old location // remember its already unindexed _recordStore->deleteRecord( oldLocation ); } else { // new doc insert failed, so lets re-index the old document and location _indexCatalog.indexRecord( objOld, oldLocation ); } return loc; } _infoCache.notifyOfWriteOp(); _details->paddingFits(); if ( debug ) debug->keyUpdates = 0; ii = _indexCatalog.getIndexIterator( true ); while ( ii.more() ) { IndexDescriptor* descriptor = ii.next(); IndexAccessMethod* iam = _indexCatalog.getIndex( descriptor ); int64_t updatedKeys; Status ret = iam->update(*updateTickets.mutableMap()[descriptor], &updatedKeys); if ( !ret.isOK() ) return StatusWith<DiskLoc>( ret ); if ( debug ) debug->keyUpdates += updatedKeys; } // Broadcast the mutation so that query results stay correct. _cursorCache.invalidateDocument(oldLocation, INVALIDATION_MUTATION); // update in place int sz = objNew.objsize(); memcpy(getDur().writingPtr(oldRecord->data(), sz), objNew.objdata(), sz); return StatusWith<DiskLoc>( oldLocation ); }
StatusWith<RecordId> Collection::updateDocument( OperationContext* txn, const RecordId& oldLocation, const BSONObj& objNew, bool enforceQuota, OpDebug* debug ) { BSONObj objOld = _recordStore->dataFor( txn, oldLocation ).releaseToBson(); if ( objOld.hasElement( "_id" ) ) { BSONElement oldId = objOld["_id"]; BSONElement newId = objNew["_id"]; if ( oldId != newId ) return StatusWith<RecordId>( ErrorCodes::InternalError, "in Collection::updateDocument _id mismatch", 13596 ); } /* duplicate key check. we descend the btree twice - once for this check, and once for the actual inserts, further below. that is suboptimal, but it's pretty complicated to do it the other way without rollbacks... */ // At the end of this step, we will have a map of UpdateTickets, one per index, which // represent the index updates needed to be done, based on the changes between objOld and // objNew. OwnedPointerMap<IndexDescriptor*,UpdateTicket> updateTickets; IndexCatalog::IndexIterator ii = _indexCatalog.getIndexIterator( txn, true ); while ( ii.more() ) { IndexDescriptor* descriptor = ii.next(); IndexAccessMethod* iam = _indexCatalog.getIndex( descriptor ); InsertDeleteOptions options; options.logIfError = false; options.dupsAllowed = !(KeyPattern::isIdKeyPattern(descriptor->keyPattern()) || descriptor->unique()) || repl::getGlobalReplicationCoordinator()->shouldIgnoreUniqueIndex(descriptor); UpdateTicket* updateTicket = new UpdateTicket(); updateTickets.mutableMap()[descriptor] = updateTicket; Status ret = iam->validateUpdate(txn, objOld, objNew, oldLocation, options, updateTicket ); if ( !ret.isOK() ) { return StatusWith<RecordId>( ret ); } } // This can call back into Collection::recordStoreGoingToMove. If that happens, the old // object is removed from all indexes. StatusWith<RecordId> newLocation = _recordStore->updateRecord( txn, oldLocation, objNew.objdata(), objNew.objsize(), _enforceQuota( enforceQuota ), this ); if ( !newLocation.isOK() ) { return newLocation; } // At this point, the old object may or may not still be indexed, depending on if it was // moved. _infoCache.notifyOfWriteOp(); // If the object did move, we need to add the new location to all indexes. if ( newLocation.getValue() != oldLocation ) { if ( debug ) { if (debug->nmoved == -1) // default of -1 rather than 0 debug->nmoved = 1; else debug->nmoved += 1; } Status s = _indexCatalog.indexRecord(txn, objNew, newLocation.getValue()); if (!s.isOK()) return StatusWith<RecordId>(s); return newLocation; } // Object did not move. We update each index with each respective UpdateTicket. if ( debug ) debug->keyUpdates = 0; ii = _indexCatalog.getIndexIterator( txn, true ); while ( ii.more() ) { IndexDescriptor* descriptor = ii.next(); IndexAccessMethod* iam = _indexCatalog.getIndex( descriptor ); int64_t updatedKeys; Status ret = iam->update(txn, *updateTickets.mutableMap()[descriptor], &updatedKeys); if ( !ret.isOK() ) return StatusWith<RecordId>( ret ); if ( debug ) debug->keyUpdates += updatedKeys; } // Broadcast the mutation so that query results stay correct. _cursorCache.invalidateDocument(txn, oldLocation, INVALIDATION_MUTATION); return newLocation; }