Status _findAllCollections(const ConnectionString& configLoc, bool optionalEpochs, OwnedPointerMap<string, CollectionType>* collections) { scoped_ptr<ScopedDbConnection> connPtr; try { connPtr.reset(ScopedDbConnection::getInternalScopedDbConnection(configLoc, 30)); ScopedDbConnection& conn = *connPtr; scoped_ptr<DBClientCursor> cursor(_safeCursor(conn->query(CollectionType::ConfigNS, Query()))); while (cursor->more()) { BSONObj collDoc = cursor->nextSafe(); CollectionType* coll = new CollectionType(); string errMsg; coll->parseBSON(collDoc, &errMsg); // Needed for the v3 to v4 upgrade bool epochNotSet = !coll->isEpochSet() || !coll->getEpoch().isSet(); if (optionalEpochs && epochNotSet) { // Set our epoch to something here, just to allow coll->setEpoch(OID::gen()); } if (errMsg != "" || !coll->isValid(&errMsg)) { return Status(ErrorCodes::UnsupportedFormat, stream() << "invalid collection " << collDoc << " read from the config server" << causedBy(errMsg)); } if (coll->isDroppedSet() && coll->getDropped()) { continue; } if (optionalEpochs && epochNotSet) { coll->setEpoch(OID()); } collections->mutableMap().insert(make_pair(coll->getNS(), coll)); } } catch (const DBException& e) { return e.toStatus(); } connPtr->done(); return Status::OK(); }
void CollectionInfo::save(OperationContext* txn, const string& ns) { CollectionType coll; coll.setNs(NamespaceString{ns}); if (_cm) { invariant(!_dropped); coll.setEpoch(_cm->getVersion().epoch()); // TODO(schwerin): The following isn't really a date, but is stored as one in-memory and // in config.collections, as a historical oddity. coll.setUpdatedAt(Date_t::fromMillisSinceEpoch(_cm->getVersion().toLong())); coll.setKeyPattern(_cm->getShardKeyPattern().toBSON()); coll.setDefaultCollation(_cm->getDefaultCollation()); coll.setUnique(_cm->isUnique()); } else { invariant(_dropped); coll.setDropped(true); coll.setEpoch(ChunkVersion::DROPPED().epoch()); coll.setUpdatedAt(Date_t::now()); } uassertStatusOK(grid.catalogClient(txn)->updateCollection(txn, ns, coll)); _dirty = false; }
/** * Stores ranges for a particular collection and shard starting from some version */ void storeCollectionRanges( const NamespaceString& nss, const string& shardName, const vector<KeyRange>& ranges, const ChunkVersion& startVersion ) { // Get key pattern from first range ASSERT_GREATER_THAN( ranges.size(), 0u ); CollectionType coll; coll.setNS( nss.ns() ); coll.setKeyPattern( ranges.begin()->keyPattern ); coll.setEpoch( startVersion.epoch() ); coll.setUpdatedAt( 1ULL ); string errMsg; ASSERT( coll.isValid( &errMsg ) ); DBDirectClient client(&_txn); client.update( CollectionType::ConfigNS, BSON( CollectionType::ns( coll.getNS() ) ), coll.toBSON(), true, false ); ChunkVersion nextVersion = startVersion; for ( vector<KeyRange>::const_iterator it = ranges.begin(); it != ranges.end(); ++it ) { ChunkType chunk; // TODO: We should not rely on the serialized ns, minkey being unique in the future, // causes problems since it links string serialization to correctness. chunk.setName( Chunk::genID( nss, it->minKey ) ); chunk.setShard( shardName ); chunk.setNS( nss.ns() ); chunk.setVersion( nextVersion ); chunk.setMin( it->minKey ); chunk.setMax( it->maxKey ); nextVersion.incMajor(); client.insert( ChunkType::ConfigNS, chunk.toBSON() ); } }
void ShardingCatalogManager::shardCollection(OperationContext* opCtx, const NamespaceString& nss, const boost::optional<UUID> uuid, const ShardKeyPattern& fieldsAndOrder, const BSONObj& defaultCollation, bool unique, const vector<BSONObj>& splitPoints, bool isFromMapReduce, const ShardId& dbPrimaryShardId) { const auto shardRegistry = Grid::get(opCtx)->shardRegistry(); const auto primaryShard = uassertStatusOK(shardRegistry->getShard(opCtx, dbPrimaryShardId)); // Fail if there are partially written chunks from a previous failed shardCollection. checkForExistingChunks(opCtx, nss); // Prior to 4.0.5, zones cannot be taken into account at collection sharding time, so ignore // them and let the balancer apply them later const std::vector<TagsType> treatAsNoZonesDefined; // Map/reduce with output to sharded collection ignores consistency checks and requires the // initial chunks to be spread across shards unconditionally const bool treatAsEmpty = isFromMapReduce; // Record start in changelog { BSONObjBuilder collectionDetail; collectionDetail.append("shardKey", fieldsAndOrder.toBSON()); collectionDetail.append("collection", nss.ns()); if (uuid) uuid->appendToBuilder(&collectionDetail, "uuid"); collectionDetail.append("empty", treatAsEmpty); collectionDetail.append("fromMapReduce", isFromMapReduce); collectionDetail.append("primary", primaryShard->toString()); collectionDetail.append("numChunks", static_cast<int>(splitPoints.size() + 1)); uassertStatusOK(ShardingLogging::get(opCtx)->logChangeChecked( opCtx, "shardCollection.start", nss.ns(), collectionDetail.obj(), ShardingCatalogClient::kMajorityWriteConcern)); } // Construct the collection default collator. std::unique_ptr<CollatorInterface> defaultCollator; if (!defaultCollation.isEmpty()) { defaultCollator = uassertStatusOK(CollatorFactoryInterface::get(opCtx->getServiceContext()) ->makeFromBSON(defaultCollation)); } const auto initialChunks = InitialSplitPolicy::createFirstChunks(opCtx, nss, fieldsAndOrder, dbPrimaryShardId, splitPoints, treatAsNoZonesDefined, treatAsEmpty); InitialSplitPolicy::writeFirstChunksToConfig(opCtx, initialChunks); { CollectionType coll; coll.setNs(nss); if (uuid) coll.setUUID(*uuid); coll.setEpoch(initialChunks.collVersion().epoch()); coll.setUpdatedAt(Date_t::fromMillisSinceEpoch(initialChunks.collVersion().toLong())); coll.setKeyPattern(fieldsAndOrder.toBSON()); coll.setDefaultCollation(defaultCollator ? defaultCollator->getSpec().toBSON() : BSONObj()); coll.setUnique(unique); uassertStatusOK(ShardingCatalogClientImpl::updateShardingCatalogEntryForCollection( opCtx, nss, coll, true /*upsert*/)); } auto shard = uassertStatusOK(shardRegistry->getShard(opCtx, dbPrimaryShardId)); invariant(!shard->isConfig()); // Tell the primary mongod to refresh its data SetShardVersionRequest ssv = SetShardVersionRequest::makeForVersioningNoPersist( shardRegistry->getConfigServerConnectionString(), dbPrimaryShardId, primaryShard->getConnString(), nss, initialChunks.collVersion(), true /* isAuthoritative */, true /* forceRefresh */); auto ssvResponse = shard->runCommandWithFixedRetryAttempts(opCtx, ReadPreferenceSetting{ReadPreference::PrimaryOnly}, "admin", ssv.toBSON(), Shard::RetryPolicy::kIdempotent); auto status = ssvResponse.isOK() ? std::move(ssvResponse.getValue().commandStatus) : std::move(ssvResponse.getStatus()); if (!status.isOK()) { warning() << "could not update initial version of " << nss.ns() << " on shard primary " << dbPrimaryShardId << causedBy(redact(status)); } ShardingLogging::get(opCtx)->logChange( opCtx, "shardCollection.end", nss.ns(), BSON("version" << initialChunks.collVersion().toString()), ShardingCatalogClient::kMajorityWriteConcern); }
Status ShardingCatalogManager::dropCollection(OperationContext* opCtx, const NamespaceString& nss) { const Status logStatus = ShardingLogging::get(opCtx)->logChangeChecked(opCtx, "dropCollection.start", nss.ns(), BSONObj(), ShardingCatalogClient::kMajorityWriteConcern); if (!logStatus.isOK()) { return logStatus; } const auto catalogClient = Grid::get(opCtx)->catalogClient(); const auto shardsStatus = catalogClient->getAllShards(opCtx, repl::ReadConcernLevel::kLocalReadConcern); if (!shardsStatus.isOK()) { return shardsStatus.getStatus(); } vector<ShardType> allShards = std::move(shardsStatus.getValue().value); LOG(1) << "dropCollection " << nss.ns() << " started"; const auto dropCommandBSON = [opCtx, &nss] { BSONObjBuilder builder; builder.append("drop", nss.coll()); if (!opCtx->getWriteConcern().usedDefault) { builder.append(WriteConcernOptions::kWriteConcernField, opCtx->getWriteConcern().toBSON()); } return builder.obj(); }(); std::map<std::string, BSONObj> errors; auto* const shardRegistry = Grid::get(opCtx)->shardRegistry(); for (const auto& shardEntry : allShards) { auto swShard = shardRegistry->getShard(opCtx, shardEntry.getName()); if (!swShard.isOK()) { return swShard.getStatus(); } const auto& shard = swShard.getValue(); auto swDropResult = shard->runCommandWithFixedRetryAttempts( opCtx, ReadPreferenceSetting{ReadPreference::PrimaryOnly}, nss.db().toString(), dropCommandBSON, Shard::RetryPolicy::kIdempotent); if (!swDropResult.isOK()) { return swDropResult.getStatus().withContext( str::stream() << "Error dropping collection on shard " << shardEntry.getName()); } auto& dropResult = swDropResult.getValue(); auto dropStatus = std::move(dropResult.commandStatus); auto wcStatus = std::move(dropResult.writeConcernStatus); if (!dropStatus.isOK() || !wcStatus.isOK()) { if (dropStatus.code() == ErrorCodes::NamespaceNotFound && wcStatus.isOK()) { // Generally getting NamespaceNotFound is okay to ignore as it simply means that // the collection has already been dropped or doesn't exist on this shard. // If, however, we get NamespaceNotFound but also have a write concern error then we // can't confirm whether the fact that the namespace doesn't exist is actually // committed. Thus we must still fail on NamespaceNotFound if there is also a write // concern error. This can happen if we call drop, it succeeds but with a write // concern error, then we retry the drop. continue; } errors.emplace(shardEntry.getHost(), std::move(dropResult.response)); } } if (!errors.empty()) { StringBuilder sb; sb << "Dropping collection failed on the following hosts: "; for (auto it = errors.cbegin(); it != errors.cend(); ++it) { if (it != errors.cbegin()) { sb << ", "; } sb << it->first << ": " << it->second; } return {ErrorCodes::OperationFailed, sb.str()}; } LOG(1) << "dropCollection " << nss.ns() << " shard data deleted"; // Remove chunk data Status result = catalogClient->removeConfigDocuments(opCtx, ChunkType::ConfigNS, BSON(ChunkType::ns(nss.ns())), ShardingCatalogClient::kMajorityWriteConcern); if (!result.isOK()) { return result; } LOG(1) << "dropCollection " << nss.ns() << " chunk data deleted"; // Remove tag data result = catalogClient->removeConfigDocuments(opCtx, TagsType::ConfigNS, BSON(TagsType::ns(nss.ns())), ShardingCatalogClient::kMajorityWriteConcern); if (!result.isOK()) { return result; } LOG(1) << "dropCollection " << nss.ns() << " tag data deleted"; // Mark the collection as dropped CollectionType coll; coll.setNs(nss); coll.setDropped(true); coll.setEpoch(ChunkVersion::DROPPED().epoch()); coll.setUpdatedAt(Grid::get(opCtx)->getNetwork()->now()); const bool upsert = false; result = ShardingCatalogClientImpl::updateShardingCatalogEntryForCollection( opCtx, nss, coll, upsert); if (!result.isOK()) { return result; } LOG(1) << "dropCollection " << nss.ns() << " collection marked as dropped"; for (const auto& shardEntry : allShards) { auto swShard = shardRegistry->getShard(opCtx, shardEntry.getName()); if (!swShard.isOK()) { return swShard.getStatus(); } const auto& shard = swShard.getValue(); SetShardVersionRequest ssv = SetShardVersionRequest::makeForVersioningNoPersist( shardRegistry->getConfigServerConnectionString(), shardEntry.getName(), fassert(28781, ConnectionString::parse(shardEntry.getHost())), nss, ChunkVersion::DROPPED(), true /* isAuthoritative */, true /* forceRefresh */); auto ssvResult = shard->runCommandWithFixedRetryAttempts( opCtx, ReadPreferenceSetting{ReadPreference::PrimaryOnly}, "admin", ssv.toBSON(), Shard::RetryPolicy::kIdempotent); if (!ssvResult.isOK()) { return ssvResult.getStatus(); } auto ssvStatus = std::move(ssvResult.getValue().commandStatus); if (!ssvStatus.isOK()) { return ssvStatus; } auto unsetShardingStatus = shard->runCommandWithFixedRetryAttempts( opCtx, ReadPreferenceSetting{ReadPreference::PrimaryOnly}, "admin", BSON("unsetSharding" << 1), Shard::RetryPolicy::kIdempotent); if (!unsetShardingStatus.isOK()) { return unsetShardingStatus.getStatus(); } auto unsetShardingResult = std::move(unsetShardingStatus.getValue().commandStatus); if (!unsetShardingResult.isOK()) { return unsetShardingResult; } } LOG(1) << "dropCollection " << nss.ns() << " completed"; ShardingLogging::get(opCtx)->logChange( opCtx, "dropCollection", nss.ns(), BSONObj(), ShardingCatalogClient::kMajorityWriteConcern); return Status::OK(); }
/** * Upgrade v3 to v4 described here. * * This upgrade takes a config server without collection epochs (potentially) and adds * epochs to all mongo processes. * */ bool doUpgradeV3ToV4(const ConnectionString& configLoc, const VersionType& lastVersionInfo, string* errMsg) { string dummy; if (!errMsg) errMsg = &dummy; verify(lastVersionInfo.getCurrentVersion() == UpgradeHistory_NoEpochVersion); if (lastVersionInfo.isUpgradeIdSet() && lastVersionInfo.getUpgradeId().isSet()) { // // Another upgrade failed, so cleanup may be necessary // BSONObj lastUpgradeState = lastVersionInfo.getUpgradeState(); bool inCriticalSection; if (!FieldParser::extract(lastUpgradeState, inCriticalSectionField, &inCriticalSection, errMsg)) { *errMsg = stream() << "problem reading previous upgrade state" << causedBy(errMsg); return false; } if (inCriticalSection) { // Manual intervention is needed here. Somehow our upgrade didn't get applied // consistently across config servers. *errMsg = cannotCleanupMessage; return false; } if (!_cleanupUpgradeState(configLoc, lastVersionInfo.getUpgradeId(), errMsg)) { // If we can't cleanup the old upgrade state, the user might have done it for us, // not a fatal problem (we'll just end up with extra collections). warning() << "could not cleanup previous upgrade state" << causedBy(errMsg) << endl; *errMsg = ""; } } // // Check the versions of other mongo processes in the cluster before upgrade. // We can't upgrade if there are active pre-v2.2 processes in the cluster // Status mongoVersionStatus = checkClusterMongoVersions(configLoc, string(minMongoProcessVersion)); if (!mongoVersionStatus.isOK()) { *errMsg = stream() << "cannot upgrade with pre-v" << minMongoProcessVersion << " mongo processes active in the cluster" << causedBy(mongoVersionStatus); return false; } VersionType newVersionInfo; lastVersionInfo.cloneTo(&newVersionInfo); // Set our upgrade id and state OID upgradeId = OID::gen(); newVersionInfo.setUpgradeId(upgradeId); newVersionInfo.setUpgradeState(BSONObj()); // Write our upgrade id and state { scoped_ptr<ScopedDbConnection> connPtr; try { connPtr.reset(ScopedDbConnection::getInternalScopedDbConnection(configLoc, 30)); ScopedDbConnection& conn = *connPtr; verify(newVersionInfo.isValid(NULL)); conn->update(VersionType::ConfigNS, BSON("_id" << 1 << VersionType::version_DEPRECATED(3)), newVersionInfo.toBSON()); _checkGLE(conn); } catch (const DBException& e) { *errMsg = stream() << "could not initialize version info for upgrade" << causedBy(e); return false; } connPtr->done(); } // // First lock all collection namespaces that exist // OwnedPointerMap<string, CollectionType> ownedCollections; const map<string, CollectionType*>& collections = ownedCollections.map(); Status findCollectionsStatus = findAllCollectionsV3(configLoc, &ownedCollections); if (!findCollectionsStatus.isOK()) { *errMsg = stream() << "could not read collections from config server" << causedBy(findCollectionsStatus); return false; } // // Acquire locks for all sharded collections // Something that didn't involve getting thousands of locks would be better. // OwnedPointerVector<ScopedDistributedLock> collectionLocks; log() << "acquiring locks for " << collections.size() << " sharded collections..." << endl; // WARNING - this string is used programmatically when forcing locks, be careful when // changing! // TODO: Add programmatic "why" field to lock collection string lockMessage = str::stream() << "ensuring epochs for config upgrade" << " (" << upgradeId.toString() << ")"; if (!_acquireAllCollectionLocks(configLoc, collections, lockMessage, 20 * 60 * 1000, &collectionLocks, errMsg)) { *errMsg = stream() << "could not acquire all namespace locks for upgrade" << " (" << upgradeId.toString() << ")" << causedBy(errMsg); return false; } // We are now preventing all splits and migrates for all sharded collections // Get working and backup suffixes string workingSuffix = genWorkingSuffix(upgradeId); string backupSuffix = genBackupSuffix(upgradeId); log() << "copying collection and chunk metadata to working and backup collections..." << endl; // Get a backup and working copy of the config.collections and config.chunks collections Status copyStatus = copyFrozenCollection(configLoc, CollectionType::ConfigNS, CollectionType::ConfigNS + workingSuffix); if (!copyStatus.isOK()) { *errMsg = stream() << "could not copy " << CollectionType::ConfigNS << " to " << (CollectionType::ConfigNS + workingSuffix) << causedBy(copyStatus); return false; } copyStatus = copyFrozenCollection(configLoc, CollectionType::ConfigNS, CollectionType::ConfigNS + backupSuffix); if (!copyStatus.isOK()) { *errMsg = stream() << "could not copy " << CollectionType::ConfigNS << " to " << (CollectionType::ConfigNS + backupSuffix) << causedBy(copyStatus); return false; } copyStatus = copyFrozenCollection(configLoc, ChunkType::ConfigNS, ChunkType::ConfigNS + workingSuffix); if (!copyStatus.isOK()) { *errMsg = stream() << "could not copy " << ChunkType::ConfigNS << " to " << (ChunkType::ConfigNS + workingSuffix) << causedBy(copyStatus); return false; } copyStatus = copyFrozenCollection(configLoc, ChunkType::ConfigNS, ChunkType::ConfigNS + backupSuffix); if (!copyStatus.isOK()) { *errMsg = stream() << "could not copy " << ChunkType::ConfigNS << " to " << (ChunkType::ConfigNS + backupSuffix) << causedBy(copyStatus); return false; } // // Go through sharded collections one-by-one and add epochs where missing // for (map<string, CollectionType*>::const_iterator it = collections.begin(); it != collections.end(); ++it) { // Create a copy so that we can change the epoch later CollectionType collection; it->second->cloneTo(&collection); log() << "checking epochs for " << collection.getNS() << " collection..." << endl; OID epoch = collection.getEpoch(); // // Go through chunks to find epoch if we haven't found it or to verify epoch is the same // OwnedPointerVector<ChunkType> ownedChunks; const vector<ChunkType*>& chunks = ownedChunks.vector(); Status findChunksStatus = findAllChunks(configLoc, collection.getNS(), &ownedChunks); if (!findChunksStatus.isOK()) { *errMsg = stream() << "could not read chunks from config server" << causedBy(findChunksStatus); return false; } for (vector<ChunkType*>::const_iterator chunkIt = chunks.begin(); chunkIt != chunks.end(); ++chunkIt) { const ChunkType& chunk = *(*chunkIt); // If our chunk epoch is set and doesn't match if (epoch.isSet() && chunk.getVersion().epoch().isSet() && chunk.getVersion().epoch() != epoch) { *errMsg = stream() << "chunk epoch for " << chunk.toString() << " in " << collection.getNS() << " does not match found epoch " << epoch; return false; } else if (!epoch.isSet() && chunk.getVersion().epoch().isSet()) { epoch = chunk.getVersion().epoch(); } } // // Write collection epoch if needed // if (!collection.getEpoch().isSet()) { OID newEpoch = OID::gen(); log() << "writing new epoch " << newEpoch << " for " << collection.getNS() << " collection..." << endl; scoped_ptr<ScopedDbConnection> connPtr; try { connPtr.reset(ScopedDbConnection::getInternalScopedDbConnection(configLoc, 30)); ScopedDbConnection& conn = *connPtr; conn->update(CollectionType::ConfigNS + workingSuffix, BSON(CollectionType::ns(collection.getNS())), BSON("$set" << BSON(CollectionType::DEPRECATED_lastmodEpoch(newEpoch)))); _checkGLE(conn); } catch (const DBException& e) { *errMsg = stream() << "could not write a new epoch for " << collection.getNS() << causedBy(e); return false; } connPtr->done(); collection.setEpoch(newEpoch); } epoch = collection.getEpoch(); verify(epoch.isSet()); // // Now write verified epoch to all chunks // log() << "writing epoch " << epoch << " for " << chunks.size() << " chunks in " << collection.getNS() << " collection..." << endl; { scoped_ptr<ScopedDbConnection> connPtr; try { connPtr.reset(ScopedDbConnection::getInternalScopedDbConnection(configLoc, 30)); ScopedDbConnection& conn = *connPtr; // Multi-update of all chunks conn->update(ChunkType::ConfigNS + workingSuffix, BSON(ChunkType::ns(collection.getNS())), BSON("$set" << BSON(ChunkType::DEPRECATED_epoch(epoch))), false, true); // multi _checkGLE(conn); } catch (const DBException& e) { *errMsg = stream() << "could not write a new epoch " << epoch.toString() << " for chunks in " << collection.getNS() << causedBy(e); return false; } connPtr->done(); } } // // Paranoid verify the collection writes // { scoped_ptr<ScopedDbConnection> connPtr; try { connPtr.reset(ScopedDbConnection::getInternalScopedDbConnection(configLoc, 30)); ScopedDbConnection& conn = *connPtr; // Find collections with no epochs BSONObj emptyDoc = conn->findOne(CollectionType::ConfigNS + workingSuffix, BSON("$unset" << BSON(CollectionType::DEPRECATED_lastmodEpoch() << 1))); if (!emptyDoc.isEmpty()) { *errMsg = stream() << "collection " << emptyDoc << " is still missing epoch after config upgrade"; connPtr->done(); return false; } // Find collections with empty epochs emptyDoc = conn->findOne(CollectionType::ConfigNS + workingSuffix, BSON(CollectionType::DEPRECATED_lastmodEpoch(OID()))); if (!emptyDoc.isEmpty()) { *errMsg = stream() << "collection " << emptyDoc << " still has empty epoch after config upgrade"; connPtr->done(); return false; } // Find chunks with no epochs emptyDoc = conn->findOne(ChunkType::ConfigNS + workingSuffix, BSON("$unset" << BSON(ChunkType::DEPRECATED_epoch() << 1))); if (!emptyDoc.isEmpty()) { *errMsg = stream() << "chunk " << emptyDoc << " is still missing epoch after config upgrade"; connPtr->done(); return false; } // Find chunks with empty epochs emptyDoc = conn->findOne(ChunkType::ConfigNS + workingSuffix, BSON(ChunkType::DEPRECATED_epoch(OID()))); if (!emptyDoc.isEmpty()) { *errMsg = stream() << "chunk " << emptyDoc << " still has empty epoch after config upgrade"; connPtr->done(); return false; } } catch (const DBException& e) { *errMsg = stream() << "could not verify epoch writes" << causedBy(e); return false; } connPtr->done(); } // // Double check that our collections haven't changed // Status idCheckStatus = checkIdsTheSame(configLoc, CollectionType::ConfigNS, CollectionType::ConfigNS + workingSuffix); if (!idCheckStatus.isOK()) { *errMsg = stream() << CollectionType::ConfigNS << " was modified while working on upgrade" << causedBy(idCheckStatus); return false; } idCheckStatus = checkIdsTheSame(configLoc, ChunkType::ConfigNS, ChunkType::ConfigNS + workingSuffix); if (!idCheckStatus.isOK()) { *errMsg = stream() << ChunkType::ConfigNS << " was modified while working on upgrade" << causedBy(idCheckStatus); return false; } // // ENTER CRITICAL SECTION // newVersionInfo.setUpgradeState(BSON(inCriticalSectionField(true))); { scoped_ptr<ScopedDbConnection> connPtr; try { connPtr.reset(ScopedDbConnection::getInternalScopedDbConnection(configLoc, 30)); ScopedDbConnection& conn = *connPtr; verify(newVersionInfo.isValid(NULL)); conn->update(VersionType::ConfigNS, BSON("_id" << 1 << VersionType::version_DEPRECATED(3)), newVersionInfo.toBSON()); _checkGLE(conn); } catch (const DBException& e) { // No cleanup message here since we're not sure if we wrote or not, and // not dangerous either way except to prevent further updates (at which point // the message is printed) *errMsg = stream() << "could not update version info to enter critical update section" << causedBy(e); return false; } // AT THIS POINT ANY FAILURE REQUIRES MANUAL INTERVENTION! connPtr->done(); } log() << "entered critical section for config upgrade" << endl; Status overwriteStatus = overwriteCollection(configLoc, CollectionType::ConfigNS + workingSuffix, CollectionType::ConfigNS); if (!overwriteStatus.isOK()) { error() << cleanupMessage << endl; *errMsg = stream() << "could not overwrite collection " << CollectionType::ConfigNS << " with working collection " << (CollectionType::ConfigNS + workingSuffix) << causedBy(overwriteStatus); return false; } overwriteStatus = overwriteCollection(configLoc, ChunkType::ConfigNS + workingSuffix, ChunkType::ConfigNS); if (!overwriteStatus.isOK()) { error() << cleanupMessage << endl; *errMsg = stream() << "could not overwrite collection " << ChunkType::ConfigNS << " with working collection " << (ChunkType::ConfigNS + workingSuffix) << causedBy(overwriteStatus); return false; } // // Finally update the version to latest and add clusterId to version // OID newClusterId = OID::gen(); // Note: hardcoded versions, since this is a very particular upgrade // Note: DO NOT CLEAR the config version unless bumping the minCompatibleVersion, // we want to save the excludes that were set. newVersionInfo.setMinCompatibleVersion(UpgradeHistory_NoEpochVersion); newVersionInfo.setCurrentVersion(UpgradeHistory_MandatoryEpochVersion); newVersionInfo.setClusterId(newClusterId); // Leave critical section newVersionInfo.unsetUpgradeId(); newVersionInfo.unsetUpgradeState(); log() << "writing new version info and clusterId " << newClusterId << "..." << endl; { scoped_ptr<ScopedDbConnection> connPtr; try { connPtr.reset(ScopedDbConnection::getInternalScopedDbConnection(configLoc, 30)); ScopedDbConnection& conn = *connPtr; verify(newVersionInfo.isValid(NULL)); conn->update(VersionType::ConfigNS, BSON("_id" << 1 << VersionType::version_DEPRECATED(UpgradeHistory_NoEpochVersion)), newVersionInfo.toBSON()); _checkGLE(conn); } catch (const DBException& e) { error() << cleanupMessage << endl; *errMsg = stream() << "could not write new version info " << "and exit critical upgrade section" << causedBy(e); return false; } connPtr->done(); } // // END CRITICAL SECTION // return true; }
Status CatalogManagerReplicaSet::dropCollection(OperationContext* txn, const NamespaceString& ns) { logChange( txn, txn->getClient()->clientAddress(true), "dropCollection.start", ns.ns(), BSONObj()); vector<ShardType> allShards; Status status = getAllShards(txn, &allShards); if (!status.isOK()) { return status; } LOG(1) << "dropCollection " << ns << " started"; // Lock the collection globally so that split/migrate cannot run stdx::chrono::seconds waitFor(2); MONGO_FAIL_POINT_BLOCK(setDropCollDistLockWait, customWait) { const BSONObj& data = customWait.getData(); waitFor = stdx::chrono::seconds(data["waitForSecs"].numberInt()); } const stdx::chrono::milliseconds lockTryInterval(500); auto scopedDistLock = getDistLockManager()->lock(ns.ns(), "drop", waitFor, lockTryInterval); if (!scopedDistLock.isOK()) { return scopedDistLock.getStatus(); } LOG(1) << "dropCollection " << ns << " locked"; std::map<string, BSONObj> errors; auto* shardRegistry = grid.shardRegistry(); for (const auto& shardEntry : allShards) { auto dropResult = shardRegistry->runCommandWithNotMasterRetries( txn, shardEntry.getName(), ns.db().toString(), BSON("drop" << ns.coll())); if (!dropResult.isOK()) { return dropResult.getStatus(); } auto dropStatus = getStatusFromCommandResult(dropResult.getValue()); if (!dropStatus.isOK()) { if (dropStatus.code() == ErrorCodes::NamespaceNotFound) { continue; } errors.emplace(shardEntry.getHost(), dropResult.getValue()); } } if (!errors.empty()) { StringBuilder sb; sb << "Dropping collection failed on the following hosts: "; for (auto it = errors.cbegin(); it != errors.cend(); ++it) { if (it != errors.cbegin()) { sb << ", "; } sb << it->first << ": " << it->second; } return {ErrorCodes::OperationFailed, sb.str()}; } LOG(1) << "dropCollection " << ns << " shard data deleted"; // Remove chunk data Status result = remove(txn, ChunkType::ConfigNS, BSON(ChunkType::ns(ns.ns())), 0, nullptr); if (!result.isOK()) { return result; } LOG(1) << "dropCollection " << ns << " chunk data deleted"; // Mark the collection as dropped CollectionType coll; coll.setNs(ns); coll.setDropped(true); coll.setEpoch(ChunkVersion::DROPPED().epoch()); coll.setUpdatedAt(grid.shardRegistry()->getNetwork()->now()); result = updateCollection(txn, ns.ns(), coll); if (!result.isOK()) { return result; } LOG(1) << "dropCollection " << ns << " collection marked as dropped"; for (const auto& shardEntry : allShards) { SetShardVersionRequest ssv = SetShardVersionRequest::makeForVersioningNoPersist( grid.shardRegistry()->getConfigServerConnectionString(), shardEntry.getName(), fassertStatusOK(28781, ConnectionString::parse(shardEntry.getHost())), ns, ChunkVersion::DROPPED(), true); auto ssvResult = shardRegistry->runCommandWithNotMasterRetries( txn, shardEntry.getName(), "admin", ssv.toBSON()); if (!ssvResult.isOK()) { return ssvResult.getStatus(); } auto ssvStatus = getStatusFromCommandResult(ssvResult.getValue()); if (!ssvStatus.isOK()) { return ssvStatus; } auto unsetShardingStatus = shardRegistry->runCommandWithNotMasterRetries( txn, shardEntry.getName(), "admin", BSON("unsetSharding" << 1)); if (!unsetShardingStatus.isOK()) { return unsetShardingStatus.getStatus(); } auto unsetShardingResult = getStatusFromCommandResult(unsetShardingStatus.getValue()); if (!unsetShardingResult.isOK()) { return unsetShardingResult; } } LOG(1) << "dropCollection " << ns << " completed"; logChange(txn, txn->getClient()->clientAddress(true), "dropCollection", ns.ns(), BSONObj()); return Status::OK(); }
Status CatalogManagerReplicaSet::dropCollection(OperationContext* txn, const NamespaceString& ns) { logChange( txn, txn->getClient()->clientAddress(true), "dropCollection.start", ns.ns(), BSONObj()); vector<ShardType> allShards; Status status = getAllShards(txn, &allShards); if (!status.isOK()) { return status; } LOG(1) << "dropCollection " << ns << " started"; // Lock the collection globally so that split/migrate cannot run auto scopedDistLock = getDistLockManager()->lock(ns.ns(), "drop"); if (!scopedDistLock.isOK()) { return scopedDistLock.getStatus(); } LOG(1) << "dropCollection " << ns << " locked"; std::map<string, BSONObj> errors; auto* shardRegistry = grid.shardRegistry(); for (const auto& shardEntry : allShards) { auto dropResult = shardRegistry->runCommandWithNotMasterRetries( txn, shardEntry.getName(), ns.db().toString(), BSON("drop" << ns.coll())); if (!dropResult.isOK()) { return dropResult.getStatus(); } auto dropStatus = getStatusFromCommandResult(dropResult.getValue()); if (!dropStatus.isOK()) { if (dropStatus.code() == ErrorCodes::NamespaceNotFound) { continue; } errors.emplace(shardEntry.getHost(), dropResult.getValue()); } } if (!errors.empty()) { StringBuilder sb; sb << "Dropping collection failed on the following hosts: "; for (auto it = errors.cbegin(); it != errors.cend(); ++it) { if (it != errors.cbegin()) { sb << ", "; } sb << it->first << ": " << it->second; } return {ErrorCodes::OperationFailed, sb.str()}; } LOG(1) << "dropCollection " << ns << " shard data deleted"; // Remove chunk data Status result = remove(txn, ChunkType::ConfigNS, BSON(ChunkType::ns(ns.ns())), 0, nullptr); if (!result.isOK()) { return result; } LOG(1) << "dropCollection " << ns << " chunk data deleted"; // Mark the collection as dropped CollectionType coll; coll.setNs(ns); coll.setDropped(true); coll.setEpoch(ChunkVersion::DROPPED().epoch()); coll.setUpdatedAt(grid.shardRegistry()->getNetwork()->now()); result = updateCollection(txn, ns.ns(), coll); if (!result.isOK()) { return result; } LOG(1) << "dropCollection " << ns << " collection marked as dropped"; // We just called updateCollection above and this would have advanced the config op time, so use // the latest value. On the MongoD side, we need to load the latest config metadata, which // indicates that the collection was dropped. const ChunkVersionAndOpTime droppedVersion(ChunkVersion::DROPPED(), grid.shardRegistry()->getConfigOpTime()); for (const auto& shardEntry : allShards) { SetShardVersionRequest ssv = SetShardVersionRequest::makeForVersioningNoPersist( grid.shardRegistry()->getConfigServerConnectionString(), shardEntry.getName(), fassertStatusOK(28781, ConnectionString::parse(shardEntry.getHost())), ns, droppedVersion, true); auto ssvResult = shardRegistry->runCommandWithNotMasterRetries( txn, shardEntry.getName(), "admin", ssv.toBSON()); if (!ssvResult.isOK()) { return ssvResult.getStatus(); } auto ssvStatus = getStatusFromCommandResult(ssvResult.getValue()); if (!ssvStatus.isOK()) { return ssvStatus; } auto unsetShardingStatus = shardRegistry->runCommandWithNotMasterRetries( txn, shardEntry.getName(), "admin", BSON("unsetSharding" << 1)); if (!unsetShardingStatus.isOK()) { return unsetShardingStatus.getStatus(); } auto unsetShardingResult = getStatusFromCommandResult(unsetShardingStatus.getValue()); if (!unsetShardingResult.isOK()) { return unsetShardingResult; } } LOG(1) << "dropCollection " << ns << " completed"; logChange(txn, txn->getClient()->clientAddress(true), "dropCollection", ns.ns(), BSONObj()); return Status::OK(); }