void forceDatabaseRefresh(OperationContext* opCtx, const StringData dbName) { invariant(!opCtx->lockState()->isLocked()); invariant(!opCtx->getClient()->isInDirectClient()); auto const shardingState = ShardingState::get(opCtx); invariant(shardingState->canAcceptShardedCommands()); const auto refreshedDbVersion = uassertStatusOK(Grid::get(opCtx)->catalogCache()->getDatabaseWithRefresh(opCtx, dbName)) .databaseVersion(); // First, check under a shared lock if another thread already updated the cached version. // This is a best-effort optimization to make as few threads as possible to convoy on the // exclusive lock below. auto databaseHolder = DatabaseHolder::get(opCtx); { // Take the DBLock directly rather than using AutoGetDb, to prevent a recursive call // into checkDbVersion(). Lock::DBLock dbLock(opCtx, dbName, MODE_IS); auto db = databaseHolder->getDb(opCtx, dbName); if (!db) { log() << "Database " << dbName << " has been dropped; not caching the refreshed databaseVersion"; return; } auto& dss = DatabaseShardingState::get(db); auto dssLock = DatabaseShardingState::DSSLock::lock(opCtx, &dss); const auto cachedDbVersion = dss.getDbVersion(opCtx, dssLock); if (cachedDbVersion && cachedDbVersion->getUuid() == refreshedDbVersion.getUuid() && cachedDbVersion->getLastMod() >= refreshedDbVersion.getLastMod()) { LOG(2) << "Skipping setting cached databaseVersion for " << dbName << " to refreshed version " << refreshedDbVersion.toBSON() << " because current cached databaseVersion is already " << cachedDbVersion->toBSON(); return; } } // The cached version is older than the refreshed version; update the cached version. Lock::DBLock dbLock(opCtx, dbName, MODE_X); auto db = databaseHolder->getDb(opCtx, dbName); if (!db) { log() << "Database " << dbName << " has been dropped; not caching the refreshed databaseVersion"; return; } auto& dss = DatabaseShardingState::get(db); auto dssLock = DatabaseShardingState::DSSLock::lockExclusive(opCtx, &dss); dss.setDbVersion(opCtx, std::move(refreshedDbVersion), dssLock); }
Status ShardingStateRecovery::recover(OperationContext* txn) { BSONObj recoveryDocBSON; try { AutoGetCollection autoColl(txn, NamespaceString::kConfigCollectionNamespace, MODE_IS); if (!Helpers::findOne( txn, autoColl.getCollection(), RecoveryDocument::getQuery(), recoveryDocBSON)) { return Status::OK(); } } catch (const DBException& ex) { return ex.toStatus(); } const auto recoveryDocStatus = RecoveryDocument::fromBSON(recoveryDocBSON); if (!recoveryDocStatus.isOK()) return recoveryDocStatus.getStatus(); const auto recoveryDoc = std::move(recoveryDocStatus.getValue()); log() << "Sharding state recovery process found document " << recoveryDoc.toBSON(); // Make sure the sharding state is initialized ShardingState* const shardingState = ShardingState::get(txn); shardingState->initialize(txn, recoveryDoc.getConfigsvr().toString()); shardingState->setShardName(recoveryDoc.getShardName()); if (!recoveryDoc.getMinOpTimeUpdaters()) { // Treat the minOpTime as up-to-date grid.shardRegistry()->advanceConfigOpTime(recoveryDoc.getMinOpTime()); return Status::OK(); } log() << "Sharding state recovery document indicates there were " << recoveryDoc.getMinOpTimeUpdaters() << " metadata change operations in flight. Contacting the config server primary in order " "to retrieve the most recent opTime."; // Need to fetch the latest uptime from the config server, so do a logging write Status status = grid.catalogManager(txn)->logChange(txn, "Sharding recovery thread", "Sharding minOpTime recovery", NamespaceString::kConfigCollectionNamespace.ns(), recoveryDocBSON); if (!status.isOK()) return status; log() << "Sharding state recovered. New config server opTime is " << grid.shardRegistry()->getConfigOpTime(); // Finally, clear the recovery document so next time we don't need to recover status = modifyRecoveryDocument(txn, RecoveryDocument::Clear, kMajorityWriteConcern); if (!status.isOK()) { warning() << "Failed to reset sharding state recovery document due to " << status; } return Status::OK(); }
Status ShardingStateRecovery::recover(OperationContext* opCtx) { if (serverGlobalParams.clusterRole != ClusterRole::ShardServer) { return Status::OK(); } BSONObj recoveryDocBSON; try { AutoGetCollection autoColl(opCtx, NamespaceString::kConfigCollectionNamespace, MODE_IS); if (!Helpers::findOne( opCtx, autoColl.getCollection(), RecoveryDocument::getQuery(), recoveryDocBSON)) { return Status::OK(); } } catch (const DBException& ex) { return ex.toStatus(); } const auto recoveryDocStatus = RecoveryDocument::fromBSON(recoveryDocBSON); if (!recoveryDocStatus.isOK()) return recoveryDocStatus.getStatus(); const auto recoveryDoc = std::move(recoveryDocStatus.getValue()); log() << "Sharding state recovery process found document " << redact(recoveryDoc.toBSON()); ShardingState* const shardingState = ShardingState::get(opCtx); invariant(shardingState->enabled()); if (!recoveryDoc.getMinOpTimeUpdaters()) { // Treat the minOpTime as up-to-date grid.advanceConfigOpTime(recoveryDoc.getMinOpTime()); return Status::OK(); } log() << "Sharding state recovery document indicates there were " << recoveryDoc.getMinOpTimeUpdaters() << " metadata change operations in flight. Contacting the config server primary in order " "to retrieve the most recent opTime."; // Need to fetch the latest uptime from the config server, so do a logging write Status status = grid.catalogClient(opCtx)->logChange(opCtx, "Sharding minOpTime recovery", NamespaceString::kConfigCollectionNamespace.ns(), recoveryDocBSON, ShardingCatalogClient::kMajorityWriteConcern); if (!status.isOK()) return status; log() << "Sharding state recovered. New config server opTime is " << grid.configOpTime(); // Finally, clear the recovery document so next time we don't need to recover status = modifyRecoveryDocument(opCtx, RecoveryDocument::Clear, kLocalWriteConcern); if (!status.isOK()) { warning() << "Failed to reset sharding state recovery document due to " << redact(status); } return Status::OK(); }
void ReplicationRecoveryImpl::_applyToEndOfOplog(OperationContext* opCtx, Timestamp oplogApplicationStartPoint, Timestamp topOfOplog) { invariant(!oplogApplicationStartPoint.isNull()); invariant(!topOfOplog.isNull()); // Check if we have any unapplied ops in our oplog. It is important that this is done after // deleting the ragged end of the oplog. if (oplogApplicationStartPoint == topOfOplog) { log() << "No oplog entries to apply for recovery. appliedThrough is at the top of the oplog."; return; // We've applied all the valid oplog we have. } else if (oplogApplicationStartPoint > topOfOplog) { severe() << "Applied op " << oplogApplicationStartPoint.toBSON() << " not found. Top of oplog is " << topOfOplog.toBSON() << '.'; fassertFailedNoTrace(40313); } log() << "Replaying stored operations from " << oplogApplicationStartPoint.toBSON() << " (exclusive) to " << topOfOplog.toBSON() << " (inclusive)."; DBDirectClient db(opCtx); auto cursor = db.query(NamespaceString::kRsOplogNamespace.ns(), QUERY("ts" << BSON("$gte" << oplogApplicationStartPoint)), /*batchSize*/ 0, /*skip*/ 0, /*projection*/ nullptr, QueryOption_OplogReplay); // Check that the first document matches our appliedThrough point then skip it since it's // already been applied. if (!cursor->more()) { // This should really be impossible because we check above that the top of the oplog is // strictly > appliedThrough. If this fails it represents a serious bug in either the // storage engine or query's implementation of OplogReplay. severe() << "Couldn't find any entries in the oplog >= " << oplogApplicationStartPoint.toBSON() << " which should be impossible."; fassertFailedNoTrace(40293); } auto firstTimestampFound = fassertStatusOK(40291, OpTime::parseFromOplogEntry(cursor->nextSafe())).getTimestamp(); if (firstTimestampFound != oplogApplicationStartPoint) { severe() << "Oplog entry at " << oplogApplicationStartPoint.toBSON() << " is missing; actual entry found is " << firstTimestampFound.toBSON(); fassertFailedNoTrace(40292); } // Apply remaining ops one at at time, but don't log them because they are already logged. UnreplicatedWritesBlock uwb(opCtx); while (cursor->more()) { auto entry = cursor->nextSafe(); fassertStatusOK(40294, SyncTail::syncApply(opCtx, entry, OplogApplication::Mode::kRecovering)); _consistencyMarkers->setAppliedThrough( opCtx, fassertStatusOK(40295, OpTime::parseFromOplogEntry(entry))); } }
StatusWith<OplogApplier::Operations> OplogApplier::getNextApplierBatch( OperationContext* opCtx, const BatchLimits& batchLimits) { if (batchLimits.ops == 0) { return Status(ErrorCodes::InvalidOptions, "Batch size must be greater than 0."); } std::uint32_t totalBytes = 0; Operations ops; BSONObj op; while (_oplogBuffer->peek(opCtx, &op)) { auto entry = OplogEntry(op); // Check for oplog version change. If it is absent, its value is one. if (entry.getVersion() != OplogEntry::kOplogVersion) { std::string message = str::stream() << "expected oplog version " << OplogEntry::kOplogVersion << " but found version " << entry.getVersion() << " in oplog entry: " << redact(entry.toBSON()); severe() << message; return {ErrorCodes::BadValue, message}; } // Commands must be processed one at a time. The only exception to this is applyOps because // applyOps oplog entries are effectively containers for CRUD operations. Therefore, it is // safe to batch applyOps commands with CRUD operations when reading from the oplog buffer. if (entry.isCommand() && (entry.getCommandType() != OplogEntry::CommandType::kApplyOps || entry.shouldPrepare())) { if (ops.empty()) { // Apply commands one-at-a-time. ops.push_back(std::move(entry)); BSONObj opToPopAndDiscard; invariant(_oplogBuffer->tryPop(opCtx, &opToPopAndDiscard)); dassert(ops.back() == OplogEntry(opToPopAndDiscard)); } // Otherwise, apply what we have so far and come back for the command. return std::move(ops); } // Apply replication batch limits. if (ops.size() >= batchLimits.ops) { return std::move(ops); } // Never return an empty batch if there are operations left. if ((totalBytes + entry.getRawObjSizeBytes() >= batchLimits.bytes) && (ops.size() > 0)) { return std::move(ops); } // Add op to buffer. totalBytes += entry.getRawObjSizeBytes(); ops.push_back(std::move(entry)); BSONObj opToPopAndDiscard; invariant(_oplogBuffer->tryPop(opCtx, &opToPopAndDiscard)); dassert(ops.back() == OplogEntry(opToPopAndDiscard)); } return std::move(ops); }
void RollbackResyncsCollectionOptionsTest::resyncCollectionOptionsTest( CollectionOptions localCollOptions, BSONObj remoteCollOptionsObj, BSONObj collModCmd, std::string collName) { createOplog(_opCtx.get()); auto dbName = "test"; auto nss = NamespaceString(dbName, collName); auto coll = _createCollection(_opCtx.get(), nss.toString(), localCollOptions); auto commonOpUuid = unittest::assertGet(UUID::parse("f005ba11-cafe-bead-f00d-123456789abc")); auto commonOpBson = BSON("ts" << Timestamp(1, 1) << "t" << 1LL << "op" << "n" << "o" << BSONObj() << "ns" << "rollback_test.test" << "ui" << commonOpUuid); auto commonOperation = std::make_pair(commonOpBson, RecordId(1)); auto collectionModificationOperation = makeCommandOp(Timestamp(Seconds(2), 0), coll->uuid(), nss.toString(), collModCmd, 2); RollbackSourceWithCollectionOptions rollbackSource( std::unique_ptr<OplogInterface>(new OplogInterfaceMock({commonOperation})), remoteCollOptionsObj); ASSERT_OK(syncRollback(_opCtx.get(), OplogInterfaceMock({collectionModificationOperation, commonOperation}), rollbackSource, {}, _coordinator, _replicationProcess.get())); // Make sure the collection options are correct. AutoGetCollectionForReadCommand autoColl(_opCtx.get(), NamespaceString(nss.toString())); auto collAfterRollbackOptions = autoColl.getCollection()->getCatalogEntry()->getCollectionOptions(_opCtx.get()); BSONObjBuilder expectedOptionsBob; if (localCollOptions.uuid) { localCollOptions.uuid.get().appendToBuilder(&expectedOptionsBob, "uuid"); } expectedOptionsBob.appendElements(remoteCollOptionsObj); ASSERT_BSONOBJ_EQ(expectedOptionsBob.obj(), collAfterRollbackOptions.toBSON()); }
std::string LocksType::toString() const { return toBSON().toString(); }
void ReplicationRecoveryImpl::recoverFromOplog(OperationContext* opCtx) try { if (_consistencyMarkers->getInitialSyncFlag(opCtx)) { log() << "No recovery needed. Initial sync flag set."; return; // Initial Sync will take over so no cleanup is needed. } const auto truncateAfterPoint = _consistencyMarkers->getOplogTruncateAfterPoint(opCtx); const auto appliedThrough = _consistencyMarkers->getAppliedThrough(opCtx); if (!truncateAfterPoint.isNull()) { log() << "Removing unapplied entries starting at: " << truncateAfterPoint.toBSON(); _truncateOplogTo(opCtx, truncateAfterPoint); } // Clear the truncateAfterPoint so that we don't truncate the next batch of oplog entries // erroneously. _consistencyMarkers->setOplogTruncateAfterPoint(opCtx, {}); // TODO (SERVER-30556): Delete this line since the old oplog delete from point cannot exist. _consistencyMarkers->removeOldOplogDeleteFromPointField(opCtx); auto topOfOplogSW = _getLastAppliedOpTime(opCtx); boost::optional<OpTime> topOfOplog = boost::none; if (topOfOplogSW.getStatus() != ErrorCodes::CollectionIsEmpty && topOfOplogSW.getStatus() != ErrorCodes::NamespaceNotFound) { fassertStatusOK(40290, topOfOplogSW); topOfOplog = topOfOplogSW.getValue(); } // If we have a checkpoint timestamp, then we recovered to a timestamp and should set the // initial data timestamp to that. Otherwise, we simply recovered the data on disk so we should // set the initial data timestamp to the top OpTime in the oplog once the data is consistent // there. If there is nothing in the oplog, then we do not set the initial data timestamp. auto checkpointTimestamp = _consistencyMarkers->getCheckpointTimestamp(opCtx); if (!checkpointTimestamp.isNull()) { // If we have a checkpoint timestamp, we set the initial data timestamp now so that // the operations we apply below can be given the proper timestamps. _storageInterface->setInitialDataTimestamp(opCtx->getServiceContext(), SnapshotName(checkpointTimestamp)); } // Oplog is empty. There are no oplog entries to apply, so we exit recovery. If there was a // checkpointTimestamp then we already set the initial data timestamp. Otherwise, there is // nothing to set it to. if (!topOfOplog) { log() << "No oplog entries to apply for recovery. Oplog is empty."; return; } if (auto startPoint = _getOplogApplicationStartPoint(checkpointTimestamp, appliedThrough)) { _applyToEndOfOplog(opCtx, startPoint.get(), topOfOplog->getTimestamp()); } // If we don't have a checkpoint timestamp, then we are either not running a storage engine // that supports "recover to stable timestamp" or we just upgraded from a version that didn't. // In both cases, the data on disk is not consistent until we have applied all oplog entries to // the end of the oplog, since we do not know which ones actually got applied before shutdown. // As a result, we do not set the initial data timestamp until after we have applied to the end // of the oplog. if (checkpointTimestamp.isNull()) { _storageInterface->setInitialDataTimestamp(opCtx->getServiceContext(), SnapshotName(topOfOplog->getTimestamp())); } } catch (...) { severe() << "Caught exception during replication recovery: " << exceptionToStatus(); std::terminate(); }
Status createCollectionForApplyOps(OperationContext* opCtx, const std::string& dbName, const BSONElement& ui, const BSONObj& cmdObj, const BSONObj& idIndex) { invariant(opCtx->lockState()->isDbLockedForMode(dbName, MODE_X)); auto db = dbHolder().get(opCtx, dbName); const NamespaceString newCollName(Command::parseNsCollectionRequired(dbName, cmdObj)); auto newCmd = cmdObj; // If a UUID is given, see if we need to rename a collection out of the way, and whether the // collection already exists under a different name. If so, rename it into place. As this is // done during replay of the oplog, the operations do not need to be atomic, just idempotent. // We need to do the renaming part in a separate transaction, as we cannot transactionally // create a database on MMAPv1, which could result in createCollection failing if the database // does not yet exist. if (ui.ok()) { // Return an optional, indicating whether we need to early return (if the collection already // exists, or in case of an error). using Result = boost::optional<Status>; auto result = writeConflictRetry(opCtx, "createCollectionForApplyOps", newCollName.ns(), [&] { WriteUnitOfWork wunit(opCtx); // Options need the field to be named "uuid", so parse/recreate. auto uuid = uassertStatusOK(UUID::parse(ui)); uassert(ErrorCodes::InvalidUUID, "Invalid UUID in applyOps create command: " + uuid.toString(), uuid.isRFC4122v4()); auto& catalog = UUIDCatalog::get(opCtx); auto currentName = catalog.lookupNSSByUUID(uuid); OpObserver* opObserver = getGlobalServiceContext()->getOpObserver(); if (currentName == newCollName) return Result(Status::OK()); // In the case of oplog replay, a future command may have created or renamed a // collection with that same name. In that case, renaming this future collection to // a random temporary name is correct: once all entries are replayed no temporary // names will remain. On MMAPv1 the rename can result in index names that are too // long. However this should only happen for initial sync and "resync collection" // for rollback, so we can let the error propagate resulting in an abort and restart // of the initial sync or result in rollback to fassert, requiring a resync of that // node. const bool stayTemp = true; if (auto futureColl = db ? db->getCollection(opCtx, newCollName) : nullptr) { auto tmpNameResult = db->makeUniqueCollectionNamespace(opCtx, "tmp%%%%%"); if (!tmpNameResult.isOK()) { return Result(Status(tmpNameResult.getStatus().code(), str::stream() << "Cannot generate temporary " "collection namespace for applyOps " "create command: collection: " << newCollName.ns() << ". error: " << tmpNameResult.getStatus().reason())); } const auto& tmpName = tmpNameResult.getValue(); Status status = db->renameCollection(opCtx, newCollName.ns(), tmpName.ns(), stayTemp); if (!status.isOK()) return Result(status); opObserver->onRenameCollection(opCtx, newCollName, tmpName, futureColl->uuid(), /*dropTarget*/ false, /*dropTargetUUID*/ {}, stayTemp); } // If the collection with the requested UUID already exists, but with a different // name, just rename it to 'newCollName'. if (catalog.lookupCollectionByUUID(uuid)) { Status status = db->renameCollection(opCtx, currentName.ns(), newCollName.ns(), stayTemp); if (!status.isOK()) return Result(status); opObserver->onRenameCollection(opCtx, currentName, newCollName, uuid, /*dropTarget*/ false, /*dropTargetUUID*/ {}, stayTemp); wunit.commit(); return Result(Status::OK()); } // A new collection with the specific UUID must be created, so add the UUID to the // creation options. Regular user collection creation commands cannot do this. auto uuidObj = uuid.toBSON(); newCmd = cmdObj.addField(uuidObj.firstElement()); wunit.commit(); return Result(boost::none); }); if (result) { return *result; } } return createCollection( opCtx, newCollName, newCmd, idIndex, CollectionOptions::parseForStorage); }
std::string ReadPreferenceSetting::toString() const { return toBSON().toString(); }
std::string CollectionBulkLoaderImpl::toString() const { return toBSON().toString(); }
std::string ChunkType::toString() const { return toBSON().toString(); }
void ChunkVersion::appendForCommands(BSONObjBuilder* builder) const { builder->appendArray(kShardVersionField, toBSON()); }
std::string SettingsType::toString() const { return toBSON().toString(); }
void ReplicationRecoveryImpl::recoverFromOplog(OperationContext* opCtx, boost::optional<Timestamp> stableTimestamp) try { if (_consistencyMarkers->getInitialSyncFlag(opCtx)) { log() << "No recovery needed. Initial sync flag set."; return; // Initial Sync will take over so no cleanup is needed. } const auto serviceCtx = getGlobalServiceContext(); inReplicationRecovery(serviceCtx) = true; ON_BLOCK_EXIT([serviceCtx] { invariant( inReplicationRecovery(serviceCtx), "replication recovery flag is unexpectedly unset when exiting recoverFromOplog()"); inReplicationRecovery(serviceCtx) = false; }); const auto truncateAfterPoint = _consistencyMarkers->getOplogTruncateAfterPoint(opCtx); if (!truncateAfterPoint.isNull()) { log() << "Removing unapplied entries starting at: " << truncateAfterPoint.toBSON(); _truncateOplogTo(opCtx, truncateAfterPoint); // Clear the truncateAfterPoint so that we don't truncate the next batch of oplog entries // erroneously. _consistencyMarkers->setOplogTruncateAfterPoint(opCtx, {}); opCtx->recoveryUnit()->waitUntilDurable(); } auto topOfOplogSW = _getTopOfOplog(opCtx); if (topOfOplogSW.getStatus() == ErrorCodes::CollectionIsEmpty || topOfOplogSW.getStatus() == ErrorCodes::NamespaceNotFound) { // Oplog is empty. There are no oplog entries to apply, so we exit recovery and go into // initial sync. log() << "No oplog entries to apply for recovery. Oplog is empty."; return; } fassert(40290, topOfOplogSW); const auto topOfOplog = topOfOplogSW.getValue(); // If we were passed in a stable timestamp, we are in rollback recovery and should recover from // that stable timestamp. Otherwise, we're recovering at startup. If this storage engine // supports recover to stable timestamp or enableMajorityReadConcern=false, we ask it for the // recovery timestamp. If the storage engine returns a timestamp, we recover from that point. // However, if the storage engine returns "none", the storage engine does not have a stable // checkpoint and we must recover from an unstable checkpoint instead. const bool supportsRecoveryTimestamp = _storageInterface->supportsRecoveryTimestamp(opCtx->getServiceContext()); if (!stableTimestamp && supportsRecoveryTimestamp) { stableTimestamp = _storageInterface->getRecoveryTimestamp(opCtx->getServiceContext()); } const auto appliedThrough = _consistencyMarkers->getAppliedThrough(opCtx); invariant(!stableTimestamp || stableTimestamp->isNull() || appliedThrough.isNull() || *stableTimestamp == appliedThrough.getTimestamp(), str::stream() << "Stable timestamp " << stableTimestamp->toString() << " does not equal appliedThrough timestamp " << appliedThrough.toString()); if (stableTimestamp) { invariant(supportsRecoveryTimestamp); _recoverFromStableTimestamp(opCtx, *stableTimestamp, appliedThrough, topOfOplog); } else { _recoverFromUnstableCheckpoint(opCtx, appliedThrough, topOfOplog); } _reconstructPreparedTransactions(opCtx); } catch (...) { severe() << "Caught exception during replication recovery: " << exceptionToStatus(); std::terminate(); }
std::string CollectionCloner::Stats::toString() const { return toBSON().toString(); }
Status ShardingStateRecovery::recover(OperationContext* txn) { if (serverGlobalParams.clusterRole != ClusterRole::ShardServer) { return Status::OK(); } BSONObj recoveryDocBSON; try { AutoGetCollection autoColl(txn, NamespaceString::kConfigCollectionNamespace, MODE_IS); if (!Helpers::findOne( txn, autoColl.getCollection(), RecoveryDocument::getQuery(), recoveryDocBSON)) { return Status::OK(); } } catch (const DBException& ex) { return ex.toStatus(); } const auto recoveryDocStatus = RecoveryDocument::fromBSON(recoveryDocBSON); if (!recoveryDocStatus.isOK()) return recoveryDocStatus.getStatus(); const auto recoveryDoc = std::move(recoveryDocStatus.getValue()); log() << "Sharding state recovery process found document " << recoveryDoc.toBSON(); // Make sure the sharding state is initialized ShardingState* const shardingState = ShardingState::get(txn); // For backwards compatibility. Shards added by v3.4 cluster should have been initialized by // the shard identity document. // TODO(SERER-25276): Remove this after 3.4 since 3.4 shards should always have ShardingState // initialized by this point. if (!shardingState->enabled()) { shardingState->initializeFromConfigConnString(txn, recoveryDoc.getConfigsvr().toString()); shardingState->setShardName(recoveryDoc.getShardName()); } if (!recoveryDoc.getMinOpTimeUpdaters()) { // Treat the minOpTime as up-to-date grid.advanceConfigOpTime(recoveryDoc.getMinOpTime()); return Status::OK(); } log() << "Sharding state recovery document indicates there were " << recoveryDoc.getMinOpTimeUpdaters() << " metadata change operations in flight. Contacting the config server primary in order " "to retrieve the most recent opTime."; // Need to fetch the latest uptime from the config server, so do a logging write Status status = grid.catalogClient(txn)->logChange(txn, "Sharding minOpTime recovery", NamespaceString::kConfigCollectionNamespace.ns(), recoveryDocBSON, ShardingCatalogClient::kMajorityWriteConcern); if (!status.isOK()) return status; log() << "Sharding state recovered. New config server opTime is " << grid.configOpTime(); // Finally, clear the recovery document so next time we don't need to recover status = modifyRecoveryDocument(txn, RecoveryDocument::Clear, kLocalWriteConcern); if (!status.isOK()) { warning() << "Failed to reset sharding state recovery document due to " << status; } return Status::OK(); }
std::string MongosType::toString() const { return toBSON().toString(); }
std::string ReadConcernArgs::toString() const { return toBSON().toString(); }
std::vector<BSONObj> MongoProcessCommon::getCurrentOps( const boost::intrusive_ptr<ExpressionContext>& expCtx, CurrentOpConnectionsMode connMode, CurrentOpSessionsMode sessionMode, CurrentOpUserMode userMode, CurrentOpTruncateMode truncateMode, CurrentOpCursorMode cursorMode) const { OperationContext* opCtx = expCtx->opCtx; AuthorizationSession* ctxAuth = AuthorizationSession::get(opCtx->getClient()); std::vector<BSONObj> ops; for (ServiceContext::LockedClientsCursor cursor(opCtx->getClient()->getServiceContext()); Client* client = cursor.next();) { invariant(client); stdx::lock_guard<Client> lk(*client); // If auth is disabled, ignore the allUsers parameter. if (ctxAuth->getAuthorizationManager().isAuthEnabled() && userMode == CurrentOpUserMode::kExcludeOthers && !ctxAuth->isCoauthorizedWithClient(client)) { continue; } // Ignore inactive connections unless 'idleConnections' is true. if (!client->getOperationContext() && connMode == CurrentOpConnectionsMode::kExcludeIdle) { continue; } // Delegate to the mongoD- or mongoS-specific implementation of _reportCurrentOpForClient. ops.emplace_back(_reportCurrentOpForClient(opCtx, client, truncateMode)); } // If 'cursorMode' is set to include idle cursors, retrieve them and add them to ops. if (cursorMode == CurrentOpCursorMode::kIncludeCursors) { for (auto&& cursor : getIdleCursors(expCtx, userMode)) { BSONObjBuilder cursorObj; auto ns = cursor.getNs(); auto lsid = cursor.getLsid(); cursorObj.append("type", "idleCursor"); cursorObj.append("host", getHostNameCached()); cursorObj.append("ns", ns->toString()); // If in legacy read mode, lsid is not present. if (lsid) { cursorObj.append("lsid", lsid->toBSON()); } cursor.setNs(boost::none); cursor.setLsid(boost::none); // On mongos, planSummary is not present. auto planSummaryData = cursor.getPlanSummary(); if (planSummaryData) { auto planSummaryText = planSummaryData->toString(); // Plan summary has to appear in the top level object, not the cursor object. // We remove it, create the op, then put it back. cursor.setPlanSummary(boost::none); cursorObj.append("planSummary", planSummaryText); cursorObj.append("cursor", cursor.toBSON()); cursor.setPlanSummary(StringData(planSummaryText)); } else { cursorObj.append("cursor", cursor.toBSON()); } ops.emplace_back(cursorObj.obj()); cursor.setNs(ns); cursor.setLsid(lsid); } } // If we need to report on idle Sessions, defer to the mongoD or mongoS implementations. if (sessionMode == CurrentOpSessionsMode::kIncludeIdle) { _reportCurrentOpsForIdleSessions(opCtx, userMode, &ops); } return ops; }
std::string DatabasesCloner::Stats::toString() const { return toBSON().toString(); }
mongo::Status mongo::cloneCollectionAsCapped(OperationContext* opCtx, Database* db, const std::string& shortFrom, const std::string& shortTo, long long size, bool temp) { NamespaceString fromNss(db->name(), shortFrom); NamespaceString toNss(db->name(), shortTo); Collection* fromCollection = db->getCollection(opCtx, fromNss); if (!fromCollection) { if (db->getViewCatalog()->lookup(opCtx, fromNss.ns())) { return Status(ErrorCodes::CommandNotSupportedOnView, str::stream() << "cloneCollectionAsCapped not supported for views: " << fromNss.ns()); } return Status(ErrorCodes::NamespaceNotFound, str::stream() << "source collection " << fromNss.ns() << " does not exist"); } if (fromNss.isDropPendingNamespace()) { return Status(ErrorCodes::NamespaceNotFound, str::stream() << "source collection " << fromNss.ns() << " is currently in a drop-pending state."); } if (db->getCollection(opCtx, toNss)) { return Status(ErrorCodes::NamespaceExists, str::stream() << "cloneCollectionAsCapped failed - destination collection " << toNss.ns() << " already exists. source collection: " << fromNss.ns()); } // create new collection { auto options = fromCollection->getCatalogEntry()->getCollectionOptions(opCtx); // The capped collection will get its own new unique id, as the conversion isn't reversible, // so it can't be rolled back. options.uuid.reset(); options.capped = true; options.cappedSize = size; if (temp) options.temp = true; BSONObjBuilder cmd; cmd.append("create", toNss.coll()); cmd.appendElements(options.toBSON()); Status status = createCollection(opCtx, toNss.db().toString(), cmd.done()); if (!status.isOK()) return status; } Collection* toCollection = db->getCollection(opCtx, toNss); invariant(toCollection); // we created above // how much data to ignore because it won't fit anyway // datasize and extentSize can't be compared exactly, so add some padding to 'size' long long allocatedSpaceGuess = std::max(static_cast<long long>(size * 2), static_cast<long long>(toCollection->getRecordStore()->storageSize(opCtx) * 2)); long long excessSize = fromCollection->dataSize(opCtx) - allocatedSpaceGuess; auto exec = InternalPlanner::collectionScan(opCtx, fromNss.ns(), fromCollection, PlanExecutor::WRITE_CONFLICT_RETRY_ONLY, InternalPlanner::FORWARD); Snapshotted<BSONObj> objToClone; RecordId loc; PlanExecutor::ExecState state = PlanExecutor::FAILURE; // suppress uninitialized warnings DisableDocumentValidation validationDisabler(opCtx); int retries = 0; // non-zero when retrying our last document. while (true) { if (!retries) { state = exec->getNextSnapshotted(&objToClone, &loc); } switch (state) { case PlanExecutor::IS_EOF: return Status::OK(); case PlanExecutor::ADVANCED: { if (excessSize > 0) { // 4x is for padding, power of 2, etc... excessSize -= (4 * objToClone.value().objsize()); continue; } break; } default: // Unreachable as: // 1) We require a read lock (at a minimum) on the "from" collection // and won't yield, preventing collection drop and PlanExecutor::DEAD // 2) PlanExecutor::FAILURE is only returned on PlanStage::FAILURE. The // CollectionScan PlanStage does not have a FAILURE scenario. // 3) All other PlanExecutor states are handled above MONGO_UNREACHABLE; } try { // Make sure we are working with the latest version of the document. if (objToClone.snapshotId() != opCtx->recoveryUnit()->getSnapshotId() && !fromCollection->findDoc(opCtx, loc, &objToClone)) { // doc was deleted so don't clone it. retries = 0; continue; } WriteUnitOfWork wunit(opCtx); OpDebug* const nullOpDebug = nullptr; uassertStatusOK(toCollection->insertDocument( opCtx, InsertStatement(objToClone.value()), nullOpDebug, true)); wunit.commit(); // Go to the next document retries = 0; } catch (const WriteConflictException&) { CurOp::get(opCtx)->debug().additiveMetrics.incrementWriteConflicts(1); retries++; // logAndBackoff expects this to be 1 on first call. WriteConflictException::logAndBackoff(retries, "cloneCollectionAsCapped", fromNss.ns()); // Can't use writeConflictRetry since we need to save/restore exec around call to // abandonSnapshot. exec->saveState(); opCtx->recoveryUnit()->abandonSnapshot(); auto restoreStatus = exec->restoreState(); // Handles any WCEs internally. if (!restoreStatus.isOK()) { return restoreStatus; } } } MONGO_UNREACHABLE; }
std::string ChangeLogType::toString() const { return toBSON().toString(); }
void ChunkVersion::appendWithFieldForCommands(BSONObjBuilder* builder, StringData field) const { builder->appendArray(field, toBSON()); }
std::string ShardType::toString() const { return toBSON().toString(); }
std::string ShardCollectionType::toString() const { return toBSON().toString(); }