Document Document::fromBsonWithMetaData(const BSONObj& bson) { MutableDocument md; BSONObjIterator it(bson); while (it.more()) { BSONElement elem(it.next()); auto fieldName = elem.fieldNameStringData(); if (fieldName[0] == '$') { if (fieldName == metaFieldTextScore) { md.setTextScore(elem.Double()); continue; } else if (fieldName == metaFieldRandVal) { md.setRandMetaField(elem.Double()); continue; } else if (fieldName == metaFieldSortKey) { md.setSortKeyMetaField(elem.Obj()); continue; } } // Note: this will not parse out metadata in embedded documents. md.addField(fieldName, Value(elem)); } return md.freeze(); }
Document DocumentSourceChangeStreamTransform::applyTransformation(const Document& input) { // If we're executing a change stream pipeline that was forwarded from mongos, then we expect it // to "need merge"---we expect to be executing the shards part of a split pipeline. It is never // correct for mongos to pass through the change stream without splitting into into a merging // part executed on mongos and a shards part. // // This is necessary so that mongos can correctly handle "invalidate" and "retryNeeded" change // notifications. See SERVER-31978 for an example of why the pipeline must be split. // // We have to check this invariant at run-time of the change stream rather than parse time, // since a mongos may forward a change stream in an invalid position (e.g. in a nested $lookup // or $facet pipeline). In this case, mongod is responsible for parsing the pipeline and // throwing an error without ever executing the change stream. if (pExpCtx->fromMongos) { invariant(pExpCtx->needsMerge); } MutableDocument doc; // Extract the fields we need. checkValueType(input[repl::OplogEntry::kOpTypeFieldName], repl::OplogEntry::kOpTypeFieldName, BSONType::String); string op = input[repl::OplogEntry::kOpTypeFieldName].getString(); Value ts = input[repl::OplogEntry::kTimestampFieldName]; Value ns = input[repl::OplogEntry::kNssFieldName]; checkValueType(ns, repl::OplogEntry::kNssFieldName, BSONType::String); Value uuid = input[repl::OplogEntry::kUuidFieldName]; std::vector<FieldPath> documentKeyFields; // Deal with CRUD operations and commands. auto opType = repl::OpType_parse(IDLParserErrorContext("ChangeStreamEntry.op"), op); NamespaceString nss(ns.getString()); // Ignore commands in the oplog when looking up the document key fields since a command implies // that the change stream is about to be invalidated (e.g. collection drop). if (!uuid.missing() && opType != repl::OpTypeEnum::kCommand) { checkValueType(uuid, repl::OplogEntry::kUuidFieldName, BSONType::BinData); // We need to retrieve the document key fields if our cache does not have an entry for this // UUID or if the cache entry is not definitively final, indicating that the collection was // unsharded when the entry was last populated. auto it = _documentKeyCache.find(uuid.getUuid()); if (it == _documentKeyCache.end() || !it->second.isFinal) { auto docKeyFields = pExpCtx->mongoProcessInterface->collectDocumentKeyFieldsForHostedCollection( pExpCtx->opCtx, nss, uuid.getUuid()); if (it == _documentKeyCache.end() || docKeyFields.second) { _documentKeyCache[uuid.getUuid()] = DocumentKeyCacheEntry(docKeyFields); } } documentKeyFields = _documentKeyCache.find(uuid.getUuid())->second.documentKeyFields; } Value id = input.getNestedField("o._id"); // Non-replace updates have the _id in field "o2". StringData operationType; Value fullDocument; Value updateDescription; Value documentKey; switch (opType) { case repl::OpTypeEnum::kInsert: { operationType = DocumentSourceChangeStream::kInsertOpType; fullDocument = input[repl::OplogEntry::kObjectFieldName]; documentKey = Value(document_path_support::extractPathsFromDoc( fullDocument.getDocument(), documentKeyFields)); break; } case repl::OpTypeEnum::kDelete: { operationType = DocumentSourceChangeStream::kDeleteOpType; documentKey = input[repl::OplogEntry::kObjectFieldName]; break; } case repl::OpTypeEnum::kUpdate: { if (id.missing()) { operationType = DocumentSourceChangeStream::kUpdateOpType; checkValueType(input[repl::OplogEntry::kObjectFieldName], repl::OplogEntry::kObjectFieldName, BSONType::Object); Document opObject = input[repl::OplogEntry::kObjectFieldName].getDocument(); Value updatedFields = opObject["$set"]; Value removedFields = opObject["$unset"]; // Extract the field names of $unset document. vector<Value> removedFieldsVector; if (removedFields.getType() == BSONType::Object) { auto iter = removedFields.getDocument().fieldIterator(); while (iter.more()) { removedFieldsVector.push_back(Value(iter.next().first)); } } updateDescription = Value(Document{ {"updatedFields", updatedFields.missing() ? Value(Document()) : updatedFields}, {"removedFields", removedFieldsVector}}); } else { operationType = DocumentSourceChangeStream::kReplaceOpType; fullDocument = input[repl::OplogEntry::kObjectFieldName]; } documentKey = input[repl::OplogEntry::kObject2FieldName]; break; } case repl::OpTypeEnum::kCommand: { if (!input.getNestedField("o.drop").missing()) { operationType = DocumentSourceChangeStream::kDropCollectionOpType; // The "o.drop" field will contain the actual collection name. nss = NamespaceString(nss.db(), input.getNestedField("o.drop").getString()); } else if (!input.getNestedField("o.renameCollection").missing()) { operationType = DocumentSourceChangeStream::kRenameCollectionOpType; // The "o.renameCollection" field contains the namespace of the original collection. nss = NamespaceString(input.getNestedField("o.renameCollection").getString()); // The "o.to" field contains the target namespace for the rename. const auto renameTargetNss = NamespaceString(input.getNestedField("o.to").getString()); doc.addField(DocumentSourceChangeStream::kRenameTargetNssField, Value(Document{{"db", renameTargetNss.db()}, {"coll", renameTargetNss.coll()}})); } else if (!input.getNestedField("o.dropDatabase").missing()) { operationType = DocumentSourceChangeStream::kDropDatabaseOpType; // Extract the database name from the namespace field and leave the collection name // empty. nss = NamespaceString(nss.db()); } else { // All other commands will invalidate the stream. operationType = DocumentSourceChangeStream::kInvalidateOpType; } // Make sure the result doesn't have a document key. documentKey = Value(); break; } case repl::OpTypeEnum::kNoop: { operationType = DocumentSourceChangeStream::kNewShardDetectedOpType; // Generate a fake document Id for NewShardDetected operation so that we can resume // after this operation. documentKey = Value(Document{{DocumentSourceChangeStream::kIdField, input[repl::OplogEntry::kObject2FieldName]}}); break; } default: { MONGO_UNREACHABLE; } } // UUID should always be present except for invalidate and dropDatabase entries. if (operationType != DocumentSourceChangeStream::kInvalidateOpType && operationType != DocumentSourceChangeStream::kDropDatabaseOpType) { invariant(!uuid.missing(), "Saw a CRUD op without a UUID"); } // Note that 'documentKey' and/or 'uuid' might be missing, in which case they will not appear // in the output. auto resumeTokenData = getResumeToken(ts, uuid, documentKey); auto resumeToken = ResumeToken(resumeTokenData).toDocument(); // Add some additional fields only relevant to transactions. if (_txnContext) { doc.addField(DocumentSourceChangeStream::kTxnNumberField, Value(static_cast<long long>(_txnContext->txnNumber))); doc.addField(DocumentSourceChangeStream::kLsidField, Value(_txnContext->lsid)); } doc.addField(DocumentSourceChangeStream::kIdField, Value(resumeToken)); doc.addField(DocumentSourceChangeStream::kOperationTypeField, Value(operationType)); doc.addField(DocumentSourceChangeStream::kClusterTimeField, Value(resumeTokenData.clusterTime)); // We set the resume token as the document's sort key in both the sharded and non-sharded cases, // since we will subsequently rely upon it to generate a correct postBatchResumeToken. // TODO SERVER-38539: when returning results for merging, we first check whether 'mergeByPBRT' // has been set. If not, then the request was sent from an older mongoS which cannot merge by // raw resume tokens, and we must use the old sort key format. This check, and the 'mergeByPBRT' // flag, are no longer necessary in 4.4; all change streams will be merged by resume token. if (pExpCtx->needsMerge && !pExpCtx->mergeByPBRT) { doc.setSortKeyMetaField(BSON("" << ts << "" << uuid << "" << documentKey)); } else { doc.setSortKeyMetaField(resumeToken.toBson()); } // "invalidate" and "newShardDetected" entries have fewer fields. if (operationType == DocumentSourceChangeStream::kInvalidateOpType || operationType == DocumentSourceChangeStream::kNewShardDetectedOpType) { return doc.freeze(); } doc.addField(DocumentSourceChangeStream::kFullDocumentField, fullDocument); doc.addField(DocumentSourceChangeStream::kNamespaceField, operationType == DocumentSourceChangeStream::kDropDatabaseOpType ? Value(Document{{"db", nss.db()}}) : Value(Document{{"db", nss.db()}, {"coll", nss.coll()}})); doc.addField(DocumentSourceChangeStream::kDocumentKeyField, documentKey); // Note that 'updateDescription' might be the 'missing' value, in which case it will not be // serialized. doc.addField("updateDescription", updateDescription); return doc.freeze(); }