Value DocumentSourceBucketAuto::extractKey(const Document& doc) { if (!_groupByExpression) { return Value(BSONNULL); } _variables->setRoot(doc); Value key = _groupByExpression->evaluate(_variables.get()); if (_granularityRounder) { uassert(40258, str::stream() << "$bucketAuto can specify a 'granularity' with numeric boundaries " "only, but found a value with type: " << typeName(key.getType()), key.numeric()); double keyValue = key.coerceToDouble(); uassert( 40259, "$bucketAuto can specify a 'granularity' with numeric boundaries only, but found a NaN", !std::isnan(keyValue)); uassert(40260, "$bucketAuto can specify a 'granularity' with non-negative numbers only, but found " "a negative number", keyValue >= 0.0); } // To be consistent with the $group stage, we consider "missing" to be equivalent to null when // grouping values into buckets. return key.missing() ? Value(BSONNULL) : std::move(key); }
Document applyTransformation(Document input) final { // Extract subdocument in the form of a Value. _variables->setRoot(input); Value newRoot = _newRoot->evaluate(_variables.get()); // The newRoot expression must evaluate to a valid Value. uassert( 40232, str::stream() << " 'newRoot' argument " << " to $replaceRoot stage must be able to be evaluated by the document " << input.toString() << ", try ensuring that your field path(s) exist by prepending a " << "$match: {<path>: $exists} aggregation stage.", !newRoot.missing()); // The newRoot expression, if it exists, must evaluate to an object. uassert( 40228, str::stream() << " 'newRoot' argument to $replaceRoot stage must evaluate to an object, but got " << typeName(newRoot.getType()) << " try ensuring that it evaluates to an object by prepending a " << "$match: {<path>: {$type: 'object'}} aggregation stage.", newRoot.getType() == Object); // Turn the value into a document. return newRoot.getDocument(); }
ResumeTokenData DocumentSourceChangeStreamTransform::getResumeToken(Value ts, Value uuid, Value documentKey) { ResumeTokenData resumeTokenData; if (_txnContext) { // We're in the middle of unwinding an 'applyOps'. // Use the clusterTime from the higher level applyOps resumeTokenData.clusterTime = _txnContext->clusterTime; // 'pos' points to the _next_ applyOps index, so we must subtract one to get the index of // the entry being examined right now. invariant(_txnContext->pos >= 1); resumeTokenData.applyOpsIndex = _txnContext->pos - 1; } else { resumeTokenData.clusterTime = ts.getTimestamp(); resumeTokenData.applyOpsIndex = 0; } resumeTokenData.documentKey = documentKey; if (!uuid.missing()) resumeTokenData.uuid = uuid.getUuid(); if (_fcv < ServerGlobalParams::FeatureCompatibility::Version::kFullyUpgradedTo42) { resumeTokenData.version = 0; } return resumeTokenData; }
BSONObj DocumentSourceLookUp::queryForInput(const Document& input) const { Value localFieldVal = input.getNestedField(_localField); if (localFieldVal.missing()) { localFieldVal = Value(BSONNULL); } return BSON(_foreignFieldFieldName << localFieldVal); }
Value DocumentSourceRedact::redactValue(const Value& in) { const BSONType valueType = in.getType(); if (valueType == Object) { _variables->setValue(_currentId, in); const boost::optional<Document> result = redactObject(); if (result) { return Value(*result); } else { return Value(); } } else if (valueType == Array) { // TODO dont copy if possible vector<Value> newArr; const vector<Value>& arr = in.getArray(); for (size_t i = 0; i < arr.size(); i++) { if (arr[i].getType() == Object || arr[i].getType() == Array) { const Value toAdd = redactValue(arr[i]); if (!toAdd.missing()) { newArr.push_back(toAdd); } } else { newArr.push_back(arr[i]); } } return Value(std::move(newArr)); } else { return in; } }
void DocumentSource::serializeToArray(vector<Value>& array, boost::optional<ExplainOptions::Verbosity> explain) const { Value entry = serialize(explain); if (!entry.missing()) { array.push_back(entry); } }
boost::optional<Document> DocumentSourceRedact::redactObject() { const Value expressionResult = _expression->evaluate(_variables.get()); if (expressionResult == keepVal) { return _variables->getDocument(_currentId); } else if (expressionResult == pruneVal) { return boost::optional<Document>(); } else if (expressionResult == descendVal) { const Document in = _variables->getDocument(_currentId); MutableDocument out; out.copyMetaDataFrom(in); FieldIterator fields(in); while (fields.more()) { const Document::FieldPair field(fields.next()); // This changes CURRENT so don't read from _variables after this const Value val = redactValue(field.second); if (!val.missing()) { out.addField(field.first, val); } } return out.freeze(); } else { uasserted(17053, str::stream() << "$redact's expression should not return anything " << "aside from the variables $$KEEP, $$DESCEND, and " << "$$PRUNE, but returned " << expressionResult.toString()); } }
ResumeTokenData DocumentSourceChangeStreamTransform::getResumeToken(Value ts, Value uuid, Value documentKey) { ResumeTokenData resumeTokenData; if (_txnContext) { // We're in the middle of unwinding an 'applyOps'. // Use the clusterTime from the higher level applyOps resumeTokenData.clusterTime = _txnContext->clusterTime; // 'pos' points to the _next_ applyOps index, so we must subtract one to get the index of // the entry being examined right now. invariant(_txnContext->pos >= 1); resumeTokenData.applyOpsIndex = _txnContext->pos - 1; } else { resumeTokenData.clusterTime = ts.getTimestamp(); resumeTokenData.applyOpsIndex = 0; } resumeTokenData.documentKey = documentKey; if (!uuid.missing()) resumeTokenData.uuid = uuid.getUuid(); // If 'needsMerge' is true, 'mergeByPBRT' is false, and FCV is less than 4.2, then we are // running on a sharded cluster that is mid-upgrade, and so we generate v0 resume tokens. // Otherwise, we always generate v1 resume tokens whether the FCV is 4.0 or 4.2. if (pExpCtx->needsMerge && !pExpCtx->mergeByPBRT && _fcv < ServerGlobalParams::FeatureCompatibility::Version::kFullyUpgradedTo42) { resumeTokenData.version = 0; } return resumeTokenData; }
boost::optional<Document> DocumentSourceRedact::redactObject(const Variables& in) { const Value expressionResult = _expression->evaluate(in); if (expressionResult == keepVal) { return in.current.getDocument(); } else if (expressionResult == pruneVal) { return boost::optional<Document>(); } else if (expressionResult == descendVal) { MutableDocument out; FieldIterator fields(in.current.getDocument()); while (fields.more()) { const Document::FieldPair field(fields.next()); const Value val = redactValue(in, field.second); if (!val.missing()) { out.addField(field.first, val); } } return out.freeze(); } else { uasserted(17053, str::stream() << "$redact's expression should not return anything " << "aside from the variables $$KEEP, $$DESCEND, and " << "$$PRUNE, but returned " << expressionResult.toString()); } }
Value DocumentSourceRedact::redactValue(const Variables& vars, const Value& in) { const BSONType valueType = in.getType(); if (valueType == Object) { Variables recurse = vars; recurse.current = in; const boost::optional<Document> result = redactObject(recurse); if (result) { return Value(*result); } else { return Value(); } } else if (valueType == Array) { // TODO dont copy if possible vector<Value> newArr; const vector<Value>& arr = in.getArray(); for (size_t i = 0; i < arr.size(); i++) { if (arr[i].getType() == Object || arr[i].getType() == Array) { const Value toAdd = redactValue(vars, arr[i]) ; if (!toAdd.missing()) { newArr.push_back(toAdd); } } } return Value::consume(newArr); } else { return in; } }
Document DocumentSource::documentFromBsonWithDeps(const BSONObj& bson, const ParsedDeps& neededFields) { MutableDocument md(neededFields.size()); BSONObjIterator it(bson); while (it.more()) { BSONElement bsonElement (it.next()); StringData fieldName = bsonElement.fieldNameStringData(); Value isNeeded = neededFields[fieldName]; if (isNeeded.missing()) continue; if (isNeeded.getType() == Bool) { md.addField(fieldName, Value(bsonElement)); continue; } dassert(isNeeded.getType() == Object); if (bsonElement.type() == Object) { Document sub = documentFromBsonWithDeps(bsonElement.embeddedObject(), isNeeded.getDocument()); md.addField(fieldName, Value(sub)); } if (bsonElement.type() == Array) { md.addField(fieldName, arrayHelper(bsonElement.embeddedObject(), isNeeded.getDocument())); } } return md.freeze(); }
BSONObj DocumentSourceLookUp::queryForInput(const Document& input, const FieldPath& localFieldPath, const std::string& foreignFieldName, const BSONObj& additionalFilter) { Value localFieldVal = input.getNestedField(localFieldPath); // Missing values are treated as null. if (localFieldVal.missing()) { localFieldVal = Value(BSONNULL); } // We are constructing a query of one of the following forms: // {$and: [{<foreignFieldName>: {$eq: <localFieldVal>}}, <additionalFilter>]} // {$and: [{<foreignFieldName>: {$in: [<value>, <value>, ...]}}, <additionalFilter>]} // {$and: [{$or: [{<foreignFieldName>: {$eq: <value>}}, // {<foreignFieldName>: {$eq: <value>}}, ...]}, // <additionalFilter>]} BSONObjBuilder query; BSONArrayBuilder andObj(query.subarrayStart("$and")); BSONObjBuilder joiningObj(andObj.subobjStart()); if (localFieldVal.isArray()) { // Assume an array value logically corresponds to many documents, rather than logically // corresponding to one document with an array value. const vector<Value>& localArray = localFieldVal.getArray(); const bool containsRegex = std::any_of( localArray.begin(), localArray.end(), [](Value val) { return val.getType() == RegEx; }); if (containsRegex) { // A regex inside of an $in will not be treated as an equality comparison, so use an // $or. BSONObj orQuery = buildEqualityOrQuery(foreignFieldName, localFieldVal.getArray()); joiningObj.appendElements(orQuery); } else { // { _foreignFieldFieldName : { "$in" : localFieldValue } } BSONObjBuilder subObj(joiningObj.subobjStart(foreignFieldName)); subObj << "$in" << localFieldVal; subObj.doneFast(); } } else { // { _foreignFieldFieldName : { "$eq" : localFieldValue } } BSONObjBuilder subObj(joiningObj.subobjStart(foreignFieldName)); subObj << "$eq" << localFieldVal; subObj.doneFast(); } joiningObj.doneFast(); BSONObjBuilder additionalFilterObj(andObj.subobjStart()); additionalFilterObj.appendElements(additionalFilter); additionalFilterObj.doneFast(); andObj.doneFast(); return query.obj(); }
void AccumulatorAvg::processInternal(const Value& input, bool merging) { if (!merging) { Super::processInternal(input, merging); } else { // We expect an object that contains both a subtotal and a count. // This is what getValue(true) produced below. verify(input.getType() == Object); Value subTotal = input[subTotalName]; verify(!subTotal.missing()); doubleTotal += subTotal.getDouble(); Value subCount = input[countName]; verify(!subCount.missing()); count += subCount.getLong(); } }
void DocumentSourceGroup::populate() { for(bool hasNext = !pSource->eof(); hasNext; hasNext = pSource->advance()) { Document pDocument(pSource->getCurrent()); /* get the _id value */ Value pId = pIdExpression->evaluate(pDocument); /* treat missing values the same as NULL SERVER-4674 */ if (pId.missing()) pId = Value(BSONNULL); /* Look for the _id value in the map; if it's not there, add a new entry with a blank accumulator. */ vector<intrusive_ptr<Accumulator> > *pGroup; GroupsType::iterator it(groups.find(pId)); if (it != groups.end()) { /* point at the existing accumulators */ pGroup = &it->second; } else { /* insert a new group into the map */ groups[pId] = vector<intrusive_ptr<Accumulator> >(); /* find the accumulator vector (the map value) */ it = groups.find(pId); pGroup = &it->second; /* add the accumulators */ const size_t n = vpAccumulatorFactory.size(); pGroup->reserve(n); for(size_t i = 0; i < n; ++i) { intrusive_ptr<Accumulator> pAccumulator( (*vpAccumulatorFactory[i])(pExpCtx)); pAccumulator->addOperand(vpExpression[i]); pGroup->push_back(pAccumulator); } } /* point at the existing key */ // unneeded atm // pId = it.first; /* tickle all the accumulators for the group we found */ const size_t n = pGroup->size(); for(size_t i = 0; i < n; ++i) (*pGroup)[i]->evaluate(pDocument); } /* start the group iterator */ groupsIterator = groups.begin(); if (groupsIterator != groups.end()) pCurrent = makeDocument(groupsIterator); populated = true; }
bool DocumentSourceChangeStreamTransform::isDocumentRelevant(const Document& d) { invariant( d["op"].getType() == BSONType::String, str::stream() << "Unexpected format for entry within a transaction oplog entry: 'op' field was type " << typeName(d["op"].getType())); invariant(ValueComparator::kInstance.evaluate(d["op"] != Value("n"_sd)), "Unexpected noop entry within a transaction"); Value nsField = d["ns"]; invariant(!nsField.missing()); return _nsRegex->PartialMatch(nsField.getString()); }
Value AccumulatorAvg::evaluate(const Document& pDocument) const { if (!pCtx->getDoingMerge()) { Super::evaluate(pDocument); } else { /* If we're in the router, we expect an object that contains both a subtotal and a count. This is what getValue() produced below. */ Value shardOut = vpOperand[0]->evaluate(pDocument); verify(shardOut.getType() == Object); Value subTotal = shardOut[subTotalName]; verify(!subTotal.missing()); doubleTotal += subTotal.getDouble(); Value subCount = shardOut[countName]; verify(!subCount.missing()); count += subCount.getLong(); } return Value(); }
void DocumentSourceChangeStreamTransform::initializeTransactionContext(const Document& input) { // The only two commands we will see here are an applyOps or a commit, which both mean we // need to open a "transaction context" representing a group of updates that all occurred at // once as part of a transaction. If we already have a transaction context open, that would // mean we are looking at an applyOps or commit nested within an applyOps, which is not // allowed in the oplog. invariant(!_txnContext); Value lsid = input["lsid"]; checkValueType(lsid, "lsid", BSONType::Object); Value txnNumber = input["txnNumber"]; checkValueType(txnNumber, "txnNumber", BSONType::NumberLong); Value ts = input[repl::OplogEntry::kTimestampFieldName]; Timestamp txnApplyTime = ts.getTimestamp(); auto commandObj = input["o"].getDocument(); Value applyOps = commandObj["applyOps"]; if (!applyOps.missing()) { // An "applyOps" command represents an immediately-committed transaction. We place the // operations within the "applyOps" array directly into the transaction context. applyOps = input.getNestedField("o.applyOps"); } else { invariant(!commandObj["commitTransaction"].missing()); // A "commit" command is the second part of a transaction that has been split up into // two oplog entries. The lsid, txnNumber, and timestamp are in this entry, but the // "applyOps" array is in a previous entry, which we must look up. repl::OpTime opTime; uassertStatusOK(bsonExtractOpTimeField(input.toBson(), "prevOpTime", &opTime)); auto applyOpsEntry = pExpCtx->mongoProcessInterface->lookUpOplogEntryByOpTime(pExpCtx->opCtx, opTime); invariant(applyOpsEntry.isCommand() && (repl::OplogEntry::CommandType::kApplyOps == applyOpsEntry.getCommandType())); invariant(applyOpsEntry.shouldPrepare()); auto bsonOp = applyOpsEntry.getOperationToApply(); invariant(BSONType::Array == bsonOp["applyOps"].type()); applyOps = Value(bsonOp["applyOps"]); } checkValueType(applyOps, "applyOps", BSONType::Array); invariant(applyOps.getArrayLength() > 0); _txnContext.emplace(applyOps, txnApplyTime, lsid.getDocument(), txnNumber.getLong()); }
Document DocumentSourceBucketAuto::makeDocument(const Bucket& bucket) { const size_t nAccumulatedFields = _fieldNames.size(); MutableDocument out(1 + nAccumulatedFields); out.addField("_id", Value{Document{{"min", bucket._min}, {"max", bucket._max}}}); const bool mergingOutput = false; for (size_t i = 0; i < nAccumulatedFields; i++) { Value val = bucket._accums[i]->getValue(mergingOutput); // To be consistent with the $group stage, we consider "missing" to be equivalent to null // when evaluating accumulators. out.addField(_fieldNames[i], val.missing() ? Value(BSONNULL) : std::move(val)); } return out.freeze(); }
bool DocumentSourceGraphLookUp::addToVisitedAndFrontier(BSONObj result, long long depth) { Value _id = Value(result.getField("_id")); if (_visited.find(_id) != _visited.end()) { // We've already seen this object, don't repeat any work. return false; } // We have not seen this node before. If '_depthField' was specified, add the field to the // object. BSONObj fullObject = _depthField ? addDepthFieldToObject(_depthField->fullPath(), depth, result) : result; // Add the object to our '_visited' list. _visited[_id] = fullObject; // Update the size of '_visited' appropriately. _visitedUsageBytes += _id.getApproximateSize(); _visitedUsageBytes += static_cast<size_t>(fullObject.objsize()); // Add the 'connectFrom' field of 'result' into '_frontier'. If the 'connectFrom' field is an // array, we treat it as connecting to multiple values, so we must add each element to // '_frontier'. BSONElementSet recurseOnValues; dps::extractAllElementsAlongPath(result, _connectFromField.fullPath(), recurseOnValues); for (auto&& elem : recurseOnValues) { Value recurseOn = Value(elem); if (recurseOn.isArray()) { for (auto&& subElem : recurseOn.getArray()) { _frontier->insert(subElem); _frontierUsageBytes += subElem.getApproximateSize(); } } else if (!recurseOn.missing()) { // Don't recurse on a missing value. _frontier->insert(recurseOn); _frontierUsageBytes += recurseOn.getApproximateSize(); } } // We inserted into _visited, so return true. return true; }
void AccumulatorPush::processInternal(const Value& input, bool merging) { if (!merging) { if (!input.missing()) { vpValue.push_back(input); _memUsageBytes += input.getApproximateSize(); } } else { // If we're merging, we need to take apart the arrays we // receive and put their elements into the array we are collecting. // If we didn't, then we'd get an array of arrays, with one array // from each merge source. verify(input.getType() == Array); const vector<Value>& vec = input.getArray(); vpValue.insert(vpValue.end(), vec.begin(), vec.end()); for (size_t i = 0; i < vec.size(); i++) { _memUsageBytes += vec[i].getApproximateSize(); } } }
Document DocumentSourceGroup::makeDocument(const Value& id, const Accumulators& accums, bool mergeableOutput) { const size_t n = vFieldName.size(); MutableDocument out (1 + n); /* add the _id field */ out.addField("_id", id); /* add the rest of the fields */ for(size_t i = 0; i < n; ++i) { Value val = accums[i]->getValue(mergeableOutput); if (val.missing()) { // we return null in this case so return objects are predictable out.addField(vFieldName[i], Value(BSONNULL)); } else { out.addField(vFieldName[i], val); } } return out.freeze(); }
BSONObj DocumentSourceLookUp::queryForInput(const Document& input, const FieldPath& localFieldPath, const std::string& foreignFieldName) { Value localFieldVal = input.getNestedField(localFieldPath); // Missing values are treated as null. if (localFieldVal.missing()) { localFieldVal = Value(BSONNULL); } BSONObjBuilder query; BSONObjBuilder subObj(query.subobjStart(foreignFieldName)); if (localFieldVal.isArray()) { // Assume an array value logically corresponds to many documents, rather than logically // corresponding to one document with an array value. const vector<Value>& localArray = localFieldVal.getArray(); const bool containsRegex = std::any_of( localArray.begin(), localArray.end(), [](Value val) { return val.getType() == RegEx; }); if (containsRegex) { // A regex inside of an $in will not be treated as an equality comparison, so use an // $or. return buildEqualityOrQuery(foreignFieldName, localFieldVal.getArray()); } // { _foreignFieldFieldName : { "$in" : localFieldValue } } subObj << "$in" << localFieldVal; } else { // { _foreignFieldFieldName : { "$eq" : localFieldValue } } subObj << "$eq" << localFieldVal; } subObj.doneFast(); return query.obj(); }
void AccumulatorAddToSet::processInternal(const Value& input, bool merging) { if (!merging) { if (!input.missing()) { bool inserted = set.insert(input).second; if (inserted) { _memUsageBytes += input.getApproximateSize(); } } } else { // If we're merging, we need to take apart the arrays we // receive and put their elements into the array we are collecting. // If we didn't, then we'd get an array of arrays, with one array // from each merge source. verify(input.getType() == Array); const vector<Value>& array = input.getArray(); for (size_t i = 0; i < array.size(); i++) { bool inserted = set.insert(array[i]).second; if (inserted) { _memUsageBytes += array[i].getApproximateSize(); } } } }
Document DocumentSourceChangeStreamTransform::applyTransformation(const Document& input) { // If we're executing a change stream pipeline that was forwarded from mongos, then we expect it // to "need merge"---we expect to be executing the shards part of a split pipeline. It is never // correct for mongos to pass through the change stream without splitting into into a merging // part executed on mongos and a shards part. // // This is necessary so that mongos can correctly handle "invalidate" and "retryNeeded" change // notifications. See SERVER-31978 for an example of why the pipeline must be split. // // We have to check this invariant at run-time of the change stream rather than parse time, // since a mongos may forward a change stream in an invalid position (e.g. in a nested $lookup // or $facet pipeline). In this case, mongod is responsible for parsing the pipeline and // throwing an error without ever executing the change stream. if (pExpCtx->fromMongos) { invariant(pExpCtx->needsMerge); } MutableDocument doc; // Extract the fields we need. checkValueType(input[repl::OplogEntry::kOpTypeFieldName], repl::OplogEntry::kOpTypeFieldName, BSONType::String); string op = input[repl::OplogEntry::kOpTypeFieldName].getString(); Value ts = input[repl::OplogEntry::kTimestampFieldName]; Value ns = input[repl::OplogEntry::kNssFieldName]; checkValueType(ns, repl::OplogEntry::kNssFieldName, BSONType::String); Value uuid = input[repl::OplogEntry::kUuidFieldName]; std::vector<FieldPath> documentKeyFields; // Deal with CRUD operations and commands. auto opType = repl::OpType_parse(IDLParserErrorContext("ChangeStreamEntry.op"), op); NamespaceString nss(ns.getString()); // Ignore commands in the oplog when looking up the document key fields since a command implies // that the change stream is about to be invalidated (e.g. collection drop). if (!uuid.missing() && opType != repl::OpTypeEnum::kCommand) { checkValueType(uuid, repl::OplogEntry::kUuidFieldName, BSONType::BinData); // We need to retrieve the document key fields if our cache does not have an entry for this // UUID or if the cache entry is not definitively final, indicating that the collection was // unsharded when the entry was last populated. auto it = _documentKeyCache.find(uuid.getUuid()); if (it == _documentKeyCache.end() || !it->second.isFinal) { auto docKeyFields = pExpCtx->mongoProcessInterface->collectDocumentKeyFieldsForHostedCollection( pExpCtx->opCtx, nss, uuid.getUuid()); if (it == _documentKeyCache.end() || docKeyFields.second) { _documentKeyCache[uuid.getUuid()] = DocumentKeyCacheEntry(docKeyFields); } } documentKeyFields = _documentKeyCache.find(uuid.getUuid())->second.documentKeyFields; } Value id = input.getNestedField("o._id"); // Non-replace updates have the _id in field "o2". StringData operationType; Value fullDocument; Value updateDescription; Value documentKey; switch (opType) { case repl::OpTypeEnum::kInsert: { operationType = DocumentSourceChangeStream::kInsertOpType; fullDocument = input[repl::OplogEntry::kObjectFieldName]; documentKey = Value(document_path_support::extractPathsFromDoc( fullDocument.getDocument(), documentKeyFields)); break; } case repl::OpTypeEnum::kDelete: { operationType = DocumentSourceChangeStream::kDeleteOpType; documentKey = input[repl::OplogEntry::kObjectFieldName]; break; } case repl::OpTypeEnum::kUpdate: { if (id.missing()) { operationType = DocumentSourceChangeStream::kUpdateOpType; checkValueType(input[repl::OplogEntry::kObjectFieldName], repl::OplogEntry::kObjectFieldName, BSONType::Object); Document opObject = input[repl::OplogEntry::kObjectFieldName].getDocument(); Value updatedFields = opObject["$set"]; Value removedFields = opObject["$unset"]; // Extract the field names of $unset document. vector<Value> removedFieldsVector; if (removedFields.getType() == BSONType::Object) { auto iter = removedFields.getDocument().fieldIterator(); while (iter.more()) { removedFieldsVector.push_back(Value(iter.next().first)); } } updateDescription = Value(Document{ {"updatedFields", updatedFields.missing() ? Value(Document()) : updatedFields}, {"removedFields", removedFieldsVector}}); } else { operationType = DocumentSourceChangeStream::kReplaceOpType; fullDocument = input[repl::OplogEntry::kObjectFieldName]; } documentKey = input[repl::OplogEntry::kObject2FieldName]; break; } case repl::OpTypeEnum::kCommand: { if (!input.getNestedField("o.drop").missing()) { operationType = DocumentSourceChangeStream::kDropCollectionOpType; // The "o.drop" field will contain the actual collection name. nss = NamespaceString(nss.db(), input.getNestedField("o.drop").getString()); } else if (!input.getNestedField("o.renameCollection").missing()) { operationType = DocumentSourceChangeStream::kRenameCollectionOpType; // The "o.renameCollection" field contains the namespace of the original collection. nss = NamespaceString(input.getNestedField("o.renameCollection").getString()); // The "o.to" field contains the target namespace for the rename. const auto renameTargetNss = NamespaceString(input.getNestedField("o.to").getString()); doc.addField(DocumentSourceChangeStream::kRenameTargetNssField, Value(Document{{"db", renameTargetNss.db()}, {"coll", renameTargetNss.coll()}})); } else if (!input.getNestedField("o.dropDatabase").missing()) { operationType = DocumentSourceChangeStream::kDropDatabaseOpType; // Extract the database name from the namespace field and leave the collection name // empty. nss = NamespaceString(nss.db()); } else { // All other commands will invalidate the stream. operationType = DocumentSourceChangeStream::kInvalidateOpType; } // Make sure the result doesn't have a document key. documentKey = Value(); break; } case repl::OpTypeEnum::kNoop: { operationType = DocumentSourceChangeStream::kNewShardDetectedOpType; // Generate a fake document Id for NewShardDetected operation so that we can resume // after this operation. documentKey = Value(Document{{DocumentSourceChangeStream::kIdField, input[repl::OplogEntry::kObject2FieldName]}}); break; } default: { MONGO_UNREACHABLE; } } // UUID should always be present except for invalidate and dropDatabase entries. if (operationType != DocumentSourceChangeStream::kInvalidateOpType && operationType != DocumentSourceChangeStream::kDropDatabaseOpType) { invariant(!uuid.missing(), "Saw a CRUD op without a UUID"); } // Note that 'documentKey' and/or 'uuid' might be missing, in which case they will not appear // in the output. auto resumeTokenData = getResumeToken(ts, uuid, documentKey); auto resumeToken = ResumeToken(resumeTokenData).toDocument(); // Add some additional fields only relevant to transactions. if (_txnContext) { doc.addField(DocumentSourceChangeStream::kTxnNumberField, Value(static_cast<long long>(_txnContext->txnNumber))); doc.addField(DocumentSourceChangeStream::kLsidField, Value(_txnContext->lsid)); } doc.addField(DocumentSourceChangeStream::kIdField, Value(resumeToken)); doc.addField(DocumentSourceChangeStream::kOperationTypeField, Value(operationType)); doc.addField(DocumentSourceChangeStream::kClusterTimeField, Value(resumeTokenData.clusterTime)); // We set the resume token as the document's sort key in both the sharded and non-sharded cases, // since we will subsequently rely upon it to generate a correct postBatchResumeToken. // TODO SERVER-38539: when returning results for merging, we first check whether 'mergeByPBRT' // has been set. If not, then the request was sent from an older mongoS which cannot merge by // raw resume tokens, and we must use the old sort key format. This check, and the 'mergeByPBRT' // flag, are no longer necessary in 4.4; all change streams will be merged by resume token. if (pExpCtx->needsMerge && !pExpCtx->mergeByPBRT) { doc.setSortKeyMetaField(BSON("" << ts << "" << uuid << "" << documentKey)); } else { doc.setSortKeyMetaField(resumeToken.toBson()); } // "invalidate" and "newShardDetected" entries have fewer fields. if (operationType == DocumentSourceChangeStream::kInvalidateOpType || operationType == DocumentSourceChangeStream::kNewShardDetectedOpType) { return doc.freeze(); } doc.addField(DocumentSourceChangeStream::kFullDocumentField, fullDocument); doc.addField(DocumentSourceChangeStream::kNamespaceField, operationType == DocumentSourceChangeStream::kDropDatabaseOpType ? Value(Document{{"db", nss.db()}}) : Value(Document{{"db", nss.db()}, {"coll", nss.coll()}})); doc.addField(DocumentSourceChangeStream::kDocumentKeyField, documentKey); // Note that 'updateDescription' might be the 'missing' value, in which case it will not be // serialized. doc.addField("updateDescription", updateDescription); return doc.freeze(); }
Document DocumentSourceChangeStream::Transformation::applyTransformation(const Document& input) { MutableDocument doc; // Extract the fields we need. checkValueType(input[repl::OplogEntry::kOpTypeFieldName], repl::OplogEntry::kOpTypeFieldName, BSONType::String); string op = input[repl::OplogEntry::kOpTypeFieldName].getString(); Value ts = input[repl::OplogEntry::kTimestampFieldName]; Value ns = input[repl::OplogEntry::kNamespaceFieldName]; checkValueType(ns, repl::OplogEntry::kNamespaceFieldName, BSONType::String); NamespaceString nss(ns.getString()); Value id = input.getNestedField("o._id"); // Non-replace updates have the _id in field "o2". Value documentId = id.missing() ? input.getNestedField("o2._id") : id; StringData operationType; Value fullDocument; Value updateDescription; // Deal with CRUD operations and commands. auto opType = repl::OpType_parse(IDLParserErrorContext("ChangeStreamEntry.op"), op); switch (opType) { case repl::OpTypeEnum::kInsert: { operationType = kInsertOpType; fullDocument = input[repl::OplogEntry::kObjectFieldName]; break; } case repl::OpTypeEnum::kDelete: { operationType = kDeleteOpType; break; } case repl::OpTypeEnum::kUpdate: { if (id.missing()) { operationType = kUpdateOpType; checkValueType(input[repl::OplogEntry::kObjectFieldName], repl::OplogEntry::kObjectFieldName, BSONType::Object); Document opObject = input[repl::OplogEntry::kObjectFieldName].getDocument(); Value updatedFields = opObject["$set"]; Value removedFields = opObject["$unset"]; // Extract the field names of $unset document. vector<Value> removedFieldsVector; if (removedFields.getType() == BSONType::Object) { auto iter = removedFields.getDocument().fieldIterator(); while (iter.more()) { removedFieldsVector.push_back(Value(iter.next().first)); } } updateDescription = Value(Document{ {"updatedFields", updatedFields.missing() ? Value(Document()) : updatedFields}, {"removedFields", removedFieldsVector}}); } else { operationType = kReplaceOpType; fullDocument = input[repl::OplogEntry::kObjectFieldName]; } break; } case repl::OpTypeEnum::kCommand: { operationType = kInvalidateOpType; // Make sure the result doesn't have a document id. documentId = Value(); break; } default: { MONGO_UNREACHABLE; } } // Construct the result document. Value documentKey; if (!documentId.missing()) { documentKey = Value(Document{{kIdField, documentId}}); } // Note that 'documentKey' might be missing, in which case it will not appear in the output. Document resumeToken{{kClusterTimeField, Document{{kTimestampField, ts}}}, {kNamespaceField, ns}, {kDocumentKeyField, documentKey}}; doc.addField(kIdField, Value(resumeToken)); doc.addField(kOperationTypeField, Value(operationType)); doc.addField(kFullDocumentField, fullDocument); // "invalidate" entry has fewer fields. if (opType == repl::OpTypeEnum::kCommand) { return doc.freeze(); } doc.addField(kNamespaceField, Value(Document{{"db", nss.db()}, {"coll", nss.coll()}})); doc.addField(kDocumentKeyField, documentKey); // Note that 'updateDescription' might be the 'missing' value, in which case it will not be // serialized. doc.addField("updateDescription", updateDescription); return doc.freeze(); }
void DocumentSource::serializeToArray(vector<Value>& array, bool explain) const { Value entry = serialize(explain); if (!entry.missing()) { array.push_back(entry); } }
BSONObj DocumentSourceLookUp::makeMatchStageFromInput(const Document& input, const FieldPath& localFieldPath, const std::string& foreignFieldName, const BSONObj& additionalFilter) { Value localFieldVal = input.getNestedField(localFieldPath); // Missing values are treated as null. if (localFieldVal.missing()) { localFieldVal = Value(BSONNULL); } // We construct a query of one of the following forms, depending on the contents of // 'localFieldVal'. // // {$and: [{<foreignFieldName>: {$eq: <localFieldVal>}}, <additionalFilter>]} // if 'localFieldVal' isn't an array value. // // {$and: [{<foreignFieldName>: {$in: [<value>, <value>, ...]}}, <additionalFilter>]} // if 'localFieldVal' is an array value but doesn't contain any elements that are regular // expressions. // // {$and: [{$or: [{<foreignFieldName>: {$eq: <value>}}, // {<foreignFieldName>: {$eq: <value>}}, ...]}, // <additionalFilter>]} // if 'localFieldVal' is an array value and it contains at least one element that is a // regular expression. // We wrap the query in a $match so that it can be parsed into a DocumentSourceMatch when // constructing a pipeline to execute. BSONObjBuilder match; BSONObjBuilder query(match.subobjStart("$match")); BSONArrayBuilder andObj(query.subarrayStart("$and")); BSONObjBuilder joiningObj(andObj.subobjStart()); if (localFieldVal.isArray()) { // A $lookup on an array value corresponds to finding documents in the foreign collection // that have a value of any of the elements in the array value, rather than finding // documents that have a value equal to the entire array value. These semantics are // automatically provided to us by using the $in query operator. const vector<Value>& localArray = localFieldVal.getArray(); const bool containsRegex = std::any_of( localArray.begin(), localArray.end(), [](Value val) { return val.getType() == RegEx; }); if (containsRegex) { // A regular expression inside the $in query operator will perform pattern matching on // any string values. Since we want regular expressions to only match other RegEx types, // we write the query as a $or of equality comparisons instead. BSONObj orQuery = buildEqualityOrQuery(foreignFieldName, localFieldVal.getArray()); joiningObj.appendElements(orQuery); } else { // { <foreignFieldName> : { "$in" : <localFieldVal> } } BSONObjBuilder subObj(joiningObj.subobjStart(foreignFieldName)); subObj << "$in" << localFieldVal; subObj.doneFast(); } } else { // { <foreignFieldName> : { "$eq" : <localFieldVal> } } BSONObjBuilder subObj(joiningObj.subobjStart(foreignFieldName)); subObj << "$eq" << localFieldVal; subObj.doneFast(); } joiningObj.doneFast(); BSONObjBuilder additionalFilterObj(andObj.subobjStart()); additionalFilterObj.appendElements(additionalFilter); additionalFilterObj.doneFast(); andObj.doneFast(); query.doneFast(); return match.obj(); }
void DocumentSourceGroup::populate() { const size_t numAccumulators = vpAccumulatorFactory.size(); dassert(numAccumulators == vpExpression.size()); // pushed to on spill() vector<shared_ptr<Sorter<Value, Value>::Iterator> > sortedFiles; int memoryUsageBytes = 0; // This loop consumes all input from pSource and buckets it based on pIdExpression. while (boost::optional<Document> input = pSource->getNext()) { if (memoryUsageBytes > _maxMemoryUsageBytes) { uassert(16945, "Exceeded memory limit for $group, but didn't allow external sort." " Pass allowDiskUse:true to opt in.", _extSortAllowed); sortedFiles.push_back(spill()); memoryUsageBytes = 0; } _variables->setRoot(*input); /* get the _id value */ Value id = computeId(_variables.get()); /* treat missing values the same as NULL SERVER-4674 */ if (id.missing()) id = Value(BSONNULL); /* Look for the _id value in the map; if it's not there, add a new entry with a blank accumulator. */ const size_t oldSize = groups.size(); vector<intrusive_ptr<Accumulator> >& group = groups[id]; const bool inserted = groups.size() != oldSize; if (inserted) { memoryUsageBytes += id.getApproximateSize(); // Add the accumulators group.reserve(numAccumulators); for (size_t i = 0; i < numAccumulators; i++) { group.push_back(vpAccumulatorFactory[i]()); } } else { for (size_t i = 0; i < numAccumulators; i++) { // subtract old mem usage. New usage added back after processing. memoryUsageBytes -= group[i]->memUsageForSorter(); } } /* tickle all the accumulators for the group we found */ dassert(numAccumulators == group.size()); for (size_t i = 0; i < numAccumulators; i++) { group[i]->process(vpExpression[i]->evaluate(_variables.get()), _doingMerge); memoryUsageBytes += group[i]->memUsageForSorter(); } // We are done with the ROOT document so release it. _variables->clearRoot(); DEV { // In debug mode, spill every time we have a duplicate id to stress merge logic. if (!inserted // is a dup && !pExpCtx->inRouter // can't spill to disk in router && !_extSortAllowed // don't change behavior when testing external sort && sortedFiles.size() < 20 // don't open too many FDs ) { sortedFiles.push_back(spill()); } } } // These blocks do any final steps necessary to prepare to output results. if (!sortedFiles.empty()) { _spilled = true; if (!groups.empty()) { sortedFiles.push_back(spill()); } // We won't be using groups again so free its memory. GroupsMap().swap(groups); _sorterIterator.reset( Sorter<Value,Value>::Iterator::merge( sortedFiles, SortOptions(), SorterComparator())); // prepare current to accumulate data _currentAccumulators.reserve(numAccumulators); for (size_t i = 0; i < numAccumulators; i++) { _currentAccumulators.push_back(vpAccumulatorFactory[i]()); } verify(_sorterIterator->more()); // we put data in, we should get something out. _firstPartOfNextGroup = _sorterIterator->next(); } else { // start the group iterator groupsIterator = groups.begin(); } populated = true; }