Value DocumentSourceBucketAuto::extractKey(const Document& doc) {
    if (!_groupByExpression) {
        return Value(BSONNULL);
    }

    _variables->setRoot(doc);
    Value key = _groupByExpression->evaluate(_variables.get());

    if (_granularityRounder) {
        uassert(40258,
                str::stream() << "$bucketAuto can specify a 'granularity' with numeric boundaries "
                                 "only, but found a value with type: "
                              << typeName(key.getType()),
                key.numeric());

        double keyValue = key.coerceToDouble();
        uassert(
            40259,
            "$bucketAuto can specify a 'granularity' with numeric boundaries only, but found a NaN",
            !std::isnan(keyValue));

        uassert(40260,
                "$bucketAuto can specify a 'granularity' with non-negative numbers only, but found "
                "a negative number",
                keyValue >= 0.0);
    }

    // To be consistent with the $group stage, we consider "missing" to be equivalent to null when
    // grouping values into buckets.
    return key.missing() ? Value(BSONNULL) : std::move(key);
}
    Document applyTransformation(Document input) final {
        // Extract subdocument in the form of a Value.
        _variables->setRoot(input);
        Value newRoot = _newRoot->evaluate(_variables.get());

        // The newRoot expression must evaluate to a valid Value.
        uassert(
            40232,
            str::stream() << " 'newRoot' argument "
            << " to $replaceRoot stage must be able to be evaluated by the document "
            << input.toString()
            << ", try ensuring that your field path(s) exist by prepending a "
            << "$match: {<path>: $exists} aggregation stage.",
            !newRoot.missing());

        // The newRoot expression, if it exists, must evaluate to an object.
        uassert(
            40228,
            str::stream()
            << " 'newRoot' argument to $replaceRoot stage must evaluate to an object, but got "
            << typeName(newRoot.getType())
            << " try ensuring that it evaluates to an object by prepending a "
            << "$match: {<path>: {$type: 'object'}} aggregation stage.",
            newRoot.getType() == Object);

        // Turn the value into a document.
        return newRoot.getDocument();
    }
ResumeTokenData DocumentSourceChangeStreamTransform::getResumeToken(Value ts,
                                                                    Value uuid,
                                                                    Value documentKey) {
    ResumeTokenData resumeTokenData;
    if (_txnContext) {
        // We're in the middle of unwinding an 'applyOps'.

        // Use the clusterTime from the higher level applyOps
        resumeTokenData.clusterTime = _txnContext->clusterTime;

        // 'pos' points to the _next_ applyOps index, so we must subtract one to get the index of
        // the entry being examined right now.
        invariant(_txnContext->pos >= 1);
        resumeTokenData.applyOpsIndex = _txnContext->pos - 1;
    } else {
        resumeTokenData.clusterTime = ts.getTimestamp();
        resumeTokenData.applyOpsIndex = 0;
    }

    resumeTokenData.documentKey = documentKey;
    if (!uuid.missing())
        resumeTokenData.uuid = uuid.getUuid();

    if (_fcv < ServerGlobalParams::FeatureCompatibility::Version::kFullyUpgradedTo42) {
        resumeTokenData.version = 0;
    }

    return resumeTokenData;
}
Example #4
0
BSONObj DocumentSourceLookUp::queryForInput(const Document& input) const {
    Value localFieldVal = input.getNestedField(_localField);
    if (localFieldVal.missing()) {
        localFieldVal = Value(BSONNULL);
    }
    return BSON(_foreignFieldFieldName << localFieldVal);
}
Value DocumentSourceRedact::redactValue(const Value& in) {
    const BSONType valueType = in.getType();
    if (valueType == Object) {
        _variables->setValue(_currentId, in);
        const boost::optional<Document> result = redactObject();
        if (result) {
            return Value(*result);
        } else {
            return Value();
        }
    } else if (valueType == Array) {
        // TODO dont copy if possible
        vector<Value> newArr;
        const vector<Value>& arr = in.getArray();
        for (size_t i = 0; i < arr.size(); i++) {
            if (arr[i].getType() == Object || arr[i].getType() == Array) {
                const Value toAdd = redactValue(arr[i]);
                if (!toAdd.missing()) {
                    newArr.push_back(toAdd);
                }
            } else {
                newArr.push_back(arr[i]);
            }
        }
        return Value(std::move(newArr));
    } else {
        return in;
    }
}
Example #6
0
void DocumentSource::serializeToArray(vector<Value>& array,
                                      boost::optional<ExplainOptions::Verbosity> explain) const {
    Value entry = serialize(explain);
    if (!entry.missing()) {
        array.push_back(entry);
    }
}
boost::optional<Document> DocumentSourceRedact::redactObject() {
    const Value expressionResult = _expression->evaluate(_variables.get());

    if (expressionResult == keepVal) {
        return _variables->getDocument(_currentId);
    } else if (expressionResult == pruneVal) {
        return boost::optional<Document>();
    } else if (expressionResult == descendVal) {
        const Document in = _variables->getDocument(_currentId);
        MutableDocument out;
        out.copyMetaDataFrom(in);
        FieldIterator fields(in);
        while (fields.more()) {
            const Document::FieldPair field(fields.next());

            // This changes CURRENT so don't read from _variables after this
            const Value val = redactValue(field.second);
            if (!val.missing()) {
                out.addField(field.first, val);
            }
        }
        return out.freeze();
    } else {
        uasserted(17053,
                  str::stream() << "$redact's expression should not return anything "
                                << "aside from the variables $$KEEP, $$DESCEND, and "
                                << "$$PRUNE, but returned "
                                << expressionResult.toString());
    }
}
ResumeTokenData DocumentSourceChangeStreamTransform::getResumeToken(Value ts,
                                                                    Value uuid,
                                                                    Value documentKey) {
    ResumeTokenData resumeTokenData;
    if (_txnContext) {
        // We're in the middle of unwinding an 'applyOps'.

        // Use the clusterTime from the higher level applyOps
        resumeTokenData.clusterTime = _txnContext->clusterTime;

        // 'pos' points to the _next_ applyOps index, so we must subtract one to get the index of
        // the entry being examined right now.
        invariant(_txnContext->pos >= 1);
        resumeTokenData.applyOpsIndex = _txnContext->pos - 1;
    } else {
        resumeTokenData.clusterTime = ts.getTimestamp();
        resumeTokenData.applyOpsIndex = 0;
    }

    resumeTokenData.documentKey = documentKey;
    if (!uuid.missing())
        resumeTokenData.uuid = uuid.getUuid();

    // If 'needsMerge' is true, 'mergeByPBRT' is false, and FCV is less than 4.2, then we are
    // running on a sharded cluster that is mid-upgrade, and so we generate v0 resume tokens.
    // Otherwise, we always generate v1 resume tokens whether the FCV is 4.0 or 4.2.
    if (pExpCtx->needsMerge && !pExpCtx->mergeByPBRT &&
        _fcv < ServerGlobalParams::FeatureCompatibility::Version::kFullyUpgradedTo42) {
        resumeTokenData.version = 0;
    }

    return resumeTokenData;
}
boost::optional<Document> DocumentSourceRedact::redactObject(const Variables& in) {
    const Value expressionResult = _expression->evaluate(in);

    if (expressionResult == keepVal) {
        return in.current.getDocument();
    }
    else if (expressionResult == pruneVal) {
        return boost::optional<Document>();
    }
    else if (expressionResult == descendVal) {
        MutableDocument out;
        FieldIterator fields(in.current.getDocument());
        while (fields.more()) {
            const Document::FieldPair field(fields.next());
            const Value val = redactValue(in, field.second);
            if (!val.missing()) {
                out.addField(field.first, val);
            }
        }
        return out.freeze();
    }
    else {
        uasserted(17053, str::stream() << "$redact's expression should not return anything "
                  << "aside from the variables $$KEEP, $$DESCEND, and "
                  << "$$PRUNE, but returned "
                  << expressionResult.toString());
    }
}
Value DocumentSourceRedact::redactValue(const Variables& vars, const Value& in) {
    const BSONType valueType = in.getType();
    if (valueType == Object) {
        Variables recurse = vars;
        recurse.current = in;
        const boost::optional<Document> result = redactObject(recurse);
        if (result) {
            return Value(*result);
        }
        else {
            return Value();
        }
    }
    else if (valueType == Array) {
        // TODO dont copy if possible
        vector<Value> newArr;
        const vector<Value>& arr = in.getArray();
        for (size_t i = 0; i < arr.size(); i++) {
            if (arr[i].getType() == Object || arr[i].getType() == Array) {
                const Value toAdd = redactValue(vars, arr[i]) ;
                if (!toAdd.missing()) {
                    newArr.push_back(toAdd);
                }
            }
        }
        return Value::consume(newArr);
    }
    else {
        return in;
    }
}
Example #11
0
    Document DocumentSource::documentFromBsonWithDeps(const BSONObj& bson,
                                                      const ParsedDeps& neededFields) {
        MutableDocument md(neededFields.size());

        BSONObjIterator it(bson);
        while (it.more()) {
            BSONElement bsonElement (it.next());
            StringData fieldName = bsonElement.fieldNameStringData();
            Value isNeeded = neededFields[fieldName];

            if (isNeeded.missing())
                continue;

            if (isNeeded.getType() == Bool) {
                md.addField(fieldName, Value(bsonElement));
                continue;
            }

            dassert(isNeeded.getType() == Object);

            if (bsonElement.type() == Object) {
                Document sub = documentFromBsonWithDeps(bsonElement.embeddedObject(),
                                                        isNeeded.getDocument());
                md.addField(fieldName, Value(sub));
            }

            if (bsonElement.type() == Array) {
                md.addField(fieldName, arrayHelper(bsonElement.embeddedObject(),
                                                   isNeeded.getDocument()));
            }
        }

        return md.freeze();
    }
BSONObj DocumentSourceLookUp::queryForInput(const Document& input,
                                            const FieldPath& localFieldPath,
                                            const std::string& foreignFieldName,
                                            const BSONObj& additionalFilter) {
    Value localFieldVal = input.getNestedField(localFieldPath);

    // Missing values are treated as null.
    if (localFieldVal.missing()) {
        localFieldVal = Value(BSONNULL);
    }

    // We are constructing a query of one of the following forms:
    // {$and: [{<foreignFieldName>: {$eq: <localFieldVal>}}, <additionalFilter>]}
    // {$and: [{<foreignFieldName>: {$in: [<value>, <value>, ...]}}, <additionalFilter>]}
    // {$and: [{$or: [{<foreignFieldName>: {$eq: <value>}},
    //                {<foreignFieldName>: {$eq: <value>}}, ...]},
    //         <additionalFilter>]}

    BSONObjBuilder query;

    BSONArrayBuilder andObj(query.subarrayStart("$and"));
    BSONObjBuilder joiningObj(andObj.subobjStart());

    if (localFieldVal.isArray()) {
        // Assume an array value logically corresponds to many documents, rather than logically
        // corresponding to one document with an array value.
        const vector<Value>& localArray = localFieldVal.getArray();
        const bool containsRegex = std::any_of(
            localArray.begin(), localArray.end(), [](Value val) { return val.getType() == RegEx; });

        if (containsRegex) {
            // A regex inside of an $in will not be treated as an equality comparison, so use an
            // $or.
            BSONObj orQuery = buildEqualityOrQuery(foreignFieldName, localFieldVal.getArray());
            joiningObj.appendElements(orQuery);
        } else {
            // { _foreignFieldFieldName : { "$in" : localFieldValue } }
            BSONObjBuilder subObj(joiningObj.subobjStart(foreignFieldName));
            subObj << "$in" << localFieldVal;
            subObj.doneFast();
        }
    } else {
        // { _foreignFieldFieldName : { "$eq" : localFieldValue } }
        BSONObjBuilder subObj(joiningObj.subobjStart(foreignFieldName));
        subObj << "$eq" << localFieldVal;
        subObj.doneFast();
    }

    joiningObj.doneFast();

    BSONObjBuilder additionalFilterObj(andObj.subobjStart());
    additionalFilterObj.appendElements(additionalFilter);
    additionalFilterObj.doneFast();

    andObj.doneFast();

    return query.obj();
}
Example #13
0
    void AccumulatorAvg::processInternal(const Value& input, bool merging) {
        if (!merging) {
            Super::processInternal(input, merging);
        }
        else {
            // We expect an object that contains both a subtotal and a count.
            // This is what getValue(true) produced below.
            verify(input.getType() == Object);

            Value subTotal = input[subTotalName];
            verify(!subTotal.missing());
            doubleTotal += subTotal.getDouble();
                
            Value subCount = input[countName];
            verify(!subCount.missing());
            count += subCount.getLong();
        }
    }
Example #14
0
    void DocumentSourceGroup::populate() {
        for(bool hasNext = !pSource->eof(); hasNext;
                hasNext = pSource->advance()) {
            Document pDocument(pSource->getCurrent());

            /* get the _id value */
            Value pId = pIdExpression->evaluate(pDocument);

            /* treat missing values the same as NULL SERVER-4674 */
            if (pId.missing())
                pId = Value(BSONNULL);

            /*
              Look for the _id value in the map; if it's not there, add a
              new entry with a blank accumulator.
            */
            vector<intrusive_ptr<Accumulator> > *pGroup;
            GroupsType::iterator it(groups.find(pId));
            if (it != groups.end()) {
                /* point at the existing accumulators */
                pGroup = &it->second;
            }
            else {
                /* insert a new group into the map */
                groups[pId] = vector<intrusive_ptr<Accumulator> >();

                /* find the accumulator vector (the map value) */
                it = groups.find(pId);
                pGroup = &it->second;

                /* add the accumulators */
                const size_t n = vpAccumulatorFactory.size();
                pGroup->reserve(n);
                for(size_t i = 0; i < n; ++i) {
                    intrusive_ptr<Accumulator> pAccumulator(
                        (*vpAccumulatorFactory[i])(pExpCtx));
                    pAccumulator->addOperand(vpExpression[i]);
                    pGroup->push_back(pAccumulator);
                }
            }

            /* point at the existing key */
            // unneeded atm // pId = it.first;

            /* tickle all the accumulators for the group we found */
            const size_t n = pGroup->size();
            for(size_t i = 0; i < n; ++i)
                (*pGroup)[i]->evaluate(pDocument);
        }

        /* start the group iterator */
        groupsIterator = groups.begin();
        if (groupsIterator != groups.end())
            pCurrent = makeDocument(groupsIterator);
        populated = true;
    }
bool DocumentSourceChangeStreamTransform::isDocumentRelevant(const Document& d) {
    invariant(
        d["op"].getType() == BSONType::String,
        str::stream()
            << "Unexpected format for entry within a transaction oplog entry: 'op' field was type "
            << typeName(d["op"].getType()));
    invariant(ValueComparator::kInstance.evaluate(d["op"] != Value("n"_sd)),
              "Unexpected noop entry within a transaction");

    Value nsField = d["ns"];
    invariant(!nsField.missing());

    return _nsRegex->PartialMatch(nsField.getString());
}
Example #16
0
    Value AccumulatorAvg::evaluate(const Document& pDocument) const {
        if (!pCtx->getDoingMerge()) {
            Super::evaluate(pDocument);
        }
        else {
            /*
              If we're in the router, we expect an object that contains
              both a subtotal and a count.  This is what getValue() produced
              below.
             */
            Value shardOut = vpOperand[0]->evaluate(pDocument);
            verify(shardOut.getType() == Object);

            Value subTotal = shardOut[subTotalName];
            verify(!subTotal.missing());
            doubleTotal += subTotal.getDouble();
                
            Value subCount = shardOut[countName];
            verify(!subCount.missing());
            count += subCount.getLong();
        }

        return Value();
    }
void DocumentSourceChangeStreamTransform::initializeTransactionContext(const Document& input) {
    // The only two commands we will see here are an applyOps or a commit, which both mean we
    // need to open a "transaction context" representing a group of updates that all occurred at
    // once as part of a transaction. If we already have a transaction context open, that would
    // mean we are looking at an applyOps or commit nested within an applyOps, which is not
    // allowed in the oplog.
    invariant(!_txnContext);

    Value lsid = input["lsid"];
    checkValueType(lsid, "lsid", BSONType::Object);

    Value txnNumber = input["txnNumber"];
    checkValueType(txnNumber, "txnNumber", BSONType::NumberLong);

    Value ts = input[repl::OplogEntry::kTimestampFieldName];
    Timestamp txnApplyTime = ts.getTimestamp();

    auto commandObj = input["o"].getDocument();
    Value applyOps = commandObj["applyOps"];
    if (!applyOps.missing()) {
        // An "applyOps" command represents an immediately-committed transaction. We place the
        // operations within the "applyOps" array directly into the transaction context.
        applyOps = input.getNestedField("o.applyOps");
    } else {
        invariant(!commandObj["commitTransaction"].missing());

        // A "commit" command is the second part of a transaction that has been split up into
        // two oplog entries. The lsid, txnNumber, and timestamp are in this entry, but the
        // "applyOps" array is in a previous entry, which we must look up.
        repl::OpTime opTime;
        uassertStatusOK(bsonExtractOpTimeField(input.toBson(), "prevOpTime", &opTime));

        auto applyOpsEntry =
            pExpCtx->mongoProcessInterface->lookUpOplogEntryByOpTime(pExpCtx->opCtx, opTime);
        invariant(applyOpsEntry.isCommand() &&
                  (repl::OplogEntry::CommandType::kApplyOps == applyOpsEntry.getCommandType()));
        invariant(applyOpsEntry.shouldPrepare());

        auto bsonOp = applyOpsEntry.getOperationToApply();
        invariant(BSONType::Array == bsonOp["applyOps"].type());
        applyOps = Value(bsonOp["applyOps"]);
    }

    checkValueType(applyOps, "applyOps", BSONType::Array);
    invariant(applyOps.getArrayLength() > 0);

    _txnContext.emplace(applyOps, txnApplyTime, lsid.getDocument(), txnNumber.getLong());
}
Document DocumentSourceBucketAuto::makeDocument(const Bucket& bucket) {
    const size_t nAccumulatedFields = _fieldNames.size();
    MutableDocument out(1 + nAccumulatedFields);

    out.addField("_id", Value{Document{{"min", bucket._min}, {"max", bucket._max}}});

    const bool mergingOutput = false;
    for (size_t i = 0; i < nAccumulatedFields; i++) {
        Value val = bucket._accums[i]->getValue(mergingOutput);

        // To be consistent with the $group stage, we consider "missing" to be equivalent to null
        // when evaluating accumulators.
        out.addField(_fieldNames[i], val.missing() ? Value(BSONNULL) : std::move(val));
    }
    return out.freeze();
}
bool DocumentSourceGraphLookUp::addToVisitedAndFrontier(BSONObj result, long long depth) {
    Value _id = Value(result.getField("_id"));

    if (_visited.find(_id) != _visited.end()) {
        // We've already seen this object, don't repeat any work.
        return false;
    }

    // We have not seen this node before. If '_depthField' was specified, add the field to the
    // object.
    BSONObj fullObject =
        _depthField ? addDepthFieldToObject(_depthField->fullPath(), depth, result) : result;

    // Add the object to our '_visited' list.
    _visited[_id] = fullObject;

    // Update the size of '_visited' appropriately.
    _visitedUsageBytes += _id.getApproximateSize();
    _visitedUsageBytes += static_cast<size_t>(fullObject.objsize());

    // Add the 'connectFrom' field of 'result' into '_frontier'. If the 'connectFrom' field is an
    // array, we treat it as connecting to multiple values, so we must add each element to
    // '_frontier'.
    BSONElementSet recurseOnValues;
    dps::extractAllElementsAlongPath(result, _connectFromField.fullPath(), recurseOnValues);

    for (auto&& elem : recurseOnValues) {
        Value recurseOn = Value(elem);
        if (recurseOn.isArray()) {
            for (auto&& subElem : recurseOn.getArray()) {
                _frontier->insert(subElem);
                _frontierUsageBytes += subElem.getApproximateSize();
            }
        } else if (!recurseOn.missing()) {
            // Don't recurse on a missing value.
            _frontier->insert(recurseOn);
            _frontierUsageBytes += recurseOn.getApproximateSize();
        }
    }

    // We inserted into _visited, so return true.
    return true;
}
Example #20
0
void AccumulatorPush::processInternal(const Value& input, bool merging) {
    if (!merging) {
        if (!input.missing()) {
            vpValue.push_back(input);
            _memUsageBytes += input.getApproximateSize();
        }
    } else {
        // If we're merging, we need to take apart the arrays we
        // receive and put their elements into the array we are collecting.
        // If we didn't, then we'd get an array of arrays, with one array
        // from each merge source.
        verify(input.getType() == Array);

        const vector<Value>& vec = input.getArray();
        vpValue.insert(vpValue.end(), vec.begin(), vec.end());

        for (size_t i = 0; i < vec.size(); i++) {
            _memUsageBytes += vec[i].getApproximateSize();
        }
    }
}
    Document DocumentSourceGroup::makeDocument(const Value& id,
                                               const Accumulators& accums,
                                               bool mergeableOutput) {
        const size_t n = vFieldName.size();
        MutableDocument out (1 + n);

        /* add the _id field */
        out.addField("_id", id);

        /* add the rest of the fields */
        for(size_t i = 0; i < n; ++i) {
            Value val = accums[i]->getValue(mergeableOutput);
            if (val.missing()) {
                // we return null in this case so return objects are predictable
                out.addField(vFieldName[i], Value(BSONNULL));
            }
            else {
                out.addField(vFieldName[i], val);
            }
        }

        return out.freeze();
    }
BSONObj DocumentSourceLookUp::queryForInput(const Document& input,
                                            const FieldPath& localFieldPath,
                                            const std::string& foreignFieldName) {
    Value localFieldVal = input.getNestedField(localFieldPath);

    // Missing values are treated as null.
    if (localFieldVal.missing()) {
        localFieldVal = Value(BSONNULL);
    }

    BSONObjBuilder query;
    BSONObjBuilder subObj(query.subobjStart(foreignFieldName));

    if (localFieldVal.isArray()) {
        // Assume an array value logically corresponds to many documents, rather than logically
        // corresponding to one document with an array value.
        const vector<Value>& localArray = localFieldVal.getArray();
        const bool containsRegex = std::any_of(
            localArray.begin(), localArray.end(), [](Value val) { return val.getType() == RegEx; });

        if (containsRegex) {
            // A regex inside of an $in will not be treated as an equality comparison, so use an
            // $or.
            return buildEqualityOrQuery(foreignFieldName, localFieldVal.getArray());
        }

        // { _foreignFieldFieldName : { "$in" : localFieldValue } }
        subObj << "$in" << localFieldVal;
    } else {
        // { _foreignFieldFieldName : { "$eq" : localFieldValue } }
        subObj << "$eq" << localFieldVal;
    }

    subObj.doneFast();
    return query.obj();
}
void AccumulatorAddToSet::processInternal(const Value& input, bool merging) {
    if (!merging) {
        if (!input.missing()) {
            bool inserted = set.insert(input).second;
            if (inserted) {
                _memUsageBytes += input.getApproximateSize();
            }
        }
    } else {
        // If we're merging, we need to take apart the arrays we
        // receive and put their elements into the array we are collecting.
        // If we didn't, then we'd get an array of arrays, with one array
        // from each merge source.
        verify(input.getType() == Array);

        const vector<Value>& array = input.getArray();
        for (size_t i = 0; i < array.size(); i++) {
            bool inserted = set.insert(array[i]).second;
            if (inserted) {
                _memUsageBytes += array[i].getApproximateSize();
            }
        }
    }
}
Document DocumentSourceChangeStreamTransform::applyTransformation(const Document& input) {
    // If we're executing a change stream pipeline that was forwarded from mongos, then we expect it
    // to "need merge"---we expect to be executing the shards part of a split pipeline. It is never
    // correct for mongos to pass through the change stream without splitting into into a merging
    // part executed on mongos and a shards part.
    //
    // This is necessary so that mongos can correctly handle "invalidate" and "retryNeeded" change
    // notifications. See SERVER-31978 for an example of why the pipeline must be split.
    //
    // We have to check this invariant at run-time of the change stream rather than parse time,
    // since a mongos may forward a change stream in an invalid position (e.g. in a nested $lookup
    // or $facet pipeline). In this case, mongod is responsible for parsing the pipeline and
    // throwing an error without ever executing the change stream.
    if (pExpCtx->fromMongos) {
        invariant(pExpCtx->needsMerge);
    }

    MutableDocument doc;

    // Extract the fields we need.
    checkValueType(input[repl::OplogEntry::kOpTypeFieldName],
                   repl::OplogEntry::kOpTypeFieldName,
                   BSONType::String);
    string op = input[repl::OplogEntry::kOpTypeFieldName].getString();
    Value ts = input[repl::OplogEntry::kTimestampFieldName];
    Value ns = input[repl::OplogEntry::kNssFieldName];
    checkValueType(ns, repl::OplogEntry::kNssFieldName, BSONType::String);
    Value uuid = input[repl::OplogEntry::kUuidFieldName];
    std::vector<FieldPath> documentKeyFields;

    // Deal with CRUD operations and commands.
    auto opType = repl::OpType_parse(IDLParserErrorContext("ChangeStreamEntry.op"), op);

    NamespaceString nss(ns.getString());
    // Ignore commands in the oplog when looking up the document key fields since a command implies
    // that the change stream is about to be invalidated (e.g. collection drop).
    if (!uuid.missing() && opType != repl::OpTypeEnum::kCommand) {
        checkValueType(uuid, repl::OplogEntry::kUuidFieldName, BSONType::BinData);
        // We need to retrieve the document key fields if our cache does not have an entry for this
        // UUID or if the cache entry is not definitively final, indicating that the collection was
        // unsharded when the entry was last populated.
        auto it = _documentKeyCache.find(uuid.getUuid());
        if (it == _documentKeyCache.end() || !it->second.isFinal) {
            auto docKeyFields =
                pExpCtx->mongoProcessInterface->collectDocumentKeyFieldsForHostedCollection(
                    pExpCtx->opCtx, nss, uuid.getUuid());
            if (it == _documentKeyCache.end() || docKeyFields.second) {
                _documentKeyCache[uuid.getUuid()] = DocumentKeyCacheEntry(docKeyFields);
            }
        }

        documentKeyFields = _documentKeyCache.find(uuid.getUuid())->second.documentKeyFields;
    }
    Value id = input.getNestedField("o._id");
    // Non-replace updates have the _id in field "o2".
    StringData operationType;
    Value fullDocument;
    Value updateDescription;
    Value documentKey;

    switch (opType) {
        case repl::OpTypeEnum::kInsert: {
            operationType = DocumentSourceChangeStream::kInsertOpType;
            fullDocument = input[repl::OplogEntry::kObjectFieldName];
            documentKey = Value(document_path_support::extractPathsFromDoc(
                fullDocument.getDocument(), documentKeyFields));
            break;
        }
        case repl::OpTypeEnum::kDelete: {
            operationType = DocumentSourceChangeStream::kDeleteOpType;
            documentKey = input[repl::OplogEntry::kObjectFieldName];
            break;
        }
        case repl::OpTypeEnum::kUpdate: {
            if (id.missing()) {
                operationType = DocumentSourceChangeStream::kUpdateOpType;
                checkValueType(input[repl::OplogEntry::kObjectFieldName],
                               repl::OplogEntry::kObjectFieldName,
                               BSONType::Object);
                Document opObject = input[repl::OplogEntry::kObjectFieldName].getDocument();
                Value updatedFields = opObject["$set"];
                Value removedFields = opObject["$unset"];

                // Extract the field names of $unset document.
                vector<Value> removedFieldsVector;
                if (removedFields.getType() == BSONType::Object) {
                    auto iter = removedFields.getDocument().fieldIterator();
                    while (iter.more()) {
                        removedFieldsVector.push_back(Value(iter.next().first));
                    }
                }
                updateDescription = Value(Document{
                    {"updatedFields", updatedFields.missing() ? Value(Document()) : updatedFields},
                    {"removedFields", removedFieldsVector}});
            } else {
                operationType = DocumentSourceChangeStream::kReplaceOpType;
                fullDocument = input[repl::OplogEntry::kObjectFieldName];
            }
            documentKey = input[repl::OplogEntry::kObject2FieldName];
            break;
        }
        case repl::OpTypeEnum::kCommand: {
            if (!input.getNestedField("o.drop").missing()) {
                operationType = DocumentSourceChangeStream::kDropCollectionOpType;

                // The "o.drop" field will contain the actual collection name.
                nss = NamespaceString(nss.db(), input.getNestedField("o.drop").getString());
            } else if (!input.getNestedField("o.renameCollection").missing()) {
                operationType = DocumentSourceChangeStream::kRenameCollectionOpType;

                // The "o.renameCollection" field contains the namespace of the original collection.
                nss = NamespaceString(input.getNestedField("o.renameCollection").getString());

                // The "o.to" field contains the target namespace for the rename.
                const auto renameTargetNss =
                    NamespaceString(input.getNestedField("o.to").getString());
                doc.addField(DocumentSourceChangeStream::kRenameTargetNssField,
                             Value(Document{{"db", renameTargetNss.db()},
                                            {"coll", renameTargetNss.coll()}}));
            } else if (!input.getNestedField("o.dropDatabase").missing()) {
                operationType = DocumentSourceChangeStream::kDropDatabaseOpType;

                // Extract the database name from the namespace field and leave the collection name
                // empty.
                nss = NamespaceString(nss.db());
            } else {
                // All other commands will invalidate the stream.
                operationType = DocumentSourceChangeStream::kInvalidateOpType;
            }

            // Make sure the result doesn't have a document key.
            documentKey = Value();
            break;
        }
        case repl::OpTypeEnum::kNoop: {
            operationType = DocumentSourceChangeStream::kNewShardDetectedOpType;
            // Generate a fake document Id for NewShardDetected operation so that we can resume
            // after this operation.
            documentKey = Value(Document{{DocumentSourceChangeStream::kIdField,
                                          input[repl::OplogEntry::kObject2FieldName]}});
            break;
        }
        default: { MONGO_UNREACHABLE; }
    }

    // UUID should always be present except for invalidate and dropDatabase entries.
    if (operationType != DocumentSourceChangeStream::kInvalidateOpType &&
        operationType != DocumentSourceChangeStream::kDropDatabaseOpType) {
        invariant(!uuid.missing(), "Saw a CRUD op without a UUID");
    }

    // Note that 'documentKey' and/or 'uuid' might be missing, in which case they will not appear
    // in the output.
    auto resumeTokenData = getResumeToken(ts, uuid, documentKey);
    auto resumeToken = ResumeToken(resumeTokenData).toDocument();

    // Add some additional fields only relevant to transactions.
    if (_txnContext) {
        doc.addField(DocumentSourceChangeStream::kTxnNumberField,
                     Value(static_cast<long long>(_txnContext->txnNumber)));
        doc.addField(DocumentSourceChangeStream::kLsidField, Value(_txnContext->lsid));
    }

    doc.addField(DocumentSourceChangeStream::kIdField, Value(resumeToken));
    doc.addField(DocumentSourceChangeStream::kOperationTypeField, Value(operationType));
    doc.addField(DocumentSourceChangeStream::kClusterTimeField, Value(resumeTokenData.clusterTime));

    // We set the resume token as the document's sort key in both the sharded and non-sharded cases,
    // since we will subsequently rely upon it to generate a correct postBatchResumeToken.
    // TODO SERVER-38539: when returning results for merging, we first check whether 'mergeByPBRT'
    // has been set. If not, then the request was sent from an older mongoS which cannot merge by
    // raw resume tokens, and we must use the old sort key format. This check, and the 'mergeByPBRT'
    // flag, are no longer necessary in 4.4; all change streams will be merged by resume token.
    if (pExpCtx->needsMerge && !pExpCtx->mergeByPBRT) {
        doc.setSortKeyMetaField(BSON("" << ts << "" << uuid << "" << documentKey));
    } else {
        doc.setSortKeyMetaField(resumeToken.toBson());
    }

    // "invalidate" and "newShardDetected" entries have fewer fields.
    if (operationType == DocumentSourceChangeStream::kInvalidateOpType ||
        operationType == DocumentSourceChangeStream::kNewShardDetectedOpType) {
        return doc.freeze();
    }

    doc.addField(DocumentSourceChangeStream::kFullDocumentField, fullDocument);
    doc.addField(DocumentSourceChangeStream::kNamespaceField,
                 operationType == DocumentSourceChangeStream::kDropDatabaseOpType
                     ? Value(Document{{"db", nss.db()}})
                     : Value(Document{{"db", nss.db()}, {"coll", nss.coll()}}));
    doc.addField(DocumentSourceChangeStream::kDocumentKeyField, documentKey);

    // Note that 'updateDescription' might be the 'missing' value, in which case it will not be
    // serialized.
    doc.addField("updateDescription", updateDescription);
    return doc.freeze();
}
Document DocumentSourceChangeStream::Transformation::applyTransformation(const Document& input) {
    MutableDocument doc;

    // Extract the fields we need.
    checkValueType(input[repl::OplogEntry::kOpTypeFieldName],
                   repl::OplogEntry::kOpTypeFieldName,
                   BSONType::String);
    string op = input[repl::OplogEntry::kOpTypeFieldName].getString();
    Value ts = input[repl::OplogEntry::kTimestampFieldName];
    Value ns = input[repl::OplogEntry::kNamespaceFieldName];
    checkValueType(ns, repl::OplogEntry::kNamespaceFieldName, BSONType::String);
    NamespaceString nss(ns.getString());
    Value id = input.getNestedField("o._id");
    // Non-replace updates have the _id in field "o2".
    Value documentId = id.missing() ? input.getNestedField("o2._id") : id;
    StringData operationType;
    Value fullDocument;
    Value updateDescription;

    // Deal with CRUD operations and commands.
    auto opType = repl::OpType_parse(IDLParserErrorContext("ChangeStreamEntry.op"), op);
    switch (opType) {
        case repl::OpTypeEnum::kInsert: {
            operationType = kInsertOpType;
            fullDocument = input[repl::OplogEntry::kObjectFieldName];
            break;
        }
        case repl::OpTypeEnum::kDelete: {
            operationType = kDeleteOpType;
            break;
        }
        case repl::OpTypeEnum::kUpdate: {
            if (id.missing()) {
                operationType = kUpdateOpType;
                checkValueType(input[repl::OplogEntry::kObjectFieldName],
                               repl::OplogEntry::kObjectFieldName,
                               BSONType::Object);
                Document opObject = input[repl::OplogEntry::kObjectFieldName].getDocument();
                Value updatedFields = opObject["$set"];
                Value removedFields = opObject["$unset"];

                // Extract the field names of $unset document.
                vector<Value> removedFieldsVector;
                if (removedFields.getType() == BSONType::Object) {
                    auto iter = removedFields.getDocument().fieldIterator();
                    while (iter.more()) {
                        removedFieldsVector.push_back(Value(iter.next().first));
                    }
                }
                updateDescription = Value(Document{
                    {"updatedFields", updatedFields.missing() ? Value(Document()) : updatedFields},
                    {"removedFields", removedFieldsVector}});
            } else {
                operationType = kReplaceOpType;
                fullDocument = input[repl::OplogEntry::kObjectFieldName];
            }
            break;
        }
        case repl::OpTypeEnum::kCommand: {
            operationType = kInvalidateOpType;
            // Make sure the result doesn't have a document id.
            documentId = Value();
            break;
        }
        default: { MONGO_UNREACHABLE; }
    }

    // Construct the result document.
    Value documentKey;
    if (!documentId.missing()) {
        documentKey = Value(Document{{kIdField, documentId}});
    }
    // Note that 'documentKey' might be missing, in which case it will not appear in the output.
    Document resumeToken{{kClusterTimeField, Document{{kTimestampField, ts}}},
                         {kNamespaceField, ns},
                         {kDocumentKeyField, documentKey}};
    doc.addField(kIdField, Value(resumeToken));
    doc.addField(kOperationTypeField, Value(operationType));
    doc.addField(kFullDocumentField, fullDocument);

    // "invalidate" entry has fewer fields.
    if (opType == repl::OpTypeEnum::kCommand) {
        return doc.freeze();
    }

    doc.addField(kNamespaceField, Value(Document{{"db", nss.db()}, {"coll", nss.coll()}}));
    doc.addField(kDocumentKeyField, documentKey);

    // Note that 'updateDescription' might be the 'missing' value, in which case it will not be
    // serialized.
    doc.addField("updateDescription", updateDescription);
    return doc.freeze();
}
Example #26
0
void DocumentSource::serializeToArray(vector<Value>& array, bool explain) const {
    Value entry = serialize(explain);
    if (!entry.missing()) {
        array.push_back(entry);
    }
}
Example #27
0
BSONObj DocumentSourceLookUp::makeMatchStageFromInput(const Document& input,
                                                      const FieldPath& localFieldPath,
                                                      const std::string& foreignFieldName,
                                                      const BSONObj& additionalFilter) {
    Value localFieldVal = input.getNestedField(localFieldPath);

    // Missing values are treated as null.
    if (localFieldVal.missing()) {
        localFieldVal = Value(BSONNULL);
    }

    // We construct a query of one of the following forms, depending on the contents of
    // 'localFieldVal'.
    //
    //   {$and: [{<foreignFieldName>: {$eq: <localFieldVal>}}, <additionalFilter>]}
    //     if 'localFieldVal' isn't an array value.
    //
    //   {$and: [{<foreignFieldName>: {$in: [<value>, <value>, ...]}}, <additionalFilter>]}
    //     if 'localFieldVal' is an array value but doesn't contain any elements that are regular
    //     expressions.
    //
    //   {$and: [{$or: [{<foreignFieldName>: {$eq: <value>}},
    //                  {<foreignFieldName>: {$eq: <value>}}, ...]},
    //           <additionalFilter>]}
    //     if 'localFieldVal' is an array value and it contains at least one element that is a
    //     regular expression.

    // We wrap the query in a $match so that it can be parsed into a DocumentSourceMatch when
    // constructing a pipeline to execute.
    BSONObjBuilder match;
    BSONObjBuilder query(match.subobjStart("$match"));

    BSONArrayBuilder andObj(query.subarrayStart("$and"));
    BSONObjBuilder joiningObj(andObj.subobjStart());

    if (localFieldVal.isArray()) {
        // A $lookup on an array value corresponds to finding documents in the foreign collection
        // that have a value of any of the elements in the array value, rather than finding
        // documents that have a value equal to the entire array value. These semantics are
        // automatically provided to us by using the $in query operator.
        const vector<Value>& localArray = localFieldVal.getArray();
        const bool containsRegex = std::any_of(
            localArray.begin(), localArray.end(), [](Value val) { return val.getType() == RegEx; });

        if (containsRegex) {
            // A regular expression inside the $in query operator will perform pattern matching on
            // any string values. Since we want regular expressions to only match other RegEx types,
            // we write the query as a $or of equality comparisons instead.
            BSONObj orQuery = buildEqualityOrQuery(foreignFieldName, localFieldVal.getArray());
            joiningObj.appendElements(orQuery);
        } else {
            // { <foreignFieldName> : { "$in" : <localFieldVal> } }
            BSONObjBuilder subObj(joiningObj.subobjStart(foreignFieldName));
            subObj << "$in" << localFieldVal;
            subObj.doneFast();
        }
    } else {
        // { <foreignFieldName> : { "$eq" : <localFieldVal> } }
        BSONObjBuilder subObj(joiningObj.subobjStart(foreignFieldName));
        subObj << "$eq" << localFieldVal;
        subObj.doneFast();
    }

    joiningObj.doneFast();

    BSONObjBuilder additionalFilterObj(andObj.subobjStart());
    additionalFilterObj.appendElements(additionalFilter);
    additionalFilterObj.doneFast();

    andObj.doneFast();

    query.doneFast();
    return match.obj();
}
    void DocumentSourceGroup::populate() {
        const size_t numAccumulators = vpAccumulatorFactory.size();
        dassert(numAccumulators == vpExpression.size());

        // pushed to on spill()
        vector<shared_ptr<Sorter<Value, Value>::Iterator> > sortedFiles;
        int memoryUsageBytes = 0;

        // This loop consumes all input from pSource and buckets it based on pIdExpression.
        while (boost::optional<Document> input = pSource->getNext()) {
            if (memoryUsageBytes > _maxMemoryUsageBytes) {
                uassert(16945, "Exceeded memory limit for $group, but didn't allow external sort."
                               " Pass allowDiskUse:true to opt in.",
                        _extSortAllowed);
                sortedFiles.push_back(spill());
                memoryUsageBytes = 0;
            }

            _variables->setRoot(*input);

            /* get the _id value */
            Value id = computeId(_variables.get());

            /* treat missing values the same as NULL SERVER-4674 */
            if (id.missing())
                id = Value(BSONNULL);

            /*
              Look for the _id value in the map; if it's not there, add a
              new entry with a blank accumulator.
            */
            const size_t oldSize = groups.size();
            vector<intrusive_ptr<Accumulator> >& group = groups[id];
            const bool inserted = groups.size() != oldSize;

            if (inserted) {
                memoryUsageBytes += id.getApproximateSize();

                // Add the accumulators
                group.reserve(numAccumulators);
                for (size_t i = 0; i < numAccumulators; i++) {
                    group.push_back(vpAccumulatorFactory[i]());
                }
            } else {
                for (size_t i = 0; i < numAccumulators; i++) {
                    // subtract old mem usage. New usage added back after processing.
                    memoryUsageBytes -= group[i]->memUsageForSorter();
                }
            }

            /* tickle all the accumulators for the group we found */
            dassert(numAccumulators == group.size());
            for (size_t i = 0; i < numAccumulators; i++) {
                group[i]->process(vpExpression[i]->evaluate(_variables.get()), _doingMerge);
                memoryUsageBytes += group[i]->memUsageForSorter();
            }

            // We are done with the ROOT document so release it.
            _variables->clearRoot();

            DEV {
                // In debug mode, spill every time we have a duplicate id to stress merge logic.
                if (!inserted // is a dup
                        && !pExpCtx->inRouter // can't spill to disk in router
                        && !_extSortAllowed // don't change behavior when testing external sort
                        && sortedFiles.size() < 20 // don't open too many FDs
                        ) {
                    sortedFiles.push_back(spill());
                }
            }
        }

        // These blocks do any final steps necessary to prepare to output results.
        if (!sortedFiles.empty()) {
            _spilled = true;
            if (!groups.empty()) {
                sortedFiles.push_back(spill());
            }

            // We won't be using groups again so free its memory.
            GroupsMap().swap(groups);

            _sorterIterator.reset(
                    Sorter<Value,Value>::Iterator::merge(
                        sortedFiles, SortOptions(), SorterComparator()));

            // prepare current to accumulate data
            _currentAccumulators.reserve(numAccumulators);
            for (size_t i = 0; i < numAccumulators; i++) {
                _currentAccumulators.push_back(vpAccumulatorFactory[i]());
            }

            verify(_sorterIterator->more()); // we put data in, we should get something out.
            _firstPartOfNextGroup = _sorterIterator->next();
        } else {
            // start the group iterator
            groupsIterator = groups.begin();
        }

        populated = true;
    }