void BtreeKeyGeneratorV1::getKeysImplWithArray(
    std::vector<const char*> fieldNames,
    std::vector<BSONElement> fixed,
    const BSONObj& obj,
    BSONObjSet* keys,
    unsigned numNotFound,
    const std::vector<PositionalPathInfo>& positionalInfo,
    MultikeyPaths* multikeyPaths) const {
    BSONElement arrElt;

    // A set containing the position of any indexed fields in the key pattern that traverse through
    // the 'arrElt' array value.
    std::set<size_t> arrIdxs;

    // A vector with size equal to the number of elements in the index key pattern. Each element in
    // the vector, if initialized, refers to the component within the indexed field that traverses
    // through the 'arrElt' array value. We say that this component within the indexed field
    // corresponds to a path that causes the index to be multikey if the 'arrElt' array value
    // contains multiple elements.
    //
    // For example, consider the index {'a.b': 1, 'a.c'} and the document
    // {a: [{b: 1, c: 'x'}, {b: 2, c: 'y'}]}. The path "a" causes the index to be multikey, so we'd
    // have a std::vector<boost::optional<size_t>>{{0U}, {0U}}.
    //
    // Furthermore, due to how positional key patterns are specified, it's possible for an indexed
    // field to cause the index to be multikey at a different component than another indexed field
    // that also traverses through the 'arrElt' array value. It's then also possible for an indexed
    // field not to cause the index to be multikey, even if it traverses through the 'arrElt' array
    // value, because only a particular element would be indexed.
    //
    // For example, consider the index {'a.b': 1, 'a.b.0'} and the document {a: {b: [1, 2]}}. The
    // path "a.b" causes the index to be multikey, but the key pattern "a.b.0" only indexes the
    // first element of the array, so we'd have a
    // std::vector<boost::optional<size_t>>{{1U}, boost::none}.
    std::vector<boost::optional<size_t>> arrComponents(fieldNames.size());

    bool mayExpandArrayUnembedded = true;
    for (size_t i = 0; i < fieldNames.size(); ++i) {
        if (*fieldNames[i] == '\0') {
            continue;
        }

        bool arrayNestedArray;
        // Extract element matching fieldName[ i ] from object xor array.
        BSONElement e =
            extractNextElement(obj, positionalInfo[i], &fieldNames[i], &arrayNestedArray);

        if (e.eoo()) {
            // if field not present, set to null
            fixed[i] = nullElt;
            // done expanding this field name
            fieldNames[i] = "";
            numNotFound++;
        } else if (e.type() == Array) {
            arrIdxs.insert(i);
            if (arrElt.eoo()) {
                // we only expand arrays on a single path -- track the path here
                arrElt = e;
            } else if (e.rawdata() != arrElt.rawdata()) {
                // enforce single array path here
                assertParallelArrays(e.fieldName(), arrElt.fieldName());
            }
            if (arrayNestedArray) {
                mayExpandArrayUnembedded = false;
            }
        } else {
            // not an array - no need for further expansion
            fixed[i] = e;
        }
    }

    if (arrElt.eoo()) {
        // No array, so generate a single key.
        if (_isSparse && numNotFound == fieldNames.size()) {
            return;
        }
        BSONObjBuilder b(_sizeTracker);
        for (std::vector<BSONElement>::iterator i = fixed.begin(); i != fixed.end(); ++i) {
            CollationIndexKey::collationAwareIndexKeyAppend(*i, _collator, &b);
        }
        keys->insert(b.obj());
    } else if (arrElt.embeddedObject().firstElement().eoo()) {
        // We've encountered an empty array.
        if (multikeyPaths && mayExpandArrayUnembedded) {
            // Any indexed path which traverses through the empty array must be recorded as an array
            // component.
            for (auto i : arrIdxs) {
                // We need to determine which component of the indexed field causes the index to be
                // multikey as a result of the empty array. Indexed empty arrays are considered
                // multikey and may occur mid-path. For instance, the indexed path "a.b.c" has
                // multikey components {0, 1} given the document {a: [{b: []}, {b: 1}]}.
                size_t fullPathLength = _pathLengths[i];
                size_t suffixPathLength = FieldRef{fieldNames[i]}.numParts();
                invariant(suffixPathLength < fullPathLength);
                arrComponents[i] = fullPathLength - suffixPathLength - 1;
            }
        }

        // For an empty array, set matching fields to undefined.
        _getKeysArrEltFixed(&fieldNames,
                            &fixed,
                            undefinedElt,
                            keys,
                            numNotFound,
                            arrElt,
                            arrIdxs,
                            true,
                            _emptyPositionalInfo,
                            multikeyPaths);
    } else {
        BSONObj arrObj = arrElt.embeddedObject();

        // For positional key patterns, e.g. {'a.1.b': 1}, we lookup the indexed array element
        // and then traverse the remainder of the field path up front. This prevents us from
        // having to look up the indexed element again on each recursive call (i.e. once per
        // array element).
        std::vector<PositionalPathInfo> subPositionalInfo(fixed.size());
        for (size_t i = 0; i < fieldNames.size(); ++i) {
            const bool fieldIsArray = arrIdxs.find(i) != arrIdxs.end();

            if (*fieldNames[i] == '\0') {
                // We've reached the end of the path.
                if (multikeyPaths && fieldIsArray && mayExpandArrayUnembedded) {
                    // The 'arrElt' array value isn't expanded into multiple elements when the last
                    // component of the indexed field is positional and 'arrElt' contains nested
                    // array values. In all other cases, the 'arrElt' array value may be expanded
                    // into multiple element and can therefore cause the index to be multikey.
                    arrComponents[i] = _pathLengths[i] - 1;
                }
                continue;
            }

            // The earlier call to dps::extractElementAtPathOrArrayAlongPath(..., fieldNames[i])
            // modified fieldNames[i] to refer to the suffix of the path immediately following the
            // 'arrElt' array value. If we haven't reached the end of this indexed field yet, then
            // we must have traversed through 'arrElt'.
            invariant(fieldIsArray);

            StringData part = fieldNames[i];
            part = part.substr(0, part.find('.'));
            subPositionalInfo[i].positionallyIndexedElt = arrObj[part];
            if (subPositionalInfo[i].positionallyIndexedElt.eoo()) {
                // We aren't indexing a particular element of the 'arrElt' array value, so it may be
                // expanded into multiple elements. It can therefore cause the index to be multikey.
                if (multikeyPaths) {
                    // We need to determine which component of the indexed field causes the index to
                    // be multikey as a result of the 'arrElt' array value. Since
                    //
                    //   NumComponents("<pathPrefix>") + NumComponents("<pathSuffix>")
                    //       = NumComponents("<pathPrefix>.<pathSuffix>"),
                    //
                    // we can compute the number of components in a prefix of the indexed field by
                    // subtracting the number of components in the suffix 'fieldNames[i]' from the
                    // number of components in the indexed field '_fieldNames[i]'.
                    //
                    // For example, consider the indexed field "a.b.c" and the suffix "c". The path
                    // "a.b.c" has 3 components and the suffix "c" has 1 component. Subtracting the
                    // latter from the former yields the number of components in the prefix "a.b",
                    // i.e. 2.
                    size_t fullPathLength = _pathLengths[i];
                    size_t suffixPathLength = FieldRef{fieldNames[i]}.numParts();
                    invariant(suffixPathLength < fullPathLength);
                    arrComponents[i] = fullPathLength - suffixPathLength - 1;
                }
                continue;
            }

            // We're indexing an array element by its position. Traverse the remainder of the
            // field path now.
            //
            // Indexing an array element by its position selects a particular element of the
            // 'arrElt' array value when generating keys. It therefore cannot cause the index to be
            // multikey.
            subPositionalInfo[i].arrayObj = arrObj;
            subPositionalInfo[i].remainingPath = fieldNames[i];
            subPositionalInfo[i].dottedElt = dps::extractElementAtPathOrArrayAlongPath(
                arrObj, subPositionalInfo[i].remainingPath);
        }

        // Generate a key for each element of the indexed array.
        for (const auto arrObjElem : arrObj) {
            _getKeysArrEltFixed(&fieldNames,
                                &fixed,
                                arrObjElem,
                                keys,
                                numNotFound,
                                arrElt,
                                arrIdxs,
                                mayExpandArrayUnembedded,
                                subPositionalInfo,
                                multikeyPaths);
        }
    }

    // Record multikey path components.
    if (multikeyPaths) {
        for (size_t i = 0; i < arrComponents.size(); ++i) {
            if (auto arrComponent = arrComponents[i]) {
                (*multikeyPaths)[i].insert(*arrComponent);
            }
        }
    }
}
Beispiel #2
0
 /**
  * @param fieldNames - fields to index, may be postfixes in recursive calls
  * @param fixed - values that have already been identified for their index fields
  * @param obj - object from which keys should be extracted, based on names in fieldNames
  * @param keys - set where index keys are written
  * @param numNotFound - number of index fields that have already been identified as missing
  * @param array - array from which keys should be extracted, based on names in fieldNames
  *        If obj and array are both nonempty, obj will be one of the elements of array.
  */        
 void _getKeys( vector<const char*> fieldNames , vector<BSONElement> fixed , const BSONObj &obj, BSONObjSet &keys, int numNotFound = 0, const BSONObj &array = BSONObj() ) const {
     BSONElement arrElt;
     set<unsigned> arrIdxs;
     bool mayExpandArrayUnembedded = true;
     for( unsigned i = 0; i < fieldNames.size(); ++i ) {
         if ( *fieldNames[ i ] == '\0' ) {
             continue;
         }
         
         bool arrayNestedArray;
         // Extract element matching fieldName[ i ] from object xor array.
         BSONElement e = extractNextElement( obj, array, fieldNames[ i ], arrayNestedArray );
         
         if ( e.eoo() ) {
             // if field not present, set to null
             fixed[ i ] = _spec._nullElt;
             // done expanding this field name
             fieldNames[ i ] = "";
             numNotFound++;
         }
         else if ( e.type() == Array ) {
             arrIdxs.insert( i );
             if ( arrElt.eoo() ) {
                 // we only expand arrays on a single path -- track the path here
                 arrElt = e;
             }
             else if ( e.rawdata() != arrElt.rawdata() ) {
                 // enforce single array path here
                 assertParallelArrays( e.fieldName(), arrElt.fieldName() );
             }
             if ( arrayNestedArray ) {
                 mayExpandArrayUnembedded = false;   
             }
         }
         else {
             // not an array - no need for further expansion
             fixed[ i ] = e;
         }
     }
     
     if ( arrElt.eoo() ) {
         // No array, so generate a single key.
         if ( _spec._sparse && numNotFound == _spec._nFields ) {
             return;
         }            
         BSONObjBuilder b(_spec._sizeTracker);
         for( vector< BSONElement >::iterator i = fixed.begin(); i != fixed.end(); ++i ) {
             b.appendAs( *i, "" );
         }
         keys.insert( b.obj() );
     }
     else if ( arrElt.embeddedObject().firstElement().eoo() ) {
         // Empty array, so set matching fields to undefined.
         _getKeysArrEltFixed( fieldNames, fixed, _spec._undefinedElt, keys, numNotFound, arrElt, arrIdxs, true );
     }
     else {
         // Non empty array that can be expanded, so generate a key for each member.
         BSONObj arrObj = arrElt.embeddedObject();
         BSONObjIterator i( arrObj );
         while( i.more() ) {
             _getKeysArrEltFixed( fieldNames, fixed, i.next(), keys, numNotFound, arrElt, arrIdxs, mayExpandArrayUnembedded );
         }
     }
 }