コード例 #1
0
void BtreeKeyGeneratorV1::getKeysImplWithArray(
    std::vector<const char*> fieldNames,
    std::vector<BSONElement> fixed,
    const BSONObj& obj,
    BSONObjSet* keys,
    unsigned numNotFound,
    const std::vector<PositionalPathInfo>& positionalInfo,
    MultikeyPaths* multikeyPaths) const {
    BSONElement arrElt;

    // A set containing the position of any indexed fields in the key pattern that traverse through
    // the 'arrElt' array value.
    std::set<size_t> arrIdxs;

    // A vector with size equal to the number of elements in the index key pattern. Each element in
    // the vector, if initialized, refers to the component within the indexed field that traverses
    // through the 'arrElt' array value. We say that this component within the indexed field
    // corresponds to a path that causes the index to be multikey if the 'arrElt' array value
    // contains multiple elements.
    //
    // For example, consider the index {'a.b': 1, 'a.c'} and the document
    // {a: [{b: 1, c: 'x'}, {b: 2, c: 'y'}]}. The path "a" causes the index to be multikey, so we'd
    // have a std::vector<boost::optional<size_t>>{{0U}, {0U}}.
    //
    // Furthermore, due to how positional key patterns are specified, it's possible for an indexed
    // field to cause the index to be multikey at a different component than another indexed field
    // that also traverses through the 'arrElt' array value. It's then also possible for an indexed
    // field not to cause the index to be multikey, even if it traverses through the 'arrElt' array
    // value, because only a particular element would be indexed.
    //
    // For example, consider the index {'a.b': 1, 'a.b.0'} and the document {a: {b: [1, 2]}}. The
    // path "a.b" causes the index to be multikey, but the key pattern "a.b.0" only indexes the
    // first element of the array, so we'd have a
    // std::vector<boost::optional<size_t>>{{1U}, boost::none}.
    std::vector<boost::optional<size_t>> arrComponents(fieldNames.size());

    bool mayExpandArrayUnembedded = true;
    for (size_t i = 0; i < fieldNames.size(); ++i) {
        if (*fieldNames[i] == '\0') {
            continue;
        }

        bool arrayNestedArray;
        // Extract element matching fieldName[ i ] from object xor array.
        BSONElement e =
            extractNextElement(obj, positionalInfo[i], &fieldNames[i], &arrayNestedArray);

        if (e.eoo()) {
            // if field not present, set to null
            fixed[i] = nullElt;
            // done expanding this field name
            fieldNames[i] = "";
            numNotFound++;
        } else if (e.type() == Array) {
            arrIdxs.insert(i);
            if (arrElt.eoo()) {
                // we only expand arrays on a single path -- track the path here
                arrElt = e;
            } else if (e.rawdata() != arrElt.rawdata()) {
                // enforce single array path here
                assertParallelArrays(e.fieldName(), arrElt.fieldName());
            }
            if (arrayNestedArray) {
                mayExpandArrayUnembedded = false;
            }
        } else {
            // not an array - no need for further expansion
            fixed[i] = e;
        }
    }

    if (arrElt.eoo()) {
        // No array, so generate a single key.
        if (_isSparse && numNotFound == fieldNames.size()) {
            return;
        }
        BSONObjBuilder b(_sizeTracker);
        for (std::vector<BSONElement>::iterator i = fixed.begin(); i != fixed.end(); ++i) {
            CollationIndexKey::collationAwareIndexKeyAppend(*i, _collator, &b);
        }
        keys->insert(b.obj());
    } else if (arrElt.embeddedObject().firstElement().eoo()) {
        // We've encountered an empty array.
        if (multikeyPaths && mayExpandArrayUnembedded) {
            // Any indexed path which traverses through the empty array must be recorded as an array
            // component.
            for (auto i : arrIdxs) {
                // We need to determine which component of the indexed field causes the index to be
                // multikey as a result of the empty array. Indexed empty arrays are considered
                // multikey and may occur mid-path. For instance, the indexed path "a.b.c" has
                // multikey components {0, 1} given the document {a: [{b: []}, {b: 1}]}.
                size_t fullPathLength = _pathLengths[i];
                size_t suffixPathLength = FieldRef{fieldNames[i]}.numParts();
                invariant(suffixPathLength < fullPathLength);
                arrComponents[i] = fullPathLength - suffixPathLength - 1;
            }
        }

        // For an empty array, set matching fields to undefined.
        _getKeysArrEltFixed(&fieldNames,
                            &fixed,
                            undefinedElt,
                            keys,
                            numNotFound,
                            arrElt,
                            arrIdxs,
                            true,
                            _emptyPositionalInfo,
                            multikeyPaths);
    } else {
        BSONObj arrObj = arrElt.embeddedObject();

        // For positional key patterns, e.g. {'a.1.b': 1}, we lookup the indexed array element
        // and then traverse the remainder of the field path up front. This prevents us from
        // having to look up the indexed element again on each recursive call (i.e. once per
        // array element).
        std::vector<PositionalPathInfo> subPositionalInfo(fixed.size());
        for (size_t i = 0; i < fieldNames.size(); ++i) {
            const bool fieldIsArray = arrIdxs.find(i) != arrIdxs.end();

            if (*fieldNames[i] == '\0') {
                // We've reached the end of the path.
                if (multikeyPaths && fieldIsArray && mayExpandArrayUnembedded) {
                    // The 'arrElt' array value isn't expanded into multiple elements when the last
                    // component of the indexed field is positional and 'arrElt' contains nested
                    // array values. In all other cases, the 'arrElt' array value may be expanded
                    // into multiple element and can therefore cause the index to be multikey.
                    arrComponents[i] = _pathLengths[i] - 1;
                }
                continue;
            }

            // The earlier call to dps::extractElementAtPathOrArrayAlongPath(..., fieldNames[i])
            // modified fieldNames[i] to refer to the suffix of the path immediately following the
            // 'arrElt' array value. If we haven't reached the end of this indexed field yet, then
            // we must have traversed through 'arrElt'.
            invariant(fieldIsArray);

            StringData part = fieldNames[i];
            part = part.substr(0, part.find('.'));
            subPositionalInfo[i].positionallyIndexedElt = arrObj[part];
            if (subPositionalInfo[i].positionallyIndexedElt.eoo()) {
                // We aren't indexing a particular element of the 'arrElt' array value, so it may be
                // expanded into multiple elements. It can therefore cause the index to be multikey.
                if (multikeyPaths) {
                    // We need to determine which component of the indexed field causes the index to
                    // be multikey as a result of the 'arrElt' array value. Since
                    //
                    //   NumComponents("<pathPrefix>") + NumComponents("<pathSuffix>")
                    //       = NumComponents("<pathPrefix>.<pathSuffix>"),
                    //
                    // we can compute the number of components in a prefix of the indexed field by
                    // subtracting the number of components in the suffix 'fieldNames[i]' from the
                    // number of components in the indexed field '_fieldNames[i]'.
                    //
                    // For example, consider the indexed field "a.b.c" and the suffix "c". The path
                    // "a.b.c" has 3 components and the suffix "c" has 1 component. Subtracting the
                    // latter from the former yields the number of components in the prefix "a.b",
                    // i.e. 2.
                    size_t fullPathLength = _pathLengths[i];
                    size_t suffixPathLength = FieldRef{fieldNames[i]}.numParts();
                    invariant(suffixPathLength < fullPathLength);
                    arrComponents[i] = fullPathLength - suffixPathLength - 1;
                }
                continue;
            }

            // We're indexing an array element by its position. Traverse the remainder of the
            // field path now.
            //
            // Indexing an array element by its position selects a particular element of the
            // 'arrElt' array value when generating keys. It therefore cannot cause the index to be
            // multikey.
            subPositionalInfo[i].arrayObj = arrObj;
            subPositionalInfo[i].remainingPath = fieldNames[i];
            subPositionalInfo[i].dottedElt = dps::extractElementAtPathOrArrayAlongPath(
                arrObj, subPositionalInfo[i].remainingPath);
        }

        // Generate a key for each element of the indexed array.
        for (const auto arrObjElem : arrObj) {
            _getKeysArrEltFixed(&fieldNames,
                                &fixed,
                                arrObjElem,
                                keys,
                                numNotFound,
                                arrElt,
                                arrIdxs,
                                mayExpandArrayUnembedded,
                                subPositionalInfo,
                                multikeyPaths);
        }
    }

    // Record multikey path components.
    if (multikeyPaths) {
        for (size_t i = 0; i < arrComponents.size(); ++i) {
            if (auto arrComponent = arrComponents[i]) {
                (*multikeyPaths)[i].insert(*arrComponent);
            }
        }
    }
}
コード例 #2
0
ファイル: indexkey.cpp プロジェクト: LeeShuni/mongo
 /**
  * @param fieldNames - fields to index, may be postfixes in recursive calls
  * @param fixed - values that have already been identified for their index fields
  * @param obj - object from which keys should be extracted, based on names in fieldNames
  * @param keys - set where index keys are written
  * @param numNotFound - number of index fields that have already been identified as missing
  * @param array - array from which keys should be extracted, based on names in fieldNames
  *        If obj and array are both nonempty, obj will be one of the elements of array.
  */        
 void _getKeys( vector<const char*> fieldNames , vector<BSONElement> fixed , const BSONObj &obj, BSONObjSet &keys, int numNotFound = 0, const BSONObj &array = BSONObj() ) const {
     BSONElement arrElt;
     set<unsigned> arrIdxs;
     bool mayExpandArrayUnembedded = true;
     for( unsigned i = 0; i < fieldNames.size(); ++i ) {
         if ( *fieldNames[ i ] == '\0' ) {
             continue;
         }
         
         bool arrayNestedArray;
         // Extract element matching fieldName[ i ] from object xor array.
         BSONElement e = extractNextElement( obj, array, fieldNames[ i ], arrayNestedArray );
         
         if ( e.eoo() ) {
             // if field not present, set to null
             fixed[ i ] = _spec._nullElt;
             // done expanding this field name
             fieldNames[ i ] = "";
             numNotFound++;
         }
         else if ( e.type() == Array ) {
             arrIdxs.insert( i );
             if ( arrElt.eoo() ) {
                 // we only expand arrays on a single path -- track the path here
                 arrElt = e;
             }
             else if ( e.rawdata() != arrElt.rawdata() ) {
                 // enforce single array path here
                 assertParallelArrays( e.fieldName(), arrElt.fieldName() );
             }
             if ( arrayNestedArray ) {
                 mayExpandArrayUnembedded = false;   
             }
         }
         else {
             // not an array - no need for further expansion
             fixed[ i ] = e;
         }
     }
     
     if ( arrElt.eoo() ) {
         // No array, so generate a single key.
         if ( _spec._sparse && numNotFound == _spec._nFields ) {
             return;
         }            
         BSONObjBuilder b(_spec._sizeTracker);
         for( vector< BSONElement >::iterator i = fixed.begin(); i != fixed.end(); ++i ) {
             b.appendAs( *i, "" );
         }
         keys.insert( b.obj() );
     }
     else if ( arrElt.embeddedObject().firstElement().eoo() ) {
         // Empty array, so set matching fields to undefined.
         _getKeysArrEltFixed( fieldNames, fixed, _spec._undefinedElt, keys, numNotFound, arrElt, arrIdxs, true );
     }
     else {
         // Non empty array that can be expanded, so generate a key for each member.
         BSONObj arrObj = arrElt.embeddedObject();
         BSONObjIterator i( arrObj );
         while( i.more() ) {
             _getKeysArrEltFixed( fieldNames, fixed, i.next(), keys, numNotFound, arrElt, arrIdxs, mayExpandArrayUnembedded );
         }
     }
 }
コード例 #3
0
void BtreeKeyGeneratorV0::getKeysImpl(std::vector<const char*> fieldNames,
                                      std::vector<BSONElement> fixed,
                                      const BSONObj& obj,
                                      BSONObjSet* keys,
                                      MultikeyPaths* multikeyPaths) const {
    if (_isIdIndex) {
        // we special case for speed
        BSONElement e = obj["_id"];
        if (e.eoo()) {
            keys->insert(_nullKey);
        } else {
            int size = e.size() + 5 /* bson over head*/ - 3 /* remove _id string */;
            BSONObjBuilder b(size);
            b.appendAs(e, "");
            keys->insert(b.obj());
            invariant(keys->begin()->objsize() == size);
        }
        return;
    }

    BSONElement arrElt;
    unsigned arrIdx = ~0;
    unsigned numNotFound = 0;

    for (unsigned i = 0; i < fieldNames.size(); ++i) {
        if (*fieldNames[i] == '\0')
            continue;

        BSONElement e = dps::extractElementAtPathOrArrayAlongPath(obj, fieldNames[i]);

        if (e.eoo()) {
            e = nullElt;  // no matching field
            numNotFound++;
        }

        if (e.type() != Array)
            fieldNames[i] = "";  // no matching field or non-array match

        if (*fieldNames[i] == '\0')
            // no need for further object expansion (though array expansion still possible)
            fixed[i] = e;

        if (e.type() == Array && arrElt.eoo()) {
            // we only expand arrays on a single path -- track the path here
            arrIdx = i;
            arrElt = e;
        }

        // enforce single array path here
        if (e.type() == Array && e.rawdata() != arrElt.rawdata()) {
            assertParallelArrays(e.fieldName(), arrElt.fieldName());
        }
    }

    bool allFound = true;  // have we found elements for all field names in the key spec?
    for (std::vector<const char*>::const_iterator i = fieldNames.begin(); i != fieldNames.end();
         ++i) {
        if (**i != '\0') {
            allFound = false;
            break;
        }
    }

    if (_isSparse && numNotFound == _fieldNames.size()) {
        // we didn't find any fields
        // so we're not going to index this document
        return;
    }

    bool insertArrayNull = false;

    if (allFound) {
        if (arrElt.eoo()) {
            // no terminal array element to expand
            BSONObjBuilder b(_sizeTracker);
            for (std::vector<BSONElement>::iterator i = fixed.begin(); i != fixed.end(); ++i)
                b.appendAs(*i, "");
            keys->insert(b.obj());
        } else {
            // terminal array element to expand, so generate all keys
            BSONObjIterator i(arrElt.embeddedObject());
            if (i.more()) {
                while (i.more()) {
                    BSONObjBuilder b(_sizeTracker);
                    for (unsigned j = 0; j < fixed.size(); ++j) {
                        if (j == arrIdx)
                            b.appendAs(i.next(), "");
                        else
                            b.appendAs(fixed[j], "");
                    }
                    keys->insert(b.obj());
                }
            } else if (fixed.size() > 1) {
                insertArrayNull = true;
            }
        }
    } else {
        // nonterminal array element to expand, so recurse
        verify(!arrElt.eoo());
        BSONObjIterator i(arrElt.embeddedObject());
        if (i.more()) {
            while (i.more()) {
                BSONElement e = i.next();
                if (e.type() == Object) {
                    getKeysImpl(fieldNames, fixed, e.embeddedObject(), keys, multikeyPaths);
                }
            }
        } else {
            insertArrayNull = true;
        }
    }

    if (insertArrayNull) {
        // x : [] - need to insert undefined
        BSONObjBuilder b(_sizeTracker);
        for (unsigned j = 0; j < fixed.size(); ++j) {
            if (j == arrIdx) {
                b.appendUndefined("");
            } else {
                BSONElement e = fixed[j];
                if (e.eoo())
                    b.appendNull("");
                else
                    b.appendAs(e, "");
            }
        }
        keys->insert(b.obj());
    }
}
コード例 #4
0
ファイル: indexkey.cpp プロジェクト: LeeShuni/mongo
 void _getKeys( vector<const char*> fieldNames , vector<BSONElement> fixed , const BSONObj &obj, BSONObjSet &keys ) const {
     BSONElement arrElt;
     unsigned arrIdx = ~0;
     int numNotFound = 0;
     
     for( unsigned i = 0; i < fieldNames.size(); ++i ) {
         if ( *fieldNames[ i ] == '\0' )
             continue;
         
         BSONElement e = obj.getFieldDottedOrArray( fieldNames[ i ] );
         
         if ( e.eoo() ) {
             e = _spec._nullElt; // no matching field
             numNotFound++;
         }
         
         if ( e.type() != Array )
             fieldNames[ i ] = ""; // no matching field or non-array match
         
         if ( *fieldNames[ i ] == '\0' )
             fixed[ i ] = e; // no need for further object expansion (though array expansion still possible)
         
         if ( e.type() == Array && arrElt.eoo() ) { // we only expand arrays on a single path -- track the path here
             arrIdx = i;
             arrElt = e;
         }
         
         // enforce single array path here
         if ( e.type() == Array && e.rawdata() != arrElt.rawdata() ) {
             assertParallelArrays( e.fieldName(), arrElt.fieldName() );
         }
     }
     
     bool allFound = true; // have we found elements for all field names in the key spec?
     for( vector<const char*>::const_iterator i = fieldNames.begin(); i != fieldNames.end(); ++i ) {
         if ( **i != '\0' ) {
             allFound = false;
             break;
         }
     }
     
     if ( _spec._sparse && numNotFound == _spec._nFields ) {
         // we didn't find any fields
         // so we're not going to index this document
         return;
     }
     
     bool insertArrayNull = false;
     
     if ( allFound ) {
         if ( arrElt.eoo() ) {
             // no terminal array element to expand
             BSONObjBuilder b(_spec._sizeTracker);
             for( vector< BSONElement >::iterator i = fixed.begin(); i != fixed.end(); ++i )
                 b.appendAs( *i, "" );
             keys.insert( b.obj() );
         }
         else {
             // terminal array element to expand, so generate all keys
             BSONObjIterator i( arrElt.embeddedObject() );
             if ( i.more() ) {
                 while( i.more() ) {
                     BSONObjBuilder b(_spec._sizeTracker);
                     for( unsigned j = 0; j < fixed.size(); ++j ) {
                         if ( j == arrIdx )
                             b.appendAs( i.next(), "" );
                         else
                             b.appendAs( fixed[ j ], "" );
                     }
                     keys.insert( b.obj() );
                 }
             }
             else if ( fixed.size() > 1 ) {
                 insertArrayNull = true;
             }
         }
     }
     else {
         // nonterminal array element to expand, so recurse
         verify( !arrElt.eoo() );
         BSONObjIterator i( arrElt.embeddedObject() );
         if ( i.more() ) {
             while( i.more() ) {
                 BSONElement e = i.next();
                 if ( e.type() == Object ) {
                     _getKeys( fieldNames, fixed, e.embeddedObject(), keys );
                 }
             }
         }
         else {
             insertArrayNull = true;
         }
     }
     
     if ( insertArrayNull ) {
         // x : [] - need to insert undefined
         BSONObjBuilder b(_spec._sizeTracker);
         for( unsigned j = 0; j < fixed.size(); ++j ) {
             if ( j == arrIdx ) {
                 b.appendUndefined( "" );
             }
             else {
                 BSONElement e = fixed[j];
                 if ( e.eoo() )
                     b.appendNull( "" );
                 else
                     b.appendAs( e , "" );
             }
         }
         keys.insert( b.obj() );
     }
 }