void BtreeKeyGeneratorV1::getKeysImplWithArray( std::vector<const char*> fieldNames, std::vector<BSONElement> fixed, const BSONObj& obj, BSONObjSet* keys, unsigned numNotFound, const std::vector<PositionalPathInfo>& positionalInfo, MultikeyPaths* multikeyPaths) const { BSONElement arrElt; // A set containing the position of any indexed fields in the key pattern that traverse through // the 'arrElt' array value. std::set<size_t> arrIdxs; // A vector with size equal to the number of elements in the index key pattern. Each element in // the vector, if initialized, refers to the component within the indexed field that traverses // through the 'arrElt' array value. We say that this component within the indexed field // corresponds to a path that causes the index to be multikey if the 'arrElt' array value // contains multiple elements. // // For example, consider the index {'a.b': 1, 'a.c'} and the document // {a: [{b: 1, c: 'x'}, {b: 2, c: 'y'}]}. The path "a" causes the index to be multikey, so we'd // have a std::vector<boost::optional<size_t>>{{0U}, {0U}}. // // Furthermore, due to how positional key patterns are specified, it's possible for an indexed // field to cause the index to be multikey at a different component than another indexed field // that also traverses through the 'arrElt' array value. It's then also possible for an indexed // field not to cause the index to be multikey, even if it traverses through the 'arrElt' array // value, because only a particular element would be indexed. // // For example, consider the index {'a.b': 1, 'a.b.0'} and the document {a: {b: [1, 2]}}. The // path "a.b" causes the index to be multikey, but the key pattern "a.b.0" only indexes the // first element of the array, so we'd have a // std::vector<boost::optional<size_t>>{{1U}, boost::none}. std::vector<boost::optional<size_t>> arrComponents(fieldNames.size()); bool mayExpandArrayUnembedded = true; for (size_t i = 0; i < fieldNames.size(); ++i) { if (*fieldNames[i] == '\0') { continue; } bool arrayNestedArray; // Extract element matching fieldName[ i ] from object xor array. BSONElement e = extractNextElement(obj, positionalInfo[i], &fieldNames[i], &arrayNestedArray); if (e.eoo()) { // if field not present, set to null fixed[i] = nullElt; // done expanding this field name fieldNames[i] = ""; numNotFound++; } else if (e.type() == Array) { arrIdxs.insert(i); if (arrElt.eoo()) { // we only expand arrays on a single path -- track the path here arrElt = e; } else if (e.rawdata() != arrElt.rawdata()) { // enforce single array path here assertParallelArrays(e.fieldName(), arrElt.fieldName()); } if (arrayNestedArray) { mayExpandArrayUnembedded = false; } } else { // not an array - no need for further expansion fixed[i] = e; } } if (arrElt.eoo()) { // No array, so generate a single key. if (_isSparse && numNotFound == fieldNames.size()) { return; } BSONObjBuilder b(_sizeTracker); for (std::vector<BSONElement>::iterator i = fixed.begin(); i != fixed.end(); ++i) { CollationIndexKey::collationAwareIndexKeyAppend(*i, _collator, &b); } keys->insert(b.obj()); } else if (arrElt.embeddedObject().firstElement().eoo()) { // We've encountered an empty array. if (multikeyPaths && mayExpandArrayUnembedded) { // Any indexed path which traverses through the empty array must be recorded as an array // component. for (auto i : arrIdxs) { // We need to determine which component of the indexed field causes the index to be // multikey as a result of the empty array. Indexed empty arrays are considered // multikey and may occur mid-path. For instance, the indexed path "a.b.c" has // multikey components {0, 1} given the document {a: [{b: []}, {b: 1}]}. size_t fullPathLength = _pathLengths[i]; size_t suffixPathLength = FieldRef{fieldNames[i]}.numParts(); invariant(suffixPathLength < fullPathLength); arrComponents[i] = fullPathLength - suffixPathLength - 1; } } // For an empty array, set matching fields to undefined. _getKeysArrEltFixed(&fieldNames, &fixed, undefinedElt, keys, numNotFound, arrElt, arrIdxs, true, _emptyPositionalInfo, multikeyPaths); } else { BSONObj arrObj = arrElt.embeddedObject(); // For positional key patterns, e.g. {'a.1.b': 1}, we lookup the indexed array element // and then traverse the remainder of the field path up front. This prevents us from // having to look up the indexed element again on each recursive call (i.e. once per // array element). std::vector<PositionalPathInfo> subPositionalInfo(fixed.size()); for (size_t i = 0; i < fieldNames.size(); ++i) { const bool fieldIsArray = arrIdxs.find(i) != arrIdxs.end(); if (*fieldNames[i] == '\0') { // We've reached the end of the path. if (multikeyPaths && fieldIsArray && mayExpandArrayUnembedded) { // The 'arrElt' array value isn't expanded into multiple elements when the last // component of the indexed field is positional and 'arrElt' contains nested // array values. In all other cases, the 'arrElt' array value may be expanded // into multiple element and can therefore cause the index to be multikey. arrComponents[i] = _pathLengths[i] - 1; } continue; } // The earlier call to dps::extractElementAtPathOrArrayAlongPath(..., fieldNames[i]) // modified fieldNames[i] to refer to the suffix of the path immediately following the // 'arrElt' array value. If we haven't reached the end of this indexed field yet, then // we must have traversed through 'arrElt'. invariant(fieldIsArray); StringData part = fieldNames[i]; part = part.substr(0, part.find('.')); subPositionalInfo[i].positionallyIndexedElt = arrObj[part]; if (subPositionalInfo[i].positionallyIndexedElt.eoo()) { // We aren't indexing a particular element of the 'arrElt' array value, so it may be // expanded into multiple elements. It can therefore cause the index to be multikey. if (multikeyPaths) { // We need to determine which component of the indexed field causes the index to // be multikey as a result of the 'arrElt' array value. Since // // NumComponents("<pathPrefix>") + NumComponents("<pathSuffix>") // = NumComponents("<pathPrefix>.<pathSuffix>"), // // we can compute the number of components in a prefix of the indexed field by // subtracting the number of components in the suffix 'fieldNames[i]' from the // number of components in the indexed field '_fieldNames[i]'. // // For example, consider the indexed field "a.b.c" and the suffix "c". The path // "a.b.c" has 3 components and the suffix "c" has 1 component. Subtracting the // latter from the former yields the number of components in the prefix "a.b", // i.e. 2. size_t fullPathLength = _pathLengths[i]; size_t suffixPathLength = FieldRef{fieldNames[i]}.numParts(); invariant(suffixPathLength < fullPathLength); arrComponents[i] = fullPathLength - suffixPathLength - 1; } continue; } // We're indexing an array element by its position. Traverse the remainder of the // field path now. // // Indexing an array element by its position selects a particular element of the // 'arrElt' array value when generating keys. It therefore cannot cause the index to be // multikey. subPositionalInfo[i].arrayObj = arrObj; subPositionalInfo[i].remainingPath = fieldNames[i]; subPositionalInfo[i].dottedElt = dps::extractElementAtPathOrArrayAlongPath( arrObj, subPositionalInfo[i].remainingPath); } // Generate a key for each element of the indexed array. for (const auto arrObjElem : arrObj) { _getKeysArrEltFixed(&fieldNames, &fixed, arrObjElem, keys, numNotFound, arrElt, arrIdxs, mayExpandArrayUnembedded, subPositionalInfo, multikeyPaths); } } // Record multikey path components. if (multikeyPaths) { for (size_t i = 0; i < arrComponents.size(); ++i) { if (auto arrComponent = arrComponents[i]) { (*multikeyPaths)[i].insert(*arrComponent); } } } }
/** * @param fieldNames - fields to index, may be postfixes in recursive calls * @param fixed - values that have already been identified for their index fields * @param obj - object from which keys should be extracted, based on names in fieldNames * @param keys - set where index keys are written * @param numNotFound - number of index fields that have already been identified as missing * @param array - array from which keys should be extracted, based on names in fieldNames * If obj and array are both nonempty, obj will be one of the elements of array. */ void _getKeys( vector<const char*> fieldNames , vector<BSONElement> fixed , const BSONObj &obj, BSONObjSet &keys, int numNotFound = 0, const BSONObj &array = BSONObj() ) const { BSONElement arrElt; set<unsigned> arrIdxs; bool mayExpandArrayUnembedded = true; for( unsigned i = 0; i < fieldNames.size(); ++i ) { if ( *fieldNames[ i ] == '\0' ) { continue; } bool arrayNestedArray; // Extract element matching fieldName[ i ] from object xor array. BSONElement e = extractNextElement( obj, array, fieldNames[ i ], arrayNestedArray ); if ( e.eoo() ) { // if field not present, set to null fixed[ i ] = _spec._nullElt; // done expanding this field name fieldNames[ i ] = ""; numNotFound++; } else if ( e.type() == Array ) { arrIdxs.insert( i ); if ( arrElt.eoo() ) { // we only expand arrays on a single path -- track the path here arrElt = e; } else if ( e.rawdata() != arrElt.rawdata() ) { // enforce single array path here assertParallelArrays( e.fieldName(), arrElt.fieldName() ); } if ( arrayNestedArray ) { mayExpandArrayUnembedded = false; } } else { // not an array - no need for further expansion fixed[ i ] = e; } } if ( arrElt.eoo() ) { // No array, so generate a single key. if ( _spec._sparse && numNotFound == _spec._nFields ) { return; } BSONObjBuilder b(_spec._sizeTracker); for( vector< BSONElement >::iterator i = fixed.begin(); i != fixed.end(); ++i ) { b.appendAs( *i, "" ); } keys.insert( b.obj() ); } else if ( arrElt.embeddedObject().firstElement().eoo() ) { // Empty array, so set matching fields to undefined. _getKeysArrEltFixed( fieldNames, fixed, _spec._undefinedElt, keys, numNotFound, arrElt, arrIdxs, true ); } else { // Non empty array that can be expanded, so generate a key for each member. BSONObj arrObj = arrElt.embeddedObject(); BSONObjIterator i( arrObj ); while( i.more() ) { _getKeysArrEltFixed( fieldNames, fixed, i.next(), keys, numNotFound, arrElt, arrIdxs, mayExpandArrayUnembedded ); } } }