void BtreeKeyGeneratorV1::getKeysImplWithArray( std::vector<const char*> fieldNames, std::vector<BSONElement> fixed, const BSONObj& obj, BSONObjSet* keys, unsigned numNotFound, const std::vector<PositionalPathInfo>& positionalInfo, MultikeyPaths* multikeyPaths) const { BSONElement arrElt; // A set containing the position of any indexed fields in the key pattern that traverse through // the 'arrElt' array value. std::set<size_t> arrIdxs; // A vector with size equal to the number of elements in the index key pattern. Each element in // the vector, if initialized, refers to the component within the indexed field that traverses // through the 'arrElt' array value. We say that this component within the indexed field // corresponds to a path that causes the index to be multikey if the 'arrElt' array value // contains multiple elements. // // For example, consider the index {'a.b': 1, 'a.c'} and the document // {a: [{b: 1, c: 'x'}, {b: 2, c: 'y'}]}. The path "a" causes the index to be multikey, so we'd // have a std::vector<boost::optional<size_t>>{{0U}, {0U}}. // // Furthermore, due to how positional key patterns are specified, it's possible for an indexed // field to cause the index to be multikey at a different component than another indexed field // that also traverses through the 'arrElt' array value. It's then also possible for an indexed // field not to cause the index to be multikey, even if it traverses through the 'arrElt' array // value, because only a particular element would be indexed. // // For example, consider the index {'a.b': 1, 'a.b.0'} and the document {a: {b: [1, 2]}}. The // path "a.b" causes the index to be multikey, but the key pattern "a.b.0" only indexes the // first element of the array, so we'd have a // std::vector<boost::optional<size_t>>{{1U}, boost::none}. std::vector<boost::optional<size_t>> arrComponents(fieldNames.size()); bool mayExpandArrayUnembedded = true; for (size_t i = 0; i < fieldNames.size(); ++i) { if (*fieldNames[i] == '\0') { continue; } bool arrayNestedArray; // Extract element matching fieldName[ i ] from object xor array. BSONElement e = extractNextElement(obj, positionalInfo[i], &fieldNames[i], &arrayNestedArray); if (e.eoo()) { // if field not present, set to null fixed[i] = nullElt; // done expanding this field name fieldNames[i] = ""; numNotFound++; } else if (e.type() == Array) { arrIdxs.insert(i); if (arrElt.eoo()) { // we only expand arrays on a single path -- track the path here arrElt = e; } else if (e.rawdata() != arrElt.rawdata()) { // enforce single array path here assertParallelArrays(e.fieldName(), arrElt.fieldName()); } if (arrayNestedArray) { mayExpandArrayUnembedded = false; } } else { // not an array - no need for further expansion fixed[i] = e; } } if (arrElt.eoo()) { // No array, so generate a single key. if (_isSparse && numNotFound == fieldNames.size()) { return; } BSONObjBuilder b(_sizeTracker); for (std::vector<BSONElement>::iterator i = fixed.begin(); i != fixed.end(); ++i) { CollationIndexKey::collationAwareIndexKeyAppend(*i, _collator, &b); } keys->insert(b.obj()); } else if (arrElt.embeddedObject().firstElement().eoo()) { // We've encountered an empty array. if (multikeyPaths && mayExpandArrayUnembedded) { // Any indexed path which traverses through the empty array must be recorded as an array // component. for (auto i : arrIdxs) { // We need to determine which component of the indexed field causes the index to be // multikey as a result of the empty array. Indexed empty arrays are considered // multikey and may occur mid-path. For instance, the indexed path "a.b.c" has // multikey components {0, 1} given the document {a: [{b: []}, {b: 1}]}. size_t fullPathLength = _pathLengths[i]; size_t suffixPathLength = FieldRef{fieldNames[i]}.numParts(); invariant(suffixPathLength < fullPathLength); arrComponents[i] = fullPathLength - suffixPathLength - 1; } } // For an empty array, set matching fields to undefined. _getKeysArrEltFixed(&fieldNames, &fixed, undefinedElt, keys, numNotFound, arrElt, arrIdxs, true, _emptyPositionalInfo, multikeyPaths); } else { BSONObj arrObj = arrElt.embeddedObject(); // For positional key patterns, e.g. {'a.1.b': 1}, we lookup the indexed array element // and then traverse the remainder of the field path up front. This prevents us from // having to look up the indexed element again on each recursive call (i.e. once per // array element). std::vector<PositionalPathInfo> subPositionalInfo(fixed.size()); for (size_t i = 0; i < fieldNames.size(); ++i) { const bool fieldIsArray = arrIdxs.find(i) != arrIdxs.end(); if (*fieldNames[i] == '\0') { // We've reached the end of the path. if (multikeyPaths && fieldIsArray && mayExpandArrayUnembedded) { // The 'arrElt' array value isn't expanded into multiple elements when the last // component of the indexed field is positional and 'arrElt' contains nested // array values. In all other cases, the 'arrElt' array value may be expanded // into multiple element and can therefore cause the index to be multikey. arrComponents[i] = _pathLengths[i] - 1; } continue; } // The earlier call to dps::extractElementAtPathOrArrayAlongPath(..., fieldNames[i]) // modified fieldNames[i] to refer to the suffix of the path immediately following the // 'arrElt' array value. If we haven't reached the end of this indexed field yet, then // we must have traversed through 'arrElt'. invariant(fieldIsArray); StringData part = fieldNames[i]; part = part.substr(0, part.find('.')); subPositionalInfo[i].positionallyIndexedElt = arrObj[part]; if (subPositionalInfo[i].positionallyIndexedElt.eoo()) { // We aren't indexing a particular element of the 'arrElt' array value, so it may be // expanded into multiple elements. It can therefore cause the index to be multikey. if (multikeyPaths) { // We need to determine which component of the indexed field causes the index to // be multikey as a result of the 'arrElt' array value. Since // // NumComponents("<pathPrefix>") + NumComponents("<pathSuffix>") // = NumComponents("<pathPrefix>.<pathSuffix>"), // // we can compute the number of components in a prefix of the indexed field by // subtracting the number of components in the suffix 'fieldNames[i]' from the // number of components in the indexed field '_fieldNames[i]'. // // For example, consider the indexed field "a.b.c" and the suffix "c". The path // "a.b.c" has 3 components and the suffix "c" has 1 component. Subtracting the // latter from the former yields the number of components in the prefix "a.b", // i.e. 2. size_t fullPathLength = _pathLengths[i]; size_t suffixPathLength = FieldRef{fieldNames[i]}.numParts(); invariant(suffixPathLength < fullPathLength); arrComponents[i] = fullPathLength - suffixPathLength - 1; } continue; } // We're indexing an array element by its position. Traverse the remainder of the // field path now. // // Indexing an array element by its position selects a particular element of the // 'arrElt' array value when generating keys. It therefore cannot cause the index to be // multikey. subPositionalInfo[i].arrayObj = arrObj; subPositionalInfo[i].remainingPath = fieldNames[i]; subPositionalInfo[i].dottedElt = dps::extractElementAtPathOrArrayAlongPath( arrObj, subPositionalInfo[i].remainingPath); } // Generate a key for each element of the indexed array. for (const auto arrObjElem : arrObj) { _getKeysArrEltFixed(&fieldNames, &fixed, arrObjElem, keys, numNotFound, arrElt, arrIdxs, mayExpandArrayUnembedded, subPositionalInfo, multikeyPaths); } } // Record multikey path components. if (multikeyPaths) { for (size_t i = 0; i < arrComponents.size(); ++i) { if (auto arrComponent = arrComponents[i]) { (*multikeyPaths)[i].insert(*arrComponent); } } } }
/** * @param fieldNames - fields to index, may be postfixes in recursive calls * @param fixed - values that have already been identified for their index fields * @param obj - object from which keys should be extracted, based on names in fieldNames * @param keys - set where index keys are written * @param numNotFound - number of index fields that have already been identified as missing * @param array - array from which keys should be extracted, based on names in fieldNames * If obj and array are both nonempty, obj will be one of the elements of array. */ void _getKeys( vector<const char*> fieldNames , vector<BSONElement> fixed , const BSONObj &obj, BSONObjSet &keys, int numNotFound = 0, const BSONObj &array = BSONObj() ) const { BSONElement arrElt; set<unsigned> arrIdxs; bool mayExpandArrayUnembedded = true; for( unsigned i = 0; i < fieldNames.size(); ++i ) { if ( *fieldNames[ i ] == '\0' ) { continue; } bool arrayNestedArray; // Extract element matching fieldName[ i ] from object xor array. BSONElement e = extractNextElement( obj, array, fieldNames[ i ], arrayNestedArray ); if ( e.eoo() ) { // if field not present, set to null fixed[ i ] = _spec._nullElt; // done expanding this field name fieldNames[ i ] = ""; numNotFound++; } else if ( e.type() == Array ) { arrIdxs.insert( i ); if ( arrElt.eoo() ) { // we only expand arrays on a single path -- track the path here arrElt = e; } else if ( e.rawdata() != arrElt.rawdata() ) { // enforce single array path here assertParallelArrays( e.fieldName(), arrElt.fieldName() ); } if ( arrayNestedArray ) { mayExpandArrayUnembedded = false; } } else { // not an array - no need for further expansion fixed[ i ] = e; } } if ( arrElt.eoo() ) { // No array, so generate a single key. if ( _spec._sparse && numNotFound == _spec._nFields ) { return; } BSONObjBuilder b(_spec._sizeTracker); for( vector< BSONElement >::iterator i = fixed.begin(); i != fixed.end(); ++i ) { b.appendAs( *i, "" ); } keys.insert( b.obj() ); } else if ( arrElt.embeddedObject().firstElement().eoo() ) { // Empty array, so set matching fields to undefined. _getKeysArrEltFixed( fieldNames, fixed, _spec._undefinedElt, keys, numNotFound, arrElt, arrIdxs, true ); } else { // Non empty array that can be expanded, so generate a key for each member. BSONObj arrObj = arrElt.embeddedObject(); BSONObjIterator i( arrObj ); while( i.more() ) { _getKeysArrEltFixed( fieldNames, fixed, i.next(), keys, numNotFound, arrElt, arrIdxs, mayExpandArrayUnembedded ); } } }
void BtreeKeyGeneratorV0::getKeysImpl(std::vector<const char*> fieldNames, std::vector<BSONElement> fixed, const BSONObj& obj, BSONObjSet* keys, MultikeyPaths* multikeyPaths) const { if (_isIdIndex) { // we special case for speed BSONElement e = obj["_id"]; if (e.eoo()) { keys->insert(_nullKey); } else { int size = e.size() + 5 /* bson over head*/ - 3 /* remove _id string */; BSONObjBuilder b(size); b.appendAs(e, ""); keys->insert(b.obj()); invariant(keys->begin()->objsize() == size); } return; } BSONElement arrElt; unsigned arrIdx = ~0; unsigned numNotFound = 0; for (unsigned i = 0; i < fieldNames.size(); ++i) { if (*fieldNames[i] == '\0') continue; BSONElement e = dps::extractElementAtPathOrArrayAlongPath(obj, fieldNames[i]); if (e.eoo()) { e = nullElt; // no matching field numNotFound++; } if (e.type() != Array) fieldNames[i] = ""; // no matching field or non-array match if (*fieldNames[i] == '\0') // no need for further object expansion (though array expansion still possible) fixed[i] = e; if (e.type() == Array && arrElt.eoo()) { // we only expand arrays on a single path -- track the path here arrIdx = i; arrElt = e; } // enforce single array path here if (e.type() == Array && e.rawdata() != arrElt.rawdata()) { assertParallelArrays(e.fieldName(), arrElt.fieldName()); } } bool allFound = true; // have we found elements for all field names in the key spec? for (std::vector<const char*>::const_iterator i = fieldNames.begin(); i != fieldNames.end(); ++i) { if (**i != '\0') { allFound = false; break; } } if (_isSparse && numNotFound == _fieldNames.size()) { // we didn't find any fields // so we're not going to index this document return; } bool insertArrayNull = false; if (allFound) { if (arrElt.eoo()) { // no terminal array element to expand BSONObjBuilder b(_sizeTracker); for (std::vector<BSONElement>::iterator i = fixed.begin(); i != fixed.end(); ++i) b.appendAs(*i, ""); keys->insert(b.obj()); } else { // terminal array element to expand, so generate all keys BSONObjIterator i(arrElt.embeddedObject()); if (i.more()) { while (i.more()) { BSONObjBuilder b(_sizeTracker); for (unsigned j = 0; j < fixed.size(); ++j) { if (j == arrIdx) b.appendAs(i.next(), ""); else b.appendAs(fixed[j], ""); } keys->insert(b.obj()); } } else if (fixed.size() > 1) { insertArrayNull = true; } } } else { // nonterminal array element to expand, so recurse verify(!arrElt.eoo()); BSONObjIterator i(arrElt.embeddedObject()); if (i.more()) { while (i.more()) { BSONElement e = i.next(); if (e.type() == Object) { getKeysImpl(fieldNames, fixed, e.embeddedObject(), keys, multikeyPaths); } } } else { insertArrayNull = true; } } if (insertArrayNull) { // x : [] - need to insert undefined BSONObjBuilder b(_sizeTracker); for (unsigned j = 0; j < fixed.size(); ++j) { if (j == arrIdx) { b.appendUndefined(""); } else { BSONElement e = fixed[j]; if (e.eoo()) b.appendNull(""); else b.appendAs(e, ""); } } keys->insert(b.obj()); } }
void _getKeys( vector<const char*> fieldNames , vector<BSONElement> fixed , const BSONObj &obj, BSONObjSet &keys ) const { BSONElement arrElt; unsigned arrIdx = ~0; int numNotFound = 0; for( unsigned i = 0; i < fieldNames.size(); ++i ) { if ( *fieldNames[ i ] == '\0' ) continue; BSONElement e = obj.getFieldDottedOrArray( fieldNames[ i ] ); if ( e.eoo() ) { e = _spec._nullElt; // no matching field numNotFound++; } if ( e.type() != Array ) fieldNames[ i ] = ""; // no matching field or non-array match if ( *fieldNames[ i ] == '\0' ) fixed[ i ] = e; // no need for further object expansion (though array expansion still possible) if ( e.type() == Array && arrElt.eoo() ) { // we only expand arrays on a single path -- track the path here arrIdx = i; arrElt = e; } // enforce single array path here if ( e.type() == Array && e.rawdata() != arrElt.rawdata() ) { assertParallelArrays( e.fieldName(), arrElt.fieldName() ); } } bool allFound = true; // have we found elements for all field names in the key spec? for( vector<const char*>::const_iterator i = fieldNames.begin(); i != fieldNames.end(); ++i ) { if ( **i != '\0' ) { allFound = false; break; } } if ( _spec._sparse && numNotFound == _spec._nFields ) { // we didn't find any fields // so we're not going to index this document return; } bool insertArrayNull = false; if ( allFound ) { if ( arrElt.eoo() ) { // no terminal array element to expand BSONObjBuilder b(_spec._sizeTracker); for( vector< BSONElement >::iterator i = fixed.begin(); i != fixed.end(); ++i ) b.appendAs( *i, "" ); keys.insert( b.obj() ); } else { // terminal array element to expand, so generate all keys BSONObjIterator i( arrElt.embeddedObject() ); if ( i.more() ) { while( i.more() ) { BSONObjBuilder b(_spec._sizeTracker); for( unsigned j = 0; j < fixed.size(); ++j ) { if ( j == arrIdx ) b.appendAs( i.next(), "" ); else b.appendAs( fixed[ j ], "" ); } keys.insert( b.obj() ); } } else if ( fixed.size() > 1 ) { insertArrayNull = true; } } } else { // nonterminal array element to expand, so recurse verify( !arrElt.eoo() ); BSONObjIterator i( arrElt.embeddedObject() ); if ( i.more() ) { while( i.more() ) { BSONElement e = i.next(); if ( e.type() == Object ) { _getKeys( fieldNames, fixed, e.embeddedObject(), keys ); } } } else { insertArrayNull = true; } } if ( insertArrayNull ) { // x : [] - need to insert undefined BSONObjBuilder b(_spec._sizeTracker); for( unsigned j = 0; j < fixed.size(); ++j ) { if ( j == arrIdx ) { b.appendUndefined( "" ); } else { BSONElement e = fixed[j]; if ( e.eoo() ) b.appendNull( "" ); else b.appendAs( e , "" ); } } keys.insert( b.obj() ); } }