Beispiel #1
0
        void getKeys(const BSONObj& obj, BSONObjSet& keys) const {
            verify(_fields.size() >= 1);

            BSONObjSet keysToAdd;
            // We output keys in the same order as the fields we index.
            for (size_t i = 0; i < _fields.size(); ++i) {
                const IndexedField &field = _fields[i];

                // First, we get the keys that this field adds.  Either they're added literally from
                // the value of the field, or they're transformed if the field is geo.
                BSONElementSet fieldElements;
                // false means Don't expand the last array, duh.
                obj.getFieldsDotted(field.name, fieldElements, false);

                BSONObjSet keysForThisField;
                if (IndexedField::GEO == field.type) {
                    getGeoKeys(fieldElements, &keysForThisField);
                } else if (IndexedField::LITERAL == field.type) {
                    getLiteralKeys(fieldElements, &keysForThisField);
                } else {
                    verify(0);
                }

                // We expect there to be _spec->_missingField() present in the keys if data is
                // missing.  So, this should be non-empty.
                verify(!keysForThisField.empty());

                // We take the Cartesian product of all of the keys.  This requires that we have
                // some keys to take the Cartesian product with.  If keysToAdd.empty(), we
                // initialize it.  
                if (keysToAdd.empty()) {
                    keysToAdd = keysForThisField;
                    continue;
                }

                BSONObjSet updatedKeysToAdd;
                for (BSONObjSet::const_iterator it = keysToAdd.begin(); it != keysToAdd.end();
                     ++it) {
                    for (BSONObjSet::const_iterator newIt = keysForThisField.begin();
                         newIt!= keysForThisField.end(); ++newIt) {
                        BSONObjBuilder b;
                        b.appendElements(*it);
                        b.append(newIt->firstElement());
                        updatedKeysToAdd.insert(b.obj());
                    }
                }
                keysToAdd = updatedKeysToAdd;
            }

            if (keysToAdd.size() > _params.maxKeysPerInsert) {
                warning() << "insert of geo object generated lots of keys (" << keysToAdd.size()
                          << ") consider creating larger buckets. obj="
                          << obj;
            }

            for (BSONObjSet::const_iterator it = keysToAdd.begin(); it != keysToAdd.end(); ++it) {
                keys.insert(*it);
            }
        }
Beispiel #2
0
    void S2AccessMethod::getKeys(const BSONObj& obj, BSONObjSet* keys) {
        BSONObjSet keysToAdd;
        // We output keys in the same order as the fields we index.
        BSONObjIterator i(_descriptor->keyPattern());
        while (i.more()) {
            BSONElement e = i.next();

            // First, we get the keys that this field adds.  Either they're added literally from
            // the value of the field, or they're transformed if the field is geo.
            BSONElementSet fieldElements;
            // false means Don't expand the last array, duh.
            obj.getFieldsDotted(e.fieldName(), fieldElements, false);

            BSONObjSet keysForThisField;
            if (IndexNames::GEO_2DSPHERE == e.valuestr()) {
                // We can't ever return documents that don't have geometry so don't bother indexing
                // them.
                if (fieldElements.empty()) { return; }
                getGeoKeys(obj, fieldElements, &keysForThisField);
            } else {
                getLiteralKeys(fieldElements, &keysForThisField);
            }

            // We expect there to be the missing field element present in the keys if data is
            // missing.  So, this should be non-empty.
            verify(!keysForThisField.empty());

            // We take the Cartesian product of all of the keys.  This requires that we have
            // some keys to take the Cartesian product with.  If keysToAdd.empty(), we
            // initialize it.  
            if (keysToAdd.empty()) {
                keysToAdd = keysForThisField;
                continue;
            }

            BSONObjSet updatedKeysToAdd;
            for (BSONObjSet::const_iterator it = keysToAdd.begin(); it != keysToAdd.end();
                    ++it) {
                for (BSONObjSet::const_iterator newIt = keysForThisField.begin();
                        newIt!= keysForThisField.end(); ++newIt) {
                    BSONObjBuilder b;
                    b.appendElements(*it);
                    b.append(newIt->firstElement());
                    updatedKeysToAdd.insert(b.obj());
                }
            }
            keysToAdd = updatedKeysToAdd;
        }

        if (keysToAdd.size() > _params.maxKeysPerInsert) {
            warning() << "insert of geo object generated lots of keys (" << keysToAdd.size()
                << ") consider creating larger buckets. obj="
                << obj;
        }

        *keys = keysToAdd;
    }
void ExpressionKeysPrivate::getS2Keys(const BSONObj& obj,
                                      const BSONObj& keyPattern,
                                      const S2IndexingParams& params,
                                      BSONObjSet* keys) {
    BSONObjSet keysToAdd;

    // Does one of our documents have a geo field?
    bool haveGeoField = false;

    // We output keys in the same order as the fields we index.
    BSONObjIterator i(keyPattern);
    while (i.more()) {
        BSONElement e = i.next();

        // First, we get the keys that this field adds.  Either they're added literally from
        // the value of the field, or they're transformed if the field is geo.
        BSONElementSet fieldElements;
        // false means Don't expand the last array, duh.
        obj.getFieldsDotted(e.fieldName(), fieldElements, false);

        BSONObjSet keysForThisField;
        if (IndexNames::GEO_2DSPHERE == e.valuestr()) {
            if (params.indexVersion >= S2_INDEX_VERSION_2) {
                // For >= V2,
                // geo: null,
                // geo: undefined
                // geo: []
                // should all behave like there is no geo field.  So we look for these cases and
                // throw out the field elements if we find them.
                if (1 == fieldElements.size()) {
                    BSONElement elt = *fieldElements.begin();
                    // Get the :null and :undefined cases.
                    if (elt.isNull() || Undefined == elt.type()) {
                        fieldElements.clear();
                    } else if (elt.isABSONObj()) {
                        // And this is the :[] case.
                        BSONObj obj = elt.Obj();
                        if (0 == obj.nFields()) {
                            fieldElements.clear();
                        }
                    }
                }

                // >= V2 2dsphere indices require that at least one geo field to be present in a
                // document in order to index it.
                if (fieldElements.size() > 0) {
                    haveGeoField = true;
                }
            }

            getS2GeoKeys(obj, fieldElements, params, &keysForThisField);
        } else {
            getS2LiteralKeys(fieldElements, params.collator, &keysForThisField);
        }

        // We expect there to be the missing field element present in the keys if data is
        // missing.  So, this should be non-empty.
        verify(!keysForThisField.empty());

        // We take the Cartesian product of all of the keys.  This requires that we have
        // some keys to take the Cartesian product with.  If keysToAdd.empty(), we
        // initialize it.
        if (keysToAdd.empty()) {
            keysToAdd = keysForThisField;
            continue;
        }

        BSONObjSet updatedKeysToAdd;
        for (BSONObjSet::const_iterator it = keysToAdd.begin(); it != keysToAdd.end(); ++it) {
            for (BSONObjSet::const_iterator newIt = keysForThisField.begin();
                 newIt != keysForThisField.end();
                 ++newIt) {
                BSONObjBuilder b;
                b.appendElements(*it);
                b.append(newIt->firstElement());
                updatedKeysToAdd.insert(b.obj());
            }
        }
        keysToAdd = updatedKeysToAdd;
    }

    // Make sure that if we're >= V2 there's at least one geo field present in the doc.
    if (params.indexVersion >= S2_INDEX_VERSION_2) {
        if (!haveGeoField) {
            return;
        }
    }

    if (keysToAdd.size() > params.maxKeysPerInsert) {
        warning() << "Insert of geo object generated a high number of keys."
                  << " num keys: " << keysToAdd.size() << " obj inserted: " << obj;
    }

    *keys = keysToAdd;
}
void ExpressionKeysPrivate::getS2Keys(const BSONObj& obj,
                                      const BSONObj& keyPattern,
                                      const S2IndexingParams& params,
                                      BSONObjSet* keys,
                                      MultikeyPaths* multikeyPaths) {
    BSONObjSet keysToAdd = SimpleBSONObjComparator::kInstance.makeBSONObjSet();

    // Does one of our documents have a geo field?
    bool haveGeoField = false;

    if (multikeyPaths) {
        invariant(multikeyPaths->empty());
        multikeyPaths->resize(keyPattern.nFields());
    }

    size_t posInIdx = 0;

    // We output keys in the same order as the fields we index.
    for (const auto keyElem : keyPattern) {
        // First, we get the keys that this field adds.  Either they're added literally from
        // the value of the field, or they're transformed if the field is geo.
        BSONElementSet fieldElements;
        const bool expandArrayOnTrailingField = false;
        std::set<size_t>* arrayComponents = multikeyPaths ? &(*multikeyPaths)[posInIdx] : nullptr;
        dps::extractAllElementsAlongPath(
            obj, keyElem.fieldName(), fieldElements, expandArrayOnTrailingField, arrayComponents);

        // Trailing array values aren't being expanded, so we still need to determine whether the
        // last component of the indexed path 'keyElem.fieldName()' causes the index to be multikey.
        // We say that it does if
        //   (a) the last component of the indexed path ever refers to an array value (regardless of
        //       the number of array elements)
        //   (b) the last component of the indexed path ever refers to GeoJSON data that requires
        //       multiple cells for its covering.
        bool lastPathComponentCausesIndexToBeMultikey;
        BSONObjSet keysForThisField = SimpleBSONObjComparator::kInstance.makeBSONObjSet();
        if (IndexNames::GEO_2DSPHERE == keyElem.valuestr()) {
            if (params.indexVersion >= S2_INDEX_VERSION_2) {
                // For >= V2,
                // geo: null,
                // geo: undefined
                // geo: []
                // should all behave like there is no geo field.  So we look for these cases and
                // throw out the field elements if we find them.
                if (1 == fieldElements.size()) {
                    BSONElement elt = *fieldElements.begin();
                    // Get the :null and :undefined cases.
                    if (elt.isNull() || Undefined == elt.type()) {
                        fieldElements.clear();
                    } else if (elt.isABSONObj()) {
                        // And this is the :[] case.
                        BSONObj obj = elt.Obj();
                        if (0 == obj.nFields()) {
                            fieldElements.clear();
                        }
                    }
                }

                // >= V2 2dsphere indices require that at least one geo field to be present in a
                // document in order to index it.
                if (fieldElements.size() > 0) {
                    haveGeoField = true;
                }
            }

            lastPathComponentCausesIndexToBeMultikey =
                getS2GeoKeys(obj, fieldElements, params, &keysForThisField);
        } else {
            lastPathComponentCausesIndexToBeMultikey =
                getS2LiteralKeys(fieldElements, params.collator, &keysForThisField);
        }

        // We expect there to be the missing field element present in the keys if data is
        // missing.  So, this should be non-empty.
        verify(!keysForThisField.empty());

        if (multikeyPaths && lastPathComponentCausesIndexToBeMultikey) {
            const size_t pathLengthOfThisField = FieldRef{keyElem.fieldNameStringData()}.numParts();
            invariant(pathLengthOfThisField > 0);
            (*multikeyPaths)[posInIdx].insert(pathLengthOfThisField - 1);
        }

        // We take the Cartesian product of all of the keys.  This requires that we have
        // some keys to take the Cartesian product with.  If keysToAdd.empty(), we
        // initialize it.
        if (keysToAdd.empty()) {
            keysToAdd = keysForThisField;
            ++posInIdx;
            continue;
        }

        BSONObjSet updatedKeysToAdd = SimpleBSONObjComparator::kInstance.makeBSONObjSet();
        for (BSONObjSet::const_iterator it = keysToAdd.begin(); it != keysToAdd.end(); ++it) {
            for (BSONObjSet::const_iterator newIt = keysForThisField.begin();
                 newIt != keysForThisField.end();
                 ++newIt) {
                BSONObjBuilder b;
                b.appendElements(*it);
                b.append(newIt->firstElement());
                updatedKeysToAdd.insert(b.obj());
            }
        }
        keysToAdd = updatedKeysToAdd;
        ++posInIdx;
    }

    // Make sure that if we're >= V2 there's at least one geo field present in the doc.
    if (params.indexVersion >= S2_INDEX_VERSION_2) {
        if (!haveGeoField) {
            return;
        }
    }

    if (keysToAdd.size() > params.maxKeysPerInsert) {
        warning() << "Insert of geo object generated a high number of keys."
                  << " num keys: " << keysToAdd.size() << " obj inserted: " << redact(obj);
    }

    *keys = keysToAdd;
}