Esempio n. 1
0
        void getKeys(const BSONObj& obj, BSONObjSet& keys) const {
            verify(_fields.size() >= 1);

            BSONObjSet keysToAdd;
            // We output keys in the same order as the fields we index.
            for (size_t i = 0; i < _fields.size(); ++i) {
                const IndexedField &field = _fields[i];

                // First, we get the keys that this field adds.  Either they're added literally from
                // the value of the field, or they're transformed if the field is geo.
                BSONElementSet fieldElements;
                // false means Don't expand the last array, duh.
                obj.getFieldsDotted(field.name, fieldElements, false);

                BSONObjSet keysForThisField;
                if (IndexedField::GEO == field.type) {
                    getGeoKeys(fieldElements, &keysForThisField);
                } else if (IndexedField::LITERAL == field.type) {
                    getLiteralKeys(fieldElements, &keysForThisField);
                } else {
                    verify(0);
                }

                // We expect there to be _spec->_missingField() present in the keys if data is
                // missing.  So, this should be non-empty.
                verify(!keysForThisField.empty());

                // We take the Cartesian product of all of the keys.  This requires that we have
                // some keys to take the Cartesian product with.  If keysToAdd.empty(), we
                // initialize it.  
                if (keysToAdd.empty()) {
                    keysToAdd = keysForThisField;
                    continue;
                }

                BSONObjSet updatedKeysToAdd;
                for (BSONObjSet::const_iterator it = keysToAdd.begin(); it != keysToAdd.end();
                     ++it) {
                    for (BSONObjSet::const_iterator newIt = keysForThisField.begin();
                         newIt!= keysForThisField.end(); ++newIt) {
                        BSONObjBuilder b;
                        b.appendElements(*it);
                        b.append(newIt->firstElement());
                        updatedKeysToAdd.insert(b.obj());
                    }
                }
                keysToAdd = updatedKeysToAdd;
            }

            if (keysToAdd.size() > _params.maxKeysPerInsert) {
                warning() << "insert of geo object generated lots of keys (" << keysToAdd.size()
                          << ") consider creating larger buckets. obj="
                          << obj;
            }

            for (BSONObjSet::const_iterator it = keysToAdd.begin(); it != keysToAdd.end(); ++it) {
                keys.insert(*it);
            }
        }
Esempio n. 2
0
    void S2AccessMethod::getKeys(const BSONObj& obj, BSONObjSet* keys) {
        BSONObjSet keysToAdd;
        // We output keys in the same order as the fields we index.
        BSONObjIterator i(_descriptor->keyPattern());
        while (i.more()) {
            BSONElement e = i.next();

            // First, we get the keys that this field adds.  Either they're added literally from
            // the value of the field, or they're transformed if the field is geo.
            BSONElementSet fieldElements;
            // false means Don't expand the last array, duh.
            obj.getFieldsDotted(e.fieldName(), fieldElements, false);

            BSONObjSet keysForThisField;
            if (IndexNames::GEO_2DSPHERE == e.valuestr()) {
                // We can't ever return documents that don't have geometry so don't bother indexing
                // them.
                if (fieldElements.empty()) { return; }
                getGeoKeys(obj, fieldElements, &keysForThisField);
            } else {
                getLiteralKeys(fieldElements, &keysForThisField);
            }

            // We expect there to be the missing field element present in the keys if data is
            // missing.  So, this should be non-empty.
            verify(!keysForThisField.empty());

            // We take the Cartesian product of all of the keys.  This requires that we have
            // some keys to take the Cartesian product with.  If keysToAdd.empty(), we
            // initialize it.  
            if (keysToAdd.empty()) {
                keysToAdd = keysForThisField;
                continue;
            }

            BSONObjSet updatedKeysToAdd;
            for (BSONObjSet::const_iterator it = keysToAdd.begin(); it != keysToAdd.end();
                    ++it) {
                for (BSONObjSet::const_iterator newIt = keysForThisField.begin();
                        newIt!= keysForThisField.end(); ++newIt) {
                    BSONObjBuilder b;
                    b.appendElements(*it);
                    b.append(newIt->firstElement());
                    updatedKeysToAdd.insert(b.obj());
                }
            }
            keysToAdd = updatedKeysToAdd;
        }

        if (keysToAdd.size() > _params.maxKeysPerInsert) {
            warning() << "insert of geo object generated lots of keys (" << keysToAdd.size()
                << ") consider creating larger buckets. obj="
                << obj;
        }

        *keys = keysToAdd;
    }
Esempio n. 3
0
 /* step one of adding keys to index idxNo for a new record
    @return true means done.  false means multikey involved and more work to do
 */
 void fetchIndexInserters(BSONObjSet & /*out*/keys,
                          IndexInterface::IndexInserter &inserter,
                          NamespaceDetails *d,
                          int idxNo,
                          const BSONObj& obj,
                          DiskLoc recordLoc) {
     IndexDetails &idx = d->idx(idxNo);
     idx.getKeysFromObject(obj, keys);
     if( keys.empty() )
         return;
     bool dupsAllowed = !idx.unique();
     Ordering ordering = Ordering::make(idx.keyPattern());
     
     try {
         // we can't do the two step method with multi keys as insertion of one key changes the indexes 
         // structure.  however we can do the first key of the set so we go ahead and do that FWIW
         inserter.addInsertionContinuation(
                 idx.idxInterface().beginInsertIntoIndex(
                         idxNo, idx, recordLoc, *keys.begin(), ordering, dupsAllowed));
     }
     catch (AssertionException& e) {
         if( e.getCode() == 10287 && idxNo == d->nIndexes ) {
             DEV log() << "info: caught key already in index on bg indexing (ok)" << endl;
         }
         else {
             throw;
         }
     }
 }
Esempio n. 4
0
 /* add keys to index idxNo for a new record */
 static void addKeysToIndex(const char *ns, NamespaceDetails *d, int idxNo, BSONObj& obj,
                            DiskLoc recordLoc, bool dupsAllowed) {
     IndexDetails& idx = d->idx(idxNo);
     BSONObjSet keys;
     idx.getKeysFromObject(obj, keys);
     if( keys.empty() ) 
         return;
     BSONObj order = idx.keyPattern();
     IndexInterface& ii = idx.idxInterface();
     Ordering ordering = Ordering::make(order);
     int n = 0;
     for ( BSONObjSet::iterator i=keys.begin(); i != keys.end(); i++ ) {
         if( ++n == 2 ) {
             d->setIndexIsMultikey(ns, idxNo);
         }
         verify( !recordLoc.isNull() );
         try {
             ii.bt_insert(idx.head, recordLoc, *i, ordering, dupsAllowed, idx);
         }
         catch (AssertionException& e) {
             if( e.getCode() == 10287 && idxNo == d->nIndexes ) {
                 DEV log() << "info: caught key already in index on bg indexing (ok)" << endl;
                 continue;
             }
             if( !dupsAllowed ) {
                 // dup key exception, presumably.
                 throw;
             }
             problem() << " caught assertion addKeysToIndex " << idx.indexNamespace() << " " << obj["_id"] << endl;
         }
     }
 }
Esempio n. 5
0
 void getKeys( const BSONObj &obj, BSONObjSet &keys ) const {
     if ( _spec._indexType.get() ) { //plugin (eg geo)
         _spec._indexType->getKeys( obj , keys );
         return;
     }
     vector<const char*> fieldNames( _spec._fieldNames );
     vector<BSONElement> fixed( _spec._fixed );
     _getKeys( fieldNames , fixed , obj, keys );
     if ( keys.empty() && ! _spec._sparse )
         keys.insert( _spec._nullKey );
 }     
void DocumentSourceGraphLookUp::doBreadthFirstSearch() {
    long long depth = 0;
    bool shouldPerformAnotherQuery;
    do {
        shouldPerformAnotherQuery = false;

        // Check whether each key in the frontier exists in the cache or needs to be queried.
        BSONObjSet cached = SimpleBSONObjComparator::kInstance.makeBSONObjSet();
        auto matchStage = makeMatchStageFromFrontier(&cached);

        ValueUnorderedSet queried = pExpCtx->getValueComparator().makeUnorderedValueSet();
        _frontier->swap(queried);
        _frontierUsageBytes = 0;

        // Process cached values, populating '_frontier' for the next iteration of search.
        while (!cached.empty()) {
            auto it = cached.begin();
            shouldPerformAnotherQuery =
                addToVisitedAndFrontier(*it, depth) || shouldPerformAnotherQuery;
            cached.erase(it);
            checkMemoryUsage();
        }

        if (matchStage) {
            // Query for all keys that were in the frontier and not in the cache, populating
            // '_frontier' for the next iteration of search.

            // We've already allocated space for the trailing $match stage in '_fromPipeline'.
            _fromPipeline.back() = *matchStage;
            auto pipeline = uassertStatusOK(_mongod->makePipeline(_fromPipeline, _fromExpCtx));
            while (auto next = pipeline->output()->getNext()) {
                uassert(40271,
                        str::stream()
                            << "Documents in the '"
                            << _from.ns()
                            << "' namespace must contain an _id for de-duplication in $graphLookup",
                        !(*next)["_id"].missing());

                BSONObj result = next->toBson();
                shouldPerformAnotherQuery =
                    addToVisitedAndFrontier(result.getOwned(), depth) || shouldPerformAnotherQuery;
                addToCache(result, queried);
            }
            checkMemoryUsage();
        }

        ++depth;
    } while (shouldPerformAnotherQuery && depth < std::numeric_limits<long long>::max() &&
             (!_maxDepth || depth <= *_maxDepth));

    _frontier->clear();
    _frontierUsageBytes = 0;
}
void DocumentSourceGraphLookUp::doBreadthFirstSearch() {
    long long depth = 0;
    bool shouldPerformAnotherQuery;
    do {
        shouldPerformAnotherQuery = false;

        // Check whether each key in the frontier exists in the cache or needs to be queried.
        BSONObjSet cached;
        auto query = constructQuery(&cached);

        std::unordered_set<Value, Value::Hash> queried;
        _frontier.swap(queried);
        _frontierUsageBytes = 0;

        // Process cached values, populating '_frontier' for the next iteration of search.
        while (!cached.empty()) {
            auto it = cached.begin();
            shouldPerformAnotherQuery =
                addToVisitedAndFrontier(*it, depth) || shouldPerformAnotherQuery;
            cached.erase(it);
            checkMemoryUsage();
        }

        if (query) {
            // Query for all keys that were in the frontier and not in the cache, populating
            // '_frontier' for the next iteration of search.
            unique_ptr<DBClientCursor> cursor = _mongod->directClient()->query(_from.ns(), *query);

            // Iterate the cursor.
            while (cursor->more()) {
                BSONObj result = cursor->nextSafe();
                shouldPerformAnotherQuery =
                    addToVisitedAndFrontier(result.getOwned(), depth) || shouldPerformAnotherQuery;
                addToCache(result, queried);
            }
            checkMemoryUsage();
        }

        ++depth;
    } while (shouldPerformAnotherQuery && depth < std::numeric_limits<long long>::max() &&
             (!_maxDepth || depth <= *_maxDepth));

    _frontier.clear();
    _frontierUsageBytes = 0;
}
Esempio n. 8
0
      // PD_TRACE_DECLARE_FUNCTION ( SDB__IXMKEYGEN_GETKEYS, "_ixmKeyGenerator::getKeys" )
      INT32 getKeys ( const BSONObj &obj, BSONObjSet &keys,
                      BSONElement *pArrEle ) const
      {
         INT32 rc = SDB_OK ;
         PD_TRACE_ENTRY ( SDB__IXMKEYGEN_GETKEYS );
         SDB_ASSERT( _keygen, "spec can't be NULL" ) ;
         SDB_ASSERT( !_keygen->_fieldNames.empty(), "can not be empty" ) ;
         vector<const CHAR*> fieldNames ( _keygen->_fieldNames ) ;
         BSONElement arrEle ;
         try
         {
            rc = _getKeys( fieldNames, obj, keys, &arrEle ) ;
         }
         catch ( std::exception &e )
         {
            PD_LOG( PDERROR, "unexpected err:%s", e.what() ) ;
            rc = SDB_INVALIDARG ;
            goto error ;
         }

         if ( SDB_OK != rc )
         {
            PD_LOG ( PDERROR, "Failed to generate key from object: %s",
                     obj.toString().c_str() ) ;
            goto error ;
         }

         if ( keys.empty() )
         {
            keys.insert ( _keygen->_undefinedKey ) ;
         }

         if ( NULL != pArrEle && !arrEle.eoo() )
         {
            *pArrEle = arrEle ;
         }
      done :
         PD_TRACE_EXITRC ( SDB__IXMKEYGEN_GETKEYS, rc );
         return rc ;
      error :
         goto done ;
      }
void ExpressionKeysPrivate::getS2Keys(const BSONObj& obj,
                                      const BSONObj& keyPattern,
                                      const S2IndexingParams& params,
                                      BSONObjSet* keys) {
    BSONObjSet keysToAdd;

    // Does one of our documents have a geo field?
    bool haveGeoField = false;

    // We output keys in the same order as the fields we index.
    BSONObjIterator i(keyPattern);
    while (i.more()) {
        BSONElement e = i.next();

        // First, we get the keys that this field adds.  Either they're added literally from
        // the value of the field, or they're transformed if the field is geo.
        BSONElementSet fieldElements;
        // false means Don't expand the last array, duh.
        obj.getFieldsDotted(e.fieldName(), fieldElements, false);

        BSONObjSet keysForThisField;
        if (IndexNames::GEO_2DSPHERE == e.valuestr()) {
            if (params.indexVersion >= S2_INDEX_VERSION_2) {
                // For >= V2,
                // geo: null,
                // geo: undefined
                // geo: []
                // should all behave like there is no geo field.  So we look for these cases and
                // throw out the field elements if we find them.
                if (1 == fieldElements.size()) {
                    BSONElement elt = *fieldElements.begin();
                    // Get the :null and :undefined cases.
                    if (elt.isNull() || Undefined == elt.type()) {
                        fieldElements.clear();
                    } else if (elt.isABSONObj()) {
                        // And this is the :[] case.
                        BSONObj obj = elt.Obj();
                        if (0 == obj.nFields()) {
                            fieldElements.clear();
                        }
                    }
                }

                // >= V2 2dsphere indices require that at least one geo field to be present in a
                // document in order to index it.
                if (fieldElements.size() > 0) {
                    haveGeoField = true;
                }
            }

            getS2GeoKeys(obj, fieldElements, params, &keysForThisField);
        } else {
            getS2LiteralKeys(fieldElements, params.collator, &keysForThisField);
        }

        // We expect there to be the missing field element present in the keys if data is
        // missing.  So, this should be non-empty.
        verify(!keysForThisField.empty());

        // We take the Cartesian product of all of the keys.  This requires that we have
        // some keys to take the Cartesian product with.  If keysToAdd.empty(), we
        // initialize it.
        if (keysToAdd.empty()) {
            keysToAdd = keysForThisField;
            continue;
        }

        BSONObjSet updatedKeysToAdd;
        for (BSONObjSet::const_iterator it = keysToAdd.begin(); it != keysToAdd.end(); ++it) {
            for (BSONObjSet::const_iterator newIt = keysForThisField.begin();
                 newIt != keysForThisField.end();
                 ++newIt) {
                BSONObjBuilder b;
                b.appendElements(*it);
                b.append(newIt->firstElement());
                updatedKeysToAdd.insert(b.obj());
            }
        }
        keysToAdd = updatedKeysToAdd;
    }

    // Make sure that if we're >= V2 there's at least one geo field present in the doc.
    if (params.indexVersion >= S2_INDEX_VERSION_2) {
        if (!haveGeoField) {
            return;
        }
    }

    if (keysToAdd.size() > params.maxKeysPerInsert) {
        warning() << "Insert of geo object generated a high number of keys."
                  << " num keys: " << keysToAdd.size() << " obj inserted: " << obj;
    }

    *keys = keysToAdd;
}
Esempio n. 10
0
void ExpressionKeysPrivate::getS2Keys(const BSONObj& obj,
                                      const BSONObj& keyPattern,
                                      const S2IndexingParams& params,
                                      BSONObjSet* keys,
                                      MultikeyPaths* multikeyPaths) {
    BSONObjSet keysToAdd = SimpleBSONObjComparator::kInstance.makeBSONObjSet();

    // Does one of our documents have a geo field?
    bool haveGeoField = false;

    if (multikeyPaths) {
        invariant(multikeyPaths->empty());
        multikeyPaths->resize(keyPattern.nFields());
    }

    size_t posInIdx = 0;

    // We output keys in the same order as the fields we index.
    for (const auto keyElem : keyPattern) {
        // First, we get the keys that this field adds.  Either they're added literally from
        // the value of the field, or they're transformed if the field is geo.
        BSONElementSet fieldElements;
        const bool expandArrayOnTrailingField = false;
        std::set<size_t>* arrayComponents = multikeyPaths ? &(*multikeyPaths)[posInIdx] : nullptr;
        dps::extractAllElementsAlongPath(
            obj, keyElem.fieldName(), fieldElements, expandArrayOnTrailingField, arrayComponents);

        // Trailing array values aren't being expanded, so we still need to determine whether the
        // last component of the indexed path 'keyElem.fieldName()' causes the index to be multikey.
        // We say that it does if
        //   (a) the last component of the indexed path ever refers to an array value (regardless of
        //       the number of array elements)
        //   (b) the last component of the indexed path ever refers to GeoJSON data that requires
        //       multiple cells for its covering.
        bool lastPathComponentCausesIndexToBeMultikey;
        BSONObjSet keysForThisField = SimpleBSONObjComparator::kInstance.makeBSONObjSet();
        if (IndexNames::GEO_2DSPHERE == keyElem.valuestr()) {
            if (params.indexVersion >= S2_INDEX_VERSION_2) {
                // For >= V2,
                // geo: null,
                // geo: undefined
                // geo: []
                // should all behave like there is no geo field.  So we look for these cases and
                // throw out the field elements if we find them.
                if (1 == fieldElements.size()) {
                    BSONElement elt = *fieldElements.begin();
                    // Get the :null and :undefined cases.
                    if (elt.isNull() || Undefined == elt.type()) {
                        fieldElements.clear();
                    } else if (elt.isABSONObj()) {
                        // And this is the :[] case.
                        BSONObj obj = elt.Obj();
                        if (0 == obj.nFields()) {
                            fieldElements.clear();
                        }
                    }
                }

                // >= V2 2dsphere indices require that at least one geo field to be present in a
                // document in order to index it.
                if (fieldElements.size() > 0) {
                    haveGeoField = true;
                }
            }

            lastPathComponentCausesIndexToBeMultikey =
                getS2GeoKeys(obj, fieldElements, params, &keysForThisField);
        } else {
            lastPathComponentCausesIndexToBeMultikey =
                getS2LiteralKeys(fieldElements, params.collator, &keysForThisField);
        }

        // We expect there to be the missing field element present in the keys if data is
        // missing.  So, this should be non-empty.
        verify(!keysForThisField.empty());

        if (multikeyPaths && lastPathComponentCausesIndexToBeMultikey) {
            const size_t pathLengthOfThisField = FieldRef{keyElem.fieldNameStringData()}.numParts();
            invariant(pathLengthOfThisField > 0);
            (*multikeyPaths)[posInIdx].insert(pathLengthOfThisField - 1);
        }

        // We take the Cartesian product of all of the keys.  This requires that we have
        // some keys to take the Cartesian product with.  If keysToAdd.empty(), we
        // initialize it.
        if (keysToAdd.empty()) {
            keysToAdd = keysForThisField;
            ++posInIdx;
            continue;
        }

        BSONObjSet updatedKeysToAdd = SimpleBSONObjComparator::kInstance.makeBSONObjSet();
        for (BSONObjSet::const_iterator it = keysToAdd.begin(); it != keysToAdd.end(); ++it) {
            for (BSONObjSet::const_iterator newIt = keysForThisField.begin();
                 newIt != keysForThisField.end();
                 ++newIt) {
                BSONObjBuilder b;
                b.appendElements(*it);
                b.append(newIt->firstElement());
                updatedKeysToAdd.insert(b.obj());
            }
        }
        keysToAdd = updatedKeysToAdd;
        ++posInIdx;
    }

    // Make sure that if we're >= V2 there's at least one geo field present in the doc.
    if (params.indexVersion >= S2_INDEX_VERSION_2) {
        if (!haveGeoField) {
            return;
        }
    }

    if (keysToAdd.size() > params.maxKeysPerInsert) {
        warning() << "Insert of geo object generated a high number of keys."
                  << " num keys: " << keysToAdd.size() << " obj inserted: " << redact(obj);
    }

    *keys = keysToAdd;
}