void getKeys(const BSONObj& obj, BSONObjSet& keys) const { verify(_fields.size() >= 1); BSONObjSet keysToAdd; // We output keys in the same order as the fields we index. for (size_t i = 0; i < _fields.size(); ++i) { const IndexedField &field = _fields[i]; // First, we get the keys that this field adds. Either they're added literally from // the value of the field, or they're transformed if the field is geo. BSONElementSet fieldElements; // false means Don't expand the last array, duh. obj.getFieldsDotted(field.name, fieldElements, false); BSONObjSet keysForThisField; if (IndexedField::GEO == field.type) { getGeoKeys(fieldElements, &keysForThisField); } else if (IndexedField::LITERAL == field.type) { getLiteralKeys(fieldElements, &keysForThisField); } else { verify(0); } // We expect there to be _spec->_missingField() present in the keys if data is // missing. So, this should be non-empty. verify(!keysForThisField.empty()); // We take the Cartesian product of all of the keys. This requires that we have // some keys to take the Cartesian product with. If keysToAdd.empty(), we // initialize it. if (keysToAdd.empty()) { keysToAdd = keysForThisField; continue; } BSONObjSet updatedKeysToAdd; for (BSONObjSet::const_iterator it = keysToAdd.begin(); it != keysToAdd.end(); ++it) { for (BSONObjSet::const_iterator newIt = keysForThisField.begin(); newIt!= keysForThisField.end(); ++newIt) { BSONObjBuilder b; b.appendElements(*it); b.append(newIt->firstElement()); updatedKeysToAdd.insert(b.obj()); } } keysToAdd = updatedKeysToAdd; } if (keysToAdd.size() > _params.maxKeysPerInsert) { warning() << "insert of geo object generated lots of keys (" << keysToAdd.size() << ") consider creating larger buckets. obj=" << obj; } for (BSONObjSet::const_iterator it = keysToAdd.begin(); it != keysToAdd.end(); ++it) { keys.insert(*it); } }
void S2AccessMethod::getKeys(const BSONObj& obj, BSONObjSet* keys) { BSONObjSet keysToAdd; // We output keys in the same order as the fields we index. BSONObjIterator i(_descriptor->keyPattern()); while (i.more()) { BSONElement e = i.next(); // First, we get the keys that this field adds. Either they're added literally from // the value of the field, or they're transformed if the field is geo. BSONElementSet fieldElements; // false means Don't expand the last array, duh. obj.getFieldsDotted(e.fieldName(), fieldElements, false); BSONObjSet keysForThisField; if (IndexNames::GEO_2DSPHERE == e.valuestr()) { // We can't ever return documents that don't have geometry so don't bother indexing // them. if (fieldElements.empty()) { return; } getGeoKeys(obj, fieldElements, &keysForThisField); } else { getLiteralKeys(fieldElements, &keysForThisField); } // We expect there to be the missing field element present in the keys if data is // missing. So, this should be non-empty. verify(!keysForThisField.empty()); // We take the Cartesian product of all of the keys. This requires that we have // some keys to take the Cartesian product with. If keysToAdd.empty(), we // initialize it. if (keysToAdd.empty()) { keysToAdd = keysForThisField; continue; } BSONObjSet updatedKeysToAdd; for (BSONObjSet::const_iterator it = keysToAdd.begin(); it != keysToAdd.end(); ++it) { for (BSONObjSet::const_iterator newIt = keysForThisField.begin(); newIt!= keysForThisField.end(); ++newIt) { BSONObjBuilder b; b.appendElements(*it); b.append(newIt->firstElement()); updatedKeysToAdd.insert(b.obj()); } } keysToAdd = updatedKeysToAdd; } if (keysToAdd.size() > _params.maxKeysPerInsert) { warning() << "insert of geo object generated lots of keys (" << keysToAdd.size() << ") consider creating larger buckets. obj=" << obj; } *keys = keysToAdd; }
/* step one of adding keys to index idxNo for a new record @return true means done. false means multikey involved and more work to do */ void fetchIndexInserters(BSONObjSet & /*out*/keys, IndexInterface::IndexInserter &inserter, NamespaceDetails *d, int idxNo, const BSONObj& obj, DiskLoc recordLoc) { IndexDetails &idx = d->idx(idxNo); idx.getKeysFromObject(obj, keys); if( keys.empty() ) return; bool dupsAllowed = !idx.unique(); Ordering ordering = Ordering::make(idx.keyPattern()); try { // we can't do the two step method with multi keys as insertion of one key changes the indexes // structure. however we can do the first key of the set so we go ahead and do that FWIW inserter.addInsertionContinuation( idx.idxInterface().beginInsertIntoIndex( idxNo, idx, recordLoc, *keys.begin(), ordering, dupsAllowed)); } catch (AssertionException& e) { if( e.getCode() == 10287 && idxNo == d->nIndexes ) { DEV log() << "info: caught key already in index on bg indexing (ok)" << endl; } else { throw; } } }
/* add keys to index idxNo for a new record */ static void addKeysToIndex(const char *ns, NamespaceDetails *d, int idxNo, BSONObj& obj, DiskLoc recordLoc, bool dupsAllowed) { IndexDetails& idx = d->idx(idxNo); BSONObjSet keys; idx.getKeysFromObject(obj, keys); if( keys.empty() ) return; BSONObj order = idx.keyPattern(); IndexInterface& ii = idx.idxInterface(); Ordering ordering = Ordering::make(order); int n = 0; for ( BSONObjSet::iterator i=keys.begin(); i != keys.end(); i++ ) { if( ++n == 2 ) { d->setIndexIsMultikey(ns, idxNo); } verify( !recordLoc.isNull() ); try { ii.bt_insert(idx.head, recordLoc, *i, ordering, dupsAllowed, idx); } catch (AssertionException& e) { if( e.getCode() == 10287 && idxNo == d->nIndexes ) { DEV log() << "info: caught key already in index on bg indexing (ok)" << endl; continue; } if( !dupsAllowed ) { // dup key exception, presumably. throw; } problem() << " caught assertion addKeysToIndex " << idx.indexNamespace() << " " << obj["_id"] << endl; } } }
void getKeys( const BSONObj &obj, BSONObjSet &keys ) const { if ( _spec._indexType.get() ) { //plugin (eg geo) _spec._indexType->getKeys( obj , keys ); return; } vector<const char*> fieldNames( _spec._fieldNames ); vector<BSONElement> fixed( _spec._fixed ); _getKeys( fieldNames , fixed , obj, keys ); if ( keys.empty() && ! _spec._sparse ) keys.insert( _spec._nullKey ); }
void DocumentSourceGraphLookUp::doBreadthFirstSearch() { long long depth = 0; bool shouldPerformAnotherQuery; do { shouldPerformAnotherQuery = false; // Check whether each key in the frontier exists in the cache or needs to be queried. BSONObjSet cached = SimpleBSONObjComparator::kInstance.makeBSONObjSet(); auto matchStage = makeMatchStageFromFrontier(&cached); ValueUnorderedSet queried = pExpCtx->getValueComparator().makeUnorderedValueSet(); _frontier->swap(queried); _frontierUsageBytes = 0; // Process cached values, populating '_frontier' for the next iteration of search. while (!cached.empty()) { auto it = cached.begin(); shouldPerformAnotherQuery = addToVisitedAndFrontier(*it, depth) || shouldPerformAnotherQuery; cached.erase(it); checkMemoryUsage(); } if (matchStage) { // Query for all keys that were in the frontier and not in the cache, populating // '_frontier' for the next iteration of search. // We've already allocated space for the trailing $match stage in '_fromPipeline'. _fromPipeline.back() = *matchStage; auto pipeline = uassertStatusOK(_mongod->makePipeline(_fromPipeline, _fromExpCtx)); while (auto next = pipeline->output()->getNext()) { uassert(40271, str::stream() << "Documents in the '" << _from.ns() << "' namespace must contain an _id for de-duplication in $graphLookup", !(*next)["_id"].missing()); BSONObj result = next->toBson(); shouldPerformAnotherQuery = addToVisitedAndFrontier(result.getOwned(), depth) || shouldPerformAnotherQuery; addToCache(result, queried); } checkMemoryUsage(); } ++depth; } while (shouldPerformAnotherQuery && depth < std::numeric_limits<long long>::max() && (!_maxDepth || depth <= *_maxDepth)); _frontier->clear(); _frontierUsageBytes = 0; }
void DocumentSourceGraphLookUp::doBreadthFirstSearch() { long long depth = 0; bool shouldPerformAnotherQuery; do { shouldPerformAnotherQuery = false; // Check whether each key in the frontier exists in the cache or needs to be queried. BSONObjSet cached; auto query = constructQuery(&cached); std::unordered_set<Value, Value::Hash> queried; _frontier.swap(queried); _frontierUsageBytes = 0; // Process cached values, populating '_frontier' for the next iteration of search. while (!cached.empty()) { auto it = cached.begin(); shouldPerformAnotherQuery = addToVisitedAndFrontier(*it, depth) || shouldPerformAnotherQuery; cached.erase(it); checkMemoryUsage(); } if (query) { // Query for all keys that were in the frontier and not in the cache, populating // '_frontier' for the next iteration of search. unique_ptr<DBClientCursor> cursor = _mongod->directClient()->query(_from.ns(), *query); // Iterate the cursor. while (cursor->more()) { BSONObj result = cursor->nextSafe(); shouldPerformAnotherQuery = addToVisitedAndFrontier(result.getOwned(), depth) || shouldPerformAnotherQuery; addToCache(result, queried); } checkMemoryUsage(); } ++depth; } while (shouldPerformAnotherQuery && depth < std::numeric_limits<long long>::max() && (!_maxDepth || depth <= *_maxDepth)); _frontier.clear(); _frontierUsageBytes = 0; }
// PD_TRACE_DECLARE_FUNCTION ( SDB__IXMKEYGEN_GETKEYS, "_ixmKeyGenerator::getKeys" ) INT32 getKeys ( const BSONObj &obj, BSONObjSet &keys, BSONElement *pArrEle ) const { INT32 rc = SDB_OK ; PD_TRACE_ENTRY ( SDB__IXMKEYGEN_GETKEYS ); SDB_ASSERT( _keygen, "spec can't be NULL" ) ; SDB_ASSERT( !_keygen->_fieldNames.empty(), "can not be empty" ) ; vector<const CHAR*> fieldNames ( _keygen->_fieldNames ) ; BSONElement arrEle ; try { rc = _getKeys( fieldNames, obj, keys, &arrEle ) ; } catch ( std::exception &e ) { PD_LOG( PDERROR, "unexpected err:%s", e.what() ) ; rc = SDB_INVALIDARG ; goto error ; } if ( SDB_OK != rc ) { PD_LOG ( PDERROR, "Failed to generate key from object: %s", obj.toString().c_str() ) ; goto error ; } if ( keys.empty() ) { keys.insert ( _keygen->_undefinedKey ) ; } if ( NULL != pArrEle && !arrEle.eoo() ) { *pArrEle = arrEle ; } done : PD_TRACE_EXITRC ( SDB__IXMKEYGEN_GETKEYS, rc ); return rc ; error : goto done ; }
void ExpressionKeysPrivate::getS2Keys(const BSONObj& obj, const BSONObj& keyPattern, const S2IndexingParams& params, BSONObjSet* keys) { BSONObjSet keysToAdd; // Does one of our documents have a geo field? bool haveGeoField = false; // We output keys in the same order as the fields we index. BSONObjIterator i(keyPattern); while (i.more()) { BSONElement e = i.next(); // First, we get the keys that this field adds. Either they're added literally from // the value of the field, or they're transformed if the field is geo. BSONElementSet fieldElements; // false means Don't expand the last array, duh. obj.getFieldsDotted(e.fieldName(), fieldElements, false); BSONObjSet keysForThisField; if (IndexNames::GEO_2DSPHERE == e.valuestr()) { if (params.indexVersion >= S2_INDEX_VERSION_2) { // For >= V2, // geo: null, // geo: undefined // geo: [] // should all behave like there is no geo field. So we look for these cases and // throw out the field elements if we find them. if (1 == fieldElements.size()) { BSONElement elt = *fieldElements.begin(); // Get the :null and :undefined cases. if (elt.isNull() || Undefined == elt.type()) { fieldElements.clear(); } else if (elt.isABSONObj()) { // And this is the :[] case. BSONObj obj = elt.Obj(); if (0 == obj.nFields()) { fieldElements.clear(); } } } // >= V2 2dsphere indices require that at least one geo field to be present in a // document in order to index it. if (fieldElements.size() > 0) { haveGeoField = true; } } getS2GeoKeys(obj, fieldElements, params, &keysForThisField); } else { getS2LiteralKeys(fieldElements, params.collator, &keysForThisField); } // We expect there to be the missing field element present in the keys if data is // missing. So, this should be non-empty. verify(!keysForThisField.empty()); // We take the Cartesian product of all of the keys. This requires that we have // some keys to take the Cartesian product with. If keysToAdd.empty(), we // initialize it. if (keysToAdd.empty()) { keysToAdd = keysForThisField; continue; } BSONObjSet updatedKeysToAdd; for (BSONObjSet::const_iterator it = keysToAdd.begin(); it != keysToAdd.end(); ++it) { for (BSONObjSet::const_iterator newIt = keysForThisField.begin(); newIt != keysForThisField.end(); ++newIt) { BSONObjBuilder b; b.appendElements(*it); b.append(newIt->firstElement()); updatedKeysToAdd.insert(b.obj()); } } keysToAdd = updatedKeysToAdd; } // Make sure that if we're >= V2 there's at least one geo field present in the doc. if (params.indexVersion >= S2_INDEX_VERSION_2) { if (!haveGeoField) { return; } } if (keysToAdd.size() > params.maxKeysPerInsert) { warning() << "Insert of geo object generated a high number of keys." << " num keys: " << keysToAdd.size() << " obj inserted: " << obj; } *keys = keysToAdd; }
void ExpressionKeysPrivate::getS2Keys(const BSONObj& obj, const BSONObj& keyPattern, const S2IndexingParams& params, BSONObjSet* keys, MultikeyPaths* multikeyPaths) { BSONObjSet keysToAdd = SimpleBSONObjComparator::kInstance.makeBSONObjSet(); // Does one of our documents have a geo field? bool haveGeoField = false; if (multikeyPaths) { invariant(multikeyPaths->empty()); multikeyPaths->resize(keyPattern.nFields()); } size_t posInIdx = 0; // We output keys in the same order as the fields we index. for (const auto keyElem : keyPattern) { // First, we get the keys that this field adds. Either they're added literally from // the value of the field, or they're transformed if the field is geo. BSONElementSet fieldElements; const bool expandArrayOnTrailingField = false; std::set<size_t>* arrayComponents = multikeyPaths ? &(*multikeyPaths)[posInIdx] : nullptr; dps::extractAllElementsAlongPath( obj, keyElem.fieldName(), fieldElements, expandArrayOnTrailingField, arrayComponents); // Trailing array values aren't being expanded, so we still need to determine whether the // last component of the indexed path 'keyElem.fieldName()' causes the index to be multikey. // We say that it does if // (a) the last component of the indexed path ever refers to an array value (regardless of // the number of array elements) // (b) the last component of the indexed path ever refers to GeoJSON data that requires // multiple cells for its covering. bool lastPathComponentCausesIndexToBeMultikey; BSONObjSet keysForThisField = SimpleBSONObjComparator::kInstance.makeBSONObjSet(); if (IndexNames::GEO_2DSPHERE == keyElem.valuestr()) { if (params.indexVersion >= S2_INDEX_VERSION_2) { // For >= V2, // geo: null, // geo: undefined // geo: [] // should all behave like there is no geo field. So we look for these cases and // throw out the field elements if we find them. if (1 == fieldElements.size()) { BSONElement elt = *fieldElements.begin(); // Get the :null and :undefined cases. if (elt.isNull() || Undefined == elt.type()) { fieldElements.clear(); } else if (elt.isABSONObj()) { // And this is the :[] case. BSONObj obj = elt.Obj(); if (0 == obj.nFields()) { fieldElements.clear(); } } } // >= V2 2dsphere indices require that at least one geo field to be present in a // document in order to index it. if (fieldElements.size() > 0) { haveGeoField = true; } } lastPathComponentCausesIndexToBeMultikey = getS2GeoKeys(obj, fieldElements, params, &keysForThisField); } else { lastPathComponentCausesIndexToBeMultikey = getS2LiteralKeys(fieldElements, params.collator, &keysForThisField); } // We expect there to be the missing field element present in the keys if data is // missing. So, this should be non-empty. verify(!keysForThisField.empty()); if (multikeyPaths && lastPathComponentCausesIndexToBeMultikey) { const size_t pathLengthOfThisField = FieldRef{keyElem.fieldNameStringData()}.numParts(); invariant(pathLengthOfThisField > 0); (*multikeyPaths)[posInIdx].insert(pathLengthOfThisField - 1); } // We take the Cartesian product of all of the keys. This requires that we have // some keys to take the Cartesian product with. If keysToAdd.empty(), we // initialize it. if (keysToAdd.empty()) { keysToAdd = keysForThisField; ++posInIdx; continue; } BSONObjSet updatedKeysToAdd = SimpleBSONObjComparator::kInstance.makeBSONObjSet(); for (BSONObjSet::const_iterator it = keysToAdd.begin(); it != keysToAdd.end(); ++it) { for (BSONObjSet::const_iterator newIt = keysForThisField.begin(); newIt != keysForThisField.end(); ++newIt) { BSONObjBuilder b; b.appendElements(*it); b.append(newIt->firstElement()); updatedKeysToAdd.insert(b.obj()); } } keysToAdd = updatedKeysToAdd; ++posInIdx; } // Make sure that if we're >= V2 there's at least one geo field present in the doc. if (params.indexVersion >= S2_INDEX_VERSION_2) { if (!haveGeoField) { return; } } if (keysToAdd.size() > params.maxKeysPerInsert) { warning() << "Insert of geo object generated a high number of keys." << " num keys: " << keysToAdd.size() << " obj inserted: " << redact(obj); } *keys = keysToAdd; }