Status IndexAccessMethod::BulkBuilder::insert(OperationContext* txn, const BSONObj& obj, const RecordId& loc, const InsertDeleteOptions& options, int64_t* numInserted) { BSONObjSet keys = SimpleBSONObjComparator::kInstance.makeBSONObjSet(); MultikeyPaths multikeyPaths; _real->getKeys(obj, &keys, &multikeyPaths); _everGeneratedMultipleKeys = _everGeneratedMultipleKeys || (keys.size() > 1); if (!multikeyPaths.empty()) { if (_indexMultikeyPaths.empty()) { _indexMultikeyPaths = multikeyPaths; } else { invariant(_indexMultikeyPaths.size() == multikeyPaths.size()); for (size_t i = 0; i < multikeyPaths.size(); ++i) { _indexMultikeyPaths[i].insert(multikeyPaths[i].begin(), multikeyPaths[i].end()); } } } for (BSONObjSet::iterator it = keys.begin(); it != keys.end(); ++it) { _sorter->add(*it, loc); _keysInserted++; } if (NULL != numInserted) { *numInserted += keys.size(); } return Status::OK(); }
void getKeys(const BSONObj& obj, BSONObjSet& keys) const { verify(_fields.size() >= 1); BSONObjSet keysToAdd; // We output keys in the same order as the fields we index. for (size_t i = 0; i < _fields.size(); ++i) { const IndexedField &field = _fields[i]; // First, we get the keys that this field adds. Either they're added literally from // the value of the field, or they're transformed if the field is geo. BSONElementSet fieldElements; // false means Don't expand the last array, duh. obj.getFieldsDotted(field.name, fieldElements, false); BSONObjSet keysForThisField; if (IndexedField::GEO == field.type) { getGeoKeys(fieldElements, &keysForThisField); } else if (IndexedField::LITERAL == field.type) { getLiteralKeys(fieldElements, &keysForThisField); } else { verify(0); } // We expect there to be _spec->_missingField() present in the keys if data is // missing. So, this should be non-empty. verify(!keysForThisField.empty()); // We take the Cartesian product of all of the keys. This requires that we have // some keys to take the Cartesian product with. If keysToAdd.empty(), we // initialize it. if (keysToAdd.empty()) { keysToAdd = keysForThisField; continue; } BSONObjSet updatedKeysToAdd; for (BSONObjSet::const_iterator it = keysToAdd.begin(); it != keysToAdd.end(); ++it) { for (BSONObjSet::const_iterator newIt = keysForThisField.begin(); newIt!= keysForThisField.end(); ++newIt) { BSONObjBuilder b; b.appendElements(*it); b.append(newIt->firstElement()); updatedKeysToAdd.insert(b.obj()); } } keysToAdd = updatedKeysToAdd; } if (keysToAdd.size() > _params.maxKeysPerInsert) { warning() << "insert of geo object generated lots of keys (" << keysToAdd.size() << ") consider creating larger buckets. obj=" << obj; } for (BSONObjSet::const_iterator it = keysToAdd.begin(); it != keysToAdd.end(); ++it) { keys.insert(*it); } }
void S2AccessMethod::getKeys(const BSONObj& obj, BSONObjSet* keys) { BSONObjSet keysToAdd; // We output keys in the same order as the fields we index. BSONObjIterator i(_descriptor->keyPattern()); while (i.more()) { BSONElement e = i.next(); // First, we get the keys that this field adds. Either they're added literally from // the value of the field, or they're transformed if the field is geo. BSONElementSet fieldElements; // false means Don't expand the last array, duh. obj.getFieldsDotted(e.fieldName(), fieldElements, false); BSONObjSet keysForThisField; if (IndexNames::GEO_2DSPHERE == e.valuestr()) { // We can't ever return documents that don't have geometry so don't bother indexing // them. if (fieldElements.empty()) { return; } getGeoKeys(obj, fieldElements, &keysForThisField); } else { getLiteralKeys(fieldElements, &keysForThisField); } // We expect there to be the missing field element present in the keys if data is // missing. So, this should be non-empty. verify(!keysForThisField.empty()); // We take the Cartesian product of all of the keys. This requires that we have // some keys to take the Cartesian product with. If keysToAdd.empty(), we // initialize it. if (keysToAdd.empty()) { keysToAdd = keysForThisField; continue; } BSONObjSet updatedKeysToAdd; for (BSONObjSet::const_iterator it = keysToAdd.begin(); it != keysToAdd.end(); ++it) { for (BSONObjSet::const_iterator newIt = keysForThisField.begin(); newIt!= keysForThisField.end(); ++newIt) { BSONObjBuilder b; b.appendElements(*it); b.append(newIt->firstElement()); updatedKeysToAdd.insert(b.obj()); } } keysToAdd = updatedKeysToAdd; } if (keysToAdd.size() > _params.maxKeysPerInsert) { warning() << "insert of geo object generated lots of keys (" << keysToAdd.size() << ") consider creating larger buckets. obj=" << obj; } *keys = keysToAdd; }
Status AbstractIndexAccessMethod::insertKeys(OperationContext* opCtx, const BSONObjSet& keys, const BSONObjSet& multikeyMetadataKeys, const MultikeyPaths& multikeyPaths, const RecordId& loc, const InsertDeleteOptions& options, InsertResult* result) { bool checkIndexKeySize = shouldCheckIndexKeySize(opCtx); // Add all new data keys, and all new multikey metadata keys, into the index. When iterating // over the data keys, each of them should point to the doc's RecordId. When iterating over // the multikey metadata keys, they should point to the reserved 'kMultikeyMetadataKeyId'. for (const auto keySet : {&keys, &multikeyMetadataKeys}) { const auto& recordId = (keySet == &keys ? loc : kMultikeyMetadataKeyId); for (const auto& key : *keySet) { Status status = checkIndexKeySize ? checkKeySize(key) : Status::OK(); if (status.isOK()) { bool unique = _descriptor->unique(); StatusWith<SpecialFormatInserted> ret = _newInterface->insert(opCtx, key, recordId, !unique /* dupsAllowed */); status = ret.getStatus(); // When duplicates are encountered and allowed, retry with dupsAllowed. Add the // key to the output vector so callers know which duplicate keys were inserted. if (ErrorCodes::DuplicateKey == status.code() && options.dupsAllowed) { invariant(unique); ret = _newInterface->insert(opCtx, key, recordId, true /* dupsAllowed */); status = ret.getStatus(); // This is speculative in that the 'dupsInserted' vector is not used by any code // today. It is currently in place to test detecting duplicate key errors during // hybrid index builds. Duplicate detection in the future will likely not take // place in this insert() method. if (status.isOK() && result) { result->dupsInserted.push_back(key); } } if (status.isOK() && ret.getValue() == SpecialFormatInserted::LongTypeBitsInserted) _btreeState->setIndexKeyStringWithLongTypeBitsExistsOnDisk(opCtx); } if (isFatalError(opCtx, status, key)) { return status; } } } if (result) { result->numInserted += keys.size() + multikeyMetadataKeys.size(); } if (shouldMarkIndexAsMultikey(keys, multikeyMetadataKeys, multikeyPaths)) { _btreeState->setMultikey(opCtx, multikeyPaths); } return Status::OK(); }
void NamespaceDetails::ColdIndexer::build() { Lock::assertWriteLocked(_d->_ns); if (_isSecondaryIndex) { IndexDetails::Builder builder(*_idx); const int indexNum = _d->idxNo(*_idx); for (shared_ptr<Cursor> cursor(BasicCursor::make(_d)); cursor->ok(); cursor->advance()) { BSONObj pk = cursor->currPK(); BSONObj obj = cursor->current(); BSONObjSet keys; _idx->getKeysFromObject(obj, keys); if (keys.size() > 1) { _d->setIndexIsMultikey(indexNum); } for (BSONObjSet::const_iterator ki = keys.begin(); ki != keys.end(); ++ki) { builder.insertPair(*ki, &pk, obj); } killCurrentOp.checkForInterrupt(); // uasserts if we should stop } builder.done(); // If the index is unique, check all adjacent keys for a duplicate. if (_idx->unique()) { _d->checkIndexUniqueness(*_idx); } } }
RecordId IndexAccessMethod::findSingle(OperationContext* opCtx, const BSONObj& requestedKey) const { // Generate the key for this index. BSONObj actualKey; if (_btreeState->getCollator()) { // For performance, call get keys only if there is a non-simple collation. BSONObjSet keys = SimpleBSONObjComparator::kInstance.makeBSONObjSet(); MultikeyPaths* multikeyPaths = nullptr; getKeys(requestedKey, GetKeysMode::kEnforceConstraints, &keys, multikeyPaths); invariant(keys.size() == 1); actualKey = *keys.begin(); } else { actualKey = requestedKey; } std::unique_ptr<SortedDataInterface::Cursor> cursor(_newInterface->newCursor(opCtx)); const auto requestedInfo = kDebugBuild ? SortedDataInterface::Cursor::kKeyAndLoc : SortedDataInterface::Cursor::kWantLoc; if (auto kv = cursor->seekExact(actualKey, requestedInfo)) { // StorageEngine should guarantee these. dassert(!kv->loc.isNull()); dassert(kv->key.woCompare(actualKey, /*order*/ BSONObj(), /*considerFieldNames*/ false) == 0); return kv->loc; } return RecordId(); }
/** add index keys for a newly inserted record done in two steps/phases to allow potential deferal of write lock portion in the future */ void indexRecordUsingTwoSteps(const char *ns, NamespaceDetails *d, BSONObj obj, DiskLoc loc, bool shouldBeUnlocked) { vector<int> multi; vector<BSONObjSet> multiKeys; IndexInterface::IndexInserter inserter; // Step 1, read phase. int n = d->nIndexesBeingBuilt(); { BSONObjSet keys; for ( int i = 0; i < n; i++ ) { // this call throws on unique constraint violation. we haven't done any writes yet so that is fine. fetchIndexInserters(/*out*/keys, inserter, d, i, obj, loc); if( keys.size() > 1 ) { multi.push_back(i); multiKeys.push_back(BSONObjSet()); multiKeys[multiKeys.size()-1].swap(keys); } keys.clear(); } } inserter.finishAllInsertions(); // Step 2, write phase. // now finish adding multikeys for( unsigned j = 0; j < multi.size(); j++ ) { unsigned i = multi[j]; BSONObjSet& keys = multiKeys[j]; IndexDetails& idx = d->idx(i); IndexInterface& ii = idx.idxInterface(); Ordering ordering = Ordering::make(idx.keyPattern()); d->setIndexIsMultikey(ns, i); for( BSONObjSet::iterator k = ++keys.begin()/*skip 1*/; k != keys.end(); k++ ) { try { ii.bt_insert(idx.head, loc, *k, ordering, !idx.unique(), idx); } catch (AssertionException& e) { if( e.getCode() == 10287 && (int) i == d->nIndexes ) { DEV log() << "info: caught key already in index on bg indexing (ok)" << endl; } else { /* roll back previously added index entries note must do self index as it is multikey and could require some cleanup itself */ for( int j = 0; j < n; j++ ) { try { _unindexRecord(d->idx(j), obj, loc, false); } catch(...) { log(3) << "unindex fails on rollback after unique key constraint prevented insert\n"; } } throw; } } } } }
virtual Status insert(const BSONObj& obj, const DiskLoc& loc, const InsertDeleteOptions& options, int64_t* numInserted) { BSONObjSet keys; _real->getKeys(obj, &keys); _phase1.addKeys(keys, loc, false); if ( numInserted ) *numInserted += keys.size(); return Status::OK(); }
Status IndexAccessMethod::BulkBuilder::insert(OperationContext* txn, const BSONObj& obj, const RecordId& loc, const InsertDeleteOptions& options, int64_t* numInserted) { BSONObjSet keys; _real->getKeys(obj, &keys); _isMultiKey = _isMultiKey || (keys.size() > 1); for (BSONObjSet::iterator it = keys.begin(); it != keys.end(); ++it) { _sorter->add(*it, loc); _keysInserted++; } if (NULL != numInserted) { *numInserted += keys.size(); } return Status::OK(); }
TEST( FTSIndexFormat, Simple1 ) { FTSSpec spec( FTSSpec::fixSpec( BSON( "key" << BSON( "data" << "text" ) ) ) ); BSONObjSet keys; FTSIndexFormat::getKeys( spec, BSON( "data" << "cat sat" ), &keys ); ASSERT_EQUALS( 2U, keys.size() ); for ( BSONObjSet::const_iterator i = keys.begin(); i != keys.end(); ++i ) { BSONObj key = *i; ASSERT_EQUALS( 2, key.nFields() ); ASSERT_EQUALS( String, key.firstElement().type() ); } }
Status AbstractIndexAccessMethod::removeKeys(OperationContext* opCtx, const BSONObjSet& keys, const RecordId& loc, const InsertDeleteOptions& options, int64_t* numDeleted) { for (const auto& key : keys) { removeOneKey(opCtx, key, loc, options.dupsAllowed); } *numDeleted = keys.size(); return Status::OK(); }
TEST( FTSIndexFormat, ExtraFront1 ) { FTSSpec spec( FTSSpec::fixSpec( BSON( "key" << BSON( "x" << 1 << "data" << "text" ) ) ) ); BSONObjSet keys; FTSIndexFormat::getKeys( spec, BSON( "data" << "cat" << "x" << 5 ), &keys ); ASSERT_EQUALS( 1U, keys.size() ); BSONObj key = *(keys.begin()); ASSERT_EQUALS( 3, key.nFields() ); BSONObjIterator i( key ); ASSERT_EQUALS( 5, i.next().numberInt() ); ASSERT_EQUALS( StringData("cat"), i.next().valuestr() ); ASSERT( i.next().numberDouble() > 0 ); }
Status BtreeBasedBulkAccessMethod::insert(TransactionExperiment* txn, const BSONObj& obj, const DiskLoc& loc, const InsertDeleteOptions& options, int64_t* numInserted) { BSONObjSet keys; _real->getKeys(obj, &keys); _isMultiKey = _isMultiKey || (keys.size() > 1); for (BSONObjSet::iterator it = keys.begin(); it != keys.end(); ++it) { // False is for mayInterrupt. _sorter->add(*it, loc, false); _keysInserted++; } _docsInserted++; if (NULL != numInserted) { *numInserted += keys.size(); } return Status::OK(); }
/** * Helper function to compare keys returned in getKeys() result * with expected values. */ void assertEqualsIndexKeys(std::set<std::string>& expectedKeys, const BSONObjSet& keys) { ASSERT_EQUALS(expectedKeys.size(), keys.size()); for (BSONObjSet::const_iterator i = keys.begin(); i != keys.end(); ++i) { BSONObj key = *i; ASSERT_EQUALS(2, key.nFields()); ASSERT_EQUALS(String, key.firstElement().type()); string s = key.firstElement().String(); std::set<string>::const_iterator j = expectedKeys.find(s); if (j == expectedKeys.end()) { mongoutils::str::stream ss; ss << "unexpected key " << s << " in FTSIndexFormat::getKeys result. " << "expected keys:"; for (std::set<string>::const_iterator k = expectedKeys.begin(); k != expectedKeys.end(); ++k) { ss << "\n " << *k; } FAIL(ss); } } }
RecordId IndexAccessMethod::findSingle(OperationContext* txn, const BSONObj& key) const { // Generate the key for this index. BSONObjSet keys = SimpleBSONObjComparator::kInstance.makeBSONObjSet(); MultikeyPaths* multikeyPaths = nullptr; getKeys(key, &keys, multikeyPaths); invariant(keys.size() == 1); std::unique_ptr<SortedDataInterface::Cursor> cursor(_newInterface->newCursor(txn)); const auto requestedInfo = kDebugBuild ? SortedDataInterface::Cursor::kKeyAndLoc : SortedDataInterface::Cursor::kWantLoc; if (auto kv = cursor->seekExact(*keys.begin(), requestedInfo)) { // StorageEngine should guarantee these. dassert(!kv->loc.isNull()); dassert(kv->key.woCompare( *keys.begin(), /*order*/ BSONObj(), /*considerFieldNames*/ false) == 0); return kv->loc; } return RecordId(); }
void ExpressionKeysPrivate::getS2Keys(const BSONObj& obj, const BSONObj& keyPattern, const S2IndexingParams& params, BSONObjSet* keys) { BSONObjSet keysToAdd; // Does one of our documents have a geo field? bool haveGeoField = false; // We output keys in the same order as the fields we index. BSONObjIterator i(keyPattern); while (i.more()) { BSONElement e = i.next(); // First, we get the keys that this field adds. Either they're added literally from // the value of the field, or they're transformed if the field is geo. BSONElementSet fieldElements; // false means Don't expand the last array, duh. obj.getFieldsDotted(e.fieldName(), fieldElements, false); BSONObjSet keysForThisField; if (IndexNames::GEO_2DSPHERE == e.valuestr()) { if (params.indexVersion >= S2_INDEX_VERSION_2) { // For >= V2, // geo: null, // geo: undefined // geo: [] // should all behave like there is no geo field. So we look for these cases and // throw out the field elements if we find them. if (1 == fieldElements.size()) { BSONElement elt = *fieldElements.begin(); // Get the :null and :undefined cases. if (elt.isNull() || Undefined == elt.type()) { fieldElements.clear(); } else if (elt.isABSONObj()) { // And this is the :[] case. BSONObj obj = elt.Obj(); if (0 == obj.nFields()) { fieldElements.clear(); } } } // >= V2 2dsphere indices require that at least one geo field to be present in a // document in order to index it. if (fieldElements.size() > 0) { haveGeoField = true; } } getS2GeoKeys(obj, fieldElements, params, &keysForThisField); } else { getS2LiteralKeys(fieldElements, params.collator, &keysForThisField); } // We expect there to be the missing field element present in the keys if data is // missing. So, this should be non-empty. verify(!keysForThisField.empty()); // We take the Cartesian product of all of the keys. This requires that we have // some keys to take the Cartesian product with. If keysToAdd.empty(), we // initialize it. if (keysToAdd.empty()) { keysToAdd = keysForThisField; continue; } BSONObjSet updatedKeysToAdd; for (BSONObjSet::const_iterator it = keysToAdd.begin(); it != keysToAdd.end(); ++it) { for (BSONObjSet::const_iterator newIt = keysForThisField.begin(); newIt != keysForThisField.end(); ++newIt) { BSONObjBuilder b; b.appendElements(*it); b.append(newIt->firstElement()); updatedKeysToAdd.insert(b.obj()); } } keysToAdd = updatedKeysToAdd; } // Make sure that if we're >= V2 there's at least one geo field present in the doc. if (params.indexVersion >= S2_INDEX_VERSION_2) { if (!haveGeoField) { return; } } if (keysToAdd.size() > params.maxKeysPerInsert) { warning() << "Insert of geo object generated a high number of keys." << " num keys: " << keysToAdd.size() << " obj inserted: " << obj; } *keys = keysToAdd; }
bool AbstractIndexAccessMethod::shouldMarkIndexAsMultikey( const BSONObjSet& keys, const BSONObjSet& multikeyMetadataKeys, const MultikeyPaths& multikeyPaths) const { return (keys.size() > 1 || isMultikeyFromPaths(multikeyPaths)); }
void ExpressionKeysPrivate::getS2Keys(const BSONObj& obj, const BSONObj& keyPattern, const S2IndexingParams& params, BSONObjSet* keys, MultikeyPaths* multikeyPaths) { BSONObjSet keysToAdd = SimpleBSONObjComparator::kInstance.makeBSONObjSet(); // Does one of our documents have a geo field? bool haveGeoField = false; if (multikeyPaths) { invariant(multikeyPaths->empty()); multikeyPaths->resize(keyPattern.nFields()); } size_t posInIdx = 0; // We output keys in the same order as the fields we index. for (const auto keyElem : keyPattern) { // First, we get the keys that this field adds. Either they're added literally from // the value of the field, or they're transformed if the field is geo. BSONElementSet fieldElements; const bool expandArrayOnTrailingField = false; std::set<size_t>* arrayComponents = multikeyPaths ? &(*multikeyPaths)[posInIdx] : nullptr; dps::extractAllElementsAlongPath( obj, keyElem.fieldName(), fieldElements, expandArrayOnTrailingField, arrayComponents); // Trailing array values aren't being expanded, so we still need to determine whether the // last component of the indexed path 'keyElem.fieldName()' causes the index to be multikey. // We say that it does if // (a) the last component of the indexed path ever refers to an array value (regardless of // the number of array elements) // (b) the last component of the indexed path ever refers to GeoJSON data that requires // multiple cells for its covering. bool lastPathComponentCausesIndexToBeMultikey; BSONObjSet keysForThisField = SimpleBSONObjComparator::kInstance.makeBSONObjSet(); if (IndexNames::GEO_2DSPHERE == keyElem.valuestr()) { if (params.indexVersion >= S2_INDEX_VERSION_2) { // For >= V2, // geo: null, // geo: undefined // geo: [] // should all behave like there is no geo field. So we look for these cases and // throw out the field elements if we find them. if (1 == fieldElements.size()) { BSONElement elt = *fieldElements.begin(); // Get the :null and :undefined cases. if (elt.isNull() || Undefined == elt.type()) { fieldElements.clear(); } else if (elt.isABSONObj()) { // And this is the :[] case. BSONObj obj = elt.Obj(); if (0 == obj.nFields()) { fieldElements.clear(); } } } // >= V2 2dsphere indices require that at least one geo field to be present in a // document in order to index it. if (fieldElements.size() > 0) { haveGeoField = true; } } lastPathComponentCausesIndexToBeMultikey = getS2GeoKeys(obj, fieldElements, params, &keysForThisField); } else { lastPathComponentCausesIndexToBeMultikey = getS2LiteralKeys(fieldElements, params.collator, &keysForThisField); } // We expect there to be the missing field element present in the keys if data is // missing. So, this should be non-empty. verify(!keysForThisField.empty()); if (multikeyPaths && lastPathComponentCausesIndexToBeMultikey) { const size_t pathLengthOfThisField = FieldRef{keyElem.fieldNameStringData()}.numParts(); invariant(pathLengthOfThisField > 0); (*multikeyPaths)[posInIdx].insert(pathLengthOfThisField - 1); } // We take the Cartesian product of all of the keys. This requires that we have // some keys to take the Cartesian product with. If keysToAdd.empty(), we // initialize it. if (keysToAdd.empty()) { keysToAdd = keysForThisField; ++posInIdx; continue; } BSONObjSet updatedKeysToAdd = SimpleBSONObjComparator::kInstance.makeBSONObjSet(); for (BSONObjSet::const_iterator it = keysToAdd.begin(); it != keysToAdd.end(); ++it) { for (BSONObjSet::const_iterator newIt = keysForThisField.begin(); newIt != keysForThisField.end(); ++newIt) { BSONObjBuilder b; b.appendElements(*it); b.append(newIt->firstElement()); updatedKeysToAdd.insert(b.obj()); } } keysToAdd = updatedKeysToAdd; ++posInIdx; } // Make sure that if we're >= V2 there's at least one geo field present in the doc. if (params.indexVersion >= S2_INDEX_VERSION_2) { if (!haveGeoField) { return; } } if (keysToAdd.size() > params.maxKeysPerInsert) { warning() << "Insert of geo object generated a high number of keys." << " num keys: " << keysToAdd.size() << " obj inserted: " << redact(obj); } *keys = keysToAdd; }