IndexAccessMethod* KVDatabaseCatalogEntry::getIndex(OperationContext* opCtx, const CollectionCatalogEntry* collection, IndexCatalogEntry* index) { IndexDescriptor* desc = index->descriptor(); const std::string& type = desc->getAccessMethodName(); std::string ident = _engine->getCatalog()->getIndexIdent(opCtx, collection->ns().ns(), desc->indexName()); SortedDataInterface* sdi = _engine->getEngine()->getGroupedSortedDataInterface(opCtx, ident, desc, index->getPrefix()); if ("" == type) return new BtreeAccessMethod(index, sdi); if (IndexNames::HASHED == type) return new HashAccessMethod(index, sdi); if (IndexNames::GEO_2DSPHERE == type) return new S2AccessMethod(index, sdi); if (IndexNames::TEXT == type) return new FTSAccessMethod(index, sdi); if (IndexNames::GEO_HAYSTACK == type) return new HaystackAccessMethod(index, sdi); if (IndexNames::GEO_2D == type) return new TwoDAccessMethod(index, sdi); log() << "Can't find index for keyPattern " << desc->keyPattern(); invariant(false); }
void CollectionInfoCache::computeIndexKeys() { DEV Lock::assertWriteLocked( _collection->ns().ns() ); _indexedPaths.clear(); IndexCatalog::IndexIterator i = _collection->getIndexCatalog()->getIndexIterator(true); while (i.more()) { IndexDescriptor* descriptor = i.next(); if (descriptor->getAccessMethodName() != IndexNames::TEXT) { BSONObj key = descriptor->keyPattern(); BSONObjIterator j(key); while (j.more()) { BSONElement e = j.next(); _indexedPaths.addPath(e.fieldName()); } } else { fts::FTSSpec ftsSpec(descriptor->infoObj()); if (ftsSpec.wildcard()) { _indexedPaths.allPathsIndexed(); } else { for (size_t i = 0; i < ftsSpec.numExtraBefore(); ++i) { _indexedPaths.addPath(ftsSpec.extraBefore(i)); } for (fts::Weights::const_iterator it = ftsSpec.weights().begin(); it != ftsSpec.weights().end(); ++it) { _indexedPaths.addPath(it->first); } for (size_t i = 0; i < ftsSpec.numExtraAfter(); ++i) { _indexedPaths.addPath(ftsSpec.extraAfter(i)); } // Any update to a path containing "language" as a component could change the // language of a subdocument. Add the override field as a path component. _indexedPaths.addPathComponent(ftsSpec.languageOverrideField()); } } } _keysComputed = true; }
Status MigrationChunkClonerSourceLegacy::_storeCurrentLocs(OperationContext* txn) { ScopedTransaction scopedXact(txn, MODE_IS); AutoGetCollection autoColl(txn, _args.getNss(), MODE_IS); Collection* const collection = autoColl.getCollection(); if (!collection) { return {ErrorCodes::NamespaceNotFound, str::stream() << "Collection " << _args.getNss().ns() << " does not exist."}; } // Allow multiKey based on the invariant that shard keys must be single-valued. Therefore, any // multi-key index prefixed by shard key cannot be multikey over the shard key fields. IndexDescriptor* idx = collection->getIndexCatalog()->findShardKeyPrefixedIndex(txn, _shardKeyPattern.toBSON(), false); // requireSingleKey if (!idx) { return {ErrorCodes::IndexNotFound, str::stream() << "can't find index with prefix " << _shardKeyPattern.toBSON() << " in storeCurrentLocs for " << _args.getNss().ns()}; } // Install the stage, which will listen for notifications on the collection { stdx::lock_guard<stdx::mutex> sl(_mutex); invariant(!_deleteNotifyExec); // Takes ownership of 'ws' and 'dns'. auto statusWithPlanExecutor = PlanExecutor::make(txn, stdx::make_unique<WorkingSet>(), stdx::make_unique<DeleteNotificationStage>(this, txn), collection, PlanExecutor::YIELD_MANUAL); invariant(statusWithPlanExecutor.isOK()); _deleteNotifyExec = std::move(statusWithPlanExecutor.getValue()); _deleteNotifyExec->registerExec(collection); } // Assume both min and max non-empty, append MinKey's to make them fit chosen index const KeyPattern kp(idx->keyPattern()); BSONObj min = Helpers::toKeyFormat(kp.extendRangeBound(_args.getMinKey(), false)); BSONObj max = Helpers::toKeyFormat(kp.extendRangeBound(_args.getMaxKey(), false)); std::unique_ptr<PlanExecutor> exec(InternalPlanner::indexScan(txn, collection, idx, min, max, false, // endKeyInclusive PlanExecutor::YIELD_MANUAL)); // We can afford to yield here because any change to the base data that we might miss is already // being queued and will migrate in the 'transferMods' stage. exec->setYieldPolicy(PlanExecutor::YIELD_AUTO, collection); // Use the average object size to estimate how many objects a full chunk would carry do that // while traversing the chunk's range using the sharding index, below there's a fair amount of // slack before we determine a chunk is too large because object sizes will vary. unsigned long long maxRecsWhenFull; long long avgRecSize; const long long totalRecs = collection->numRecords(txn); if (totalRecs > 0) { avgRecSize = collection->dataSize(txn) / totalRecs; maxRecsWhenFull = _args.getMaxChunkSizeBytes() / avgRecSize; maxRecsWhenFull = std::min((unsigned long long)(Chunk::MaxObjectPerChunk + 1), 130 * maxRecsWhenFull / 100 /* slack */); } else { avgRecSize = 0; maxRecsWhenFull = Chunk::MaxObjectPerChunk + 1; } // Do a full traversal of the chunk and don't stop even if we think it is a large chunk we want // the number of records to better report, in that case. bool isLargeChunk = false; unsigned long long recCount = 0; BSONObj obj; RecordId recordId; PlanExecutor::ExecState state; while (PlanExecutor::ADVANCED == (state = exec->getNext(&obj, &recordId))) { if (!isLargeChunk) { stdx::lock_guard<stdx::mutex> lk(_mutex); _cloneLocs.insert(recordId); } if (++recCount > maxRecsWhenFull) { isLargeChunk = true; // Continue on despite knowing that it will fail, just to get the correct value for // recCount } } if (PlanExecutor::DEAD == state || PlanExecutor::FAILURE == state) { return {ErrorCodes::InternalError, str::stream() << "Executor error while scanning for documents belonging to chunk: " << WorkingSetCommon::toStatusString(obj)}; } exec.reset(); if (isLargeChunk) { return { ErrorCodes::ChunkTooBig, str::stream() << "Cannot move chunk: the maximum number of documents for a chunk is " << maxRecsWhenFull << ", the maximum chunk size is " << _args.getMaxChunkSizeBytes() << ", average document size is " << avgRecSize << ". Found " << recCount << " documents in chunk " << " ns: " << _args.getNss().ns() << " " << _args.getMinKey() << " -> " << _args.getMaxKey()}; } _averageObjectSizeForCloneLocs = static_cast<uint64_t>(collection->averageObjectSize(txn) + 12); return Status::OK(); }
StatusWith<RecordId> Collection::updateDocument(OperationContext* txn, const RecordId& oldLocation, const Snapshotted<BSONObj>& oldDoc, const BSONObj& newDoc, bool enforceQuota, bool indexesAffected, OpDebug* debug, oplogUpdateEntryArgs& args) { { auto status = checkValidation(txn, newDoc); if (!status.isOK()) { if (_validationLevel == STRICT_V) { return status; } // moderate means we have to check the old doc auto oldDocStatus = checkValidation(txn, oldDoc.value()); if (oldDocStatus.isOK()) { // transitioning from good -> bad is not ok return status; } // bad -> bad is ok in moderate mode } } dassert(txn->lockState()->isCollectionLockedForMode(ns().toString(), MODE_IX)); invariant(oldDoc.snapshotId() == txn->recoveryUnit()->getSnapshotId()); if (_needCappedLock) { // X-lock the metadata resource for this capped collection until the end of the WUOW. This // prevents the primary from executing with more concurrency than secondaries. // See SERVER-21646. Lock::ResourceLock{txn->lockState(), ResourceId(RESOURCE_METADATA, _ns.ns()), MODE_X}; } SnapshotId sid = txn->recoveryUnit()->getSnapshotId(); BSONElement oldId = oldDoc.value()["_id"]; if (!oldId.eoo() && (oldId != newDoc["_id"])) return StatusWith<RecordId>( ErrorCodes::InternalError, "in Collection::updateDocument _id mismatch", 13596); // The MMAPv1 storage engine implements capped collections in a way that does not allow records // to grow beyond their original size. If MMAPv1 part of a replicaset with storage engines that // do not have this limitation, replication could result in errors, so it is necessary to set a // uniform rule here. Similarly, it is not sufficient to disallow growing records, because this // happens when secondaries roll back an update shrunk a record. Exactly replicating legacy // MMAPv1 behavior would require padding shrunk documents on all storage engines. Instead forbid // all size changes. const auto oldSize = oldDoc.value().objsize(); if (_recordStore->isCapped() && oldSize != newDoc.objsize()) return {ErrorCodes::CannotGrowDocumentInCappedNamespace, str::stream() << "Cannot change the size of a document in a capped collection: " << oldSize << " != " << newDoc.objsize()}; // At the end of this step, we will have a map of UpdateTickets, one per index, which // represent the index updates needed to be done, based on the changes between oldDoc and // newDoc. OwnedPointerMap<IndexDescriptor*, UpdateTicket> updateTickets; if (indexesAffected) { IndexCatalog::IndexIterator ii = _indexCatalog.getIndexIterator(txn, true); while (ii.more()) { IndexDescriptor* descriptor = ii.next(); IndexCatalogEntry* entry = ii.catalogEntry(descriptor); IndexAccessMethod* iam = ii.accessMethod(descriptor); InsertDeleteOptions options; options.logIfError = false; options.dupsAllowed = !(KeyPattern::isIdKeyPattern(descriptor->keyPattern()) || descriptor->unique()) || repl::getGlobalReplicationCoordinator()->shouldIgnoreUniqueIndex(descriptor); UpdateTicket* updateTicket = new UpdateTicket(); updateTickets.mutableMap()[descriptor] = updateTicket; Status ret = iam->validateUpdate(txn, oldDoc.value(), newDoc, oldLocation, options, updateTicket, entry->getFilterExpression()); if (!ret.isOK()) { return StatusWith<RecordId>(ret); } } } // This can call back into Collection::recordStoreGoingToMove. If that happens, the old // object is removed from all indexes. StatusWith<RecordId> newLocation = _recordStore->updateRecord( txn, oldLocation, newDoc.objdata(), newDoc.objsize(), _enforceQuota(enforceQuota), this); if (!newLocation.isOK()) { return newLocation; } // At this point, the old object may or may not still be indexed, depending on if it was // moved. If the object did move, we need to add the new location to all indexes. if (newLocation.getValue() != oldLocation) { if (debug) { if (debug->nmoved == -1) // default of -1 rather than 0 debug->nmoved = 1; else debug->nmoved += 1; } std::vector<BsonRecord> bsonRecords; BsonRecord bsonRecord = {newLocation.getValue(), &newDoc}; bsonRecords.push_back(bsonRecord); Status s = _indexCatalog.indexRecords(txn, bsonRecords); if (!s.isOK()) return StatusWith<RecordId>(s); invariant(sid == txn->recoveryUnit()->getSnapshotId()); args.ns = ns().ns(); getGlobalServiceContext()->getOpObserver()->onUpdate(txn, args); return newLocation; } // Object did not move. We update each index with each respective UpdateTicket. if (debug) debug->keyUpdates = 0; if (indexesAffected) { IndexCatalog::IndexIterator ii = _indexCatalog.getIndexIterator(txn, true); while (ii.more()) { IndexDescriptor* descriptor = ii.next(); IndexAccessMethod* iam = ii.accessMethod(descriptor); int64_t updatedKeys; Status ret = iam->update(txn, *updateTickets.mutableMap()[descriptor], &updatedKeys); if (!ret.isOK()) return StatusWith<RecordId>(ret); if (debug) debug->keyUpdates += updatedKeys; } } invariant(sid == txn->recoveryUnit()->getSnapshotId()); args.ns = ns().ns(); getGlobalServiceContext()->getOpObserver()->onUpdate(txn, args); return newLocation; }
StatusWith<RecordId> Collection::updateDocument(OperationContext* txn, const RecordId& oldLocation, const Snapshotted<BSONObj>& oldDoc, const BSONObj& newDoc, bool enforceQuota, bool indexesAffected, OpDebug* debug, oplogUpdateEntryArgs& args) { { auto status = checkValidation(txn, newDoc); if (!status.isOK()) { if (_validationLevel == STRICT_V) { return status; } // moderate means we have to check the old doc auto oldDocStatus = checkValidation(txn, oldDoc.value()); if (oldDocStatus.isOK()) { // transitioning from good -> bad is not ok return status; } // bad -> bad is ok in moderate mode } } dassert(txn->lockState()->isCollectionLockedForMode(ns().toString(), MODE_IX)); invariant(oldDoc.snapshotId() == txn->recoveryUnit()->getSnapshotId()); SnapshotId sid = txn->recoveryUnit()->getSnapshotId(); BSONElement oldId = oldDoc.value()["_id"]; if (!oldId.eoo() && (oldId != newDoc["_id"])) return StatusWith<RecordId>( ErrorCodes::InternalError, "in Collection::updateDocument _id mismatch", 13596); // At the end of this step, we will have a map of UpdateTickets, one per index, which // represent the index updates needed to be done, based on the changes between oldDoc and // newDoc. OwnedPointerMap<IndexDescriptor*, UpdateTicket> updateTickets; if (indexesAffected) { IndexCatalog::IndexIterator ii = _indexCatalog.getIndexIterator(txn, true); while (ii.more()) { IndexDescriptor* descriptor = ii.next(); IndexCatalogEntry* entry = ii.catalogEntry(descriptor); IndexAccessMethod* iam = ii.accessMethod(descriptor); InsertDeleteOptions options; options.logIfError = false; options.dupsAllowed = !(KeyPattern::isIdKeyPattern(descriptor->keyPattern()) || descriptor->unique()) || repl::getGlobalReplicationCoordinator()->shouldIgnoreUniqueIndex(descriptor); UpdateTicket* updateTicket = new UpdateTicket(); updateTickets.mutableMap()[descriptor] = updateTicket; Status ret = iam->validateUpdate(txn, oldDoc.value(), newDoc, oldLocation, options, updateTicket, entry->getFilterExpression()); if (!ret.isOK()) { return StatusWith<RecordId>(ret); } } } // This can call back into Collection::recordStoreGoingToMove. If that happens, the old // object is removed from all indexes. StatusWith<RecordId> newLocation = _recordStore->updateRecord( txn, oldLocation, newDoc.objdata(), newDoc.objsize(), _enforceQuota(enforceQuota), this); if (!newLocation.isOK()) { return newLocation; } // At this point, the old object may or may not still be indexed, depending on if it was // moved. If the object did move, we need to add the new location to all indexes. if (newLocation.getValue() != oldLocation) { if (debug) { if (debug->nmoved == -1) // default of -1 rather than 0 debug->nmoved = 1; else debug->nmoved += 1; } Status s = _indexCatalog.indexRecord(txn, newDoc, newLocation.getValue()); if (!s.isOK()) return StatusWith<RecordId>(s); invariant(sid == txn->recoveryUnit()->getSnapshotId()); args.ns = ns().ns(); getGlobalServiceContext()->getOpObserver()->onUpdate(txn, args); return newLocation; } // Object did not move. We update each index with each respective UpdateTicket. if (debug) debug->keyUpdates = 0; if (indexesAffected) { IndexCatalog::IndexIterator ii = _indexCatalog.getIndexIterator(txn, true); while (ii.more()) { IndexDescriptor* descriptor = ii.next(); IndexAccessMethod* iam = ii.accessMethod(descriptor); int64_t updatedKeys; Status ret = iam->update(txn, *updateTickets.mutableMap()[descriptor], &updatedKeys); if (!ret.isOK()) return StatusWith<RecordId>(ret); if (debug) debug->keyUpdates += updatedKeys; } } invariant(sid == txn->recoveryUnit()->getSnapshotId()); args.ns = ns().ns(); getGlobalServiceContext()->getOpObserver()->onUpdate(txn, args); return newLocation; }
bool MigrationSourceManager::storeCurrentLocs(OperationContext* txn, long long maxChunkSize, string& errmsg, BSONObjBuilder& result) { AutoGetCollection autoColl(txn, _getNS(), MODE_IS); Collection* collection = autoColl.getCollection(); if (!collection) { errmsg = "ns not found, should be impossible"; return false; } // Allow multiKey based on the invariant that shard keys must be single-valued. Therefore, any // multi-key index prefixed by shard key cannot be multikey over the shard key fields. IndexDescriptor* idx = collection->getIndexCatalog()->findShardKeyPrefixedIndex(txn, _shardKeyPattern, false); // requireSingleKey if (idx == NULL) { errmsg = str::stream() << "can't find index with prefix " << _shardKeyPattern << " in storeCurrentLocs for " << _nss.toString(); return false; } // Assume both min and max non-empty, append MinKey's to make them fit chosen index BSONObj min; BSONObj max; KeyPattern kp(idx->keyPattern()); { // It's alright not to lock _mutex all the way through based on the assumption that this is // only called by the main thread that drives the migration and only it can start and stop // the current migration. stdx::lock_guard<stdx::mutex> sl(_mutex); invariant(_deleteNotifyExec.get() == NULL); unique_ptr<WorkingSet> ws = stdx::make_unique<WorkingSet>(); unique_ptr<DeleteNotificationStage> dns = stdx::make_unique<DeleteNotificationStage>(this); // Takes ownership of 'ws' and 'dns'. auto statusWithPlanExecutor = PlanExecutor::make( txn, std::move(ws), std::move(dns), collection, PlanExecutor::YIELD_MANUAL); invariant(statusWithPlanExecutor.isOK()); _deleteNotifyExec = std::move(statusWithPlanExecutor.getValue()); _deleteNotifyExec->registerExec(); min = Helpers::toKeyFormat(kp.extendRangeBound(_min, false)); max = Helpers::toKeyFormat(kp.extendRangeBound(_max, false)); } unique_ptr<PlanExecutor> exec(InternalPlanner::indexScan(txn, collection, idx, min, max, false, // endKeyInclusive PlanExecutor::YIELD_MANUAL)); // We can afford to yield here because any change to the base data that we might miss is already // being queued and will migrate in the 'transferMods' stage. exec->setYieldPolicy(PlanExecutor::YIELD_AUTO); // Use the average object size to estimate how many objects a full chunk would carry do that // while traversing the chunk's range using the sharding index, below there's a fair amount of // slack before we determine a chunk is too large because object sizes will vary. unsigned long long maxRecsWhenFull; long long avgRecSize; const long long totalRecs = collection->numRecords(txn); if (totalRecs > 0) { avgRecSize = collection->dataSize(txn) / totalRecs; maxRecsWhenFull = maxChunkSize / avgRecSize; maxRecsWhenFull = std::min((unsigned long long)(Chunk::MaxObjectPerChunk + 1), 130 * maxRecsWhenFull / 100 /* slack */); } else { avgRecSize = 0; maxRecsWhenFull = Chunk::MaxObjectPerChunk + 1; } // Do a full traversal of the chunk and don't stop even if we think it is a large chunk we want // the number of records to better report, in that case bool isLargeChunk = false; unsigned long long recCount = 0; RecordId recordId; while (PlanExecutor::ADVANCED == exec->getNext(NULL, &recordId)) { if (!isLargeChunk) { stdx::lock_guard<stdx::mutex> lk(_cloneLocsMutex); _cloneLocs.insert(recordId); } if (++recCount > maxRecsWhenFull) { isLargeChunk = true; // Continue on despite knowing that it will fail, just to get the correct value for // recCount } } exec.reset(); if (isLargeChunk) { stdx::lock_guard<stdx::mutex> sl(_mutex); warning() << "cannot move chunk: the maximum number of documents for a chunk is " << maxRecsWhenFull << " , the maximum chunk size is " << maxChunkSize << " , average document size is " << avgRecSize << ". Found " << recCount << " documents in chunk " << " ns: " << _nss << " " << _min << " -> " << _max << migrateLog; result.appendBool("chunkTooBig", true); result.appendNumber("estimatedChunkSize", (long long)(recCount * avgRecSize)); errmsg = "chunk too big to move"; return false; } log() << "moveChunk number of documents: " << cloneLocsRemaining() << migrateLog; txn->recoveryUnit()->abandonSnapshot(); return true; }
StatusWith<DiskLoc> Collection::updateDocument( OperationContext* txn, const DiskLoc& oldLocation, const BSONObj& objNew, bool enforceQuota, OpDebug* debug ) { BSONObj objOld = _recordStore->dataFor( txn, oldLocation ).toBson(); if ( objOld.hasElement( "_id" ) ) { BSONElement oldId = objOld["_id"]; BSONElement newId = objNew["_id"]; if ( oldId != newId ) return StatusWith<DiskLoc>( ErrorCodes::InternalError, "in Collection::updateDocument _id mismatch", 13596 ); } if ( ns().coll() == "system.users" ) { // XXX - andy and spencer think this should go away now V2UserDocumentParser parser; Status s = parser.checkValidUserDocument(objNew); if ( !s.isOK() ) return StatusWith<DiskLoc>( s ); } /* duplicate key check. we descend the btree twice - once for this check, and once for the actual inserts, further below. that is suboptimal, but it's pretty complicated to do it the other way without rollbacks... */ OwnedPointerMap<IndexDescriptor*,UpdateTicket> updateTickets; IndexCatalog::IndexIterator ii = _indexCatalog.getIndexIterator( txn, true ); while ( ii.more() ) { IndexDescriptor* descriptor = ii.next(); IndexAccessMethod* iam = _indexCatalog.getIndex( descriptor ); InsertDeleteOptions options; options.logIfError = false; options.dupsAllowed = !(KeyPattern::isIdKeyPattern(descriptor->keyPattern()) || descriptor->unique()) || repl::getGlobalReplicationCoordinator()->shouldIgnoreUniqueIndex(descriptor); UpdateTicket* updateTicket = new UpdateTicket(); updateTickets.mutableMap()[descriptor] = updateTicket; Status ret = iam->validateUpdate(txn, objOld, objNew, oldLocation, options, updateTicket ); if ( !ret.isOK() ) { return StatusWith<DiskLoc>( ret ); } } // this can callback into Collection::recordStoreGoingToMove StatusWith<DiskLoc> newLocation = _recordStore->updateRecord( txn, oldLocation, objNew.objdata(), objNew.objsize(), _enforceQuota( enforceQuota ), this ); if ( !newLocation.isOK() ) { return newLocation; } _infoCache.notifyOfWriteOp(); if ( newLocation.getValue() != oldLocation ) { if ( debug ) { if (debug->nmoved == -1) // default of -1 rather than 0 debug->nmoved = 1; else debug->nmoved += 1; } _indexCatalog.indexRecord(txn, objNew, newLocation.getValue()); return newLocation; } if ( debug ) debug->keyUpdates = 0; ii = _indexCatalog.getIndexIterator( txn, true ); while ( ii.more() ) { IndexDescriptor* descriptor = ii.next(); IndexAccessMethod* iam = _indexCatalog.getIndex( descriptor ); int64_t updatedKeys; Status ret = iam->update(txn, *updateTickets.mutableMap()[descriptor], &updatedKeys); if ( !ret.isOK() ) return StatusWith<DiskLoc>( ret ); if ( debug ) debug->keyUpdates += updatedKeys; } // Broadcast the mutation so that query results stay correct. _cursorCache.invalidateDocument(oldLocation, INVALIDATION_MUTATION); return newLocation; }
shared_ptr<Cursor> QueryPlan::newCursor( const DiskLoc& startLoc, bool requestIntervalCursor ) const { if ( _type ) { // hopefully safe to use original query in these contexts - don't think we can mix type // with $or clause separation yet int numWanted = 0; if ( _parsedQuery ) { // SERVER-5390 numWanted = _parsedQuery->getSkip() + _parsedQuery->getNumToReturn(); } IndexDescriptor* descriptor = CatalogHack::getDescriptor(_d, _idxNo); IndexAccessMethod* iam = CatalogHack::getIndex(descriptor); return shared_ptr<Cursor>(EmulatedCursor::make(descriptor, iam, _originalQuery, _order, numWanted, descriptor->keyPattern())); } if ( _utility == Impossible ) { // Dummy table scan cursor returning no results. Allowed in --notablescan mode. return shared_ptr<Cursor>( new BasicCursor( DiskLoc() ) ); } if ( willScanTable() ) { checkTableScanAllowed(); return findTableScan( _frs.ns(), _order, startLoc ); } massert( 10363, "newCursor() with start location not implemented for indexed plans", startLoc.isNull() ); if ( _startOrEndSpec ) { // we are sure to spec _endKeyInclusive return shared_ptr<Cursor>( BtreeCursor::make( _d, *_index, _startKey, _endKey, _endKeyInclusive, _direction >= 0 ? 1 : -1 ) ); } if ( _index->getSpec().getType() ) { return shared_ptr<Cursor>( BtreeCursor::make( _d, *_index, _frv->startKey(), _frv->endKey(), true, _direction >= 0 ? 1 : -1 ) ); } // An IntervalBtreeCursor is returned if explicitly requested AND _frv is exactly // represented by a single interval within the btree. if ( // If an interval cursor is requested and ... requestIntervalCursor && // ... equalities come before ranges (a requirement of Optimal) and ... _utility == Optimal && // ... the field range vector exactly represents a single interval ... _frv->isSingleInterval() ) { // ... and an interval cursor can be created ... shared_ptr<Cursor> ret( IntervalBtreeCursor::make( _d, *_index, _frv->startKey(), _frv->startKeyInclusive(), _frv->endKey(), _frv->endKeyInclusive() ) ); if ( ret ) { // ... then return the interval cursor. return ret; } } return shared_ptr<Cursor>( BtreeCursor::make( _d, *_index, _frv, independentRangesSingleIntervalLimit(), _direction >= 0 ? 1 : -1 ) ); }
StatusWith<DiskLoc> Collection::updateDocument( const DiskLoc& oldLocation, const BSONObj& objNew, bool enforceQuota, OpDebug* debug ) { Record* oldRecord = getExtentManager()->recordFor( oldLocation ); BSONObj objOld = BSONObj::make( oldRecord ); if ( objOld.hasElement( "_id" ) ) { BSONElement oldId = objOld["_id"]; BSONElement newId = objNew["_id"]; if ( oldId != newId ) return StatusWith<DiskLoc>( ErrorCodes::InternalError, "in Collection::updateDocument _id mismatch", 13596 ); } if ( ns().coll() == "system.users" ) { // XXX - andy and spencer think this should go away now V2UserDocumentParser parser; Status s = parser.checkValidUserDocument(objNew); if ( !s.isOK() ) return StatusWith<DiskLoc>( s ); } /* duplicate key check. we descend the btree twice - once for this check, and once for the actual inserts, further below. that is suboptimal, but it's pretty complicated to do it the other way without rollbacks... */ OwnedPointerVector<UpdateTicket> updateTickets; updateTickets.mutableVector().resize(_indexCatalog.numIndexesTotal()); for (int i = 0; i < _indexCatalog.numIndexesTotal(); ++i) { IndexDescriptor* descriptor = _indexCatalog.getDescriptor( i ); IndexAccessMethod* iam = _indexCatalog.getIndex( descriptor ); InsertDeleteOptions options; options.logIfError = false; options.dupsAllowed = !(KeyPattern::isIdKeyPattern(descriptor->keyPattern()) || descriptor->unique()) || ignoreUniqueIndex(descriptor); updateTickets.mutableVector()[i] = new UpdateTicket(); Status ret = iam->validateUpdate(objOld, objNew, oldLocation, options, updateTickets.mutableVector()[i]); if ( !ret.isOK() ) { return StatusWith<DiskLoc>( ret ); } } if ( oldRecord->netLength() < objNew.objsize() ) { // doesn't fit, have to move to new location if ( _details->isCapped() ) return StatusWith<DiskLoc>( ErrorCodes::InternalError, "failing update: objects in a capped ns cannot grow", 10003 ); moveCounter.increment(); _details->paddingTooSmall(); // unindex old record, don't delete // this way, if inserting new doc fails, we can re-index this one ClientCursor::aboutToDelete(_ns.ns(), _details, oldLocation); _indexCatalog.unindexRecord( objOld, oldLocation, true ); if ( debug ) { if (debug->nmoved == -1) // default of -1 rather than 0 debug->nmoved = 1; else debug->nmoved += 1; } StatusWith<DiskLoc> loc = insertDocument( objNew, enforceQuota ); if ( loc.isOK() ) { // insert successful, now lets deallocate the old location // remember its already unindexed _recordStore.deallocRecord( oldLocation, oldRecord ); } else { // new doc insert failed, so lets re-index the old document and location _indexCatalog.indexRecord( objOld, oldLocation ); } return loc; } _infoCache.notifyOfWriteOp(); _details->paddingFits(); if ( debug ) debug->keyUpdates = 0; for (int i = 0; i < _indexCatalog.numIndexesTotal(); ++i) { IndexDescriptor* descriptor = _indexCatalog.getDescriptor( i ); IndexAccessMethod* iam = _indexCatalog.getIndex( descriptor ); int64_t updatedKeys; Status ret = iam->update(*updateTickets.vector()[i], &updatedKeys); if ( !ret.isOK() ) return StatusWith<DiskLoc>( ret ); if ( debug ) debug->keyUpdates += updatedKeys; } // update in place int sz = objNew.objsize(); memcpy(getDur().writingPtr(oldRecord->data(), sz), objNew.objdata(), sz); return StatusWith<DiskLoc>( oldLocation ); }
/** * For a given query, get a runner. The runner could be a SingleSolutionRunner, a * CachedQueryRunner, or a MultiPlanRunner, depending on the cache/query solver/etc. */ Status getRunner(CanonicalQuery* rawCanonicalQuery, Runner** out, size_t plannerOptions) { verify(rawCanonicalQuery); auto_ptr<CanonicalQuery> canonicalQuery(rawCanonicalQuery); // Try to look up a cached solution for the query. // TODO: Can the cache have negative data about a solution? PlanCache* localCache = PlanCache::get(canonicalQuery->ns()); if (NULL != localCache) { CachedSolution* cs = localCache->get(*canonicalQuery); if (NULL != cs) { // We have a cached solution. Hand the canonical query and cached solution off to // the cached plan runner, which takes ownership of both. WorkingSet* ws; PlanStage* root; verify(StageBuilder::build(*cs->solution, &root, &ws)); *out = new CachedPlanRunner(canonicalQuery.release(), cs, root, ws); return Status::OK(); } } // No entry in cache for the query. We have to solve the query ourself. // Get the indices that we could possibly use. Database* db = cc().database(); verify( db ); Collection* collection = db->getCollection( canonicalQuery->ns() ); // This can happen as we're called by internal clients as well. if (NULL == collection) { const string& ns = canonicalQuery->ns(); *out = new EOFRunner(canonicalQuery.release(), ns); return Status::OK(); } // If we have an _id index we can use the idhack runner. if (canUseIDHack(*canonicalQuery) && collection->getIndexCatalog()->findIdIndex()) { *out = new IDHackRunner(collection, canonicalQuery.release()); return Status::OK(); } // If it's not NULL, we may have indices. Access the catalog and fill out IndexEntry(s) QueryPlannerParams plannerParams; for (int i = 0; i < collection->getIndexCatalog()->numIndexesReady(); ++i) { IndexDescriptor* desc = collection->getIndexCatalog()->getDescriptor( i ); plannerParams.indices.push_back(IndexEntry(desc->keyPattern(), desc->isMultikey(), desc->isSparse(), desc->indexName())); } // Tailable: If the query requests tailable the collection must be capped. if (canonicalQuery->getParsed().hasOption(QueryOption_CursorTailable)) { if (!collection->isCapped()) { return Status(ErrorCodes::BadValue, "tailable cursor requested on non capped collection"); } // If a sort is specified it must be equal to expectedSort. const BSONObj expectedSort = BSON("$natural" << 1); const BSONObj& actualSort = canonicalQuery->getParsed().getSort(); if (!actualSort.isEmpty() && !(actualSort == expectedSort)) { return Status(ErrorCodes::BadValue, "invalid sort specified for tailable cursor: " + actualSort.toString()); } } // Process the planning options. plannerParams.options = plannerOptions; if (storageGlobalParams.noTableScan) { const string& ns = canonicalQuery->ns(); // There are certain cases where we ignore this restriction: bool ignore = canonicalQuery->getQueryObj().isEmpty() || (string::npos != ns.find(".system.")) || (0 == ns.find("local.")); if (!ignore) { plannerParams.options |= QueryPlannerParams::NO_TABLE_SCAN; } } if (!(plannerParams.options & QueryPlannerParams::NO_TABLE_SCAN)) { plannerParams.options |= QueryPlannerParams::INCLUDE_COLLSCAN; } // If the caller wants a shard filter, make sure we're actually sharded. if (plannerParams.options & QueryPlannerParams::INCLUDE_SHARD_FILTER) { CollectionMetadataPtr collMetadata = shardingState.getCollectionMetadata(canonicalQuery->ns()); if (collMetadata) { plannerParams.shardKey = collMetadata->getKeyPattern(); } else { // If there's no metadata don't bother w/the shard filter since we won't know what // the key pattern is anyway... plannerParams.options &= ~QueryPlannerParams::INCLUDE_SHARD_FILTER; } } vector<QuerySolution*> solutions; QueryPlanner::plan(*canonicalQuery, plannerParams, &solutions); /* for (size_t i = 0; i < solutions.size(); ++i) { QLOG() << "solution " << i << " is " << solutions[i]->toString() << endl; } */ // We cannot figure out how to answer the query. Should this ever happen? if (0 == solutions.size()) { return Status(ErrorCodes::BadValue, "No query solutions"); } if (1 == solutions.size()) { // Only one possible plan. Run it. Build the stages from the solution. WorkingSet* ws; PlanStage* root; verify(StageBuilder::build(*solutions[0], &root, &ws)); // And, run the plan. *out = new SingleSolutionRunner(canonicalQuery.release(), solutions[0], root, ws); return Status::OK(); } else { // Many solutions. Let the MultiPlanRunner pick the best, update the cache, and so on. auto_ptr<MultiPlanRunner> mpr(new MultiPlanRunner(canonicalQuery.release())); for (size_t i = 0; i < solutions.size(); ++i) { WorkingSet* ws; PlanStage* root; verify(StageBuilder::build(*solutions[i], &root, &ws)); // Takes ownership of all arguments. mpr->addPlan(solutions[i], root, ws); } *out = mpr.release(); return Status::OK(); } }
void CollectionInfoCache::computeIndexKeys(OperationContext* opCtx) { _indexedPaths.clear(); bool hadTTLIndex = _hasTTLIndex; _hasTTLIndex = false; IndexCatalog::IndexIterator i = _collection->getIndexCatalog()->getIndexIterator(opCtx, true); while (i.more()) { IndexDescriptor* descriptor = i.next(); if (descriptor->getAccessMethodName() != IndexNames::TEXT) { BSONObj key = descriptor->keyPattern(); const BSONObj& infoObj = descriptor->infoObj(); if (infoObj.hasField("expireAfterSeconds")) { _hasTTLIndex = true; } BSONObjIterator j(key); while (j.more()) { BSONElement e = j.next(); _indexedPaths.addPath(e.fieldName()); } } else { fts::FTSSpec ftsSpec(descriptor->infoObj()); if (ftsSpec.wildcard()) { _indexedPaths.allPathsIndexed(); } else { for (size_t i = 0; i < ftsSpec.numExtraBefore(); ++i) { _indexedPaths.addPath(ftsSpec.extraBefore(i)); } for (fts::Weights::const_iterator it = ftsSpec.weights().begin(); it != ftsSpec.weights().end(); ++it) { _indexedPaths.addPath(it->first); } for (size_t i = 0; i < ftsSpec.numExtraAfter(); ++i) { _indexedPaths.addPath(ftsSpec.extraAfter(i)); } // Any update to a path containing "language" as a component could change the // language of a subdocument. Add the override field as a path component. _indexedPaths.addPathComponent(ftsSpec.languageOverrideField()); } } // handle partial indexes const IndexCatalogEntry* entry = i.catalogEntry(descriptor); const MatchExpression* filter = entry->getFilterExpression(); if (filter) { unordered_set<std::string> paths; QueryPlannerIXSelect::getFields(filter, "", &paths); for (auto it = paths.begin(); it != paths.end(); ++it) { _indexedPaths.addPath(*it); } } } TTLCollectionCache& ttlCollectionCache = TTLCollectionCache::get(getGlobalServiceContext()); if (_hasTTLIndex != hadTTLIndex) { if (_hasTTLIndex) { ttlCollectionCache.registerCollection(_collection->ns()); } else { ttlCollectionCache.unregisterCollection(_collection->ns()); } } _keysComputed = true; }
StatusWith<RecordId> Collection::updateDocument( OperationContext* txn, const RecordId& oldLocation, const BSONObj& objNew, bool enforceQuota, OpDebug* debug ) { BSONObj objOld = _recordStore->dataFor( txn, oldLocation ).releaseToBson(); if ( objOld.hasElement( "_id" ) ) { BSONElement oldId = objOld["_id"]; BSONElement newId = objNew["_id"]; if ( oldId != newId ) return StatusWith<RecordId>( ErrorCodes::InternalError, "in Collection::updateDocument _id mismatch", 13596 ); } /* duplicate key check. we descend the btree twice - once for this check, and once for the actual inserts, further below. that is suboptimal, but it's pretty complicated to do it the other way without rollbacks... */ // At the end of this step, we will have a map of UpdateTickets, one per index, which // represent the index updates needed to be done, based on the changes between objOld and // objNew. OwnedPointerMap<IndexDescriptor*,UpdateTicket> updateTickets; IndexCatalog::IndexIterator ii = _indexCatalog.getIndexIterator( txn, true ); while ( ii.more() ) { IndexDescriptor* descriptor = ii.next(); IndexAccessMethod* iam = _indexCatalog.getIndex( descriptor ); InsertDeleteOptions options; options.logIfError = false; options.dupsAllowed = !(KeyPattern::isIdKeyPattern(descriptor->keyPattern()) || descriptor->unique()) || repl::getGlobalReplicationCoordinator()->shouldIgnoreUniqueIndex(descriptor); UpdateTicket* updateTicket = new UpdateTicket(); updateTickets.mutableMap()[descriptor] = updateTicket; Status ret = iam->validateUpdate(txn, objOld, objNew, oldLocation, options, updateTicket ); if ( !ret.isOK() ) { return StatusWith<RecordId>( ret ); } } // This can call back into Collection::recordStoreGoingToMove. If that happens, the old // object is removed from all indexes. StatusWith<RecordId> newLocation = _recordStore->updateRecord( txn, oldLocation, objNew.objdata(), objNew.objsize(), _enforceQuota( enforceQuota ), this ); if ( !newLocation.isOK() ) { return newLocation; } // At this point, the old object may or may not still be indexed, depending on if it was // moved. _infoCache.notifyOfWriteOp(); // If the object did move, we need to add the new location to all indexes. if ( newLocation.getValue() != oldLocation ) { if ( debug ) { if (debug->nmoved == -1) // default of -1 rather than 0 debug->nmoved = 1; else debug->nmoved += 1; } Status s = _indexCatalog.indexRecord(txn, objNew, newLocation.getValue()); if (!s.isOK()) return StatusWith<RecordId>(s); return newLocation; } // Object did not move. We update each index with each respective UpdateTicket. if ( debug ) debug->keyUpdates = 0; ii = _indexCatalog.getIndexIterator( txn, true ); while ( ii.more() ) { IndexDescriptor* descriptor = ii.next(); IndexAccessMethod* iam = _indexCatalog.getIndex( descriptor ); int64_t updatedKeys; Status ret = iam->update(txn, *updateTickets.mutableMap()[descriptor], &updatedKeys); if ( !ret.isOK() ) return StatusWith<RecordId>( ret ); if ( debug ) debug->keyUpdates += updatedKeys; } // Broadcast the mutation so that query results stay correct. _cursorCache.invalidateDocument(txn, oldLocation, INVALIDATION_MUTATION); return newLocation; }