Status Database::renameCollection( OperationContext* txn, const StringData& fromNS, const StringData& toNS, bool stayTemp ) { audit::logRenameCollection( currentClient.get(), fromNS, toNS ); { // remove anything cached Collection* coll = getCollection( txn, fromNS ); if ( !coll ) return Status( ErrorCodes::NamespaceNotFound, "collection not found to rename" ); IndexCatalog::IndexIterator ii = coll->getIndexCatalog()->getIndexIterator( true ); while ( ii.more() ) { IndexDescriptor* desc = ii.next(); _clearCollectionCache( desc->indexNamespace() ); } { scoped_lock lk( _collectionLock ); _clearCollectionCache_inlock( fromNS ); _clearCollectionCache_inlock( toNS ); } Top::global.collectionDropped( fromNS.toString() ); } return _dbEntry->renameCollection( txn, fromNS, toNS, stayTemp ); }
long long Database::getIndexSizeForCollection(OperationContext* opCtx, Collection* coll, BSONObjBuilder* details, int scale ) { if ( !coll ) return 0; IndexCatalog::IndexIterator ii = coll->getIndexCatalog()->getIndexIterator( true /*includeUnfinishedIndexes*/ ); long long totalSize = 0; while ( ii.more() ) { IndexDescriptor* d = ii.next(); string indNS = d->indexNamespace(); // XXX creating a Collection for an index which isn't a Collection Collection* indColl = getCollection( opCtx, indNS ); if ( ! indColl ) { log() << "error: have index descriptor [" << indNS << "] but no entry in the index collection." << endl; continue; } totalSize += indColl->dataSize(); if ( details ) { long long const indexSize = indColl->dataSize() / scale; details->appendNumber( d->indexName() , indexSize ); } } return totalSize; }
IndexAccessMethod* KVDatabaseCatalogEntry::getIndex(OperationContext* opCtx, const CollectionCatalogEntry* collection, IndexCatalogEntry* index) { IndexDescriptor* desc = index->descriptor(); const std::string& type = desc->getAccessMethodName(); std::string ident = _engine->getCatalog()->getIndexIdent(opCtx, collection->ns().ns(), desc->indexName()); SortedDataInterface* sdi = _engine->getEngine()->getGroupedSortedDataInterface(opCtx, ident, desc, index->getPrefix()); if ("" == type) return new BtreeAccessMethod(index, sdi); if (IndexNames::HASHED == type) return new HashAccessMethod(index, sdi); if (IndexNames::GEO_2DSPHERE == type) return new S2AccessMethod(index, sdi); if (IndexNames::TEXT == type) return new FTSAccessMethod(index, sdi); if (IndexNames::GEO_HAYSTACK == type) return new HaystackAccessMethod(index, sdi); if (IndexNames::GEO_2D == type) return new TwoDAccessMethod(index, sdi); log() << "Can't find index for keyPattern " << desc->keyPattern(); invariant(false); }
Status Database::renameCollection(OperationContext* txn, StringData fromNS, StringData toNS, bool stayTemp) { audit::logRenameCollection(&cc(), fromNS, toNS); invariant(txn->lockState()->isDbLockedForMode(name(), MODE_X)); BackgroundOperation::assertNoBgOpInProgForNs(fromNS); BackgroundOperation::assertNoBgOpInProgForNs(toNS); { // remove anything cached Collection* coll = getCollection(fromNS); if (!coll) return Status(ErrorCodes::NamespaceNotFound, "collection not found to rename"); string clearCacheReason = str::stream() << "renamed collection '" << fromNS << "' to '" << toNS << "'"; IndexCatalog::IndexIterator ii = coll->getIndexCatalog()->getIndexIterator(txn, true); while (ii.more()) { IndexDescriptor* desc = ii.next(); _clearCollectionCache(txn, desc->indexNamespace(), clearCacheReason); } _clearCollectionCache(txn, fromNS, clearCacheReason); _clearCollectionCache(txn, toNS, clearCacheReason); Top::get(txn->getClient()->getServiceContext()).collectionDropped(fromNS.toString()); } txn->recoveryUnit()->registerChange(new AddCollectionChange(txn, this, toNS)); Status s = _dbEntry->renameCollection(txn, fromNS, toNS, stayTemp); _collections[toNS] = _getOrCreateCollectionInstance(txn, toNS); return s; }
void run() { OperationContextImpl txn; Client::WriteContext ctx(&txn, _ns); int numFinishedIndexesStart = _catalog->numIndexesReady(&txn); Helpers::ensureIndex(&txn, _coll, BSON("x" << 1), false, "_x_0"); Helpers::ensureIndex(&txn, _coll, BSON("y" << 1), false, "_y_0"); ASSERT_TRUE(_catalog->numIndexesReady(&txn) == numFinishedIndexesStart+2); IndexCatalog::IndexIterator ii = _catalog->getIndexIterator(&txn,false); int indexesIterated = 0; bool foundIndex = false; while (ii.more()) { IndexDescriptor* indexDesc = ii.next(); indexesIterated++; BSONObjIterator boit(indexDesc->infoObj()); while (boit.more() && !foundIndex) { BSONElement e = boit.next(); if (str::equals(e.fieldName(), "name") && str::equals(e.valuestrsafe(), "_y_0")) { foundIndex = true; break; } } } ctx.commit(); ASSERT_TRUE(indexesIterated == _catalog->numIndexesReady(&txn)); ASSERT_TRUE(foundIndex); }
Status Database::renameCollection( OperationContext* txn, StringData fromNS, StringData toNS, bool stayTemp ) { audit::logRenameCollection( currentClient.get(), fromNS, toNS ); invariant(txn->lockState()->isDbLockedForMode(name(), MODE_X)); { // remove anything cached Collection* coll = getCollection( fromNS ); if ( !coll ) return Status( ErrorCodes::NamespaceNotFound, "collection not found to rename" ); IndexCatalog::IndexIterator ii = coll->getIndexCatalog()->getIndexIterator( txn, true ); while ( ii.more() ) { IndexDescriptor* desc = ii.next(); _clearCollectionCache( txn, desc->indexNamespace() ); } _clearCollectionCache( txn, fromNS ); _clearCollectionCache( txn, toNS ); Top::global.collectionDropped( fromNS.toString() ); } txn->recoveryUnit()->registerChange( new AddCollectionChange(this, toNS) ); Status s = _dbEntry->renameCollection( txn, fromNS, toNS, stayTemp ); _collections[toNS] = _getOrCreateCollectionInstance(txn, toNS); return s; }
void run() { IndexDescriptor* id = addIndexWithInfo(); // Create a SortPhaseOne. SortPhaseOne phaseOne; phaseOne.sorter.reset( new BSONObjExternalSorter(_aFirstSort)); // Add index keys to the phaseOne. int32_t nKeys = 130; for( int32_t i = 0; i < nKeys; ++i ) { phaseOne.sorter->add( BSON( "a" << i ), /* dummy disk loc */ DiskLoc(), false ); } phaseOne.nkeys = phaseOne.n = nKeys; phaseOne.sorter->sort( false ); // Set up remaining arguments. set<DiskLoc> dups; CurOp* op = cc().curop(); ProgressMeterHolder pm (op->setMessage("BuildBottomUp", "BuildBottomUp Progress", nKeys, nKeys)); pm.finished(); Timer timer; // The index's root has not yet been set. ASSERT( id->getHead().isNull() ); // Finish building the index. buildBottomUpPhases2And3<V1>( true, id, *phaseOne.sorter, false, dups, op, &phaseOne, pm, timer, true ); // The index's root is set after the build is complete. ASSERT( !id->getHead().isNull() ); // Create a cursor over the index. scoped_ptr<BtreeCursor> cursor( BtreeCursor::make( nsdetails( _ns ), id->getOnDisk(), BSON( "" << -1 ), // startKey below minimum key. BSON( "" << nKeys ), // endKey above maximum key. true, // endKeyInclusive true. 1 // direction forward. ) ); // Check that the keys in the index are the expected ones. int32_t expectedKey = 0; for( ; cursor->ok(); cursor->advance(), ++expectedKey ) { ASSERT_EQUALS( expectedKey, cursor->currKey().firstElement().number() ); } ASSERT_EQUALS( nKeys, expectedKey ); }
void CollectionInfoCache::computeIndexKeys() { DEV Lock::assertWriteLocked( _collection->ns().ns() ); _indexedPaths.clear(); IndexCatalog::IndexIterator i = _collection->getIndexCatalog()->getIndexIterator(true); while (i.more()) { IndexDescriptor* descriptor = i.next(); if (descriptor->getAccessMethodName() != IndexNames::TEXT) { BSONObj key = descriptor->keyPattern(); BSONObjIterator j(key); while (j.more()) { BSONElement e = j.next(); _indexedPaths.addPath(e.fieldName()); } } else { fts::FTSSpec ftsSpec(descriptor->infoObj()); if (ftsSpec.wildcard()) { _indexedPaths.allPathsIndexed(); } else { for (size_t i = 0; i < ftsSpec.numExtraBefore(); ++i) { _indexedPaths.addPath(ftsSpec.extraBefore(i)); } for (fts::Weights::const_iterator it = ftsSpec.weights().begin(); it != ftsSpec.weights().end(); ++it) { _indexedPaths.addPath(it->first); } for (size_t i = 0; i < ftsSpec.numExtraAfter(); ++i) { _indexedPaths.addPath(ftsSpec.extraAfter(i)); } // Any update to a path containing "language" as a component could change the // language of a subdocument. Add the override field as a path component. _indexedPaths.addPathComponent(ftsSpec.languageOverrideField()); } } } _keysComputed = true; }
uint64_t Collection::getIndexSize(OperationContext* opCtx, BSONObjBuilder* details, int scale) { IndexCatalog* idxCatalog = getIndexCatalog(); IndexCatalog::IndexIterator ii = idxCatalog->getIndexIterator(opCtx, true); uint64_t totalSize = 0; while (ii.more()) { IndexDescriptor* d = ii.next(); IndexAccessMethod* iam = idxCatalog->getIndex(d); long long ds = iam->getSpaceUsedBytes(opCtx); totalSize += ds; if (details) { details->appendNumber(d->indexName(), ds / scale); } } return totalSize; }
void run() { Client::WriteContext ctx(_ns); int numFinishedIndexesStart = _catalog->numIndexesReady(); BSONObjBuilder b1; b1.append("key", BSON("x" << 1)); b1.append("ns", _ns); b1.append("name", "_x_0"); _catalog->createIndex(b1.obj(), true); BSONObjBuilder b2; b2.append("key", BSON("y" << 1)); b2.append("ns", _ns); b2.append("name", "_y_0"); _catalog->createIndex(b2.obj(), true); ASSERT_TRUE(_catalog->numIndexesReady() == numFinishedIndexesStart+2); IndexCatalog::IndexIterator ii = _catalog->getIndexIterator(false); int indexesIterated = 0; bool foundIndex = false; while (ii.more()) { IndexDescriptor* indexDesc = ii.next(); indexesIterated++; BSONObjIterator boit(indexDesc->infoObj()); while (boit.more() && !foundIndex) { BSONElement e = boit.next(); if (str::equals(e.fieldName(), "name") && str::equals(e.valuestrsafe(), "_y_0")) { foundIndex = true; break; } } } ASSERT_TRUE(indexesIterated == _catalog->numIndexesReady()); ASSERT_TRUE(foundIndex); }
StatusWith<CompactStats> Collection::compact( OperationContext* txn, const CompactOptions* compactOptions ) { if ( !_recordStore->compactSupported() ) return StatusWith<CompactStats>( ErrorCodes::BadValue, str::stream() << "cannot compact collection with record store: " << _recordStore->name() ); if ( _indexCatalog.numIndexesInProgress() ) return StatusWith<CompactStats>( ErrorCodes::BadValue, "cannot compact when indexes in progress" ); // same data, but might perform a little different after compact? _infoCache.reset(); vector<BSONObj> indexSpecs; { IndexCatalog::IndexIterator ii( _indexCatalog.getIndexIterator( false ) ); while ( ii.more() ) { IndexDescriptor* descriptor = ii.next(); const BSONObj spec = _compactAdjustIndexSpec(descriptor->infoObj()); const BSONObj key = spec.getObjectField("key"); const Status keyStatus = validateKeyPattern(key); if (!keyStatus.isOK()) { return StatusWith<CompactStats>( ErrorCodes::CannotCreateIndex, str::stream() << "Cannot compact collection due to invalid index " << spec << ": " << keyStatus.reason() << " For more info see" << " http://dochub.mongodb.org/core/index-validation"); } indexSpecs.push_back(spec); } } // note that the drop indexes call also invalidates all clientcursors for the namespace, // which is important and wanted here log() << "compact dropping indexes" << endl; Status status = _indexCatalog.dropAllIndexes(txn, true); if ( !status.isOK() ) { return StatusWith<CompactStats>( status ); } txn->checkForInterrupt(); CompactStats stats; MultiIndexBlock multiIndexBlock(txn, this); status = multiIndexBlock.init( indexSpecs ); if ( !status.isOK() ) return StatusWith<CompactStats>( status ); MyCompactAdaptor adaptor(this, &multiIndexBlock); _recordStore->compact( txn, &adaptor, compactOptions, &stats ); log() << "starting index commits"; status = multiIndexBlock.commit(); if ( !status.isOK() ) return StatusWith<CompactStats>( status ); return StatusWith<CompactStats>( stats ); }
Status MigrationChunkClonerSourceLegacy::_storeCurrentLocs(OperationContext* txn) { ScopedTransaction scopedXact(txn, MODE_IS); AutoGetCollection autoColl(txn, _args.getNss(), MODE_IS); Collection* const collection = autoColl.getCollection(); if (!collection) { return {ErrorCodes::NamespaceNotFound, str::stream() << "Collection " << _args.getNss().ns() << " does not exist."}; } // Allow multiKey based on the invariant that shard keys must be single-valued. Therefore, any // multi-key index prefixed by shard key cannot be multikey over the shard key fields. IndexDescriptor* idx = collection->getIndexCatalog()->findShardKeyPrefixedIndex(txn, _shardKeyPattern.toBSON(), false); // requireSingleKey if (!idx) { return {ErrorCodes::IndexNotFound, str::stream() << "can't find index with prefix " << _shardKeyPattern.toBSON() << " in storeCurrentLocs for " << _args.getNss().ns()}; } // Install the stage, which will listen for notifications on the collection { stdx::lock_guard<stdx::mutex> sl(_mutex); invariant(!_deleteNotifyExec); // Takes ownership of 'ws' and 'dns'. auto statusWithPlanExecutor = PlanExecutor::make(txn, stdx::make_unique<WorkingSet>(), stdx::make_unique<DeleteNotificationStage>(this, txn), collection, PlanExecutor::YIELD_MANUAL); invariant(statusWithPlanExecutor.isOK()); _deleteNotifyExec = std::move(statusWithPlanExecutor.getValue()); _deleteNotifyExec->registerExec(collection); } // Assume both min and max non-empty, append MinKey's to make them fit chosen index const KeyPattern kp(idx->keyPattern()); BSONObj min = Helpers::toKeyFormat(kp.extendRangeBound(_args.getMinKey(), false)); BSONObj max = Helpers::toKeyFormat(kp.extendRangeBound(_args.getMaxKey(), false)); std::unique_ptr<PlanExecutor> exec(InternalPlanner::indexScan(txn, collection, idx, min, max, false, // endKeyInclusive PlanExecutor::YIELD_MANUAL)); // We can afford to yield here because any change to the base data that we might miss is already // being queued and will migrate in the 'transferMods' stage. exec->setYieldPolicy(PlanExecutor::YIELD_AUTO, collection); // Use the average object size to estimate how many objects a full chunk would carry do that // while traversing the chunk's range using the sharding index, below there's a fair amount of // slack before we determine a chunk is too large because object sizes will vary. unsigned long long maxRecsWhenFull; long long avgRecSize; const long long totalRecs = collection->numRecords(txn); if (totalRecs > 0) { avgRecSize = collection->dataSize(txn) / totalRecs; maxRecsWhenFull = _args.getMaxChunkSizeBytes() / avgRecSize; maxRecsWhenFull = std::min((unsigned long long)(Chunk::MaxObjectPerChunk + 1), 130 * maxRecsWhenFull / 100 /* slack */); } else { avgRecSize = 0; maxRecsWhenFull = Chunk::MaxObjectPerChunk + 1; } // Do a full traversal of the chunk and don't stop even if we think it is a large chunk we want // the number of records to better report, in that case. bool isLargeChunk = false; unsigned long long recCount = 0; BSONObj obj; RecordId recordId; PlanExecutor::ExecState state; while (PlanExecutor::ADVANCED == (state = exec->getNext(&obj, &recordId))) { if (!isLargeChunk) { stdx::lock_guard<stdx::mutex> lk(_mutex); _cloneLocs.insert(recordId); } if (++recCount > maxRecsWhenFull) { isLargeChunk = true; // Continue on despite knowing that it will fail, just to get the correct value for // recCount } } if (PlanExecutor::DEAD == state || PlanExecutor::FAILURE == state) { return {ErrorCodes::InternalError, str::stream() << "Executor error while scanning for documents belonging to chunk: " << WorkingSetCommon::toStatusString(obj)}; } exec.reset(); if (isLargeChunk) { return { ErrorCodes::ChunkTooBig, str::stream() << "Cannot move chunk: the maximum number of documents for a chunk is " << maxRecsWhenFull << ", the maximum chunk size is " << _args.getMaxChunkSizeBytes() << ", average document size is " << avgRecSize << ". Found " << recCount << " documents in chunk " << " ns: " << _args.getNss().ns() << " " << _args.getMinKey() << " -> " << _args.getMaxKey()}; } _averageObjectSizeForCloneLocs = static_cast<uint64_t>(collection->averageObjectSize(txn) + 12); return Status::OK(); }
StatusWith<RecordId> Collection::updateDocument(OperationContext* txn, const RecordId& oldLocation, const Snapshotted<BSONObj>& oldDoc, const BSONObj& newDoc, bool enforceQuota, bool indexesAffected, OpDebug* debug, oplogUpdateEntryArgs& args) { { auto status = checkValidation(txn, newDoc); if (!status.isOK()) { if (_validationLevel == STRICT_V) { return status; } // moderate means we have to check the old doc auto oldDocStatus = checkValidation(txn, oldDoc.value()); if (oldDocStatus.isOK()) { // transitioning from good -> bad is not ok return status; } // bad -> bad is ok in moderate mode } } dassert(txn->lockState()->isCollectionLockedForMode(ns().toString(), MODE_IX)); invariant(oldDoc.snapshotId() == txn->recoveryUnit()->getSnapshotId()); if (_needCappedLock) { // X-lock the metadata resource for this capped collection until the end of the WUOW. This // prevents the primary from executing with more concurrency than secondaries. // See SERVER-21646. Lock::ResourceLock{txn->lockState(), ResourceId(RESOURCE_METADATA, _ns.ns()), MODE_X}; } SnapshotId sid = txn->recoveryUnit()->getSnapshotId(); BSONElement oldId = oldDoc.value()["_id"]; if (!oldId.eoo() && (oldId != newDoc["_id"])) return StatusWith<RecordId>( ErrorCodes::InternalError, "in Collection::updateDocument _id mismatch", 13596); // The MMAPv1 storage engine implements capped collections in a way that does not allow records // to grow beyond their original size. If MMAPv1 part of a replicaset with storage engines that // do not have this limitation, replication could result in errors, so it is necessary to set a // uniform rule here. Similarly, it is not sufficient to disallow growing records, because this // happens when secondaries roll back an update shrunk a record. Exactly replicating legacy // MMAPv1 behavior would require padding shrunk documents on all storage engines. Instead forbid // all size changes. const auto oldSize = oldDoc.value().objsize(); if (_recordStore->isCapped() && oldSize != newDoc.objsize()) return {ErrorCodes::CannotGrowDocumentInCappedNamespace, str::stream() << "Cannot change the size of a document in a capped collection: " << oldSize << " != " << newDoc.objsize()}; // At the end of this step, we will have a map of UpdateTickets, one per index, which // represent the index updates needed to be done, based on the changes between oldDoc and // newDoc. OwnedPointerMap<IndexDescriptor*, UpdateTicket> updateTickets; if (indexesAffected) { IndexCatalog::IndexIterator ii = _indexCatalog.getIndexIterator(txn, true); while (ii.more()) { IndexDescriptor* descriptor = ii.next(); IndexCatalogEntry* entry = ii.catalogEntry(descriptor); IndexAccessMethod* iam = ii.accessMethod(descriptor); InsertDeleteOptions options; options.logIfError = false; options.dupsAllowed = !(KeyPattern::isIdKeyPattern(descriptor->keyPattern()) || descriptor->unique()) || repl::getGlobalReplicationCoordinator()->shouldIgnoreUniqueIndex(descriptor); UpdateTicket* updateTicket = new UpdateTicket(); updateTickets.mutableMap()[descriptor] = updateTicket; Status ret = iam->validateUpdate(txn, oldDoc.value(), newDoc, oldLocation, options, updateTicket, entry->getFilterExpression()); if (!ret.isOK()) { return StatusWith<RecordId>(ret); } } } // This can call back into Collection::recordStoreGoingToMove. If that happens, the old // object is removed from all indexes. StatusWith<RecordId> newLocation = _recordStore->updateRecord( txn, oldLocation, newDoc.objdata(), newDoc.objsize(), _enforceQuota(enforceQuota), this); if (!newLocation.isOK()) { return newLocation; } // At this point, the old object may or may not still be indexed, depending on if it was // moved. If the object did move, we need to add the new location to all indexes. if (newLocation.getValue() != oldLocation) { if (debug) { if (debug->nmoved == -1) // default of -1 rather than 0 debug->nmoved = 1; else debug->nmoved += 1; } std::vector<BsonRecord> bsonRecords; BsonRecord bsonRecord = {newLocation.getValue(), &newDoc}; bsonRecords.push_back(bsonRecord); Status s = _indexCatalog.indexRecords(txn, bsonRecords); if (!s.isOK()) return StatusWith<RecordId>(s); invariant(sid == txn->recoveryUnit()->getSnapshotId()); args.ns = ns().ns(); getGlobalServiceContext()->getOpObserver()->onUpdate(txn, args); return newLocation; } // Object did not move. We update each index with each respective UpdateTicket. if (debug) debug->keyUpdates = 0; if (indexesAffected) { IndexCatalog::IndexIterator ii = _indexCatalog.getIndexIterator(txn, true); while (ii.more()) { IndexDescriptor* descriptor = ii.next(); IndexAccessMethod* iam = ii.accessMethod(descriptor); int64_t updatedKeys; Status ret = iam->update(txn, *updateTickets.mutableMap()[descriptor], &updatedKeys); if (!ret.isOK()) return StatusWith<RecordId>(ret); if (debug) debug->keyUpdates += updatedKeys; } } invariant(sid == txn->recoveryUnit()->getSnapshotId()); args.ns = ns().ns(); getGlobalServiceContext()->getOpObserver()->onUpdate(txn, args); return newLocation; }
Status MMAPV1Engine::repairDatabase( OperationContext* txn, const std::string& dbName, bool preserveClonedFilesOnFailure, bool backupOriginalFiles ) { // We must hold some form of lock here invariant(txn->lockState()->threadState()); invariant( dbName.find( '.' ) == string::npos ); scoped_ptr<RepairFileDeleter> repairFileDeleter; log() << "repairDatabase " << dbName << endl; BackgroundOperation::assertNoBgOpInProgForDb(dbName); txn->recoveryUnit()->syncDataAndTruncateJournal(); // Must be done before and after repair intmax_t totalSize = dbSize( dbName ); intmax_t freeSize = File::freeSpace(storageGlobalParams.repairpath); if ( freeSize > -1 && freeSize < totalSize ) { return Status( ErrorCodes::OutOfDiskSpace, str::stream() << "Cannot repair database " << dbName << " having size: " << totalSize << " (bytes) because free disk space is: " << freeSize << " (bytes)" ); } txn->checkForInterrupt(); Path reservedPath = uniqueReservedPath( ( preserveClonedFilesOnFailure || backupOriginalFiles ) ? "backup" : "_tmp" ); MONGO_ASSERT_ON_EXCEPTION( boost::filesystem::create_directory( reservedPath ) ); string reservedPathString = reservedPath.string(); if ( !preserveClonedFilesOnFailure ) repairFileDeleter.reset( new RepairFileDeleter( txn, dbName, reservedPathString, reservedPath ) ); { Database* originalDatabase = dbHolder().get(txn, dbName); if (originalDatabase == NULL) { return Status(ErrorCodes::NamespaceNotFound, "database does not exist to repair"); } scoped_ptr<MMAPV1DatabaseCatalogEntry> dbEntry; scoped_ptr<Database> tempDatabase; { dbEntry.reset( new MMAPV1DatabaseCatalogEntry( txn, dbName, reservedPathString, storageGlobalParams.directoryperdb, true ) ); invariant( !dbEntry->exists() ); tempDatabase.reset( new Database( txn, dbName, dbEntry.get() ) ); } map<string,CollectionOptions> namespacesToCopy; { string ns = dbName + ".system.namespaces"; Client::Context ctx(txn, ns ); Collection* coll = originalDatabase->getCollection( txn, ns ); if ( coll ) { scoped_ptr<RecordIterator> it( coll->getIterator( txn, DiskLoc(), false, CollectionScanParams::FORWARD ) ); while ( !it->isEOF() ) { DiskLoc loc = it->getNext(); BSONObj obj = coll->docFor( loc ); string ns = obj["name"].String(); NamespaceString nss( ns ); if ( nss.isSystem() ) { if ( nss.isSystemDotIndexes() ) continue; if ( nss.coll() == "system.namespaces" ) continue; } if ( !nss.isNormal() ) continue; CollectionOptions options; if ( obj["options"].isABSONObj() ) { Status status = options.parse( obj["options"].Obj() ); if ( !status.isOK() ) return status; } namespacesToCopy[ns] = options; } } } for ( map<string,CollectionOptions>::const_iterator i = namespacesToCopy.begin(); i != namespacesToCopy.end(); ++i ) { string ns = i->first; CollectionOptions options = i->second; Collection* tempCollection = NULL; { Client::Context tempContext(txn, ns, tempDatabase ); WriteUnitOfWork wunit(txn); tempCollection = tempDatabase->createCollection(txn, ns, options, true, false); wunit.commit(); } Client::Context readContext(txn, ns, originalDatabase); Collection* originalCollection = originalDatabase->getCollection( txn, ns ); invariant( originalCollection ); // data // TODO SERVER-14812 add a mode that drops duplicates rather than failing MultiIndexBlock indexer(txn, tempCollection ); { vector<BSONObj> indexes; IndexCatalog::IndexIterator ii = originalCollection->getIndexCatalog()->getIndexIterator( false ); while ( ii.more() ) { IndexDescriptor* desc = ii.next(); indexes.push_back( desc->infoObj() ); } Client::Context tempContext(txn, ns, tempDatabase); Status status = indexer.init( indexes ); if ( !status.isOK() ) return status; } scoped_ptr<RecordIterator> iterator( originalCollection->getIterator( txn, DiskLoc(), false, CollectionScanParams::FORWARD )); while ( !iterator->isEOF() ) { DiskLoc loc = iterator->getNext(); invariant( !loc.isNull() ); BSONObj doc = originalCollection->docFor( loc ); Client::Context tempContext(txn, ns, tempDatabase); WriteUnitOfWork wunit(txn); StatusWith<DiskLoc> result = tempCollection->insertDocument(txn, doc, &indexer, false); if ( !result.isOK() ) return result.getStatus(); wunit.commit(); txn->checkForInterrupt(false); } Status status = indexer.doneInserting(); if (!status.isOK()) return status; { Client::Context tempContext(txn, ns, tempDatabase); WriteUnitOfWork wunit(txn); indexer.commit(); wunit.commit(); } } txn->recoveryUnit()->syncDataAndTruncateJournal(); globalStorageEngine->flushAllFiles(true); // need both in case journaling is disabled txn->checkForInterrupt(false); } // at this point if we abort, we don't want to delete new files // as they might be the only copies if ( repairFileDeleter.get() ) repairFileDeleter->success(); dbHolder().close( txn, dbName ); if ( backupOriginalFiles ) { _renameForBackup( dbName, reservedPath ); } else { // first make new directory before deleting data Path newDir = Path(storageGlobalParams.dbpath) / dbName; MONGO_ASSERT_ON_EXCEPTION(boost::filesystem::create_directory(newDir)); // this deletes old files _deleteDataFiles( dbName ); if ( !boost::filesystem::exists(newDir) ) { // we deleted because of directoryperdb // re-create MONGO_ASSERT_ON_EXCEPTION(boost::filesystem::create_directory(newDir)); } } _replaceWithRecovered( dbName, reservedPathString.c_str() ); if ( !backupOriginalFiles ) MONGO_ASSERT_ON_EXCEPTION( boost::filesystem::remove_all( reservedPath ) ); return Status::OK(); }
void run() { IndexDescriptor* id = addIndexWithInfo(); // Create a SortPhaseOne. SortPhaseOne phaseOne; phaseOne.sorter.reset(new BSONObjExternalSorter(_aFirstSort)); // It's necessary to index sufficient keys that a RARELY condition will be triggered, // but few enough keys that the btree builder will not create an internal node and check // for an interrupt internally (which would cause this test to pass spuriously). int32_t nKeys = 130; // Add index keys to the phaseOne. for( int32_t i = 0; i < nKeys; ++i ) { phaseOne.sorter->add( BSON( "a" << i ), /* dummy disk loc */ DiskLoc(), false ); } phaseOne.nkeys = phaseOne.n = nKeys; phaseOne.sorter->sort( false ); // Set up remaining arguments. set<DiskLoc> dups; CurOp* op = cc().curop(); ProgressMeterHolder pm (op->setMessage("InterruptBuildBottomUp", "InterruptBuildBottomUp Progress", nKeys, nKeys)); pm.finished(); Timer timer; // The index's root has not yet been set. ASSERT( id->getHead().isNull() ); // Register a request to kill the current operation. cc().curop()->kill(); if ( _mayInterrupt ) { // The build is aborted due to the kill request. ASSERT_THROWS ( buildBottomUpPhases2And3<V1>( true, id, *phaseOne.sorter, false, dups, op, &phaseOne, pm, timer, _mayInterrupt ), UserException ); // The root of the index is not set because the build did not complete. ASSERT( id->getHead().isNull() ); } else { // The build is aborted despite the kill request because mayInterrupt == false. buildBottomUpPhases2And3<V1>( true, id, *phaseOne.sorter, false, dups, op, &phaseOne, pm, timer, _mayInterrupt ); // The index's root is set after the build is complete. ASSERT( !id->getHead().isNull() ); } }
void CollectionInfoCache::computeIndexKeys(OperationContext* opCtx) { _indexedPaths.clear(); bool hadTTLIndex = _hasTTLIndex; _hasTTLIndex = false; IndexCatalog::IndexIterator i = _collection->getIndexCatalog()->getIndexIterator(opCtx, true); while (i.more()) { IndexDescriptor* descriptor = i.next(); if (descriptor->getAccessMethodName() != IndexNames::TEXT) { BSONObj key = descriptor->keyPattern(); const BSONObj& infoObj = descriptor->infoObj(); if (infoObj.hasField("expireAfterSeconds")) { _hasTTLIndex = true; } BSONObjIterator j(key); while (j.more()) { BSONElement e = j.next(); _indexedPaths.addPath(e.fieldName()); } } else { fts::FTSSpec ftsSpec(descriptor->infoObj()); if (ftsSpec.wildcard()) { _indexedPaths.allPathsIndexed(); } else { for (size_t i = 0; i < ftsSpec.numExtraBefore(); ++i) { _indexedPaths.addPath(ftsSpec.extraBefore(i)); } for (fts::Weights::const_iterator it = ftsSpec.weights().begin(); it != ftsSpec.weights().end(); ++it) { _indexedPaths.addPath(it->first); } for (size_t i = 0; i < ftsSpec.numExtraAfter(); ++i) { _indexedPaths.addPath(ftsSpec.extraAfter(i)); } // Any update to a path containing "language" as a component could change the // language of a subdocument. Add the override field as a path component. _indexedPaths.addPathComponent(ftsSpec.languageOverrideField()); } } // handle partial indexes const IndexCatalogEntry* entry = i.catalogEntry(descriptor); const MatchExpression* filter = entry->getFilterExpression(); if (filter) { unordered_set<std::string> paths; QueryPlannerIXSelect::getFields(filter, "", &paths); for (auto it = paths.begin(); it != paths.end(); ++it) { _indexedPaths.addPath(*it); } } } TTLCollectionCache& ttlCollectionCache = TTLCollectionCache::get(getGlobalServiceContext()); if (_hasTTLIndex != hadTTLIndex) { if (_hasTTLIndex) { ttlCollectionCache.registerCollection(_collection->ns()); } else { ttlCollectionCache.unregisterCollection(_collection->ns()); } } _keysComputed = true; }
bool run(const string& dbname, BSONObj& jsobj, int, string& errmsg, BSONObjBuilder& anObjBuilder, bool /*fromRepl*/) { BSONElement e = jsobj.firstElement(); const string toDeleteNs = dbname + '.' + e.valuestr(); if (!serverGlobalParams.quiet) { MONGO_TLOG(0) << "CMD: dropIndexes " << toDeleteNs << endl; } Lock::DBWrite dbXLock(dbname); Client::Context ctx(toDeleteNs); Collection* collection = cc().database()->getCollection( toDeleteNs ); if ( ! collection ) { errmsg = "ns not found"; return false; } stopIndexBuilds(cc().database(), jsobj); IndexCatalog* indexCatalog = collection->getIndexCatalog(); anObjBuilder.appendNumber("nIndexesWas", indexCatalog->numIndexesTotal() ); BSONElement f = jsobj.getField("index"); if ( f.type() == String ) { string indexToDelete = f.valuestr(); if ( indexToDelete == "*" ) { Status s = indexCatalog->dropAllIndexes( false ); if ( !s.isOK() ) { appendCommandStatus( anObjBuilder, s ); return false; } anObjBuilder.append("msg", "non-_id indexes dropped for collection"); return true; } IndexDescriptor* desc = collection->getIndexCatalog()->findIndexByName( indexToDelete ); if ( desc == NULL ) { errmsg = str::stream() << "index not found with name [" << indexToDelete << "]"; return false; } if ( desc->isIdIndex() ) { errmsg = "cannot drop _id index"; return false; } Status s = indexCatalog->dropIndex( desc ); if ( !s.isOK() ) { appendCommandStatus( anObjBuilder, s ); return false; } return true; } if ( f.type() == Object ) { IndexDescriptor* desc = collection->getIndexCatalog()->findIndexByKeyPattern( f.embeddedObject() ); if ( desc == NULL ) { errmsg = "can't find index with key:"; errmsg += f.embeddedObject().toString(); return false; } if ( desc->isIdIndex() ) { errmsg = "cannot drop _id index"; return false; } Status s = indexCatalog->dropIndex( desc ); if ( !s.isOK() ) { appendCommandStatus( anObjBuilder, s ); return false; } return true; } errmsg = "invalid index name spec"; return false; }
bool MigrationSourceManager::storeCurrentLocs(OperationContext* txn, long long maxChunkSize, string& errmsg, BSONObjBuilder& result) { AutoGetCollection autoColl(txn, _getNS(), MODE_IS); Collection* collection = autoColl.getCollection(); if (!collection) { errmsg = "ns not found, should be impossible"; return false; } // Allow multiKey based on the invariant that shard keys must be single-valued. Therefore, any // multi-key index prefixed by shard key cannot be multikey over the shard key fields. IndexDescriptor* idx = collection->getIndexCatalog()->findShardKeyPrefixedIndex(txn, _shardKeyPattern, false); // requireSingleKey if (idx == NULL) { errmsg = str::stream() << "can't find index with prefix " << _shardKeyPattern << " in storeCurrentLocs for " << _nss.toString(); return false; } // Assume both min and max non-empty, append MinKey's to make them fit chosen index BSONObj min; BSONObj max; KeyPattern kp(idx->keyPattern()); { // It's alright not to lock _mutex all the way through based on the assumption that this is // only called by the main thread that drives the migration and only it can start and stop // the current migration. stdx::lock_guard<stdx::mutex> sl(_mutex); invariant(_deleteNotifyExec.get() == NULL); unique_ptr<WorkingSet> ws = stdx::make_unique<WorkingSet>(); unique_ptr<DeleteNotificationStage> dns = stdx::make_unique<DeleteNotificationStage>(this); // Takes ownership of 'ws' and 'dns'. auto statusWithPlanExecutor = PlanExecutor::make( txn, std::move(ws), std::move(dns), collection, PlanExecutor::YIELD_MANUAL); invariant(statusWithPlanExecutor.isOK()); _deleteNotifyExec = std::move(statusWithPlanExecutor.getValue()); _deleteNotifyExec->registerExec(); min = Helpers::toKeyFormat(kp.extendRangeBound(_min, false)); max = Helpers::toKeyFormat(kp.extendRangeBound(_max, false)); } unique_ptr<PlanExecutor> exec(InternalPlanner::indexScan(txn, collection, idx, min, max, false, // endKeyInclusive PlanExecutor::YIELD_MANUAL)); // We can afford to yield here because any change to the base data that we might miss is already // being queued and will migrate in the 'transferMods' stage. exec->setYieldPolicy(PlanExecutor::YIELD_AUTO); // Use the average object size to estimate how many objects a full chunk would carry do that // while traversing the chunk's range using the sharding index, below there's a fair amount of // slack before we determine a chunk is too large because object sizes will vary. unsigned long long maxRecsWhenFull; long long avgRecSize; const long long totalRecs = collection->numRecords(txn); if (totalRecs > 0) { avgRecSize = collection->dataSize(txn) / totalRecs; maxRecsWhenFull = maxChunkSize / avgRecSize; maxRecsWhenFull = std::min((unsigned long long)(Chunk::MaxObjectPerChunk + 1), 130 * maxRecsWhenFull / 100 /* slack */); } else { avgRecSize = 0; maxRecsWhenFull = Chunk::MaxObjectPerChunk + 1; } // Do a full traversal of the chunk and don't stop even if we think it is a large chunk we want // the number of records to better report, in that case bool isLargeChunk = false; unsigned long long recCount = 0; RecordId recordId; while (PlanExecutor::ADVANCED == exec->getNext(NULL, &recordId)) { if (!isLargeChunk) { stdx::lock_guard<stdx::mutex> lk(_cloneLocsMutex); _cloneLocs.insert(recordId); } if (++recCount > maxRecsWhenFull) { isLargeChunk = true; // Continue on despite knowing that it will fail, just to get the correct value for // recCount } } exec.reset(); if (isLargeChunk) { stdx::lock_guard<stdx::mutex> sl(_mutex); warning() << "cannot move chunk: the maximum number of documents for a chunk is " << maxRecsWhenFull << " , the maximum chunk size is " << maxChunkSize << " , average document size is " << avgRecSize << ". Found " << recCount << " documents in chunk " << " ns: " << _nss << " " << _min << " -> " << _max << migrateLog; result.appendBool("chunkTooBig", true); result.appendNumber("estimatedChunkSize", (long long)(recCount * avgRecSize)); errmsg = "chunk too big to move"; return false; } log() << "moveChunk number of documents: " << cloneLocsRemaining() << migrateLog; txn->recoveryUnit()->abandonSnapshot(); return true; }
StatusWith<RecordId> Collection::updateDocument(OperationContext* txn, const RecordId& oldLocation, const Snapshotted<BSONObj>& oldDoc, const BSONObj& newDoc, bool enforceQuota, bool indexesAffected, OpDebug* debug, oplogUpdateEntryArgs& args) { { auto status = checkValidation(txn, newDoc); if (!status.isOK()) { if (_validationLevel == STRICT_V) { return status; } // moderate means we have to check the old doc auto oldDocStatus = checkValidation(txn, oldDoc.value()); if (oldDocStatus.isOK()) { // transitioning from good -> bad is not ok return status; } // bad -> bad is ok in moderate mode } } dassert(txn->lockState()->isCollectionLockedForMode(ns().toString(), MODE_IX)); invariant(oldDoc.snapshotId() == txn->recoveryUnit()->getSnapshotId()); SnapshotId sid = txn->recoveryUnit()->getSnapshotId(); BSONElement oldId = oldDoc.value()["_id"]; if (!oldId.eoo() && (oldId != newDoc["_id"])) return StatusWith<RecordId>( ErrorCodes::InternalError, "in Collection::updateDocument _id mismatch", 13596); // At the end of this step, we will have a map of UpdateTickets, one per index, which // represent the index updates needed to be done, based on the changes between oldDoc and // newDoc. OwnedPointerMap<IndexDescriptor*, UpdateTicket> updateTickets; if (indexesAffected) { IndexCatalog::IndexIterator ii = _indexCatalog.getIndexIterator(txn, true); while (ii.more()) { IndexDescriptor* descriptor = ii.next(); IndexCatalogEntry* entry = ii.catalogEntry(descriptor); IndexAccessMethod* iam = ii.accessMethod(descriptor); InsertDeleteOptions options; options.logIfError = false; options.dupsAllowed = !(KeyPattern::isIdKeyPattern(descriptor->keyPattern()) || descriptor->unique()) || repl::getGlobalReplicationCoordinator()->shouldIgnoreUniqueIndex(descriptor); UpdateTicket* updateTicket = new UpdateTicket(); updateTickets.mutableMap()[descriptor] = updateTicket; Status ret = iam->validateUpdate(txn, oldDoc.value(), newDoc, oldLocation, options, updateTicket, entry->getFilterExpression()); if (!ret.isOK()) { return StatusWith<RecordId>(ret); } } } // This can call back into Collection::recordStoreGoingToMove. If that happens, the old // object is removed from all indexes. StatusWith<RecordId> newLocation = _recordStore->updateRecord( txn, oldLocation, newDoc.objdata(), newDoc.objsize(), _enforceQuota(enforceQuota), this); if (!newLocation.isOK()) { return newLocation; } // At this point, the old object may or may not still be indexed, depending on if it was // moved. If the object did move, we need to add the new location to all indexes. if (newLocation.getValue() != oldLocation) { if (debug) { if (debug->nmoved == -1) // default of -1 rather than 0 debug->nmoved = 1; else debug->nmoved += 1; } Status s = _indexCatalog.indexRecord(txn, newDoc, newLocation.getValue()); if (!s.isOK()) return StatusWith<RecordId>(s); invariant(sid == txn->recoveryUnit()->getSnapshotId()); args.ns = ns().ns(); getGlobalServiceContext()->getOpObserver()->onUpdate(txn, args); return newLocation; } // Object did not move. We update each index with each respective UpdateTicket. if (debug) debug->keyUpdates = 0; if (indexesAffected) { IndexCatalog::IndexIterator ii = _indexCatalog.getIndexIterator(txn, true); while (ii.more()) { IndexDescriptor* descriptor = ii.next(); IndexAccessMethod* iam = ii.accessMethod(descriptor); int64_t updatedKeys; Status ret = iam->update(txn, *updateTickets.mutableMap()[descriptor], &updatedKeys); if (!ret.isOK()) return StatusWith<RecordId>(ret); if (debug) debug->keyUpdates += updatedKeys; } } invariant(sid == txn->recoveryUnit()->getSnapshotId()); args.ns = ns().ns(); getGlobalServiceContext()->getOpObserver()->onUpdate(txn, args); return newLocation; }
StatusWith<DiskLoc> Collection::updateDocument( OperationContext* txn, const DiskLoc& oldLocation, const BSONObj& objNew, bool enforceQuota, OpDebug* debug ) { BSONObj objOld = _recordStore->dataFor( txn, oldLocation ).toBson(); if ( objOld.hasElement( "_id" ) ) { BSONElement oldId = objOld["_id"]; BSONElement newId = objNew["_id"]; if ( oldId != newId ) return StatusWith<DiskLoc>( ErrorCodes::InternalError, "in Collection::updateDocument _id mismatch", 13596 ); } if ( ns().coll() == "system.users" ) { // XXX - andy and spencer think this should go away now V2UserDocumentParser parser; Status s = parser.checkValidUserDocument(objNew); if ( !s.isOK() ) return StatusWith<DiskLoc>( s ); } /* duplicate key check. we descend the btree twice - once for this check, and once for the actual inserts, further below. that is suboptimal, but it's pretty complicated to do it the other way without rollbacks... */ OwnedPointerMap<IndexDescriptor*,UpdateTicket> updateTickets; IndexCatalog::IndexIterator ii = _indexCatalog.getIndexIterator( txn, true ); while ( ii.more() ) { IndexDescriptor* descriptor = ii.next(); IndexAccessMethod* iam = _indexCatalog.getIndex( descriptor ); InsertDeleteOptions options; options.logIfError = false; options.dupsAllowed = !(KeyPattern::isIdKeyPattern(descriptor->keyPattern()) || descriptor->unique()) || repl::getGlobalReplicationCoordinator()->shouldIgnoreUniqueIndex(descriptor); UpdateTicket* updateTicket = new UpdateTicket(); updateTickets.mutableMap()[descriptor] = updateTicket; Status ret = iam->validateUpdate(txn, objOld, objNew, oldLocation, options, updateTicket ); if ( !ret.isOK() ) { return StatusWith<DiskLoc>( ret ); } } // this can callback into Collection::recordStoreGoingToMove StatusWith<DiskLoc> newLocation = _recordStore->updateRecord( txn, oldLocation, objNew.objdata(), objNew.objsize(), _enforceQuota( enforceQuota ), this ); if ( !newLocation.isOK() ) { return newLocation; } _infoCache.notifyOfWriteOp(); if ( newLocation.getValue() != oldLocation ) { if ( debug ) { if (debug->nmoved == -1) // default of -1 rather than 0 debug->nmoved = 1; else debug->nmoved += 1; } _indexCatalog.indexRecord(txn, objNew, newLocation.getValue()); return newLocation; } if ( debug ) debug->keyUpdates = 0; ii = _indexCatalog.getIndexIterator( txn, true ); while ( ii.more() ) { IndexDescriptor* descriptor = ii.next(); IndexAccessMethod* iam = _indexCatalog.getIndex( descriptor ); int64_t updatedKeys; Status ret = iam->update(txn, *updateTickets.mutableMap()[descriptor], &updatedKeys); if ( !ret.isOK() ) return StatusWith<DiskLoc>( ret ); if ( debug ) debug->keyUpdates += updatedKeys; } // Broadcast the mutation so that query results stay correct. _cursorCache.invalidateDocument(oldLocation, INVALIDATION_MUTATION); return newLocation; }
StatusWith<CompactStats> Collection::compact( const CompactOptions* compactOptions ) { if ( isCapped() ) return StatusWith<CompactStats>( ErrorCodes::BadValue, "cannot compact capped collection" ); if ( _indexCatalog.numIndexesInProgress() ) return StatusWith<CompactStats>( ErrorCodes::BadValue, "cannot compact when indexes in progress" ); NamespaceDetails* d = details(); // this is a big job, so might as well make things tidy before we start just to be nice. getDur().commitIfNeeded(); list<DiskLoc> extents; for( DiskLoc L = d->firstExtent(); !L.isNull(); L = L.ext()->xnext ) extents.push_back(L); log() << "compact " << extents.size() << " extents" << endl; // same data, but might perform a little different after compact? _infoCache.reset(); vector<BSONObj> indexSpecs; { IndexCatalog::IndexIterator ii( _indexCatalog.getIndexIterator( false ) ); while ( ii.more() ) { IndexDescriptor* descriptor = ii.next(); indexSpecs.push_back( _compactAdjustIndexSpec( descriptor->infoObj() ) ); } } log() << "compact orphan deleted lists" << endl; d->orphanDeletedList(); // Start over from scratch with our extent sizing and growth d->setLastExtentSize( 0 ); // before dropping indexes, at least make sure we can allocate one extent! if ( allocateSpaceForANewRecord( _ns.ns().c_str(), d, Record::HeaderSize+1, false).isNull() ) { return StatusWith<CompactStats>( ErrorCodes::InternalError, "compact error no space available to allocate" ); } // note that the drop indexes call also invalidates all clientcursors for the namespace, // which is important and wanted here log() << "compact dropping indexes" << endl; Status status = _indexCatalog.dropAllIndexes( true ); if ( !status.isOK() ) { return StatusWith<CompactStats>( status ); } getDur().commitIfNeeded(); CompactStats stats; OwnedPointerVector<IndexCatalog::IndexBuildBlock> indexBuildBlocks; vector<IndexAccessMethod*> indexesToInsertTo; vector< std::pair<IndexAccessMethod*,IndexAccessMethod*> > bulkToCommit; for ( size_t i = 0; i < indexSpecs.size(); i++ ) { killCurrentOp.checkForInterrupt(false); BSONObj info = indexSpecs[i]; info = _compactAdjustIndexSpec( info ); info = _indexCatalog.fixIndexSpec( info ); auto_ptr<IndexCatalog::IndexBuildBlock> block( new IndexCatalog::IndexBuildBlock( this,info ) ); Status status = block->init(); if ( !status.isOK() ) return StatusWith<CompactStats>(status); IndexAccessMethod* accessMethod = block->getEntry()->accessMethod(); status = accessMethod->initializeAsEmpty(); if ( !status.isOK() ) return StatusWith<CompactStats>(status); IndexAccessMethod* bulk = accessMethod->initiateBulk(); if ( bulk ) { indexesToInsertTo.push_back( bulk ); bulkToCommit.push_back( std::pair<IndexAccessMethod*,IndexAccessMethod*>( accessMethod, bulk ) ); } else { indexesToInsertTo.push_back( accessMethod ); } indexBuildBlocks.mutableVector().push_back( block.release() ); } // reset data size and record counts to 0 for this namespace // as we're about to tally them up again for each new extent d->setStats( 0, 0 ); ProgressMeterHolder pm(cc().curop()->setMessage("compact extent", "Extent Compacting Progress", extents.size())); int extentNumber = 0; for( list<DiskLoc>::iterator i = extents.begin(); i != extents.end(); i++ ) { _compactExtent(*i, extentNumber++, indexesToInsertTo, compactOptions, &stats ); pm.hit(); } verify( d->firstExtent().ext()->xprev.isNull() ); // indexes will do their own progress meter? pm.finished(); log() << "starting index commits"; for ( size_t i = 0; i < bulkToCommit.size(); i++ ) { bulkToCommit[i].first->commitBulk( bulkToCommit[i].second, false, NULL ); } for ( size_t i = 0; i < indexBuildBlocks.size(); i++ ) { IndexCatalog::IndexBuildBlock* block = indexBuildBlocks.mutableVector()[i]; block->success(); } return StatusWith<CompactStats>( stats ); }
Status MMAPV1Engine::repairDatabase( OperationContext* txn, const std::string& dbName, bool preserveClonedFilesOnFailure, bool backupOriginalFiles ) { unique_ptr<RepairFileDeleter> repairFileDeleter; // Must be done before and after repair getDur().syncDataAndTruncateJournal(txn); intmax_t totalSize = dbSize( dbName ); intmax_t freeSize = File::freeSpace(storageGlobalParams.repairpath); if ( freeSize > -1 && freeSize < totalSize ) { return Status( ErrorCodes::OutOfDiskSpace, str::stream() << "Cannot repair database " << dbName << " having size: " << totalSize << " (bytes) because free disk space is: " << freeSize << " (bytes)" ); } txn->checkForInterrupt(); Path reservedPath = uniqueReservedPath( ( preserveClonedFilesOnFailure || backupOriginalFiles ) ? "backup" : "_tmp" ); bool created = false; MONGO_ASSERT_ON_EXCEPTION( created = boost::filesystem::create_directory( reservedPath ) ); invariant( created ); string reservedPathString = reservedPath.string(); if ( !preserveClonedFilesOnFailure ) repairFileDeleter.reset( new RepairFileDeleter( txn, dbName, reservedPathString, reservedPath ) ); { Database* originalDatabase = dbHolder().openDb(txn, dbName); if (originalDatabase == NULL) { return Status(ErrorCodes::NamespaceNotFound, "database does not exist to repair"); } unique_ptr<MMAPV1DatabaseCatalogEntry> dbEntry; unique_ptr<Database> tempDatabase; // Must call this before MMAPV1DatabaseCatalogEntry's destructor closes the DB files ON_BLOCK_EXIT(&dur::DurableInterface::syncDataAndTruncateJournal, &getDur(), txn); { dbEntry.reset(new MMAPV1DatabaseCatalogEntry(txn, dbName, reservedPathString, storageGlobalParams.directoryperdb, true)); tempDatabase.reset( new Database(txn, dbName, dbEntry.get())); } map<string,CollectionOptions> namespacesToCopy; { string ns = dbName + ".system.namespaces"; OldClientContext ctx(txn, ns ); Collection* coll = originalDatabase->getCollection( ns ); if ( coll ) { auto cursor = coll->getCursor(txn); while (auto record = cursor->next()) { BSONObj obj = record->data.releaseToBson(); string ns = obj["name"].String(); NamespaceString nss( ns ); if ( nss.isSystem() ) { if ( nss.isSystemDotIndexes() ) continue; if ( nss.coll() == "system.namespaces" ) continue; } if ( !nss.isNormal() ) continue; CollectionOptions options; if ( obj["options"].isABSONObj() ) { Status status = options.parse( obj["options"].Obj() ); if ( !status.isOK() ) return status; } namespacesToCopy[ns] = options; } } } for ( map<string,CollectionOptions>::const_iterator i = namespacesToCopy.begin(); i != namespacesToCopy.end(); ++i ) { string ns = i->first; CollectionOptions options = i->second; Collection* tempCollection = NULL; { WriteUnitOfWork wunit(txn); tempCollection = tempDatabase->createCollection(txn, ns, options, false); wunit.commit(); } OldClientContext readContext(txn, ns, originalDatabase); Collection* originalCollection = originalDatabase->getCollection( ns ); invariant( originalCollection ); // data // TODO SERVER-14812 add a mode that drops duplicates rather than failing MultiIndexBlock indexer(txn, tempCollection ); { vector<BSONObj> indexes; IndexCatalog::IndexIterator ii = originalCollection->getIndexCatalog()->getIndexIterator( txn, false ); while ( ii.more() ) { IndexDescriptor* desc = ii.next(); indexes.push_back( desc->infoObj() ); } Status status = indexer.init( indexes ); if (!status.isOK()) { return status; } } auto cursor = originalCollection->getCursor(txn); while (auto record = cursor->next()) { BSONObj doc = record->data.releaseToBson(); WriteUnitOfWork wunit(txn); StatusWith<RecordId> result = tempCollection->insertDocument(txn, doc, &indexer, false); if ( !result.isOK() ) return result.getStatus(); wunit.commit(); txn->checkForInterrupt(); } Status status = indexer.doneInserting(); if (!status.isOK()) return status; { WriteUnitOfWork wunit(txn); indexer.commit(); wunit.commit(); } } getDur().syncDataAndTruncateJournal(txn); // need both in case journaling is disabled MongoFile::flushAll(true); txn->checkForInterrupt(); } // at this point if we abort, we don't want to delete new files // as they might be the only copies if ( repairFileDeleter.get() ) repairFileDeleter->success(); // Close the database so we can rename/delete the original data files dbHolder().close(txn, dbName); if ( backupOriginalFiles ) { _renameForBackup( dbName, reservedPath ); } else { // first make new directory before deleting data Path newDir = Path(storageGlobalParams.dbpath) / dbName; MONGO_ASSERT_ON_EXCEPTION(boost::filesystem::create_directory(newDir)); // this deletes old files _deleteDataFiles( dbName ); if ( !boost::filesystem::exists(newDir) ) { // we deleted because of directoryperdb // re-create MONGO_ASSERT_ON_EXCEPTION(boost::filesystem::create_directory(newDir)); } } _replaceWithRecovered( dbName, reservedPathString.c_str() ); if (!backupOriginalFiles) { MONGO_ASSERT_ON_EXCEPTION(boost::filesystem::remove_all(reservedPath)); } // Reopen the database so it's discoverable dbHolder().openDb(txn, dbName); return Status::OK(); }
StatusWith<CompactStats> Collection::compact( const CompactOptions* compactOptions ) { if ( isCapped() ) return StatusWith<CompactStats>( ErrorCodes::BadValue, "cannot compact capped collection" ); if ( _indexCatalog.numIndexesInProgress() ) return StatusWith<CompactStats>( ErrorCodes::BadValue, "cannot compact when indexes in progress" ); NamespaceDetails* d = details(); // this is a big job, so might as well make things tidy before we start just to be nice. getDur().commitIfNeeded(); list<DiskLoc> extents; for( DiskLoc L = d->firstExtent(); !L.isNull(); L = L.ext()->xnext ) extents.push_back(L); log() << "compact " << extents.size() << " extents" << endl; // same data, but might perform a little different after compact? _infoCache.reset(); vector<BSONObj> indexSpecs; { IndexCatalog::IndexIterator ii( _indexCatalog.getIndexIterator( false ) ); while ( ii.more() ) { IndexDescriptor* descriptor = ii.next(); const BSONObj spec = _compactAdjustIndexSpec(descriptor->infoObj()); const BSONObj key = spec.getObjectField("key"); const Status keyStatus = validateKeyPattern(key); if (!keyStatus.isOK()) { return StatusWith<CompactStats>( ErrorCodes::CannotCreateIndex, str::stream() << "Cannot rebuild index " << spec << ": " << keyStatus.reason() << " For more info see" << " http://dochub.mongodb.org/core/index-validation"); } indexSpecs.push_back(spec); } } log() << "compact orphan deleted lists" << endl; d->orphanDeletedList(); // Start over from scratch with our extent sizing and growth d->setLastExtentSize( 0 ); // before dropping indexes, at least make sure we can allocate one extent! // this will allocate an extent and add to free list // if it cannot, it will throw an exception increaseStorageSize( _details->lastExtentSize(), true ); // note that the drop indexes call also invalidates all clientcursors for the namespace, // which is important and wanted here log() << "compact dropping indexes" << endl; Status status = _indexCatalog.dropAllIndexes( true ); if ( !status.isOK() ) { return StatusWith<CompactStats>( status ); } getDur().commitIfNeeded(); killCurrentOp.checkForInterrupt(); CompactStats stats; MultiIndexBlock multiIndexBlock( this ); status = multiIndexBlock.init( indexSpecs ); if ( !status.isOK() ) return StatusWith<CompactStats>( status ); // reset data size and record counts to 0 for this namespace // as we're about to tally them up again for each new extent d->setStats( 0, 0 ); ProgressMeterHolder pm(cc().curop()->setMessage("compact extent", "Extent Compacting Progress", extents.size())); int extentNumber = 0; for( list<DiskLoc>::iterator i = extents.begin(); i != extents.end(); i++ ) { _compactExtent(*i, extentNumber++, multiIndexBlock, compactOptions, &stats ); pm.hit(); } verify( d->firstExtent().ext()->xprev.isNull() ); // indexes will do their own progress meter? pm.finished(); log() << "starting index commits"; status = multiIndexBlock.commit(); if ( !status.isOK() ) return StatusWith<CompactStats>( status ); return StatusWith<CompactStats>( stats ); }
StatusWith<CompactStats> compactCollection(OperationContext* opCtx, Collection* collection, const CompactOptions* compactOptions) { dassert(opCtx->lockState()->isCollectionLockedForMode(collection->ns().toString(), MODE_X)); DisableDocumentValidation validationDisabler(opCtx); auto recordStore = collection->getRecordStore(); auto indexCatalog = collection->getIndexCatalog(); if (!recordStore->compactSupported()) return StatusWith<CompactStats>(ErrorCodes::CommandNotSupported, str::stream() << "cannot compact collection with record store: " << recordStore->name()); if (recordStore->compactsInPlace()) { CompactStats stats; Status status = recordStore->compact(opCtx); if (!status.isOK()) return StatusWith<CompactStats>(status); // Compact all indexes (not including unfinished indexes) std::unique_ptr<IndexCatalog::IndexIterator> ii( indexCatalog->getIndexIterator(opCtx, false)); while (ii->more()) { IndexCatalogEntry* entry = ii->next(); IndexDescriptor* descriptor = entry->descriptor(); IndexAccessMethod* iam = entry->accessMethod(); LOG(1) << "compacting index: " << descriptor->toString(); Status status = iam->compact(opCtx); if (!status.isOK()) { error() << "failed to compact index: " << descriptor->toString(); return status; } } return StatusWith<CompactStats>(stats); } if (indexCatalog->numIndexesInProgress(opCtx)) return StatusWith<CompactStats>(ErrorCodes::BadValue, "cannot compact when indexes in progress"); std::vector<BSONObj> indexSpecs; { std::unique_ptr<IndexCatalog::IndexIterator> ii( indexCatalog->getIndexIterator(opCtx, false)); while (ii->more()) { IndexDescriptor* descriptor = ii->next()->descriptor(); // Compact always creates the new index in the foreground. const BSONObj spec = descriptor->infoObj().removeField(IndexDescriptor::kBackgroundFieldName); const BSONObj key = spec.getObjectField("key"); const Status keyStatus = index_key_validate::validateKeyPattern(key, descriptor->version()); if (!keyStatus.isOK()) { return StatusWith<CompactStats>( ErrorCodes::CannotCreateIndex, str::stream() << "Cannot compact collection due to invalid index " << spec << ": " << keyStatus.reason() << " For more info see" << " http://dochub.mongodb.org/core/index-validation"); } indexSpecs.push_back(spec); } } // Give a chance to be interrupted *before* we drop all indexes. opCtx->checkForInterrupt(); { // note that the drop indexes call also invalidates all clientcursors for the namespace, // which is important and wanted here WriteUnitOfWork wunit(opCtx); log() << "compact dropping indexes"; indexCatalog->dropAllIndexes(opCtx, true); wunit.commit(); } CompactStats stats; MultiIndexBlockImpl indexer(opCtx, collection); indexer.allowInterruption(); indexer.ignoreUniqueConstraint(); // in compact we should be doing no checking Status status = indexer.init(indexSpecs).getStatus(); if (!status.isOK()) return StatusWith<CompactStats>(status); status = recordStore->compact(opCtx); if (!status.isOK()) return StatusWith<CompactStats>(status); log() << "starting index commits"; status = indexer.dumpInsertsFromBulk(); if (!status.isOK()) return StatusWith<CompactStats>(status); { WriteUnitOfWork wunit(opCtx); status = indexer.commit(); if (!status.isOK()) { return StatusWith<CompactStats>(status); } wunit.commit(); } return StatusWith<CompactStats>(stats); }
Status repairDatabase( string dbName, bool preserveClonedFilesOnFailure, bool backupOriginalFiles ) { scoped_ptr<RepairFileDeleter> repairFileDeleter; doingRepair dr; dbName = nsToDatabase( dbName ); log() << "repairDatabase " << dbName << endl; invariant( cc().database()->name() == dbName ); invariant( cc().database()->path() == storageGlobalParams.dbpath ); BackgroundOperation::assertNoBgOpInProgForDb(dbName); getDur().syncDataAndTruncateJournal(); // Must be done before and after repair intmax_t totalSize = dbSize( dbName ); intmax_t freeSize = File::freeSpace(storageGlobalParams.repairpath); if ( freeSize > -1 && freeSize < totalSize ) { return Status( ErrorCodes::OutOfDiskSpace, str::stream() << "Cannot repair database " << dbName << " having size: " << totalSize << " (bytes) because free disk space is: " << freeSize << " (bytes)" ); } killCurrentOp.checkForInterrupt(); Path reservedPath = uniqueReservedPath( ( preserveClonedFilesOnFailure || backupOriginalFiles ) ? "backup" : "_tmp" ); MONGO_ASSERT_ON_EXCEPTION( boost::filesystem::create_directory( reservedPath ) ); string reservedPathString = reservedPath.string(); if ( !preserveClonedFilesOnFailure ) repairFileDeleter.reset( new RepairFileDeleter( dbName, reservedPathString, reservedPath ) ); { Database* originalDatabase = dbHolder().get( dbName, storageGlobalParams.dbpath ); if ( originalDatabase == NULL ) return Status( ErrorCodes::NamespaceNotFound, "database does not exist to repair" ); Database* tempDatabase = NULL; { bool justCreated = false; tempDatabase = dbHolderW().getOrCreate( dbName, reservedPathString, justCreated ); invariant( justCreated ); } map<string,CollectionOptions> namespacesToCopy; { string ns = dbName + ".system.namespaces"; Client::Context ctx( ns ); Collection* coll = originalDatabase->getCollection( ns ); if ( coll ) { scoped_ptr<CollectionIterator> it( coll->getIterator( DiskLoc(), false, CollectionScanParams::FORWARD ) ); while ( !it->isEOF() ) { DiskLoc loc = it->getNext(); BSONObj obj = coll->docFor( loc ); string ns = obj["name"].String(); NamespaceString nss( ns ); if ( nss.isSystem() ) { if ( nss.isSystemDotIndexes() ) continue; if ( nss.coll() == "system.namespaces" ) continue; } if ( !nss.isNormal() ) continue; CollectionOptions options; if ( obj["options"].isABSONObj() ) { Status status = options.parse( obj["options"].Obj() ); if ( !status.isOK() ) return status; } namespacesToCopy[ns] = options; } } } for ( map<string,CollectionOptions>::const_iterator i = namespacesToCopy.begin(); i != namespacesToCopy.end(); ++i ) { string ns = i->first; CollectionOptions options = i->second; Collection* tempCollection = NULL; { Client::Context tempContext( ns, tempDatabase ); tempCollection = tempDatabase->createCollection( ns, options, true, false ); } Client::Context readContext( ns, originalDatabase ); Collection* originalCollection = originalDatabase->getCollection( ns ); invariant( originalCollection ); // data MultiIndexBlock indexBlock( tempCollection ); { vector<BSONObj> indexes; IndexCatalog::IndexIterator ii = originalCollection->getIndexCatalog()->getIndexIterator( false ); while ( ii.more() ) { IndexDescriptor* desc = ii.next(); indexes.push_back( desc->infoObj() ); } Client::Context tempContext( ns, tempDatabase ); Status status = indexBlock.init( indexes ); if ( !status.isOK() ) return status; } scoped_ptr<CollectionIterator> iterator( originalCollection->getIterator( DiskLoc(), false, CollectionScanParams::FORWARD ) ); while ( !iterator->isEOF() ) { DiskLoc loc = iterator->getNext(); invariant( !loc.isNull() ); BSONObj doc = originalCollection->docFor( loc ); Client::Context tempContext( ns, tempDatabase ); StatusWith<DiskLoc> result = tempCollection->insertDocument( doc, indexBlock ); if ( !result.isOK() ) return result.getStatus(); getDur().commitIfNeeded(); killCurrentOp.checkForInterrupt(false); } { Client::Context tempContext( ns, tempDatabase ); Status status = indexBlock.commit(); if ( !status.isOK() ) return status; } } getDur().syncDataAndTruncateJournal(); MongoFile::flushAll(true); // need both in case journaling is disabled killCurrentOp.checkForInterrupt(false); Client::Context tempContext( dbName, reservedPathString ); Database::closeDatabase( dbName, reservedPathString ); } // at this point if we abort, we don't want to delete new files // as they might be the only copies if ( repairFileDeleter.get() ) repairFileDeleter->success(); Client::Context ctx( dbName ); Database::closeDatabase(dbName, storageGlobalParams.dbpath); if ( backupOriginalFiles ) { _renameForBackup( dbName, reservedPath ); } else { // first make new directory before deleting data Path newDir = Path(storageGlobalParams.dbpath) / dbName; MONGO_ASSERT_ON_EXCEPTION(boost::filesystem::create_directory(newDir)); // this deletes old files _deleteDataFiles( dbName ); if ( !boost::filesystem::exists(newDir) ) { // we deleted because of directoryperdb // re-create MONGO_ASSERT_ON_EXCEPTION(boost::filesystem::create_directory(newDir)); } } _replaceWithRecovered( dbName, reservedPathString.c_str() ); if ( !backupOriginalFiles ) MONGO_ASSERT_ON_EXCEPTION( boost::filesystem::remove_all( reservedPath ) ); return Status::OK(); }
StatusWith<CompactStats> Collection::compact(OperationContext* txn, const CompactOptions* compactOptions) { dassert(txn->lockState()->isCollectionLockedForMode(ns().toString(), MODE_X)); DisableDocumentValidation validationDisabler(txn); if (!_recordStore->compactSupported()) return StatusWith<CompactStats>(ErrorCodes::CommandNotSupported, str::stream() << "cannot compact collection with record store: " << _recordStore->name()); if (_recordStore->compactsInPlace()) { CompactStats stats; Status status = _recordStore->compact(txn, NULL, compactOptions, &stats); if (!status.isOK()) return StatusWith<CompactStats>(status); // Compact all indexes (not including unfinished indexes) IndexCatalog::IndexIterator ii(_indexCatalog.getIndexIterator(txn, false)); while (ii.more()) { IndexDescriptor* descriptor = ii.next(); IndexAccessMethod* index = _indexCatalog.getIndex(descriptor); LOG(1) << "compacting index: " << descriptor->toString(); Status status = index->compact(txn); if (!status.isOK()) { error() << "failed to compact index: " << descriptor->toString(); return status; } } return StatusWith<CompactStats>(stats); } if (_indexCatalog.numIndexesInProgress(txn)) return StatusWith<CompactStats>(ErrorCodes::BadValue, "cannot compact when indexes in progress"); vector<BSONObj> indexSpecs; { IndexCatalog::IndexIterator ii(_indexCatalog.getIndexIterator(txn, false)); while (ii.more()) { IndexDescriptor* descriptor = ii.next(); const BSONObj spec = _compactAdjustIndexSpec(descriptor->infoObj()); const BSONObj key = spec.getObjectField("key"); const Status keyStatus = validateKeyPattern(key); if (!keyStatus.isOK()) { return StatusWith<CompactStats>( ErrorCodes::CannotCreateIndex, str::stream() << "Cannot compact collection due to invalid index " << spec << ": " << keyStatus.reason() << " For more info see" << " http://dochub.mongodb.org/core/index-validation"); } indexSpecs.push_back(spec); } } // Give a chance to be interrupted *before* we drop all indexes. txn->checkForInterrupt(); { // note that the drop indexes call also invalidates all clientcursors for the namespace, // which is important and wanted here WriteUnitOfWork wunit(txn); log() << "compact dropping indexes"; Status status = _indexCatalog.dropAllIndexes(txn, true); if (!status.isOK()) { return StatusWith<CompactStats>(status); } wunit.commit(); } CompactStats stats; MultiIndexBlock indexer(txn, this); indexer.allowInterruption(); indexer.ignoreUniqueConstraint(); // in compact we should be doing no checking Status status = indexer.init(indexSpecs); if (!status.isOK()) return StatusWith<CompactStats>(status); MyCompactAdaptor adaptor(this, &indexer); status = _recordStore->compact(txn, &adaptor, compactOptions, &stats); if (!status.isOK()) return StatusWith<CompactStats>(status); log() << "starting index commits"; status = indexer.doneInserting(); if (!status.isOK()) return StatusWith<CompactStats>(status); { WriteUnitOfWork wunit(txn); indexer.commit(); wunit.commit(); } return StatusWith<CompactStats>(stats); }
/** * For a given query, get a runner. The runner could be a SingleSolutionRunner, a * CachedQueryRunner, or a MultiPlanRunner, depending on the cache/query solver/etc. */ Status getRunner(CanonicalQuery* rawCanonicalQuery, Runner** out, size_t plannerOptions) { verify(rawCanonicalQuery); auto_ptr<CanonicalQuery> canonicalQuery(rawCanonicalQuery); // Try to look up a cached solution for the query. // TODO: Can the cache have negative data about a solution? PlanCache* localCache = PlanCache::get(canonicalQuery->ns()); if (NULL != localCache) { CachedSolution* cs = localCache->get(*canonicalQuery); if (NULL != cs) { // We have a cached solution. Hand the canonical query and cached solution off to // the cached plan runner, which takes ownership of both. WorkingSet* ws; PlanStage* root; verify(StageBuilder::build(*cs->solution, &root, &ws)); *out = new CachedPlanRunner(canonicalQuery.release(), cs, root, ws); return Status::OK(); } } // No entry in cache for the query. We have to solve the query ourself. // Get the indices that we could possibly use. Database* db = cc().database(); verify( db ); Collection* collection = db->getCollection( canonicalQuery->ns() ); // This can happen as we're called by internal clients as well. if (NULL == collection) { const string& ns = canonicalQuery->ns(); *out = new EOFRunner(canonicalQuery.release(), ns); return Status::OK(); } // If we have an _id index we can use the idhack runner. if (canUseIDHack(*canonicalQuery) && collection->getIndexCatalog()->findIdIndex()) { *out = new IDHackRunner(collection, canonicalQuery.release()); return Status::OK(); } // If it's not NULL, we may have indices. Access the catalog and fill out IndexEntry(s) QueryPlannerParams plannerParams; for (int i = 0; i < collection->getIndexCatalog()->numIndexesReady(); ++i) { IndexDescriptor* desc = collection->getIndexCatalog()->getDescriptor( i ); plannerParams.indices.push_back(IndexEntry(desc->keyPattern(), desc->isMultikey(), desc->isSparse(), desc->indexName())); } // Tailable: If the query requests tailable the collection must be capped. if (canonicalQuery->getParsed().hasOption(QueryOption_CursorTailable)) { if (!collection->isCapped()) { return Status(ErrorCodes::BadValue, "tailable cursor requested on non capped collection"); } // If a sort is specified it must be equal to expectedSort. const BSONObj expectedSort = BSON("$natural" << 1); const BSONObj& actualSort = canonicalQuery->getParsed().getSort(); if (!actualSort.isEmpty() && !(actualSort == expectedSort)) { return Status(ErrorCodes::BadValue, "invalid sort specified for tailable cursor: " + actualSort.toString()); } } // Process the planning options. plannerParams.options = plannerOptions; if (storageGlobalParams.noTableScan) { const string& ns = canonicalQuery->ns(); // There are certain cases where we ignore this restriction: bool ignore = canonicalQuery->getQueryObj().isEmpty() || (string::npos != ns.find(".system.")) || (0 == ns.find("local.")); if (!ignore) { plannerParams.options |= QueryPlannerParams::NO_TABLE_SCAN; } } if (!(plannerParams.options & QueryPlannerParams::NO_TABLE_SCAN)) { plannerParams.options |= QueryPlannerParams::INCLUDE_COLLSCAN; } // If the caller wants a shard filter, make sure we're actually sharded. if (plannerParams.options & QueryPlannerParams::INCLUDE_SHARD_FILTER) { CollectionMetadataPtr collMetadata = shardingState.getCollectionMetadata(canonicalQuery->ns()); if (collMetadata) { plannerParams.shardKey = collMetadata->getKeyPattern(); } else { // If there's no metadata don't bother w/the shard filter since we won't know what // the key pattern is anyway... plannerParams.options &= ~QueryPlannerParams::INCLUDE_SHARD_FILTER; } } vector<QuerySolution*> solutions; QueryPlanner::plan(*canonicalQuery, plannerParams, &solutions); /* for (size_t i = 0; i < solutions.size(); ++i) { QLOG() << "solution " << i << " is " << solutions[i]->toString() << endl; } */ // We cannot figure out how to answer the query. Should this ever happen? if (0 == solutions.size()) { return Status(ErrorCodes::BadValue, "No query solutions"); } if (1 == solutions.size()) { // Only one possible plan. Run it. Build the stages from the solution. WorkingSet* ws; PlanStage* root; verify(StageBuilder::build(*solutions[0], &root, &ws)); // And, run the plan. *out = new SingleSolutionRunner(canonicalQuery.release(), solutions[0], root, ws); return Status::OK(); } else { // Many solutions. Let the MultiPlanRunner pick the best, update the cache, and so on. auto_ptr<MultiPlanRunner> mpr(new MultiPlanRunner(canonicalQuery.release())); for (size_t i = 0; i < solutions.size(); ++i) { WorkingSet* ws; PlanStage* root; verify(StageBuilder::build(*solutions[i], &root, &ws)); // Takes ownership of all arguments. mpr->addPlan(solutions[i], root, ws); } *out = mpr.release(); return Status::OK(); } }
StatusWith<DiskLoc> Collection::updateDocument( const DiskLoc& oldLocation, const BSONObj& objNew, bool enforceQuota, OpDebug* debug ) { Record* oldRecord = getExtentManager()->recordFor( oldLocation ); BSONObj objOld = BSONObj::make( oldRecord ); if ( objOld.hasElement( "_id" ) ) { BSONElement oldId = objOld["_id"]; BSONElement newId = objNew["_id"]; if ( oldId != newId ) return StatusWith<DiskLoc>( ErrorCodes::InternalError, "in Collection::updateDocument _id mismatch", 13596 ); } if ( ns().coll() == "system.users" ) { // XXX - andy and spencer think this should go away now V2UserDocumentParser parser; Status s = parser.checkValidUserDocument(objNew); if ( !s.isOK() ) return StatusWith<DiskLoc>( s ); } /* duplicate key check. we descend the btree twice - once for this check, and once for the actual inserts, further below. that is suboptimal, but it's pretty complicated to do it the other way without rollbacks... */ OwnedPointerVector<UpdateTicket> updateTickets; updateTickets.mutableVector().resize(_indexCatalog.numIndexesTotal()); for (int i = 0; i < _indexCatalog.numIndexesTotal(); ++i) { IndexDescriptor* descriptor = _indexCatalog.getDescriptor( i ); IndexAccessMethod* iam = _indexCatalog.getIndex( descriptor ); InsertDeleteOptions options; options.logIfError = false; options.dupsAllowed = !(KeyPattern::isIdKeyPattern(descriptor->keyPattern()) || descriptor->unique()) || ignoreUniqueIndex(descriptor); updateTickets.mutableVector()[i] = new UpdateTicket(); Status ret = iam->validateUpdate(objOld, objNew, oldLocation, options, updateTickets.mutableVector()[i]); if ( !ret.isOK() ) { return StatusWith<DiskLoc>( ret ); } } if ( oldRecord->netLength() < objNew.objsize() ) { // doesn't fit, have to move to new location if ( _details->isCapped() ) return StatusWith<DiskLoc>( ErrorCodes::InternalError, "failing update: objects in a capped ns cannot grow", 10003 ); moveCounter.increment(); _details->paddingTooSmall(); // unindex old record, don't delete // this way, if inserting new doc fails, we can re-index this one ClientCursor::aboutToDelete(_ns.ns(), _details, oldLocation); _indexCatalog.unindexRecord( objOld, oldLocation, true ); if ( debug ) { if (debug->nmoved == -1) // default of -1 rather than 0 debug->nmoved = 1; else debug->nmoved += 1; } StatusWith<DiskLoc> loc = insertDocument( objNew, enforceQuota ); if ( loc.isOK() ) { // insert successful, now lets deallocate the old location // remember its already unindexed _recordStore.deallocRecord( oldLocation, oldRecord ); } else { // new doc insert failed, so lets re-index the old document and location _indexCatalog.indexRecord( objOld, oldLocation ); } return loc; } _infoCache.notifyOfWriteOp(); _details->paddingFits(); if ( debug ) debug->keyUpdates = 0; for (int i = 0; i < _indexCatalog.numIndexesTotal(); ++i) { IndexDescriptor* descriptor = _indexCatalog.getDescriptor( i ); IndexAccessMethod* iam = _indexCatalog.getIndex( descriptor ); int64_t updatedKeys; Status ret = iam->update(*updateTickets.vector()[i], &updatedKeys); if ( !ret.isOK() ) return StatusWith<DiskLoc>( ret ); if ( debug ) debug->keyUpdates += updatedKeys; } // update in place int sz = objNew.objsize(); memcpy(getDur().writingPtr(oldRecord->data(), sz), objNew.objdata(), sz); return StatusWith<DiskLoc>( oldLocation ); }
bool wrappedRun(OperationContext* txn, const string& dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& anObjBuilder) { const std::string coll = jsobj.firstElement().valuestrsafe(); if (coll.empty()) { errmsg = "no collection name specified"; return false; } const std::string toDeleteNs = dbname + '.' + coll; if (!serverGlobalParams.quiet) { LOG(0) << "CMD: dropIndexes " << toDeleteNs << endl; } Client::Context ctx(txn, toDeleteNs); Database* db = ctx.db(); Collection* collection = db->getCollection( txn, toDeleteNs ); if ( ! collection ) { errmsg = "ns not found"; return false; } stopIndexBuilds(txn, db, jsobj); IndexCatalog* indexCatalog = collection->getIndexCatalog(); anObjBuilder.appendNumber("nIndexesWas", indexCatalog->numIndexesTotal(txn) ); BSONElement f = jsobj.getField("index"); if ( f.type() == String ) { string indexToDelete = f.valuestr(); if ( indexToDelete == "*" ) { Status s = indexCatalog->dropAllIndexes(txn, false); if ( !s.isOK() ) { appendCommandStatus( anObjBuilder, s ); return false; } anObjBuilder.append("msg", "non-_id indexes dropped for collection"); return true; } IndexDescriptor* desc = collection->getIndexCatalog()->findIndexByName( txn, indexToDelete ); if ( desc == NULL ) { errmsg = str::stream() << "index not found with name [" << indexToDelete << "]"; return false; } if ( desc->isIdIndex() ) { errmsg = "cannot drop _id index"; return false; } Status s = indexCatalog->dropIndex(txn, desc); if ( !s.isOK() ) { appendCommandStatus( anObjBuilder, s ); return false; } return true; } if ( f.type() == Object ) { IndexDescriptor* desc = collection->getIndexCatalog()->findIndexByKeyPattern( txn, f.embeddedObject() ); if ( desc == NULL ) { errmsg = "can't find index with key:"; errmsg += f.embeddedObject().toString(); return false; } if ( desc->isIdIndex() ) { errmsg = "cannot drop _id index"; return false; } Status s = indexCatalog->dropIndex(txn, desc); if ( !s.isOK() ) { appendCommandStatus( anObjBuilder, s ); return false; } return true; } errmsg = "invalid index name spec"; return false; }
shared_ptr<Cursor> QueryPlan::newCursor( const DiskLoc& startLoc, bool requestIntervalCursor ) const { if ( _type ) { // hopefully safe to use original query in these contexts - don't think we can mix type // with $or clause separation yet int numWanted = 0; if ( _parsedQuery ) { // SERVER-5390 numWanted = _parsedQuery->getSkip() + _parsedQuery->getNumToReturn(); } IndexDescriptor* descriptor = CatalogHack::getDescriptor(_d, _idxNo); IndexAccessMethod* iam = CatalogHack::getIndex(descriptor); return shared_ptr<Cursor>(EmulatedCursor::make(descriptor, iam, _originalQuery, _order, numWanted, descriptor->keyPattern())); } if ( _utility == Impossible ) { // Dummy table scan cursor returning no results. Allowed in --notablescan mode. return shared_ptr<Cursor>( new BasicCursor( DiskLoc() ) ); } if ( willScanTable() ) { checkTableScanAllowed(); return findTableScan( _frs.ns(), _order, startLoc ); } massert( 10363, "newCursor() with start location not implemented for indexed plans", startLoc.isNull() ); if ( _startOrEndSpec ) { // we are sure to spec _endKeyInclusive return shared_ptr<Cursor>( BtreeCursor::make( _d, *_index, _startKey, _endKey, _endKeyInclusive, _direction >= 0 ? 1 : -1 ) ); } if ( _index->getSpec().getType() ) { return shared_ptr<Cursor>( BtreeCursor::make( _d, *_index, _frv->startKey(), _frv->endKey(), true, _direction >= 0 ? 1 : -1 ) ); } // An IntervalBtreeCursor is returned if explicitly requested AND _frv is exactly // represented by a single interval within the btree. if ( // If an interval cursor is requested and ... requestIntervalCursor && // ... equalities come before ranges (a requirement of Optimal) and ... _utility == Optimal && // ... the field range vector exactly represents a single interval ... _frv->isSingleInterval() ) { // ... and an interval cursor can be created ... shared_ptr<Cursor> ret( IntervalBtreeCursor::make( _d, *_index, _frv->startKey(), _frv->startKeyInclusive(), _frv->endKey(), _frv->endKeyInclusive() ) ); if ( ret ) { // ... then return the interval cursor. return ret; } } return shared_ptr<Cursor>( BtreeCursor::make( _d, *_index, _frv, independentRangesSingleIntervalLimit(), _direction >= 0 ? 1 : -1 ) ); }