Database* DatabaseHolderImpl::openDb(OperationContext* opCtx, StringData ns, bool* justCreated) { const StringData dbname = _todb(ns); invariant(opCtx->lockState()->isDbLockedForMode(dbname, MODE_X)); if (justCreated) *justCreated = false; // Until proven otherwise. stdx::unique_lock<SimpleMutex> lk(_m); // The following will insert a nullptr for dbname, which will treated the same as a non- // existant database by the get method, yet still counts in getNamesWithConflictingCasing. if (auto db = _dbs[dbname]) return db; // We've inserted a nullptr entry for dbname: make sure to remove it on unsuccessful exit. auto removeDbGuard = makeGuard([this, &lk, dbname] { if (!lk.owns_lock()) lk.lock(); _dbs.erase(dbname); }); // Check casing in lock to avoid transient duplicates. auto duplicates = _getNamesWithConflictingCasing_inlock(dbname); uassert(ErrorCodes::DatabaseDifferCase, str::stream() << "db already exists with different case already have: [" << *duplicates.cbegin() << "] trying to create [" << dbname.toString() << "]", duplicates.empty()); // Do the catalog lookup and database creation outside of the scoped lock, because these may // block. Only one thread can be inside this method for the same DB name, because of the // requirement for X-lock on the database when we enter. So there is no way we can insert two // different databases for the same name. lk.unlock(); StorageEngine* storageEngine = getGlobalServiceContext()->getStorageEngine(); DatabaseCatalogEntry* entry = storageEngine->getDatabaseCatalogEntry(opCtx, dbname); if (!entry->exists()) { audit::logCreateDatabase(opCtx->getClient(), dbname); if (justCreated) *justCreated = true; } auto newDb = stdx::make_unique<DatabaseImpl>(dbname, entry, ++_epoch); newDb->init(opCtx); // Finally replace our nullptr entry with the new Database pointer. removeDbGuard.dismiss(); lk.lock(); auto it = _dbs.find(dbname); invariant(it != _dbs.end() && it->second == nullptr); it->second = newDb.release(); invariant(_getNamesWithConflictingCasing_inlock(dbname.toString()).empty()); return it->second; }
/** * Create a collection in the catalog and in the KVEngine. Return the storage engine's `ident`. */ StatusWith<std::string> createCollection(OperationContext* opCtx, NamespaceString ns) { AutoGetDb db(opCtx, ns.db(), LockMode::MODE_X); DatabaseCatalogEntry* dbce = _storageEngine->getDatabaseCatalogEntry(opCtx, ns.db()); auto ret = dbce->createCollection(opCtx, ns.ns(), CollectionOptions(), false); if (!ret.isOK()) { return ret; } return _storageEngine->getCatalog()->getCollectionIdent(ns.ns()); }
Database* DatabaseHolder::openDb(OperationContext* txn, StringData ns, bool* justCreated) { const StringData dbname = _todb(ns); invariant(txn->lockState()->isDbLockedForMode(dbname, MODE_X)); Database* db = get(txn, ns); if (db) { if (justCreated) { *justCreated = false; } return db; } // Check casing const string duplicate = Database::duplicateUncasedName(dbname.toString()); if (!duplicate.empty()) { stringstream ss; ss << "db already exists with different case already have: [" << duplicate << "] trying to create [" << dbname.toString() << "]"; uasserted(DatabaseDifferCaseCode, ss.str()); } StorageEngine* storageEngine = getGlobalEnvironment()->getGlobalStorageEngine(); invariant(storageEngine); DatabaseCatalogEntry* entry = storageEngine->getDatabaseCatalogEntry(txn, dbname); invariant(entry); const bool exists = entry->exists(); if (!exists) { audit::logCreateDatabase(currentClient.get(), dbname); } if (justCreated) { *justCreated = !exists; } // Do this outside of the scoped lock, because database creation does transactional // operations which may block. Only one thread can be inside this method for the same DB // name, because of the requirement for X-lock on the database when we enter. So there is // no way we can insert two different databases for the same name. db = new Database(txn, dbname, entry); SimpleMutex::scoped_lock lk(_m); _dbs[dbname] = db; return db; }
Database* DatabaseHolder::getOrCreate(OperationContext* txn, const string& ns, bool& justCreated) { const string dbname = _todb( ns ); invariant(txn->lockState()->isAtLeastReadLocked(dbname)); if (txn->lockState()->isWriteLocked() && FileAllocator::get()->hasFailed()) { uassert(17507, "Can't take a write lock while out of disk space", false); } { SimpleMutex::scoped_lock lk(_m); { DBs::iterator i = _dbs.find(dbname); if( i != _dbs.end() ) { justCreated = false; return i->second; } } // todo: protect against getting sprayed with requests for different db names that DNE - // that would make the DBs map very large. not clear what to do to handle though, // perhaps just log it, which is what we do here with the "> 40" : bool cant = !txn->lockState()->isWriteLocked(ns); if( logger::globalLogDomain()->shouldLog(logger::LogSeverity::Debug(1)) || _dbs.size() > 40 || cant || DEBUG_BUILD ) { log() << "opening db: " << dbname; } massert(15927, "can't open database in a read lock. if db was just closed, consider retrying the query. might otherwise indicate an internal error", !cant); } // we mark our thread as having done writes now as we do not want any exceptions // once we start creating a new database cc().writeHappened(); // this locks _m for defensive checks, so we don't want to be locked right here : StorageEngine* storageEngine = getGlobalEnvironment()->getGlobalStorageEngine(); invariant(storageEngine); DatabaseCatalogEntry* entry = storageEngine->getDatabaseCatalogEntry( txn, dbname ); invariant( entry ); justCreated = !entry->exists(); Database *db = new Database(txn, dbname, entry ); { SimpleMutex::scoped_lock lk(_m); _dbs[dbname] = db; } return db; }
TEST_F(KVStorageEngineTest, RecreateIndexes) { repl::setGlobalReplicationCoordinator( new repl::ReplicationCoordinatorMock(getGlobalServiceContext(), repl::ReplSettings())); auto opCtx = cc().makeOperationContext(); // Create two indexes for `db.coll1` in the catalog named `foo` and `bar`. Verify the indexes // appear as idents in the KVEngine. ASSERT_OK(createCollection(opCtx.get(), NamespaceString("db.coll1")).getStatus()); ASSERT_OK(createIndex(opCtx.get(), NamespaceString("db.coll1"), "foo")); ASSERT_OK(createIndex(opCtx.get(), NamespaceString("db.coll1"), "bar")); auto kvIdents = getAllKVEngineIdents(opCtx.get()); ASSERT_EQUALS(2, std::count_if(kvIdents.begin(), kvIdents.end(), [](const std::string& str) { return str.find("index-") == 0; })); // Use the `getIndexNameObjs` to find the `foo` index in the IndexCatalog. DatabaseCatalogEntry* dbce = _storageEngine->getDatabaseCatalogEntry(opCtx.get(), "db"); CollectionCatalogEntry* cce = dbce->getCollectionCatalogEntry("db.coll1"); auto swIndexNameObjs = getIndexNameObjs( opCtx.get(), dbce, cce, [](const std::string& indexName) { return indexName == "foo"; }); ASSERT_OK(swIndexNameObjs.getStatus()); auto& indexNameObjs = swIndexNameObjs.getValue(); // There's one index that matched the name `foo`. ASSERT_EQUALS(static_cast<const unsigned long>(1), indexNameObjs.first.size()); // Assert the parallel vectors have matching sizes. ASSERT_EQUALS(static_cast<const unsigned long>(1), indexNameObjs.second.size()); // The index that matched should be named `foo`. ASSERT_EQUALS("foo", indexNameObjs.first[0]); ASSERT_EQUALS("db.coll1"_sd, indexNameObjs.second[0].getStringField("ns")); ASSERT_EQUALS("foo"_sd, indexNameObjs.second[0].getStringField("name")); ASSERT_EQUALS(2, indexNameObjs.second[0].getIntField("v")); ASSERT_EQUALS(1, indexNameObjs.second[0].getObjectField("key").getIntField("foo")); // Drop the `foo` index table. Count one remaining index ident according to the KVEngine. ASSERT_OK(dropIndexTable(opCtx.get(), NamespaceString("db.coll1"), "foo")); kvIdents = getAllKVEngineIdents(opCtx.get()); ASSERT_EQUALS(1, std::count_if(kvIdents.begin(), kvIdents.end(), [](const std::string& str) { return str.find("index-") == 0; })); AutoGetCollection coll(opCtx.get(), NamespaceString("db.coll1"), LockMode::MODE_X); // Find the `foo` index in the catalog. Rebuild it. Count two indexes in the KVEngine. ASSERT_OK(rebuildIndexesOnCollection(opCtx.get(), dbce, cce, indexNameObjs)); ASSERT_TRUE(cce->isIndexReady(opCtx.get(), "foo")); kvIdents = getAllKVEngineIdents(opCtx.get()); ASSERT_EQUALS(2, std::count_if(kvIdents.begin(), kvIdents.end(), [](const std::string& str) { return str.find("index-") == 0; })); }
Database* DatabaseHolder::getOrCreate(OperationContext* txn, const StringData& ns, bool& justCreated) { const StringData dbname = _todb( ns ); invariant(txn->lockState()->isAtLeastReadLocked(dbname)); Database* db = get(txn, ns); if (db) { justCreated = false; return db; } // todo: protect against getting sprayed with requests for different db names that DNE - // that would make the DBs map very large. not clear what to do to handle though, // perhaps just log it, which is what we do here with the "> 40" : bool cant = !txn->lockState()->isWriteLocked(ns); if( logger::globalLogDomain()->shouldLog(logger::LogSeverity::Debug(1)) || _dbs.size() > 40 || cant || DEBUG_BUILD ) { log() << "opening db: " << dbname; } massert(15927, "can't open database in a read lock. if db was just closed, consider retrying the query. might otherwise indicate an internal error", !cant); const string duplicate = Database::duplicateUncasedName(dbname.toString()); if ( !duplicate.empty() ) { stringstream ss; ss << "db already exists with different case already have: [" << duplicate << "] trying to create [" << dbname.toString() << "]"; uasserted( DatabaseDifferCaseCode , ss.str() ); } StorageEngine* storageEngine = getGlobalEnvironment()->getGlobalStorageEngine(); invariant(storageEngine); DatabaseCatalogEntry* entry = storageEngine->getDatabaseCatalogEntry(txn, dbname); invariant(entry); justCreated = !entry->exists(); db = new Database(dbname, entry); { SimpleMutex::scoped_lock lk(_m); _dbs[dbname] = db; } return db; }
/** * Create an index with a key of `{<key>: 1}` and a `name` of <key>. */ Status createIndex(OperationContext* opCtx, NamespaceString collNs, std::string key) { Collection* coll = nullptr; BSONObjBuilder builder; { BSONObjBuilder keyObj; builder.append("key", keyObj.append(key, 1).done()); } BSONObj spec = builder.append("name", key).append("ns", collNs.ns()).append("v", 2).done(); auto descriptor = stdx::make_unique<IndexDescriptor>(coll, IndexNames::findPluginName(spec), spec); DatabaseCatalogEntry* dbce = _storageEngine->getDatabaseCatalogEntry(opCtx, collNs.db()); CollectionCatalogEntry* cce = dbce->getCollectionCatalogEntry(collNs.ns()); auto ret = cce->prepareForIndexBuild(opCtx, descriptor.get()); if (!ret.isOK()) { return ret; } cce->indexBuildSuccess(opCtx, key); return Status::OK(); }
Status repairDatabase(OperationContext* txn, StorageEngine* engine, const std::string& dbName, bool preserveClonedFilesOnFailure, bool backupOriginalFiles) { DisableDocumentValidation validationDisabler(txn); // We must hold some form of lock here invariant(txn->lockState()->isLocked()); invariant(dbName.find('.') == string::npos); log() << "repairDatabase " << dbName << endl; BackgroundOperation::assertNoBgOpInProgForDb(dbName); txn->checkForInterrupt(); if (engine->isMmapV1()) { // MMAPv1 is a layering violation so it implements its own repairDatabase. return static_cast<MMAPV1Engine*>(engine)->repairDatabase( txn, dbName, preserveClonedFilesOnFailure, backupOriginalFiles); } // These are MMAPv1 specific if (preserveClonedFilesOnFailure) { return Status(ErrorCodes::BadValue, "preserveClonedFilesOnFailure not supported"); } if (backupOriginalFiles) { return Status(ErrorCodes::BadValue, "backupOriginalFiles not supported"); } // Close the db to invalidate all current users and caches. dbHolder().close(txn, dbName); ON_BLOCK_EXIT([&dbName, &txn] { try { // Open the db after everything finishes. auto db = dbHolder().openDb(txn, dbName); // Set the minimum snapshot for all Collections in this db. This ensures that readers // using majority readConcern level can only use the collections after their repaired // versions are in the committed view. auto replCoord = repl::ReplicationCoordinator::get(txn); auto snapshotName = replCoord->reserveSnapshotName(txn); replCoord->forceSnapshotCreation(); // Ensure a newer snapshot is created even if idle. for (auto&& collection : *db) { collection->setMinimumVisibleSnapshot(snapshotName); } } catch (...) { severe() << "Unexpected exception encountered while reopening database after repair."; std::terminate(); // Logs additional info about the specific error. } }); DatabaseCatalogEntry* dbce = engine->getDatabaseCatalogEntry(txn, dbName); std::list<std::string> colls; dbce->getCollectionNamespaces(&colls); for (std::list<std::string>::const_iterator it = colls.begin(); it != colls.end(); ++it) { // Don't check for interrupt after starting to repair a collection otherwise we can // leave data in an inconsistent state. Interrupting between collections is ok, however. txn->checkForInterrupt(); log() << "Repairing collection " << *it; Status status = engine->repairRecordStore(txn, *it); if (!status.isOK()) return status; status = rebuildIndexesOnCollection(txn, dbce, *it); if (!status.isOK()) return status; // TODO: uncomment once SERVER-16869 // engine->flushAllFiles(true); } return Status::OK(); }
Status repairDatabase(OperationContext* txn, StorageEngine* engine, const std::string& dbName, bool preserveClonedFilesOnFailure, bool backupOriginalFiles) { DisableDocumentValidation validationDisabler(txn); // We must hold some form of lock here invariant(txn->lockState()->isLocked()); invariant( dbName.find( '.' ) == string::npos ); log() << "repairDatabase " << dbName << endl; BackgroundOperation::assertNoBgOpInProgForDb(dbName); txn->checkForInterrupt(); if (engine->isMmapV1()) { // MMAPv1 is a layering violation so it implements its own repairDatabase. return static_cast<MMAPV1Engine*>(engine)->repairDatabase(txn, dbName, preserveClonedFilesOnFailure, backupOriginalFiles); } // These are MMAPv1 specific if ( preserveClonedFilesOnFailure ) { return Status( ErrorCodes::BadValue, "preserveClonedFilesOnFailure not supported" ); } if ( backupOriginalFiles ) { return Status( ErrorCodes::BadValue, "backupOriginalFiles not supported" ); } // Close the db to invalidate all current users and caches. dbHolder().close(txn, dbName); // Open the db after everything finishes class OpenDbInDestructor { public: OpenDbInDestructor(OperationContext* txn, const std::string& db) : _dbName(db) , _txn(txn) {} ~OpenDbInDestructor() { dbHolder().openDb(_txn, _dbName); } private: const std::string& _dbName; OperationContext* _txn; } dbOpener(txn, dbName); DatabaseCatalogEntry* dbce = engine->getDatabaseCatalogEntry(txn, dbName); std::list<std::string> colls; dbce->getCollectionNamespaces(&colls); for (std::list<std::string>::const_iterator it = colls.begin(); it != colls.end(); ++it) { // Don't check for interrupt after starting to repair a collection otherwise we can // leave data in an inconsistent state. Interrupting between collections is ok, however. txn->checkForInterrupt(); log() << "Repairing collection " << *it; Status status = engine->repairRecordStore(txn, *it); if (!status.isOK()) return status; status = rebuildIndexesOnCollection(txn, dbce, *it); if (!status.isOK()) return status; // TODO: uncomment once SERVER-16869 // engine->flushAllFiles(true); } return Status::OK(); }