void _applyOpToDataFiles( const string& database, FileOp &fo, bool afterAllocator, const string& path ) { if ( afterAllocator ) FileAllocator::get()->waitUntilFinished(); string c = database; c += '.'; boost::filesystem::path p(path); if (storageGlobalParams.directoryperdb) p /= database; boost::filesystem::path q; q = p / (c+"ns"); bool ok = false; MONGO_ASSERT_ON_EXCEPTION( ok = fo.apply( q ) ); if ( ok ) { LOG(2) << fo.op() << " file " << q.string() << endl; } int i = 0; int extra = 10; // should not be necessary, this is defensive in case there are missing files while ( 1 ) { verify( i <= DiskLoc::MaxFiles ); stringstream ss; ss << c << i; q = p / ss.str(); MONGO_ASSERT_ON_EXCEPTION( ok = fo.apply(q) ); if ( ok ) { if ( extra != 10 ) { LOG(1) << fo.op() << " file " << q.string() << endl; log() << " _applyOpToDataFiles() warning: extra == " << extra << endl; } } else if ( --extra <= 0 ) break; i++; } }
// generate a directory name for storing temp data files Path uniqueReservedPath( const char *prefix ) { Path repairPath = Path(storageGlobalParams.repairpath); Path reservedPath; int i = 0; bool exists = false; do { stringstream ss; ss << prefix << "_repairDatabase_" << i++; reservedPath = repairPath / ss.str(); MONGO_ASSERT_ON_EXCEPTION( exists = boost::filesystem::exists( reservedPath ) ); } while ( exists ); return reservedPath; }
~RepairFileDeleter() { if ( _success ) return; log() << "cleaning up failed repair " << "db: " << _dbName << " path: " << _pathString; try { _txn->recoveryUnit()->syncDataAndTruncateJournal(); globalStorageEngine->flushAllFiles(true); // need both in case journaling is disabled MONGO_ASSERT_ON_EXCEPTION( boost::filesystem::remove_all( _path ) ); } catch ( DBException& e ) { error() << "RepairFileDeleter failed to cleanup: " << e; error() << "aborting"; fassertFailed( 17402 ); } }
~RepairFileDeleter() { if ( _success ) return; log() << "cleaning up failed repair " << "db: " << _dbName << " path: " << _pathString; try { getDur().syncDataAndTruncateJournal(); MongoFile::flushAll(true); // need both in case journaling is disabled { Client::Context tempContext( _dbName, _pathString ); Database::closeDatabase( _dbName, _pathString ); } MONGO_ASSERT_ON_EXCEPTION( boost::filesystem::remove_all( _path ) ); } catch ( DBException& e ) { error() << "RepairFileDeleter failed to cleanup: " << e; error() << "aborting"; fassertFailed( 17402 ); } }
void FileAllocator::run( FileAllocator * fa ) { setThreadName( "FileAllocator" ); while( 1 ) { { scoped_lock lk( fa->_pendingMutex ); if ( fa->_pending.size() == 0 ) fa->_pendingUpdated.wait( lk.boost() ); } while( 1 ) { string name; long size; { scoped_lock lk( fa->_pendingMutex ); if ( fa->_pending.size() == 0 ) break; name = fa->_pending.front(); size = fa->_pendingSize[ name ]; } string tmp; long fd = 0; try { log() << "allocating new datafile " << name << ", filling with zeroes..." << endl; boost::filesystem::path parent = ensureParentDirCreated(name); tmp = makeTempFileName( parent ); ensureParentDirCreated(tmp); fd = open(tmp.c_str(), O_CREAT | O_RDWR | O_NOATIME, S_IRUSR | S_IWUSR); if ( fd <= 0 ) { log() << "FileAllocator: couldn't create " << name << " (" << tmp << ") " << errnoWithDescription() << endl; uasserted(10439, ""); } #if defined(POSIX_FADV_DONTNEED) if( posix_fadvise(fd, 0, size, POSIX_FADV_DONTNEED) ) { log() << "warning: posix_fadvise fails " << name << " (" << tmp << ") " << errnoWithDescription() << endl; } #endif Timer t; /* make sure the file is the full desired length */ ensureLength( fd , size ); close( fd ); fd = 0; if( rename(tmp.c_str(), name.c_str()) ) { log() << "error: couldn't rename " << tmp << " to " << name << ' ' << errnoWithDescription() << endl; uasserted(13653, ""); } flushMyDirectory(name); log() << "done allocating datafile " << name << ", " << "size: " << size/1024/1024 << "MB, " << " took " << ((double)t.millis())/1000.0 << " secs" << endl; // no longer in a failed state. allow new writers. fa->_failed = false; } catch ( ... ) { if ( fd > 0 ) close( fd ); log() << "error failed to allocate new file: " << name << " size: " << size << ' ' << errnoWithDescription() << warnings; log() << " will try again in 10 seconds" << endl; // not going to warning logs try { if ( tmp.size() ) MONGO_ASSERT_ON_EXCEPTION( boost::filesystem::remove( tmp ) ); MONGO_ASSERT_ON_EXCEPTION( boost::filesystem::remove( name ) ); } catch ( ... ) { } scoped_lock lk( fa->_pendingMutex ); fa->_failed = true; // not erasing from pending fa->_pendingUpdated.notify_all(); sleepsecs(10); continue; } { scoped_lock lk( fa->_pendingMutex ); fa->_pendingSize.erase( name ); fa->_pending.pop_front(); fa->_pendingUpdated.notify_all(); } } } }
Status MMAPV1Engine::repairDatabase( OperationContext* txn, const std::string& dbName, bool preserveClonedFilesOnFailure, bool backupOriginalFiles ) { // We must hold some form of lock here invariant(txn->lockState()->threadState()); invariant( dbName.find( '.' ) == string::npos ); scoped_ptr<RepairFileDeleter> repairFileDeleter; log() << "repairDatabase " << dbName << endl; BackgroundOperation::assertNoBgOpInProgForDb(dbName); txn->recoveryUnit()->syncDataAndTruncateJournal(); // Must be done before and after repair intmax_t totalSize = dbSize( dbName ); intmax_t freeSize = File::freeSpace(storageGlobalParams.repairpath); if ( freeSize > -1 && freeSize < totalSize ) { return Status( ErrorCodes::OutOfDiskSpace, str::stream() << "Cannot repair database " << dbName << " having size: " << totalSize << " (bytes) because free disk space is: " << freeSize << " (bytes)" ); } txn->checkForInterrupt(); Path reservedPath = uniqueReservedPath( ( preserveClonedFilesOnFailure || backupOriginalFiles ) ? "backup" : "_tmp" ); MONGO_ASSERT_ON_EXCEPTION( boost::filesystem::create_directory( reservedPath ) ); string reservedPathString = reservedPath.string(); if ( !preserveClonedFilesOnFailure ) repairFileDeleter.reset( new RepairFileDeleter( txn, dbName, reservedPathString, reservedPath ) ); { Database* originalDatabase = dbHolder().get(txn, dbName); if (originalDatabase == NULL) { return Status(ErrorCodes::NamespaceNotFound, "database does not exist to repair"); } scoped_ptr<MMAPV1DatabaseCatalogEntry> dbEntry; scoped_ptr<Database> tempDatabase; { dbEntry.reset( new MMAPV1DatabaseCatalogEntry( txn, dbName, reservedPathString, storageGlobalParams.directoryperdb, true ) ); invariant( !dbEntry->exists() ); tempDatabase.reset( new Database( txn, dbName, dbEntry.get() ) ); } map<string,CollectionOptions> namespacesToCopy; { string ns = dbName + ".system.namespaces"; Client::Context ctx(txn, ns ); Collection* coll = originalDatabase->getCollection( txn, ns ); if ( coll ) { scoped_ptr<RecordIterator> it( coll->getIterator( txn, DiskLoc(), false, CollectionScanParams::FORWARD ) ); while ( !it->isEOF() ) { DiskLoc loc = it->getNext(); BSONObj obj = coll->docFor( loc ); string ns = obj["name"].String(); NamespaceString nss( ns ); if ( nss.isSystem() ) { if ( nss.isSystemDotIndexes() ) continue; if ( nss.coll() == "system.namespaces" ) continue; } if ( !nss.isNormal() ) continue; CollectionOptions options; if ( obj["options"].isABSONObj() ) { Status status = options.parse( obj["options"].Obj() ); if ( !status.isOK() ) return status; } namespacesToCopy[ns] = options; } } } for ( map<string,CollectionOptions>::const_iterator i = namespacesToCopy.begin(); i != namespacesToCopy.end(); ++i ) { string ns = i->first; CollectionOptions options = i->second; Collection* tempCollection = NULL; { Client::Context tempContext(txn, ns, tempDatabase ); WriteUnitOfWork wunit(txn); tempCollection = tempDatabase->createCollection(txn, ns, options, true, false); wunit.commit(); } Client::Context readContext(txn, ns, originalDatabase); Collection* originalCollection = originalDatabase->getCollection( txn, ns ); invariant( originalCollection ); // data // TODO SERVER-14812 add a mode that drops duplicates rather than failing MultiIndexBlock indexer(txn, tempCollection ); { vector<BSONObj> indexes; IndexCatalog::IndexIterator ii = originalCollection->getIndexCatalog()->getIndexIterator( false ); while ( ii.more() ) { IndexDescriptor* desc = ii.next(); indexes.push_back( desc->infoObj() ); } Client::Context tempContext(txn, ns, tempDatabase); Status status = indexer.init( indexes ); if ( !status.isOK() ) return status; } scoped_ptr<RecordIterator> iterator( originalCollection->getIterator( txn, DiskLoc(), false, CollectionScanParams::FORWARD )); while ( !iterator->isEOF() ) { DiskLoc loc = iterator->getNext(); invariant( !loc.isNull() ); BSONObj doc = originalCollection->docFor( loc ); Client::Context tempContext(txn, ns, tempDatabase); WriteUnitOfWork wunit(txn); StatusWith<DiskLoc> result = tempCollection->insertDocument(txn, doc, &indexer, false); if ( !result.isOK() ) return result.getStatus(); wunit.commit(); txn->checkForInterrupt(false); } Status status = indexer.doneInserting(); if (!status.isOK()) return status; { Client::Context tempContext(txn, ns, tempDatabase); WriteUnitOfWork wunit(txn); indexer.commit(); wunit.commit(); } } txn->recoveryUnit()->syncDataAndTruncateJournal(); globalStorageEngine->flushAllFiles(true); // need both in case journaling is disabled txn->checkForInterrupt(false); } // at this point if we abort, we don't want to delete new files // as they might be the only copies if ( repairFileDeleter.get() ) repairFileDeleter->success(); dbHolder().close( txn, dbName ); if ( backupOriginalFiles ) { _renameForBackup( dbName, reservedPath ); } else { // first make new directory before deleting data Path newDir = Path(storageGlobalParams.dbpath) / dbName; MONGO_ASSERT_ON_EXCEPTION(boost::filesystem::create_directory(newDir)); // this deletes old files _deleteDataFiles( dbName ); if ( !boost::filesystem::exists(newDir) ) { // we deleted because of directoryperdb // re-create MONGO_ASSERT_ON_EXCEPTION(boost::filesystem::create_directory(newDir)); } } _replaceWithRecovered( dbName, reservedPathString.c_str() ); if ( !backupOriginalFiles ) MONGO_ASSERT_ON_EXCEPTION( boost::filesystem::remove_all( reservedPath ) ); return Status::OK(); }
Status repairDatabase( string dbName, bool preserveClonedFilesOnFailure, bool backupOriginalFiles ) { scoped_ptr<RepairFileDeleter> repairFileDeleter; doingRepair dr; dbName = nsToDatabase( dbName ); log() << "repairDatabase " << dbName << endl; invariant( cc().database()->name() == dbName ); invariant( cc().database()->path() == storageGlobalParams.dbpath ); BackgroundOperation::assertNoBgOpInProgForDb(dbName); getDur().syncDataAndTruncateJournal(); // Must be done before and after repair intmax_t totalSize = dbSize( dbName ); intmax_t freeSize = File::freeSpace(storageGlobalParams.repairpath); if ( freeSize > -1 && freeSize < totalSize ) { return Status( ErrorCodes::OutOfDiskSpace, str::stream() << "Cannot repair database " << dbName << " having size: " << totalSize << " (bytes) because free disk space is: " << freeSize << " (bytes)" ); } killCurrentOp.checkForInterrupt(); Path reservedPath = uniqueReservedPath( ( preserveClonedFilesOnFailure || backupOriginalFiles ) ? "backup" : "_tmp" ); MONGO_ASSERT_ON_EXCEPTION( boost::filesystem::create_directory( reservedPath ) ); string reservedPathString = reservedPath.string(); if ( !preserveClonedFilesOnFailure ) repairFileDeleter.reset( new RepairFileDeleter( dbName, reservedPathString, reservedPath ) ); { Database* originalDatabase = dbHolder().get( dbName, storageGlobalParams.dbpath ); if ( originalDatabase == NULL ) return Status( ErrorCodes::NamespaceNotFound, "database does not exist to repair" ); Database* tempDatabase = NULL; { bool justCreated = false; tempDatabase = dbHolderW().getOrCreate( dbName, reservedPathString, justCreated ); invariant( justCreated ); } map<string,CollectionOptions> namespacesToCopy; { string ns = dbName + ".system.namespaces"; Client::Context ctx( ns ); Collection* coll = originalDatabase->getCollection( ns ); if ( coll ) { scoped_ptr<CollectionIterator> it( coll->getIterator( DiskLoc(), false, CollectionScanParams::FORWARD ) ); while ( !it->isEOF() ) { DiskLoc loc = it->getNext(); BSONObj obj = coll->docFor( loc ); string ns = obj["name"].String(); NamespaceString nss( ns ); if ( nss.isSystem() ) { if ( nss.isSystemDotIndexes() ) continue; if ( nss.coll() == "system.namespaces" ) continue; } if ( !nss.isNormal() ) continue; CollectionOptions options; if ( obj["options"].isABSONObj() ) { Status status = options.parse( obj["options"].Obj() ); if ( !status.isOK() ) return status; } namespacesToCopy[ns] = options; } } } for ( map<string,CollectionOptions>::const_iterator i = namespacesToCopy.begin(); i != namespacesToCopy.end(); ++i ) { string ns = i->first; CollectionOptions options = i->second; Collection* tempCollection = NULL; { Client::Context tempContext( ns, tempDatabase ); tempCollection = tempDatabase->createCollection( ns, options, true, false ); } Client::Context readContext( ns, originalDatabase ); Collection* originalCollection = originalDatabase->getCollection( ns ); invariant( originalCollection ); // data MultiIndexBlock indexBlock( tempCollection ); { vector<BSONObj> indexes; IndexCatalog::IndexIterator ii = originalCollection->getIndexCatalog()->getIndexIterator( false ); while ( ii.more() ) { IndexDescriptor* desc = ii.next(); indexes.push_back( desc->infoObj() ); } Client::Context tempContext( ns, tempDatabase ); Status status = indexBlock.init( indexes ); if ( !status.isOK() ) return status; } scoped_ptr<CollectionIterator> iterator( originalCollection->getIterator( DiskLoc(), false, CollectionScanParams::FORWARD ) ); while ( !iterator->isEOF() ) { DiskLoc loc = iterator->getNext(); invariant( !loc.isNull() ); BSONObj doc = originalCollection->docFor( loc ); Client::Context tempContext( ns, tempDatabase ); StatusWith<DiskLoc> result = tempCollection->insertDocument( doc, indexBlock ); if ( !result.isOK() ) return result.getStatus(); getDur().commitIfNeeded(); killCurrentOp.checkForInterrupt(false); } { Client::Context tempContext( ns, tempDatabase ); Status status = indexBlock.commit(); if ( !status.isOK() ) return status; } } getDur().syncDataAndTruncateJournal(); MongoFile::flushAll(true); // need both in case journaling is disabled killCurrentOp.checkForInterrupt(false); Client::Context tempContext( dbName, reservedPathString ); Database::closeDatabase( dbName, reservedPathString ); } // at this point if we abort, we don't want to delete new files // as they might be the only copies if ( repairFileDeleter.get() ) repairFileDeleter->success(); Client::Context ctx( dbName ); Database::closeDatabase(dbName, storageGlobalParams.dbpath); if ( backupOriginalFiles ) { _renameForBackup( dbName, reservedPath ); } else { // first make new directory before deleting data Path newDir = Path(storageGlobalParams.dbpath) / dbName; MONGO_ASSERT_ON_EXCEPTION(boost::filesystem::create_directory(newDir)); // this deletes old files _deleteDataFiles( dbName ); if ( !boost::filesystem::exists(newDir) ) { // we deleted because of directoryperdb // re-create MONGO_ASSERT_ON_EXCEPTION(boost::filesystem::create_directory(newDir)); } } _replaceWithRecovered( dbName, reservedPathString.c_str() ); if ( !backupOriginalFiles ) MONGO_ASSERT_ON_EXCEPTION( boost::filesystem::remove_all( reservedPath ) ); return Status::OK(); }