void _applyOpToDataFiles( const string& database, FileOp &fo, bool afterAllocator, const string& path ) {
     if ( afterAllocator )
         FileAllocator::get()->waitUntilFinished();
     string c = database;
     c += '.';
     boost::filesystem::path p(path);
     if (storageGlobalParams.directoryperdb)
         p /= database;
     boost::filesystem::path q;
     q = p / (c+"ns");
     bool ok = false;
     MONGO_ASSERT_ON_EXCEPTION( ok = fo.apply( q ) );
     if ( ok ) {
         LOG(2) << fo.op() << " file " << q.string() << endl;
     }
     int i = 0;
     int extra = 10; // should not be necessary, this is defensive in case there are missing files
     while ( 1 ) {
         verify( i <= DiskLoc::MaxFiles );
         stringstream ss;
         ss << c << i;
         q = p / ss.str();
         MONGO_ASSERT_ON_EXCEPTION( ok = fo.apply(q) );
         if ( ok ) {
             if ( extra != 10 ) {
                 LOG(1) << fo.op() << " file " << q.string() << endl;
                 log() << "  _applyOpToDataFiles() warning: extra == " << extra << endl;
             }
         }
         else if ( --extra <= 0 )
             break;
         i++;
     }
 }
 // generate a directory name for storing temp data files
 Path uniqueReservedPath( const char *prefix ) {
     Path repairPath = Path(storageGlobalParams.repairpath);
     Path reservedPath;
     int i = 0;
     bool exists = false;
     do {
         stringstream ss;
         ss << prefix << "_repairDatabase_" << i++;
         reservedPath = repairPath / ss.str();
         MONGO_ASSERT_ON_EXCEPTION( exists = boost::filesystem::exists( reservedPath ) );
     }
     while ( exists );
     return reservedPath;
 }
        ~RepairFileDeleter() {
            if ( _success )
                 return;

            log() << "cleaning up failed repair "
                  << "db: " << _dbName << " path: " << _pathString;

            try {
                _txn->recoveryUnit()->syncDataAndTruncateJournal();

                globalStorageEngine->flushAllFiles(true); // need both in case journaling is disabled

                MONGO_ASSERT_ON_EXCEPTION( boost::filesystem::remove_all( _path ) );
            }
            catch ( DBException& e ) {
                error() << "RepairFileDeleter failed to cleanup: " << e;
                error() << "aborting";
                fassertFailed( 17402 );
            }
        }
Esempio n. 4
0
        ~RepairFileDeleter() {
            if ( _success )
                 return;

            log() << "cleaning up failed repair "
                  << "db: " << _dbName << " path: " << _pathString;

            try {
                getDur().syncDataAndTruncateJournal();
                MongoFile::flushAll(true); // need both in case journaling is disabled
                {
                    Client::Context tempContext( _dbName, _pathString );
                    Database::closeDatabase( _dbName, _pathString );
                }
                MONGO_ASSERT_ON_EXCEPTION( boost::filesystem::remove_all( _path ) );
            }
            catch ( DBException& e ) {
                error() << "RepairFileDeleter failed to cleanup: " << e;
                error() << "aborting";
                fassertFailed( 17402 );
            }
        }
Esempio n. 5
0
    void FileAllocator::run( FileAllocator * fa ) {
        setThreadName( "FileAllocator" );
        while( 1 ) {
            {
                scoped_lock lk( fa->_pendingMutex );
                if ( fa->_pending.size() == 0 )
                    fa->_pendingUpdated.wait( lk.boost() );
            }
            while( 1 ) {
                string name;
                long size;
                {
                    scoped_lock lk( fa->_pendingMutex );
                    if ( fa->_pending.size() == 0 )
                        break;
                    name = fa->_pending.front();
                    size = fa->_pendingSize[ name ];
                }

                string tmp;
                long fd = 0;
                try {
                    log() << "allocating new datafile " << name << ", filling with zeroes..." << endl;
                    
                    boost::filesystem::path parent = ensureParentDirCreated(name);
                    tmp = makeTempFileName( parent );
                    ensureParentDirCreated(tmp);

                    fd = open(tmp.c_str(), O_CREAT | O_RDWR | O_NOATIME, S_IRUSR | S_IWUSR);
                    if ( fd <= 0 ) {
                        log() << "FileAllocator: couldn't create " << name << " (" << tmp << ") " << errnoWithDescription() << endl;
                        uasserted(10439, "");
                    }

#if defined(POSIX_FADV_DONTNEED)
                    if( posix_fadvise(fd, 0, size, POSIX_FADV_DONTNEED) ) {
                        log() << "warning: posix_fadvise fails " << name << " (" << tmp << ") " << errnoWithDescription() << endl;
                    }
#endif

                    Timer t;

                    /* make sure the file is the full desired length */
                    ensureLength( fd , size );

                    close( fd );
                    fd = 0;

                    if( rename(tmp.c_str(), name.c_str()) ) { 
                        log() << "error: couldn't rename " << tmp << " to " << name << ' ' << errnoWithDescription() << endl;
                        uasserted(13653, "");
                    }
                    flushMyDirectory(name);

                    log() << "done allocating datafile " << name << ", "
                          << "size: " << size/1024/1024 << "MB, "
                          << " took " << ((double)t.millis())/1000.0 << " secs"
                          << endl;

                    // no longer in a failed state. allow new writers.
                    fa->_failed = false;
                }
                catch ( ... ) {
                    if ( fd > 0 )
                        close( fd );
                    log() << "error failed to allocate new file: " << name
                          << " size: " << size << ' ' << errnoWithDescription() << warnings;
                    log() << "    will try again in 10 seconds" << endl; // not going to warning logs
                    try {
                        if ( tmp.size() )
                            MONGO_ASSERT_ON_EXCEPTION( boost::filesystem::remove( tmp ) );
                        MONGO_ASSERT_ON_EXCEPTION( boost::filesystem::remove( name ) );
                    }
                    catch ( ... ) {
                    }
                    scoped_lock lk( fa->_pendingMutex );
                    fa->_failed = true;
                    // not erasing from pending
                    fa->_pendingUpdated.notify_all();
                    
                    
                    sleepsecs(10);
                    continue;
                }

                {
                    scoped_lock lk( fa->_pendingMutex );
                    fa->_pendingSize.erase( name );
                    fa->_pending.pop_front();
                    fa->_pendingUpdated.notify_all();
                }
            }
        }
    }
    Status MMAPV1Engine::repairDatabase( OperationContext* txn,
                                         const std::string& dbName,
                                         bool preserveClonedFilesOnFailure,
                                         bool backupOriginalFiles ) {
        // We must hold some form of lock here
        invariant(txn->lockState()->threadState());
        invariant( dbName.find( '.' ) == string::npos );

        scoped_ptr<RepairFileDeleter> repairFileDeleter;

        log() << "repairDatabase " << dbName << endl;

        BackgroundOperation::assertNoBgOpInProgForDb(dbName);

        txn->recoveryUnit()->syncDataAndTruncateJournal(); // Must be done before and after repair

        intmax_t totalSize = dbSize( dbName );
        intmax_t freeSize = File::freeSpace(storageGlobalParams.repairpath);

        if ( freeSize > -1 && freeSize < totalSize ) {
            return Status( ErrorCodes::OutOfDiskSpace,
                           str::stream() << "Cannot repair database " << dbName
                           << " having size: " << totalSize
                           << " (bytes) because free disk space is: " << freeSize << " (bytes)" );
        }

        txn->checkForInterrupt();

        Path reservedPath =
            uniqueReservedPath( ( preserveClonedFilesOnFailure || backupOriginalFiles ) ?
                                "backup" : "_tmp" );
        MONGO_ASSERT_ON_EXCEPTION( boost::filesystem::create_directory( reservedPath ) );
        string reservedPathString = reservedPath.string();

        if ( !preserveClonedFilesOnFailure )
            repairFileDeleter.reset( new RepairFileDeleter( txn,
                                                            dbName,
                                                            reservedPathString,
                                                            reservedPath ) );

        {
            Database* originalDatabase =
                            dbHolder().get(txn, dbName);
            if (originalDatabase == NULL) {
                return Status(ErrorCodes::NamespaceNotFound, "database does not exist to repair");
            }

            scoped_ptr<MMAPV1DatabaseCatalogEntry> dbEntry;
            scoped_ptr<Database> tempDatabase;
            {
                dbEntry.reset( new MMAPV1DatabaseCatalogEntry( txn,
                                                               dbName,
                                                               reservedPathString,
                                                               storageGlobalParams.directoryperdb,
                                                               true ) );
                invariant( !dbEntry->exists() );
                tempDatabase.reset( new Database( txn,
                                                  dbName,
                                                  dbEntry.get() ) );

            }

            map<string,CollectionOptions> namespacesToCopy;
            {
                string ns = dbName + ".system.namespaces";
                Client::Context ctx(txn,  ns );
                Collection* coll = originalDatabase->getCollection( txn, ns );
                if ( coll ) {
                    scoped_ptr<RecordIterator> it( coll->getIterator( txn,
                                                                      DiskLoc(),
                                                                      false,
                                                                      CollectionScanParams::FORWARD ) );
                    while ( !it->isEOF() ) {
                        DiskLoc loc = it->getNext();
                        BSONObj obj = coll->docFor( loc );

                        string ns = obj["name"].String();

                        NamespaceString nss( ns );
                        if ( nss.isSystem() ) {
                            if ( nss.isSystemDotIndexes() )
                                continue;
                            if ( nss.coll() == "system.namespaces" )
                                continue;
                        }

                        if ( !nss.isNormal() )
                            continue;

                        CollectionOptions options;
                        if ( obj["options"].isABSONObj() ) {
                            Status status = options.parse( obj["options"].Obj() );
                            if ( !status.isOK() )
                                return status;
                        }
                        namespacesToCopy[ns] = options;
                    }
                }
            }

            for ( map<string,CollectionOptions>::const_iterator i = namespacesToCopy.begin();
                  i != namespacesToCopy.end();
                  ++i ) {
                string ns = i->first;
                CollectionOptions options = i->second;

                Collection* tempCollection = NULL;
                {
                    Client::Context tempContext(txn, ns, tempDatabase );
                    WriteUnitOfWork wunit(txn);
                    tempCollection = tempDatabase->createCollection(txn, ns, options, true, false);
                    wunit.commit();
                }

                Client::Context readContext(txn, ns, originalDatabase);
                Collection* originalCollection = originalDatabase->getCollection( txn, ns );
                invariant( originalCollection );

                // data

                // TODO SERVER-14812 add a mode that drops duplicates rather than failing
                MultiIndexBlock indexer(txn, tempCollection );
                {
                    vector<BSONObj> indexes;
                    IndexCatalog::IndexIterator ii =
                        originalCollection->getIndexCatalog()->getIndexIterator( false );
                    while ( ii.more() ) {
                        IndexDescriptor* desc = ii.next();
                        indexes.push_back( desc->infoObj() );
                    }

                    Client::Context tempContext(txn, ns, tempDatabase);
                    Status status = indexer.init( indexes );
                    if ( !status.isOK() )
                        return status;
                }

                scoped_ptr<RecordIterator> iterator(
                    originalCollection->getIterator( txn, DiskLoc(), false,
                                                     CollectionScanParams::FORWARD ));
                while ( !iterator->isEOF() ) {
                    DiskLoc loc = iterator->getNext();
                    invariant( !loc.isNull() );

                    BSONObj doc = originalCollection->docFor( loc );

                    Client::Context tempContext(txn, ns, tempDatabase);
                    
                    WriteUnitOfWork wunit(txn);
                    StatusWith<DiskLoc> result = tempCollection->insertDocument(txn,
                                                                                doc,
                                                                                &indexer,
                                                                                false);
                    if ( !result.isOK() )
                        return result.getStatus();

                    wunit.commit();
                    txn->checkForInterrupt(false);
                }
                
                Status status = indexer.doneInserting();
                if (!status.isOK())
                    return status;

                {
                    Client::Context tempContext(txn, ns, tempDatabase);
                    WriteUnitOfWork wunit(txn);
                    indexer.commit();
                    wunit.commit();
                }

            }

            txn->recoveryUnit()->syncDataAndTruncateJournal();
            globalStorageEngine->flushAllFiles(true); // need both in case journaling is disabled

            txn->checkForInterrupt(false);
        }

        // at this point if we abort, we don't want to delete new files
        // as they might be the only copies

        if ( repairFileDeleter.get() )
            repairFileDeleter->success();

        dbHolder().close( txn, dbName );

        if ( backupOriginalFiles ) {
            _renameForBackup( dbName, reservedPath );
        }
        else {
            // first make new directory before deleting data
            Path newDir = Path(storageGlobalParams.dbpath) / dbName;
            MONGO_ASSERT_ON_EXCEPTION(boost::filesystem::create_directory(newDir));

            // this deletes old files
            _deleteDataFiles( dbName );

            if ( !boost::filesystem::exists(newDir) ) {
                // we deleted because of directoryperdb
                // re-create
                MONGO_ASSERT_ON_EXCEPTION(boost::filesystem::create_directory(newDir));
            }
        }

        _replaceWithRecovered( dbName, reservedPathString.c_str() );

        if ( !backupOriginalFiles )
            MONGO_ASSERT_ON_EXCEPTION( boost::filesystem::remove_all( reservedPath ) );

        return Status::OK();
    }
Esempio n. 7
0
    Status repairDatabase( string dbName,
                           bool preserveClonedFilesOnFailure,
                           bool backupOriginalFiles ) {
        scoped_ptr<RepairFileDeleter> repairFileDeleter;
        doingRepair dr;
        dbName = nsToDatabase( dbName );

        log() << "repairDatabase " << dbName << endl;

        invariant( cc().database()->name() == dbName );
        invariant( cc().database()->path() == storageGlobalParams.dbpath );

        BackgroundOperation::assertNoBgOpInProgForDb(dbName);

        getDur().syncDataAndTruncateJournal(); // Must be done before and after repair

        intmax_t totalSize = dbSize( dbName );
        intmax_t freeSize = File::freeSpace(storageGlobalParams.repairpath);

        if ( freeSize > -1 && freeSize < totalSize ) {
            return Status( ErrorCodes::OutOfDiskSpace,
                           str::stream() << "Cannot repair database " << dbName
                           << " having size: " << totalSize
                           << " (bytes) because free disk space is: " << freeSize << " (bytes)" );
        }

        killCurrentOp.checkForInterrupt();

        Path reservedPath =
            uniqueReservedPath( ( preserveClonedFilesOnFailure || backupOriginalFiles ) ?
                                "backup" : "_tmp" );
        MONGO_ASSERT_ON_EXCEPTION( boost::filesystem::create_directory( reservedPath ) );
        string reservedPathString = reservedPath.string();

        if ( !preserveClonedFilesOnFailure )
            repairFileDeleter.reset( new RepairFileDeleter( dbName,
                                                            reservedPathString,
                                                            reservedPath ) );

        {
            Database* originalDatabase = dbHolder().get( dbName, storageGlobalParams.dbpath );
            if ( originalDatabase == NULL )
                return Status( ErrorCodes::NamespaceNotFound, "database does not exist to repair" );

            Database* tempDatabase = NULL;
            {
                bool justCreated = false;
                tempDatabase = dbHolderW().getOrCreate( dbName, reservedPathString, justCreated );
                invariant( justCreated );
            }

            map<string,CollectionOptions> namespacesToCopy;
            {
                string ns = dbName + ".system.namespaces";
                Client::Context ctx( ns );
                Collection* coll = originalDatabase->getCollection( ns );
                if ( coll ) {
                    scoped_ptr<CollectionIterator> it( coll->getIterator( DiskLoc(),
                                                                          false,
                                                                          CollectionScanParams::FORWARD ) );
                    while ( !it->isEOF() ) {
                        DiskLoc loc = it->getNext();
                        BSONObj obj = coll->docFor( loc );

                        string ns = obj["name"].String();

                        NamespaceString nss( ns );
                        if ( nss.isSystem() ) {
                            if ( nss.isSystemDotIndexes() )
                                continue;
                            if ( nss.coll() == "system.namespaces" )
                                continue;
                        }

                        if ( !nss.isNormal() )
                            continue;

                        CollectionOptions options;
                        if ( obj["options"].isABSONObj() ) {
                            Status status = options.parse( obj["options"].Obj() );
                            if ( !status.isOK() )
                                return status;
                        }
                        namespacesToCopy[ns] = options;
                    }
                }
            }

            for ( map<string,CollectionOptions>::const_iterator i = namespacesToCopy.begin();
                  i != namespacesToCopy.end();
                  ++i ) {
                string ns = i->first;
                CollectionOptions options = i->second;

                Collection* tempCollection = NULL;
                {
                    Client::Context tempContext( ns, tempDatabase );
                    tempCollection = tempDatabase->createCollection( ns, options, true, false );
                }

                Client::Context readContext( ns, originalDatabase );
                Collection* originalCollection = originalDatabase->getCollection( ns );
                invariant( originalCollection );

                // data

                MultiIndexBlock indexBlock( tempCollection );
                {
                    vector<BSONObj> indexes;
                    IndexCatalog::IndexIterator ii =
                        originalCollection->getIndexCatalog()->getIndexIterator( false );
                    while ( ii.more() ) {
                        IndexDescriptor* desc = ii.next();
                        indexes.push_back( desc->infoObj() );
                    }

                    Client::Context tempContext( ns, tempDatabase );
                    Status status = indexBlock.init( indexes );
                    if ( !status.isOK() )
                        return status;

                }

                scoped_ptr<CollectionIterator> iterator( originalCollection->getIterator( DiskLoc(),
                                                                                          false,
                                                                                          CollectionScanParams::FORWARD ) );
                while ( !iterator->isEOF() ) {
                    DiskLoc loc = iterator->getNext();
                    invariant( !loc.isNull() );

                    BSONObj doc = originalCollection->docFor( loc );

                    Client::Context tempContext( ns, tempDatabase );
                    StatusWith<DiskLoc> result = tempCollection->insertDocument( doc, indexBlock );
                    if ( !result.isOK() )
                        return result.getStatus();

                    getDur().commitIfNeeded();
                    killCurrentOp.checkForInterrupt(false);
                }

                {
                    Client::Context tempContext( ns, tempDatabase );
                    Status status = indexBlock.commit();
                    if ( !status.isOK() )
                        return status;
                }

            }

            getDur().syncDataAndTruncateJournal();
            MongoFile::flushAll(true); // need both in case journaling is disabled

            killCurrentOp.checkForInterrupt(false);

            Client::Context tempContext( dbName, reservedPathString );
            Database::closeDatabase( dbName, reservedPathString );
        }

        // at this point if we abort, we don't want to delete new files
        // as they might be the only copies

        if ( repairFileDeleter.get() )
            repairFileDeleter->success();

        Client::Context ctx( dbName );
        Database::closeDatabase(dbName, storageGlobalParams.dbpath);

        if ( backupOriginalFiles ) {
            _renameForBackup( dbName, reservedPath );
        }
        else {
            // first make new directory before deleting data
            Path newDir = Path(storageGlobalParams.dbpath) / dbName;
            MONGO_ASSERT_ON_EXCEPTION(boost::filesystem::create_directory(newDir));

            // this deletes old files
            _deleteDataFiles( dbName );

            if ( !boost::filesystem::exists(newDir) ) {
                // we deleted because of directoryperdb
                // re-create
                MONGO_ASSERT_ON_EXCEPTION(boost::filesystem::create_directory(newDir));
            }
        }

        _replaceWithRecovered( dbName, reservedPathString.c_str() );

        if ( !backupOriginalFiles )
            MONGO_ASSERT_ON_EXCEPTION( boost::filesystem::remove_all( reservedPath ) );

        return Status::OK();
    }