Ejemplo n.º 1
0
        virtual bool run(OperationContext* txn, const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {

            if (txn->lockState()->isLocked()) {
                errmsg = "fsync: Cannot execute fsync command from contexts that hold a data lock";
                return false;
            }

            bool sync = !cmdObj["async"].trueValue(); // async means do an fsync, but return immediately
            bool lock = cmdObj["lock"].trueValue();
            log() << "CMD fsync: sync:" << sync << " lock:" << lock << endl;
            if( lock ) {
                if ( ! sync ) {
                    errmsg = "fsync: sync option must be true when using lock";
                    return false;
                }

                SimpleMutex::scoped_lock lk(m);
                err = "";
                
                (new FSyncLockThread())->go();
                while ( ! locked && err.size() == 0 ) {
                    _threadSync.wait( m );
                }
                
                if ( err.size() ){
                    errmsg = err;
                    return false;
                }
                
                log() << "db is now locked, no writes allowed. db.fsyncUnlock() to unlock" << endl;
                log() << "    For more info see " << FSyncCommand::url() << endl;
                result.append("info", "now locked against writes, use db.fsyncUnlock() to unlock");
                result.append("seeAlso", FSyncCommand::url());

            }
            else {
                // the simple fsync command case
                if (sync) {
                    // can this be GlobalRead? and if it can, it should be nongreedy.
                    ScopedTransaction transaction(txn, MODE_X);
                    Lock::GlobalWrite w(txn->lockState());
                    getDur().commitNow(txn);

                    //  No WriteUnitOfWork needed, as this does no writes of its own.
                }

                // Take a global IS lock to ensure the storage engine is not shutdown
                Lock::GlobalLock global(txn->lockState(), MODE_IS, UINT_MAX);
                StorageEngine* storageEngine = getGlobalServiceContext()->getGlobalStorageEngine();
                result.append( "numFiles" , storageEngine->flushAllFiles( sync ) );
            }
            return 1;
        }
Ejemplo n.º 2
0
    void FSyncLockThread::doRealWork() {
        SimpleMutex::scoped_lock lkf(filesLockedFsync);

        OperationContextImpl txn;
        ScopedTransaction transaction(&txn, MODE_X);
        Lock::GlobalWrite global(txn.lockState()); // No WriteUnitOfWork needed

        SimpleMutex::scoped_lock lk(fsyncCmd.m);

        invariant(!fsyncCmd.locked);    // impossible to get here if locked is true
        try {
            getDur().syncDataAndTruncateJournal(&txn);
        }
        catch( std::exception& e ) {
            error() << "error doing syncDataAndTruncateJournal: " << e.what() << endl;
            fsyncCmd.err = e.what();
            fsyncCmd._threadSync.notify_one();
            fsyncCmd.locked = false;
            return;
        }

        txn.lockState()->downgradeGlobalXtoSForMMAPV1();

        try {
            StorageEngine* storageEngine = getGlobalServiceContext()->getGlobalStorageEngine();
            storageEngine->flushAllFiles(true);
        }
        catch( std::exception& e ) {
            error() << "error doing flushAll: " << e.what() << endl;
            fsyncCmd.err = e.what();
            fsyncCmd._threadSync.notify_one();
            fsyncCmd.locked = false;
            return;
        }

        invariant(!fsyncCmd.locked);
        fsyncCmd.locked = true;

        fsyncCmd._threadSync.notify_one();

        while ( ! fsyncCmd.pendingUnlock ) {
            fsyncCmd._unlockSync.wait(fsyncCmd.m);
        }
        fsyncCmd.pendingUnlock = false;

        fsyncCmd.locked = false;
        fsyncCmd.err = "unlocked";

        fsyncCmd._unlockSync.notify_one();
    }
Ejemplo n.º 3
0
void DataFileSync::run() {
    Client::initThread(name().c_str());

    if (storageGlobalParams.syncdelay == 0) {
        log() << "warning: --syncdelay 0 is not recommended and can have strange performance"
              << endl;
    } else if (storageGlobalParams.syncdelay == 1) {
        log() << "--syncdelay 1" << endl;
    } else if (storageGlobalParams.syncdelay != 60) {
        LOG(1) << "--syncdelay " << storageGlobalParams.syncdelay.load() << endl;
    }
    int time_flushing = 0;
    while (!globalInShutdownDeprecated()) {
        _diaglog.flush();
        if (storageGlobalParams.syncdelay == 0) {
            // in case at some point we add an option to change at runtime
            sleepsecs(5);
            continue;
        }

        sleepmillis(
            (long long)std::max(0.0, (storageGlobalParams.syncdelay * 1000) - time_flushing));

        if (globalInShutdownDeprecated()) {
            // occasional issue trying to flush during shutdown when sleep interrupted
            break;
        }

        Date_t start = jsTime();
        StorageEngine* storageEngine = getGlobalServiceContext()->getGlobalStorageEngine();

        dur::notifyPreDataFileFlush();
        int numFiles = storageEngine->flushAllFiles(true);
        dur::notifyPostDataFileFlush();

        time_flushing = durationCount<Milliseconds>(jsTime() - start);

        _flushed(time_flushing);

        if (shouldLog(logger::LogSeverity::Debug(1)) || time_flushing >= 10000) {
            log() << "flushing mmaps took " << time_flushing << "ms "
                  << " for " << numFiles << " files" << endl;
        }
    }
}
Ejemplo n.º 4
0
    void DataFileSync::run() {
        Client::initThread( name().c_str() );

        if (mmapv1GlobalOptions.syncdelay == 0) {
            log() << "warning: --syncdelay 0 is not recommended and can have strange performance" << endl;
        }
        else if (mmapv1GlobalOptions.syncdelay == 1) {
            log() << "--syncdelay 1" << endl;
        }
        else if (mmapv1GlobalOptions.syncdelay != 60) {
            LOG(1) << "--syncdelay " << mmapv1GlobalOptions.syncdelay << endl;
        }
        int time_flushing = 0;
        while ( ! inShutdown() ) {
            _diaglog.flush();
            if (mmapv1GlobalOptions.syncdelay == 0) {
                // in case at some point we add an option to change at runtime
                sleepsecs(5);
                continue;
            }

            sleepmillis((long long) std::max(0.0, (mmapv1GlobalOptions.syncdelay * 1000) - time_flushing));

            if ( inShutdown() ) {
                // occasional issue trying to flush during shutdown when sleep interrupted
                break;
            }

            Date_t start = jsTime();
            StorageEngine* storageEngine = getGlobalEnvironment()->getGlobalStorageEngine();
            int numFiles = storageEngine->flushAllFiles( true );
            time_flushing = (int) (jsTime() - start);

            _flushed(time_flushing);

            if( logger::globalLogDomain()->shouldLog(logger::LogSeverity::Debug(1)) || time_flushing >= 10000 ) {
                log() << "flushing mmaps took " << time_flushing << "ms " << " for " << numFiles << " files" << endl;
            }
        }
    }
Ejemplo n.º 5
0
    void createOplog(OperationContext* txn) {
        Lock::GlobalWrite lk(txn->lockState());

        const char * ns = "local.oplog.$main";

        const ReplSettings& replSettings = getGlobalReplicationCoordinator()->getSettings();
        bool rs = !replSettings.replSet.empty();
        if( rs )
            ns = rsoplog;

        Client::Context ctx(txn, ns);
        Collection* collection = ctx.db()->getCollection(txn, ns );

        if ( collection ) {

            if (replSettings.oplogSize != 0) {
                int o = (int)(collection->getRecordStore()->storageSize(txn) / ( 1024 * 1024 ) );
                int n = (int)(replSettings.oplogSize / (1024 * 1024));
                if ( n != o ) {
                    stringstream ss;
                    ss << "cmdline oplogsize (" << n << ") different than existing (" << o << ") see: http://dochub.mongodb.org/core/increase-oplog";
                    log() << ss.str() << endl;
                    throw UserException( 13257 , ss.str() );
                }
            }

            if( rs ) return;

            initOpTimeFromOplog(txn, ns);
            return;
        }

        /* create an oplog collection, if it doesn't yet exist. */
        long long sz = 0;
        if ( replSettings.oplogSize != 0 ) {
            sz = replSettings.oplogSize;
        }
        else {
            /* not specified. pick a default size */
            sz = 50LL * 1024LL * 1024LL;
            if ( sizeof(int *) >= 8 ) {
#if defined(__APPLE__)
                // typically these are desktops (dev machines), so keep it smallish
                sz = (256-64) * 1024 * 1024;
#else
                sz = 990LL * 1024 * 1024;
                double free =
                    File::freeSpace(storageGlobalParams.dbpath); //-1 if call not supported.
                long long fivePct = static_cast<long long>( free * 0.05 );
                if ( fivePct > sz )
                    sz = fivePct;
                // we use 5% of free space up to 50GB (1TB free)
                static long long upperBound = 50LL * 1024 * 1024 * 1024;
                if (fivePct > upperBound)
                    sz = upperBound;
#endif
            }
        }

        log() << "******" << endl;
        log() << "creating replication oplog of size: " << (int)( sz / ( 1024 * 1024 ) ) << "MB..." << endl;

        CollectionOptions options;
        options.capped = true;
        options.cappedSize = sz;
        options.autoIndexId = CollectionOptions::NO;

        WriteUnitOfWork wunit(txn);
        invariant(ctx.db()->createCollection(txn, ns, options));
        if( !rs )
            logOp(txn, "n", "", BSONObj() );
        wunit.commit();

        /* sync here so we don't get any surprising lag later when we try to sync */
        StorageEngine* storageEngine = getGlobalEnvironment()->getGlobalStorageEngine();
        storageEngine->flushAllFiles(true);
        log() << "******" << endl;
    }
Ejemplo n.º 6
0
Status waitForWriteConcern(OperationContext* txn,
                           const OpTime& replOpTime,
                           const WriteConcernOptions& writeConcern,
                           WriteConcernResult* result) {
    LOG(2) << "Waiting for write concern. OpTime: " << replOpTime
           << ", write concern: " << writeConcern.toBSON();
    auto replCoord = repl::ReplicationCoordinator::get(txn);

    MONGO_FAIL_POINT_PAUSE_WHILE_SET(hangBeforeWaitingForWriteConcern);

    // Next handle blocking on disk
    Timer syncTimer;
    WriteConcernOptions writeConcernWithPopulatedSyncMode =
        replCoord->populateUnsetWriteConcernOptionsSyncMode(writeConcern);

    switch (writeConcernWithPopulatedSyncMode.syncMode) {
        case WriteConcernOptions::SyncMode::UNSET:
            severe() << "Attempting to wait on a WriteConcern with an unset sync option";
            fassertFailed(34410);
        case WriteConcernOptions::SyncMode::NONE:
            break;
        case WriteConcernOptions::SyncMode::FSYNC: {
            StorageEngine* storageEngine = getGlobalServiceContext()->getGlobalStorageEngine();
            if (!storageEngine->isDurable()) {
                result->fsyncFiles = storageEngine->flushAllFiles(true);
            } else {
                // We only need to commit the journal if we're durable
                txn->recoveryUnit()->waitUntilDurable();
            }
            break;
        }
        case WriteConcernOptions::SyncMode::JOURNAL:
            if (replCoord->getReplicationMode() != repl::ReplicationCoordinator::Mode::modeNone) {
                // Wait for ops to become durable then update replication system's
                // knowledge of this.
                OpTime appliedOpTime = replCoord->getMyLastAppliedOpTime();
                txn->recoveryUnit()->waitUntilDurable();
                replCoord->setMyLastDurableOpTimeForward(appliedOpTime);
            } else {
                txn->recoveryUnit()->waitUntilDurable();
            }
            break;
    }

    result->syncMillis = syncTimer.millis();

    // Now wait for replication

    if (replOpTime.isNull()) {
        // no write happened for this client yet
        return Status::OK();
    }

    // needed to avoid incrementing gleWtimeStats SERVER-9005
    if (writeConcernWithPopulatedSyncMode.wNumNodes <= 1 &&
        writeConcernWithPopulatedSyncMode.wMode.empty()) {
        // no desired replication check
        return Status::OK();
    }

    // Replica set stepdowns and gle mode changes are thrown as errors
    repl::ReplicationCoordinator::StatusAndDuration replStatus =
        replCoord->awaitReplication(txn, replOpTime, writeConcernWithPopulatedSyncMode);
    if (replStatus.status == ErrorCodes::WriteConcernFailed) {
        gleWtimeouts.increment();
        result->err = "timeout";
        result->wTimedOut = true;
    }

    // Add stats
    result->writtenTo = repl::getGlobalReplicationCoordinator()->getHostsWrittenTo(
        replOpTime,
        writeConcernWithPopulatedSyncMode.syncMode == WriteConcernOptions::SyncMode::JOURNAL);
    gleWtimeStats.recordMillis(durationCount<Milliseconds>(replStatus.duration));
    result->wTime = durationCount<Milliseconds>(replStatus.duration);

    return replStatus.status;
}
Ejemplo n.º 7
0
Status waitForWriteConcern(OperationContext* txn,
                           const OpTime& replOpTime,
                           const WriteConcernOptions& writeConcern,
                           WriteConcernResult* result) {
    // We assume all options have been validated earlier, if not, programming error.
    // Passing localDB name is a hack to avoid more rigorous check that performed for non local DB.
    dassert(validateWriteConcern(txn, writeConcern, kLocalDB).isOK());

    // We should never be waiting for write concern while holding any sort of lock, because this may
    // lead to situations where the replication heartbeats are stalled.
    //
    // This check does not hold for writes done through dbeval because it runs with a global X lock.
    dassert(!txn->lockState()->isLocked() || txn->getClient()->isInDirectClient());

    // Next handle blocking on disk

    Timer syncTimer;
    auto replCoord = repl::getGlobalReplicationCoordinator();
    WriteConcernOptions writeConcernWithPopulatedSyncMode =
        replCoord->populateUnsetWriteConcernOptionsSyncMode(writeConcern);


    switch (writeConcernWithPopulatedSyncMode.syncMode) {
        case WriteConcernOptions::SyncMode::UNSET:
            severe() << "Attempting to wait on a WriteConcern with an unset sync option";
            fassertFailed(34410);
        case WriteConcernOptions::SyncMode::NONE:
            break;
        case WriteConcernOptions::SyncMode::FSYNC: {
            StorageEngine* storageEngine = getGlobalServiceContext()->getGlobalStorageEngine();
            if (!storageEngine->isDurable()) {
                result->fsyncFiles = storageEngine->flushAllFiles(true);
            } else {
                // We only need to commit the journal if we're durable
                txn->recoveryUnit()->waitUntilDurable();
            }
            break;
        }
        case WriteConcernOptions::SyncMode::JOURNAL:
            if (replCoord->getReplicationMode() != repl::ReplicationCoordinator::Mode::modeNone) {
                // Wait for ops to become durable then update replication system's
                // knowledge of this.
                OpTime appliedOpTime = replCoord->getMyLastAppliedOpTime();
                txn->recoveryUnit()->waitUntilDurable();
                replCoord->setMyLastDurableOpTimeForward(appliedOpTime);
            } else {
                txn->recoveryUnit()->waitUntilDurable();
            }
            break;
    }

    result->syncMillis = syncTimer.millis();

    // Now wait for replication

    if (replOpTime.isNull()) {
        // no write happened for this client yet
        return Status::OK();
    }

    // needed to avoid incrementing gleWtimeStats SERVER-9005
    if (writeConcernWithPopulatedSyncMode.wNumNodes <= 1 &&
        writeConcernWithPopulatedSyncMode.wMode.empty()) {
        // no desired replication check
        return Status::OK();
    }

    // Now we wait for replication
    // Note that replica set stepdowns and gle mode changes are thrown as errors
    repl::ReplicationCoordinator::StatusAndDuration replStatus =
        repl::getGlobalReplicationCoordinator()->awaitReplication(
            txn, replOpTime, writeConcernWithPopulatedSyncMode);
    if (replStatus.status == ErrorCodes::WriteConcernFailed) {
        gleWtimeouts.increment();
        result->err = "timeout";
        result->wTimedOut = true;
    }
    // Add stats
    result->writtenTo = repl::getGlobalReplicationCoordinator()->getHostsWrittenTo(
        replOpTime,
        writeConcernWithPopulatedSyncMode.syncMode == WriteConcernOptions::SyncMode::JOURNAL);
    gleWtimeStats.recordMillis(durationCount<Milliseconds>(replStatus.duration));
    result->wTime = durationCount<Milliseconds>(replStatus.duration);

    return replStatus.status;
}
Ejemplo n.º 8
0
void FSyncLockThread::run() {
    ThreadClient tc("fsyncLockWorker", getGlobalServiceContext());
    stdx::lock_guard<SimpleMutex> lkf(filesLockedFsync);
    stdx::unique_lock<stdx::mutex> lk(fsyncCmd.lockStateMutex);

    invariant(fsyncCmd.getLockCount_inLock() == 1);

    try {
        const ServiceContext::UniqueOperationContext opCtxPtr = cc().makeOperationContext();
        OperationContext& opCtx = *opCtxPtr;
        Lock::GlobalRead global(&opCtx);  // Block any writes in order to flush the files.

        StorageEngine* storageEngine = getGlobalServiceContext()->getStorageEngine();

        try {
            storageEngine->flushAllFiles(&opCtx, true);
        } catch (const std::exception& e) {
            error() << "error doing flushAll: " << e.what();
            fsyncCmd.threadStatus = Status(ErrorCodes::CommandFailed, e.what());
            fsyncCmd.acquireFsyncLockSyncCV.notify_one();
            return;
        }

        bool successfulFsyncLock = false;
        auto backupCursorHooks = BackupCursorHooks::get(opCtx.getServiceContext());
        try {
            writeConflictRetry(&opCtx,
                               "beginBackup",
                               "global",
                               [&opCtx, backupCursorHooks, &successfulFsyncLock, storageEngine] {
                                   if (backupCursorHooks->enabled()) {
                                       backupCursorHooks->fsyncLock(&opCtx);
                                       successfulFsyncLock = true;
                                   } else {
                                       // Have the uassert be caught by the DBException
                                       // block. Maintain "allowFsyncFailure" compatibility in
                                       // community.
                                       uassertStatusOK(storageEngine->beginBackup(&opCtx));
                                       successfulFsyncLock = true;
                                   }
                               });
        } catch (const DBException& e) {
            if (_allowFsyncFailure) {
                warning() << "Locking despite storage engine being unable to begin backup : "
                          << e.toString();
                opCtx.recoveryUnit()->waitUntilDurable();
            } else {
                error() << "storage engine unable to begin backup : " << e.toString();
                fsyncCmd.threadStatus = e.toStatus();
                fsyncCmd.acquireFsyncLockSyncCV.notify_one();
                return;
            }
        }

        fsyncCmd.threadStarted = true;
        fsyncCmd.acquireFsyncLockSyncCV.notify_one();

        while (fsyncCmd.getLockCount_inLock() > 0) {
            fsyncCmd.releaseFsyncLockSyncCV.wait(lk);
        }

        if (successfulFsyncLock) {
            if (backupCursorHooks->enabled()) {
                backupCursorHooks->fsyncUnlock(&opCtx);
            } else {
                storageEngine->endBackup(&opCtx);
            }
        }

    } catch (const std::exception& e) {
        severe() << "FSyncLockThread exception: " << e.what();
        fassertFailed(40350);
    }
}
Ejemplo n.º 9
0
    virtual bool errmsgRun(OperationContext* opCtx,
                           const string& dbname,
                           const BSONObj& cmdObj,
                           string& errmsg,
                           BSONObjBuilder& result) {
        if (opCtx->lockState()->isLocked()) {
            errmsg = "fsync: Cannot execute fsync command from contexts that hold a data lock";
            return false;
        }

        const bool sync =
            !cmdObj["async"].trueValue();  // async means do an fsync, but return immediately
        const bool lock = cmdObj["lock"].trueValue();
        log() << "CMD fsync: sync:" << sync << " lock:" << lock;

        // fsync + lock is sometimes used to block writes out of the system and does not care if
        // the `BackupCursorService::fsyncLock` call succeeds.
        const bool allowFsyncFailure =
            getTestCommandsEnabled() && cmdObj["allowFsyncFailure"].trueValue();

        if (!lock) {
            // Take a global IS lock to ensure the storage engine is not shutdown
            Lock::GlobalLock global(opCtx, MODE_IS);
            StorageEngine* storageEngine = getGlobalServiceContext()->getStorageEngine();
            result.append("numFiles", storageEngine->flushAllFiles(opCtx, sync));
            return true;
        }

        Lock::ExclusiveLock lk(opCtx->lockState(), commandMutex);
        if (!sync) {
            errmsg = "fsync: sync option must be true when using lock";
            return false;
        }

        const auto lockCountAtStart = getLockCount();
        invariant(lockCountAtStart > 0 || !_lockThread);

        acquireLock();

        if (lockCountAtStart == 0) {

            Status status = Status::OK();
            {
                stdx::unique_lock<stdx::mutex> lk(lockStateMutex);
                threadStatus = Status::OK();
                threadStarted = false;
                _lockThread = stdx::make_unique<FSyncLockThread>(allowFsyncFailure);
                _lockThread->go();

                while (!threadStarted && threadStatus.isOK()) {
                    acquireFsyncLockSyncCV.wait(lk);
                }

                // 'threadStatus' must be copied while 'lockStateMutex' is held.
                status = threadStatus;
            }

            if (!status.isOK()) {
                releaseLock();
                warning() << "fsyncLock failed. Lock count reset to 0. Status: " << status;
                uassertStatusOK(status);
            }
        }

        log() << "mongod is locked and no writes are allowed. db.fsyncUnlock() to unlock";
        log() << "Lock count is " << getLockCount();
        log() << "    For more info see " << FSyncCommand::url();
        result.append("info", "now locked against writes, use db.fsyncUnlock() to unlock");
        result.append("lockCount", getLockCount());
        result.append("seeAlso", FSyncCommand::url());

        return true;
    }