Ejemplo n.º 1
0
 void Stats::rotate() {
     unsigned long long now = curTimeMicros64();
     unsigned long long dt = now - _lastRotate;
     if( dt >= _intervalMicros && _intervalMicros ) {
         // rotate
         curr->_dtMillis = (unsigned) (dt/1000);
         _lastRotate = now;
         curr = other();
         curr->reset();
     }
 }
Ejemplo n.º 2
0
        /** We need to remap the private views periodically. otherwise they would become very large.
            Call within write lock.
        */
        void REMAPPRIVATEVIEW() { 
            static unsigned startAt;
            static unsigned long long lastRemap;

            dbMutex.assertWriteLocked();
            dbMutex._remapPrivateViewRequested = false;
            assert( !commitJob.hasWritten() );

            if( 0 ) { 
                log() << "TEMP remapprivateview disabled for testing - will eventually run oom in this mode if db bigger than ram" << endl;
                return;
            }

            // we want to remap all private views about every 2 seconds.  there could be ~1000 views so 
            // we do a little each pass; beyond the remap time, more significantly, there will be copy on write 
            // faults after remapping, so doing a little bit at a time will avoid big load spikes on 
            // remapping.
            unsigned long long now = curTimeMicros64();
            double fraction = (now-lastRemap)/20000000.0;

            set<MongoFile*>& files = MongoFile::getAllFiles();
            unsigned sz = files.size();
            if( sz == 0 ) 
                return;

            unsigned ntodo = (unsigned) (sz * fraction);
            if( ntodo < 1 ) ntodo = 1;
            if( ntodo > sz ) ntodo = sz;

            const set<MongoFile*>::iterator b = files.begin();
            const set<MongoFile*>::iterator e = files.end();
            set<MongoFile*>::iterator i = b;
            // skip to our starting position
            for( unsigned x = 0; x < startAt; x++ ) {
                i++;
                if( i == e ) i = b;
            }
            startAt = (startAt + ntodo) % sz; // mark where to start next time

            for( unsigned x = 0; x < ntodo; x++ ) {
                dassert( i != e );
                if( (*i)->isMongoMMF() ) {
                    MongoMMF *mmf = (MongoMMF*) *i;
                    assert(mmf);
                    if( mmf->willNeedRemap() ) {
                        mmf->willNeedRemap() = false;
                        mmf->remapThePrivateView();
                    }
                    i++;
                    if( i == e ) i = b;
                }
            }
        }
Ejemplo n.º 3
0
    void CurOp::ensureStarted() {
        if ( _start == 0 ) {
            _start = curTimeMicros64();

            // If ensureStarted() is invoked after setMaxTimeMicros(), then time limit tracking will
            // start here.  This is because time limit tracking can only commence after the
            // operation is assigned a start time.
            if (_maxTimeMicros > 0) {
                _maxTimeTracker.setTimeLimit(_start, _maxTimeMicros);
            }
        }
    }
Ejemplo n.º 4
0
    bool Socket::connect(SockAddr& remote) {
        _remote = remote;

        _fd = socket(remote.getType(), SOCK_STREAM, 0);
        if ( _fd == INVALID_SOCKET ) {
            LOG(_logLevel) << "ERROR: connect invalid socket " << errnoWithDescription() << endl;
            return false;
        }

        if ( _timeout > 0 ) {
            setTimeout( _timeout );
        }

        static const unsigned int connectTimeoutMillis = 5000;
        ConnectBG bg(_fd, remote);
        bg.go();
        if ( bg.wait(connectTimeoutMillis) ) {
            if ( bg.inError() ) {
                warning() << "Failed to connect to "
                          << _remote.getAddr() << ":" << _remote.getPort()
                          << ", reason: " << bg.getErrnoWithDescription() << endl;
                close();
                return false;
            }
        }
        else {
            // time out the connect
            close();
            bg.wait(); // so bg stays in scope until bg thread terminates
            warning() << "Failed to connect to "
                      << _remote.getAddr() << ":" << _remote.getPort()
                      << " after " << connectTimeoutMillis << " milliseconds, giving up." << endl;
            return false;
        }

        if (remote.getType() != AF_UNIX)
            disableNagle(_fd);

#ifdef SO_NOSIGPIPE
        // ignore SIGPIPE signals on osx, to avoid process exit
        const int one = 1;
        setsockopt( _fd , SOL_SOCKET, SO_NOSIGPIPE, &one, sizeof(int));
#endif

        _local = getLocalAddrForBoundSocketFd(_fd);

        _fdCreationMicroSec = curTimeMicros64();

        _awaitingHandshake = false;

        return true;
    }
Ejemplo n.º 5
0
    uint64_t CurOp::MaxTimeTracker::getRemainingMicros() const {
        if (!_enabled) {
            // 0 is "allow to run indefinitely".
            return 0;
        }

        // Does our accurate time source think time is up?  If so, claim there is 1 microsecond
        // left for this operation.
        uint64_t now = curTimeMicros64();
        if (_targetEpochMicros <= now) {
            return 1;
        }

        // Otherwise, calculate remaining time.
        return _targetEpochMicros - now;
    }
Ejemplo n.º 6
0
bool Socket::connect(SockAddr& remote) {
    _remote = remote;

    _fd = socket(remote.getType(), SOCK_STREAM, 0);
    if ( _fd == INVALID_SOCKET ) {
        LOG(_logLevel) << "ERROR: connect invalid socket " << errnoWithDescription() << endl;
        return false;
    }

    if ( _timeout > 0 ) {
        setTimeout( _timeout );
    }

    ConnectBG bg(_fd, remote);
    bg.go();
    if ( bg.wait(5000) ) {
        if ( bg.inError() ) {
            close();
            return false;
        }
    }
    else {
        // time out the connect
        close();
        bg.wait(); // so bg stays in scope until bg thread terminates
        return false;
    }

    if (remote.getType() != AF_UNIX)
        disableNagle(_fd);

#ifdef SO_NOSIGPIPE
    // ignore SIGPIPE signals on osx, to avoid process exit
    const int one = 1;
    setsockopt( _fd , SOL_SOCKET, SO_NOSIGPIPE, &one, sizeof(int));
#endif

    _local = getLocalAddrForBoundSocketFd(_fd);

    _fdCreationMicroSec = curTimeMicros64();
    return true;
}
Ejemplo n.º 7
0
    void CurOp::MaxTimeTracker::setTimeLimit(uint64_t startEpochMicros, uint64_t durationMicros) {
        dassert(durationMicros != 0);

        _enabled = true;

        _targetEpochMicros = startEpochMicros + durationMicros;

        uint64_t now = curTimeMicros64();
        // If our accurate time source thinks time is not up yet, calculate the next target for
        // our approximate time source.
        if (_targetEpochMicros > now) {
            _approxTargetServerMillis = Listener::getElapsedTimeMillis() +
                                        static_cast<int64_t>((_targetEpochMicros - now) / 1000);
        }
        // Otherwise, set our approximate time source target such that it thinks time is already
        // up.
        else {
            _approxTargetServerMillis = Listener::getElapsedTimeMillis();
        }
    }
Ejemplo n.º 8
0
    bool CurOp::MaxTimeTracker::checkTimeLimit() {
        if (!_enabled) {
            return false;
        }

        // Does our approximate time source think time is not up yet?  If so, return early.
        if (_approxTargetServerMillis > Listener::getElapsedTimeMillis()) {
            return false;
        }

        uint64_t now = curTimeMicros64();
        // Does our accurate time source think time is not up yet?  If so, readjust the target for
        // our approximate time source and return early.
        if (_targetEpochMicros > now) {
            _approxTargetServerMillis = Listener::getElapsedTimeMillis() +
                                        static_cast<int64_t>((_targetEpochMicros - now) / 1000);
            return false;
        }

        // Otherwise, time is up.
        return true;
    }
Ejemplo n.º 9
0
 void statsThread() {
     /*cout << "TEMP disabled statsthread" << endl;
     if( 1 ) 
         return;*/
     Client::initThread("stats");
     unsigned long long timeLastPass = 0;
     while ( 1 ) {
         {
             /* todo: do we even need readlock here?  if so for what? */
             readlock lk("");
             Top::completeSnapshot();
             q = (q+1)%NStats;
             Timing timing;
             dbMutex.info().getTimingInfo(timing.start, timing.timeLocked);
             unsigned long long now = curTimeMicros64();
             if ( timeLastPass ) {
                 unsigned long long dt = now - timeLastPass;
                 unsigned long long dlocked = timing.timeLocked - tlast.timeLocked;
                 {
                     stringstream ss;
                     ss << dt / 1000 << '\t';
                     ss << dlocked / 1000 << '\t';
                     if ( dt )
                         ss << (dlocked*100)/dt << '%';
                     string s = ss.str();
                     if ( cmdLine.cpu )
                         log() << "cpu: " << s << endl;
                     lockStats[q] = s;
                     ClientCursor::idleTimeReport( (unsigned) ((dt - dlocked)/1000) );
                 }
             }
             timeLastPass = now;
             tlast = timing;
         }
         sleepsecs(4);
     }
 }
Ejemplo n.º 10
0
ExitCode _initAndListen(int listenPort) {
    Client::initThread("initandlisten");

    initWireSpec();
    auto serviceContext = getGlobalServiceContext();

    serviceContext->setFastClockSource(FastClockSourceFactory::create(Milliseconds(10)));
    auto opObserverRegistry = stdx::make_unique<OpObserverRegistry>();
    opObserverRegistry->addObserver(stdx::make_unique<OpObserverShardingImpl>());
    opObserverRegistry->addObserver(stdx::make_unique<UUIDCatalogObserver>());

    if (serverGlobalParams.clusterRole == ClusterRole::ShardServer) {
        opObserverRegistry->addObserver(stdx::make_unique<ShardServerOpObserver>());
    } else if (serverGlobalParams.clusterRole == ClusterRole::ConfigServer) {
        opObserverRegistry->addObserver(stdx::make_unique<ConfigServerOpObserver>());
    }
    setupFreeMonitoringOpObserver(opObserverRegistry.get());


    serviceContext->setOpObserver(std::move(opObserverRegistry));

    DBDirectClientFactory::get(serviceContext).registerImplementation([](OperationContext* opCtx) {
        return std::unique_ptr<DBClientBase>(new DBDirectClient(opCtx));
    });

    const repl::ReplSettings& replSettings =
        repl::ReplicationCoordinator::get(serviceContext)->getSettings();

    {
        ProcessId pid = ProcessId::getCurrent();
        LogstreamBuilder l = log(LogComponent::kControl);
        l << "MongoDB starting : pid=" << pid << " port=" << serverGlobalParams.port
          << " dbpath=" << storageGlobalParams.dbpath;

        const bool is32bit = sizeof(int*) == 4;
        l << (is32bit ? " 32" : " 64") << "-bit host=" << getHostNameCached() << endl;
    }

    DEV log(LogComponent::kControl) << "DEBUG build (which is slower)" << endl;

#if defined(_WIN32)
    VersionInfoInterface::instance().logTargetMinOS();
#endif

    logProcessDetails();

    serviceContext->setServiceEntryPoint(
        stdx::make_unique<ServiceEntryPointMongod>(serviceContext));

    if (!storageGlobalParams.repair) {
        auto tl =
            transport::TransportLayerManager::createWithConfig(&serverGlobalParams, serviceContext);
        auto res = tl->setup();
        if (!res.isOK()) {
            error() << "Failed to set up listener: " << res;
            return EXIT_NET_ERROR;
        }
        serviceContext->setTransportLayer(std::move(tl));
    }

    // Set up the periodic runner for background job execution. This is required to be running
    // before the storage engine is initialized.
    auto runner = makePeriodicRunner(serviceContext);
    runner->startup();
    serviceContext->setPeriodicRunner(std::move(runner));

    initializeStorageEngine(serviceContext, StorageEngineInitFlags::kNone);

#ifdef MONGO_CONFIG_WIREDTIGER_ENABLED
    if (EncryptionHooks::get(serviceContext)->restartRequired()) {
        exitCleanly(EXIT_CLEAN);
    }
#endif

    // Warn if we detect configurations for multiple registered storage engines in the same
    // configuration file/environment.
    if (serverGlobalParams.parsedOpts.hasField("storage")) {
        BSONElement storageElement = serverGlobalParams.parsedOpts.getField("storage");
        invariant(storageElement.isABSONObj());
        for (auto&& e : storageElement.Obj()) {
            // Ignore if field name under "storage" matches current storage engine.
            if (storageGlobalParams.engine == e.fieldName()) {
                continue;
            }

            // Warn if field name matches non-active registered storage engine.
            if (isRegisteredStorageEngine(serviceContext, e.fieldName())) {
                warning() << "Detected configuration for non-active storage engine "
                          << e.fieldName() << " when current storage engine is "
                          << storageGlobalParams.engine;
            }
        }
    }

    // Disallow running a storage engine that doesn't support capped collections with --profile
    if (!serviceContext->getStorageEngine()->supportsCappedCollections() &&
        serverGlobalParams.defaultProfile != 0) {
        log() << "Running " << storageGlobalParams.engine << " with profiling is not supported. "
              << "Make sure you are not using --profile.";
        exitCleanly(EXIT_BADOPTIONS);
    }

    // Disallow running WiredTiger with --nojournal in a replica set
    if (storageGlobalParams.engine == "wiredTiger" && !storageGlobalParams.dur &&
        replSettings.usingReplSets()) {
        log() << "Running wiredTiger without journaling in a replica set is not "
              << "supported. Make sure you are not using --nojournal and that "
              << "storage.journal.enabled is not set to 'false'.";
        exitCleanly(EXIT_BADOPTIONS);
    }

    logMongodStartupWarnings(storageGlobalParams, serverGlobalParams, serviceContext);

#ifdef MONGO_CONFIG_SSL
    if (sslGlobalParams.sslAllowInvalidCertificates &&
        ((serverGlobalParams.clusterAuthMode.load() == ServerGlobalParams::ClusterAuthMode_x509) ||
         sequenceContains(saslGlobalParams.authenticationMechanisms, "MONGODB-X509"))) {
        log() << "** WARNING: While invalid X509 certificates may be used to" << startupWarningsLog;
        log() << "**          connect to this server, they will not be considered"
              << startupWarningsLog;
        log() << "**          permissible for authentication." << startupWarningsLog;
        log() << startupWarningsLog;
    }
#endif

    {
        std::stringstream ss;
        ss << endl;
        ss << "*********************************************************************" << endl;
        ss << " ERROR: dbpath (" << storageGlobalParams.dbpath << ") does not exist." << endl;
        ss << " Create this directory or give existing directory in --dbpath." << endl;
        ss << " See http://dochub.mongodb.org/core/startingandstoppingmongo" << endl;
        ss << "*********************************************************************" << endl;
        uassert(10296, ss.str().c_str(), boost::filesystem::exists(storageGlobalParams.dbpath));
    }

    initializeSNMP();

    if (!storageGlobalParams.readOnly) {
        boost::filesystem::remove_all(storageGlobalParams.dbpath + "/_tmp/");
    }

    if (mongodGlobalParams.scriptingEnabled) {
        ScriptEngine::setup();
    }

    auto startupOpCtx = serviceContext->makeOperationContext(&cc());

    bool canCallFCVSetIfCleanStartup =
        !storageGlobalParams.readOnly && (storageGlobalParams.engine != "devnull");
    if (canCallFCVSetIfCleanStartup && !replSettings.usingReplSets()) {
        Lock::GlobalWrite lk(startupOpCtx.get());
        FeatureCompatibilityVersion::setIfCleanStartup(startupOpCtx.get(),
                                                       repl::StorageInterface::get(serviceContext));
    }

    auto swNonLocalDatabases = repairDatabasesAndCheckVersion(startupOpCtx.get());
    if (!swNonLocalDatabases.isOK()) {
        // SERVER-31611 introduced a return value to `repairDatabasesAndCheckVersion`. Previously,
        // a failing condition would fassert. SERVER-31611 covers a case where the binary (3.6) is
        // refusing to start up because it refuses acknowledgement of FCV 3.2 and requires the
        // user to start up with an older binary. Thus shutting down the server must leave the
        // datafiles in a state that the older binary can start up. This requires going through a
        // clean shutdown.
        //
        // The invariant is *not* a statement that `repairDatabasesAndCheckVersion` must return
        // `MustDowngrade`. Instead, it is meant as a guardrail to protect future developers from
        // accidentally buying into this behavior. New errors that are returned from the method
        // may or may not want to go through a clean shutdown, and they likely won't want the
        // program to return an exit code of `EXIT_NEED_DOWNGRADE`.
        severe(LogComponent::kControl) << "** IMPORTANT: "
                                       << swNonLocalDatabases.getStatus().reason();
        invariant(swNonLocalDatabases == ErrorCodes::MustDowngrade);
        exitCleanly(EXIT_NEED_DOWNGRADE);
    }

    // Assert that the in-memory featureCompatibilityVersion parameter has been explicitly set. If
    // we are part of a replica set and are started up with no data files, we do not set the
    // featureCompatibilityVersion until a primary is chosen. For this case, we expect the in-memory
    // featureCompatibilityVersion parameter to still be uninitialized until after startup.
    if (canCallFCVSetIfCleanStartup &&
        (!replSettings.usingReplSets() || swNonLocalDatabases.getValue())) {
        invariant(serverGlobalParams.featureCompatibility.isVersionInitialized());
    }

    if (storageGlobalParams.upgrade) {
        log() << "finished checking dbs";
        exitCleanly(EXIT_CLEAN);
    }

    // Start up health log writer thread.
    HealthLog::get(startupOpCtx.get()).startup();

    auto const globalAuthzManager = AuthorizationManager::get(serviceContext);
    uassertStatusOK(globalAuthzManager->initialize(startupOpCtx.get()));

    // This is for security on certain platforms (nonce generation)
    srand((unsigned)(curTimeMicros64()) ^ (unsigned(uintptr_t(&startupOpCtx))));

    if (globalAuthzManager->shouldValidateAuthSchemaOnStartup()) {
        Status status = verifySystemIndexes(startupOpCtx.get());
        if (!status.isOK()) {
            log() << redact(status);
            if (status == ErrorCodes::AuthSchemaIncompatible) {
                exitCleanly(EXIT_NEED_UPGRADE);
            } else if (status == ErrorCodes::NotMaster) {
                // Try creating the indexes if we become master.  If we do not become master,
                // the master will create the indexes and we will replicate them.
            } else {
                quickExit(EXIT_FAILURE);
            }
        }

        // SERVER-14090: Verify that auth schema version is schemaVersion26Final.
        int foundSchemaVersion;
        status =
            globalAuthzManager->getAuthorizationVersion(startupOpCtx.get(), &foundSchemaVersion);
        if (!status.isOK()) {
            log() << "Auth schema version is incompatible: "
                  << "User and role management commands require auth data to have "
                  << "at least schema version " << AuthorizationManager::schemaVersion26Final
                  << " but startup could not verify schema version: " << status;
            log() << "To manually repair the 'authSchema' document in the admin.system.version "
                     "collection, start up with --setParameter "
                     "startupAuthSchemaValidation=false to disable validation.";
            exitCleanly(EXIT_NEED_UPGRADE);
        }

        if (foundSchemaVersion <= AuthorizationManager::schemaVersion26Final) {
            log() << "This server is using MONGODB-CR, an authentication mechanism which "
                  << "has been removed from MongoDB 4.0. In order to upgrade the auth schema, "
                  << "first downgrade MongoDB binaries to version 3.6 and then run the "
                  << "authSchemaUpgrade command. "
                  << "See http://dochub.mongodb.org/core/3.0-upgrade-to-scram-sha-1";
            exitCleanly(EXIT_NEED_UPGRADE);
        }
    } else if (globalAuthzManager->isAuthEnabled()) {
        error() << "Auth must be disabled when starting without auth schema validation";
        exitCleanly(EXIT_BADOPTIONS);
    } else {
        // If authSchemaValidation is disabled and server is running without auth,
        // warn the user and continue startup without authSchema metadata checks.
        log() << startupWarningsLog;
        log() << "** WARNING: Startup auth schema validation checks are disabled for the "
                 "database."
              << startupWarningsLog;
        log() << "**          This mode should only be used to manually repair corrupted auth "
                 "data."
              << startupWarningsLog;
    }

    // This function may take the global lock.
    auto shardingInitialized = ShardingInitializationMongoD::get(startupOpCtx.get())
                                   ->initializeShardingAwarenessIfNeeded(startupOpCtx.get());
    if (shardingInitialized) {
        waitForShardRegistryReload(startupOpCtx.get()).transitional_ignore();
    }

    auto storageEngine = serviceContext->getStorageEngine();
    invariant(storageEngine);
    BackupCursorHooks::initialize(serviceContext, storageEngine);

    if (!storageGlobalParams.readOnly) {

        if (storageEngine->supportsCappedCollections()) {
            logStartup(startupOpCtx.get());
        }

        startMongoDFTDC();

        startFreeMonitoring(serviceContext);

        restartInProgressIndexesFromLastShutdown(startupOpCtx.get());

        if (serverGlobalParams.clusterRole == ClusterRole::ShardServer) {
            // Note: For replica sets, ShardingStateRecovery happens on transition to primary.
            if (!repl::ReplicationCoordinator::get(startupOpCtx.get())->isReplEnabled()) {
                if (ShardingState::get(startupOpCtx.get())->enabled()) {
                    uassertStatusOK(ShardingStateRecovery::recover(startupOpCtx.get()));
                }
            }
        } else if (serverGlobalParams.clusterRole == ClusterRole::ConfigServer) {
            initializeGlobalShardingStateForMongoD(startupOpCtx.get(),
                                                   ConnectionString::forLocal(),
                                                   kDistLockProcessIdForConfigServer);

            Balancer::create(startupOpCtx->getServiceContext());

            ShardingCatalogManager::create(
                startupOpCtx->getServiceContext(),
                makeShardingTaskExecutor(executor::makeNetworkInterface("AddShard-TaskExecutor")));

            Grid::get(startupOpCtx.get())->setShardingInitialized();
        } else if (replSettings.usingReplSets()) {  // standalone replica set
            auto keysCollectionClient = stdx::make_unique<KeysCollectionClientDirect>();
            auto keyManager = std::make_shared<KeysCollectionManager>(
                KeysCollectionManager::kKeyManagerPurposeString,
                std::move(keysCollectionClient),
                Seconds(KeysRotationIntervalSec));
            keyManager->startMonitoring(startupOpCtx->getServiceContext());

            LogicalTimeValidator::set(startupOpCtx->getServiceContext(),
                                      stdx::make_unique<LogicalTimeValidator>(keyManager));
        }

        repl::ReplicationCoordinator::get(startupOpCtx.get())->startup(startupOpCtx.get());
        const unsigned long long missingRepl =
            checkIfReplMissingFromCommandLine(startupOpCtx.get());
        if (missingRepl) {
            log() << startupWarningsLog;
            log() << "** WARNING: mongod started without --replSet yet " << missingRepl
                  << " documents are present in local.system.replset." << startupWarningsLog;
            log() << "**          Database contents may appear inconsistent with the oplog and may "
                     "appear to not contain"
                  << startupWarningsLog;
            log() << "**          writes that were visible when this node was running as part of a "
                     "replica set."
                  << startupWarningsLog;
            log() << "**          Restart with --replSet unless you are doing maintenance and no "
                     "other clients are connected."
                  << startupWarningsLog;
            log() << "**          The TTL collection monitor will not start because of this."
                  << startupWarningsLog;
            log() << "**         ";
            log() << " For more info see http://dochub.mongodb.org/core/ttlcollections";
            log() << startupWarningsLog;
        } else {
            startTTLBackgroundJob();
        }

        if (replSettings.usingReplSets() || !internalValidateFeaturesAsMaster) {
            serverGlobalParams.validateFeaturesAsMaster.store(false);
        }
    }

    startClientCursorMonitor();

    PeriodicTask::startRunningPeriodicTasks();

    SessionKiller::set(serviceContext,
                       std::make_shared<SessionKiller>(serviceContext, killSessionsLocal));

    // Start up a background task to periodically check for and kill expired transactions; and a
    // background task to periodically check for and decrease cache pressure by decreasing the
    // target size setting for the storage engine's window of available snapshots.
    //
    // Only do this on storage engines supporting snapshot reads, which hold resources we wish to
    // release periodically in order to avoid storage cache pressure build up.
    if (storageEngine->supportsReadConcernSnapshot()) {
        startPeriodicThreadToAbortExpiredTransactions(serviceContext);
        startPeriodicThreadToDecreaseSnapshotHistoryCachePressure(serviceContext);
    }

    // Set up the logical session cache
    LogicalSessionCacheServer kind = LogicalSessionCacheServer::kStandalone;
    if (serverGlobalParams.clusterRole == ClusterRole::ShardServer) {
        kind = LogicalSessionCacheServer::kSharded;
    } else if (serverGlobalParams.clusterRole == ClusterRole::ConfigServer) {
        kind = LogicalSessionCacheServer::kConfigServer;
    } else if (replSettings.usingReplSets()) {
        kind = LogicalSessionCacheServer::kReplicaSet;
    }

    auto sessionCache = makeLogicalSessionCacheD(kind);
    LogicalSessionCache::set(serviceContext, std::move(sessionCache));

    // MessageServer::run will return when exit code closes its socket and we don't need the
    // operation context anymore
    startupOpCtx.reset();

    auto start = serviceContext->getServiceExecutor()->start();
    if (!start.isOK()) {
        error() << "Failed to start the service executor: " << start;
        return EXIT_NET_ERROR;
    }

    start = serviceContext->getServiceEntryPoint()->start();
    if (!start.isOK()) {
        error() << "Failed to start the service entry point: " << start;
        return EXIT_NET_ERROR;
    }

    if (!storageGlobalParams.repair) {
        start = serviceContext->getTransportLayer()->start();
        if (!start.isOK()) {
            error() << "Failed to start the listener: " << start.toString();
            return EXIT_NET_ERROR;
        }
    }

    serviceContext->notifyStartupComplete();

#ifndef _WIN32
    mongo::signalForkSuccess();
#else
    if (ntservice::shouldStartService()) {
        ntservice::reportStatus(SERVICE_RUNNING);
        log() << "Service running";
    }
#endif

    if (MONGO_FAIL_POINT(shutdownAtStartup)) {
        log() << "starting clean exit via failpoint";
        exitCleanly(EXIT_CLEAN);
    }

    MONGO_IDLE_THREAD_BLOCK;
    return waitForShutdown();
}
Ejemplo n.º 11
0
bool Socket::connect(SockAddr& remote) {
    _remote = remote;

    _fd = ::socket(remote.getType(), SOCK_STREAM, 0);
    if (_fd == INVALID_SOCKET) {
        networkWarnWithDescription(*this, "socket");
        return false;
    }

    if (!setBlock(_fd, false)) {
        networkWarnWithDescription(*this, "set socket to non-blocking mode");
        return false;
    }

    const Milliseconds connectTimeoutMillis(static_cast<int64_t>(
        _timeout > 0 ? std::min(kMaxConnectTimeoutMS, (_timeout * 1000)) : kMaxConnectTimeoutMS));
    const Date_t expiration = Date_t::now() + connectTimeoutMillis;

    bool connectSucceeded = ::connect(_fd, _remote.raw(), _remote.addressSize) == 0;

    if (!connectSucceeded) {
#ifdef _WIN32
        if (WSAGetLastError() != WSAEWOULDBLOCK) {
            networkWarnWithDescription(*this, "connect");
            return false;
        }
#else
        if (errno != EINTR && errno != EINPROGRESS) {
            networkWarnWithDescription(*this, "connect");
            return false;
        }
#endif

        pollfd pfd;
        pfd.fd = _fd;
        pfd.events = POLLOUT;

        while (true) {
            const auto timeout = std::max(Milliseconds(0), expiration - Date_t::now());

            int pollReturn = socketPoll(&pfd, 1, timeout.count());
#ifdef _WIN32
            if (pollReturn == SOCKET_ERROR) {
                networkWarnWithDescription(*this, "poll");
                return false;
            }
#else
            if (pollReturn == -1) {
                if (errno != EINTR) {
                    networkWarnWithDescription(*this, "poll");
                    return false;
                }

                // EINTR in poll, try again
                continue;
            }
#endif
            // No activity for the full duration of the timeout.
            if (pollReturn == 0) {
                warning() << "Failed to connect to " << _remote.getAddr() << ":"
                          << _remote.getPort() << " after " << connectTimeoutMillis
                          << " milliseconds, giving up.";
                return false;
            }

            // We had a result, see if there's an error on the socket.
            int optVal;
            socklen_t optLen = sizeof(optVal);
            if (::getsockopt(
                    _fd, SOL_SOCKET, SO_ERROR, reinterpret_cast<char*>(&optVal), &optLen) == -1) {
                networkWarnWithDescription(*this, "getsockopt");
                return false;
            }
            if (optVal != 0) {
                networkWarnWithDescription(*this, "checking socket for error after poll", optVal);
                return false;
            }

            // We had activity and we don't have errors on the socket, we're connected.
            break;
        }
    }

    if (!setBlock(_fd, true)) {
        networkWarnWithDescription(*this, "could not set socket to blocking mode");
        return false;
    }

    if (_timeout > 0) {
        setTimeout(_timeout);
    }

    if (remote.getType() != AF_UNIX)
        disableNagle(_fd);

#ifdef SO_NOSIGPIPE
    // ignore SIGPIPE signals on osx, to avoid process exit
    const int one = 1;
    setsockopt(_fd, SOL_SOCKET, SO_NOSIGPIPE, &one, sizeof(int));
#endif

    _local = getLocalAddrForBoundSocketFd(_fd);

    _fdCreationMicroSec = curTimeMicros64();

    _awaitingHandshake = false;

    return true;
}
Ejemplo n.º 12
0
 void CurOp::ensureStarted() {
     if ( _start == 0 )
         _start = curTimeMicros64();
 }
Ejemplo n.º 13
0
ExitCode _initAndListen(int listenPort) {
    Client::initThread("initandlisten");

    _initWireSpec();
    auto globalServiceContext = getGlobalServiceContext();

    globalServiceContext->setFastClockSource(FastClockSourceFactory::create(Milliseconds(10)));
    globalServiceContext->setOpObserver(stdx::make_unique<OpObserver>());

    DBDirectClientFactory::get(globalServiceContext)
        .registerImplementation([](OperationContext* txn) {
            return std::unique_ptr<DBClientBase>(new DBDirectClient(txn));
        });

    const repl::ReplSettings& replSettings = repl::getGlobalReplicationCoordinator()->getSettings();

    {
        ProcessId pid = ProcessId::getCurrent();
        LogstreamBuilder l = log(LogComponent::kControl);
        l << "MongoDB starting : pid=" << pid << " port=" << serverGlobalParams.port
          << " dbpath=" << storageGlobalParams.dbpath;
        if (replSettings.isMaster())
            l << " master=" << replSettings.isMaster();
        if (replSettings.isSlave())
            l << " slave=" << (int)replSettings.isSlave();

        const bool is32bit = sizeof(int*) == 4;
        l << (is32bit ? " 32" : " 64") << "-bit host=" << getHostNameCached() << endl;
    }

    DEV log(LogComponent::kControl) << "DEBUG build (which is slower)" << endl;

#if defined(_WIN32)
    VersionInfoInterface::instance().logTargetMinOS();
#endif

    logProcessDetails();

    checked_cast<ServiceContextMongoD*>(getGlobalServiceContext())->createLockFile();

    transport::TransportLayerLegacy::Options options;
    options.port = listenPort;
    options.ipList = serverGlobalParams.bind_ip;

    auto sep =
        stdx::make_unique<ServiceEntryPointMongod>(getGlobalServiceContext()->getTransportLayer());
    auto sepPtr = sep.get();

    getGlobalServiceContext()->setServiceEntryPoint(std::move(sep));

    // Create, start, and attach the TL
    auto transportLayer = stdx::make_unique<transport::TransportLayerLegacy>(options, sepPtr);
    auto res = transportLayer->setup();
    if (!res.isOK()) {
        error() << "Failed to set up listener: " << res;
        return EXIT_NET_ERROR;
    }

    std::shared_ptr<DbWebServer> dbWebServer;
    if (serverGlobalParams.isHttpInterfaceEnabled) {
        dbWebServer.reset(new DbWebServer(serverGlobalParams.bind_ip,
                                          serverGlobalParams.port + 1000,
                                          getGlobalServiceContext(),
                                          new RestAdminAccess()));
        if (!dbWebServer->setupSockets()) {
            error() << "Failed to set up sockets for HTTP interface during startup.";
            return EXIT_NET_ERROR;
        }
    }

    getGlobalServiceContext()->initializeGlobalStorageEngine();

#ifdef MONGO_CONFIG_WIREDTIGER_ENABLED
    if (WiredTigerCustomizationHooks::get(getGlobalServiceContext())->restartRequired()) {
        exitCleanly(EXIT_CLEAN);
    }
#endif

    // Warn if we detect configurations for multiple registered storage engines in
    // the same configuration file/environment.
    if (serverGlobalParams.parsedOpts.hasField("storage")) {
        BSONElement storageElement = serverGlobalParams.parsedOpts.getField("storage");
        invariant(storageElement.isABSONObj());
        BSONObj storageParamsObj = storageElement.Obj();
        BSONObjIterator i = storageParamsObj.begin();
        while (i.more()) {
            BSONElement e = i.next();
            // Ignore if field name under "storage" matches current storage engine.
            if (storageGlobalParams.engine == e.fieldName()) {
                continue;
            }

            // Warn if field name matches non-active registered storage engine.
            if (getGlobalServiceContext()->isRegisteredStorageEngine(e.fieldName())) {
                warning() << "Detected configuration for non-active storage engine "
                          << e.fieldName() << " when current storage engine is "
                          << storageGlobalParams.engine;
            }
        }
    }

    if (!getGlobalServiceContext()->getGlobalStorageEngine()->getSnapshotManager()) {
        if (moe::startupOptionsParsed.count("replication.enableMajorityReadConcern") &&
            moe::startupOptionsParsed["replication.enableMajorityReadConcern"].as<bool>()) {
            // Note: we are intentionally only erroring if the user explicitly requested that we
            // enable majority read concern. We do not error if the they are implicitly enabled for
            // CSRS because a required step in the upgrade procedure can involve an mmapv1 node in
            // the CSRS in the REMOVED state. This is handled by the TopologyCoordinator.
            invariant(replSettings.isMajorityReadConcernEnabled());
            severe() << "Majority read concern requires a storage engine that supports"
                     << " snapshots, such as wiredTiger. " << storageGlobalParams.engine
                     << " does not support snapshots.";
            exitCleanly(EXIT_BADOPTIONS);
        }
    }

    logMongodStartupWarnings(storageGlobalParams, serverGlobalParams);

    {
        stringstream ss;
        ss << endl;
        ss << "*********************************************************************" << endl;
        ss << " ERROR: dbpath (" << storageGlobalParams.dbpath << ") does not exist." << endl;
        ss << " Create this directory or give existing directory in --dbpath." << endl;
        ss << " See http://dochub.mongodb.org/core/startingandstoppingmongo" << endl;
        ss << "*********************************************************************" << endl;
        uassert(10296, ss.str().c_str(), boost::filesystem::exists(storageGlobalParams.dbpath));
    }

    {
        stringstream ss;
        ss << "repairpath (" << storageGlobalParams.repairpath << ") does not exist";
        uassert(12590, ss.str().c_str(), boost::filesystem::exists(storageGlobalParams.repairpath));
    }

    // TODO:  This should go into a MONGO_INITIALIZER once we have figured out the correct
    // dependencies.
    if (snmpInit) {
        snmpInit();
    }

    if (!storageGlobalParams.readOnly) {
        boost::filesystem::remove_all(storageGlobalParams.dbpath + "/_tmp/");
    }

    if (mmapv1GlobalOptions.journalOptions & MMAPV1Options::JournalRecoverOnly)
        return EXIT_NET_ERROR;

    if (mongodGlobalParams.scriptingEnabled) {
        ScriptEngine::setup();
    }

    auto startupOpCtx = getGlobalServiceContext()->makeOperationContext(&cc());

    repairDatabasesAndCheckVersion(startupOpCtx.get());

    if (storageGlobalParams.upgrade) {
        log() << "finished checking dbs";
        exitCleanly(EXIT_CLEAN);
    }

    uassertStatusOK(getGlobalAuthorizationManager()->initialize(startupOpCtx.get()));

    /* this is for security on certain platforms (nonce generation) */
    srand((unsigned)(curTimeMicros64() ^ startupSrandTimer.micros()));

    // The snapshot thread provides historical collection level and lock statistics for use
    // by the web interface. Only needed when HTTP is enabled.
    if (serverGlobalParams.isHttpInterfaceEnabled) {
        statsSnapshotThread.go();

        invariant(dbWebServer);
        stdx::thread web(stdx::bind(&webServerListenThread, dbWebServer));
        web.detach();
    }

#ifndef _WIN32
    mongo::signalForkSuccess();
#endif
    AuthorizationManager* globalAuthzManager = getGlobalAuthorizationManager();
    if (globalAuthzManager->shouldValidateAuthSchemaOnStartup()) {
        Status status = authindex::verifySystemIndexes(startupOpCtx.get());
        if (!status.isOK()) {
            log() << redact(status);
            exitCleanly(EXIT_NEED_UPGRADE);
        }

        // SERVER-14090: Verify that auth schema version is schemaVersion26Final.
        int foundSchemaVersion;
        status =
            globalAuthzManager->getAuthorizationVersion(startupOpCtx.get(), &foundSchemaVersion);
        if (!status.isOK()) {
            log() << "Auth schema version is incompatible: "
                  << "User and role management commands require auth data to have "
                  << "at least schema version " << AuthorizationManager::schemaVersion26Final
                  << " but startup could not verify schema version: " << status;
            exitCleanly(EXIT_NEED_UPGRADE);
        }
        if (foundSchemaVersion < AuthorizationManager::schemaVersion26Final) {
            log() << "Auth schema version is incompatible: "
                  << "User and role management commands require auth data to have "
                  << "at least schema version " << AuthorizationManager::schemaVersion26Final
                  << " but found " << foundSchemaVersion << ". In order to upgrade "
                  << "the auth schema, first downgrade MongoDB binaries to version "
                  << "2.6 and then run the authSchemaUpgrade command.";
            exitCleanly(EXIT_NEED_UPGRADE);
        }
    } else if (globalAuthzManager->isAuthEnabled()) {
        error() << "Auth must be disabled when starting without auth schema validation";
        exitCleanly(EXIT_BADOPTIONS);
    } else {
        // If authSchemaValidation is disabled and server is running without auth,
        // warn the user and continue startup without authSchema metadata checks.
        log() << startupWarningsLog;
        log() << "** WARNING: Startup auth schema validation checks are disabled for the "
                 "database."
              << startupWarningsLog;
        log() << "**          This mode should only be used to manually repair corrupted auth "
                 "data."
              << startupWarningsLog;
    }

    auto shardingInitialized =
        uassertStatusOK(ShardingState::get(startupOpCtx.get())
                            ->initializeShardingAwarenessIfNeeded(startupOpCtx.get()));
    if (shardingInitialized) {
        reloadShardRegistryUntilSuccess(startupOpCtx.get());
    }

    if (!storageGlobalParams.readOnly) {
        logStartup(startupOpCtx.get());

        startFTDC();

        getDeleter()->startWorkers();

        restartInProgressIndexesFromLastShutdown(startupOpCtx.get());

        if (serverGlobalParams.clusterRole == ClusterRole::ShardServer) {
            // Note: For replica sets, ShardingStateRecovery happens on transition to primary.
            if (!repl::getGlobalReplicationCoordinator()->isReplEnabled()) {
                uassertStatusOK(ShardingStateRecovery::recover(startupOpCtx.get()));
            }
        } else if (serverGlobalParams.clusterRole == ClusterRole::ConfigServer) {
            uassertStatusOK(
                initializeGlobalShardingStateForMongod(startupOpCtx.get(),
                                                       ConnectionString::forLocal(),
                                                       kDistLockProcessIdForConfigServer));
            Balancer::create(startupOpCtx->getServiceContext());
        }

        repl::getGlobalReplicationCoordinator()->startup(startupOpCtx.get());

        const unsigned long long missingRepl =
            checkIfReplMissingFromCommandLine(startupOpCtx.get());
        if (missingRepl) {
            log() << startupWarningsLog;
            log() << "** WARNING: mongod started without --replSet yet " << missingRepl
                  << " documents are present in local.system.replset" << startupWarningsLog;
            log() << "**          Restart with --replSet unless you are doing maintenance and "
                  << " no other clients are connected." << startupWarningsLog;
            log() << "**          The TTL collection monitor will not start because of this."
                  << startupWarningsLog;
            log() << "**         ";
            log() << " For more info see http://dochub.mongodb.org/core/ttlcollections";
            log() << startupWarningsLog;
        } else {
            startTTLBackgroundJob();
        }

        if (!replSettings.usingReplSets() && !replSettings.isSlave() &&
            storageGlobalParams.engine != "devnull") {
            ScopedTransaction transaction(startupOpCtx.get(), MODE_X);
            Lock::GlobalWrite lk(startupOpCtx.get()->lockState());
            FeatureCompatibilityVersion::setIfCleanStartup(
                startupOpCtx.get(), repl::StorageInterface::get(getGlobalServiceContext()));
        }
    }

    startClientCursorMonitor();

    PeriodicTask::startRunningPeriodicTasks();

    // MessageServer::run will return when exit code closes its socket and we don't need the
    // operation context anymore
    startupOpCtx.reset();

    auto start = getGlobalServiceContext()->addAndStartTransportLayer(std::move(transportLayer));
    if (!start.isOK()) {
        error() << "Failed to start the listener: " << start.toString();
        return EXIT_NET_ERROR;
    }

    return waitForShutdown();
}
Ejemplo n.º 14
0
 void run() {
     int iterations = 1000*1000;
     while(iterations--){
         curTimeMicros64();
     }
 }
Ejemplo n.º 15
0
Archivo: mms.cpp Proyecto: fizx/mongo
        void run(){
        if ( _token.size() == 0  && _name.size() == 0 ){
            log(1) << "mms not configured" << endl;
            return;
        }

        if ( _token.size() == 0 ){
            log() << "no token for mms - not running" << endl;
            return;
        }
        
        if ( _name.size() == 0 ){
            log() << "no name for mms - not running" << endl;
            return;
        }

        log() << "mms monitor staring...  token:" << _token << " name:" << _name << " interval: " << _secsToSleep << endl;

        unsigned long long lastTime = 0;
        unsigned long long lastLockTime = 0;
        
        while ( ! inShutdown() ){
            sleepsecs( _secsToSleep );
            
            stringstream url;
            url << _baseurl << _token << "?";
            url << "monitor_name=" << _name << "&";
            url << "version=" << versionString << "&";
            url << "git_hash=" << gitVersion() << "&";

            { //percent_locked
                unsigned long long time = curTimeMicros64();
                unsigned long long start , lock;
                dbMutexInfo.timingInfo( start , lock );
                if ( lastTime ){
                    double timeDiff = (double) (time - lastTime);
                    double lockDiff = (double) (lock - lastLockTime);
                    url << "percent_locked=" << (int)ceil( 100 * ( lockDiff / timeDiff ) ) << "&";
                }
                lastTime = time;
                lastLockTime = lock;
            }
            
            vector< string > dbNames;
            getDatabaseNames( dbNames );
            boost::intmax_t totalSize = 0;
            for ( vector< string >::iterator i = dbNames.begin(); i != dbNames.end(); ++i ) {
                boost::intmax_t size = dbSize( i->c_str() );
                totalSize += size;
            }
            url << "data_size=" << totalSize / ( 1024 * 1024 ) << "&";

            
            
            /* TODO: 
              message_operations
              update_operations
              insert_operations
              get_more_operations
              delete_operations
              kill_cursors_operations 
            */
            

            log(1) << "mms url: " << url.str() << endl;
            
            try {
                HttpClient c;
                map<string,string> headers;
                stringstream ss;
                int rc = c.get( url.str() , headers , ss );
                log(1) << "\t response code: " << rc << endl;
                if ( rc != 200 ){
                    log() << "mms error response code:" << rc << endl;
                    log(1) << "mms error body:" << ss.str() << endl;
                }
            }
            catch ( std::exception& e ){
                log() << "mms get exception: " << e.what() << endl;
            }
        }
        }
Ejemplo n.º 16
0
/**
 * The main durability thread loop. There is a single instance of this function running.
 */
static void durThread(ClockSource* cs, int64_t serverStartMs) {
    Client::initThread("durability");

    log() << "Durability thread started";

    bool samePartition = true;
    try {
        const std::string dbpathDir = boost::filesystem::path(storageGlobalParams.dbpath).string();
        samePartition = onSamePartition(getJournalDir().string(), dbpathDir);
    } catch (...) {
    }

    // Spawn the journal writer thread
    JournalWriter journalWriter(&commitNotify, &applyToDataFilesNotify, NumAsyncJournalWrites);
    journalWriter.start();

    // Used as an estimate of how much / how fast to remap
    uint64_t commitCounter(0);
    uint64_t estimatedPrivateMapSize(0);
    uint64_t remapLastTimestamp(0);

    while (shutdownRequested.loadRelaxed() == 0) {
        unsigned ms = storageGlobalParams.journalCommitIntervalMs;
        if (ms == 0) {
            ms = samePartition ? 100 : 30;
        }

        // +1 so it never goes down to zero
        const int64_t oneThird = (ms / 3) + 1;

        // Reset the stats based on the reset interval
        if (stats.curr()->getCurrentDurationMillis() > DurStatsResetIntervalMillis) {
            stats.reset();
        }

        try {
            stdx::unique_lock<stdx::mutex> lock(flushMutex);

            for (unsigned i = 0; i <= 2; i++) {
                if (stdx::cv_status::no_timeout ==
                    flushRequested.wait_for(lock, Milliseconds(oneThird).toSystemDuration())) {
                    // Someone forced a flush
                    break;
                }

                if (commitNotify.nWaiting()) {
                    // One or more getLastError j:true is pending
                    break;
                }

                if (commitJob.bytes() > UncommittedBytesLimit / 2) {
                    // The number of written bytes is growing
                    break;
                }
            }

            // The commit logic itself
            LOG(4) << "groupCommit begin";

            Timer t;

            const ServiceContext::UniqueOperationContext txnPtr = cc().makeOperationContext();
            OperationContext& txn = *txnPtr;
            AutoAcquireFlushLockForMMAPV1Commit autoFlushLock(txn.lockState());

            // We need to snapshot the commitNumber after the flush lock has been obtained,
            // because at this point we know that we have a stable snapshot of the data.
            const CommitNotifier::When commitNumber(commitNotify.now());

            LOG(4) << "Processing commit number " << commitNumber;

            if (!commitJob.hasWritten()) {
                // We do not need the journal lock anymore. Free it here, for the really
                // unlikely possibility that the writeBuffer command below blocks.
                autoFlushLock.release();

                // getlasterror request could have came after the data was already committed.
                // No need to call committingReset though, because we have not done any
                // writes (hasWritten == false).
                JournalWriter::Buffer* const buffer = journalWriter.newBuffer();
                buffer->setNoop();
                buffer->journalListenerToken = getJournalListener()->getToken();

                journalWriter.writeBuffer(buffer, commitNumber);
            } else {
                // This copies all the in-memory changes into the journal writer's buffer.
                JournalWriter::Buffer* const buffer = journalWriter.newBuffer();
                PREPLOGBUFFER(buffer->getHeader(), buffer->getBuilder(), cs, serverStartMs);

                estimatedPrivateMapSize += commitJob.bytes();
                commitCounter++;

                // Now that the write intents have been copied to the buffer, the commit job is
                // free to be reused. We need to reset the commit job's contents while under
                // the S flush lock, because otherwise someone might have done a write and this
                // would wipe out their changes without ever being committed.
                commitJob.committingReset();

                double systemMemoryPressurePercentage =
                    ProcessInfo::getSystemMemoryPressurePercentage();

                // Now that the in-memory modifications have been collected, we can potentially
                // release the flush lock if remap is not necessary.
                // When we remap due to memory pressure, we look at two criteria
                // 1. If the amount of 4k pages touched exceeds 512 MB,
                //    a reasonable estimate of memory pressure on Linux.
                // 2. Check if the amount of free memory on the machine is running low,
                //    since #1 is underestimates the memory pressure on Windows since
                //    commits in 64MB chunks.
                const bool shouldRemap = (estimatedPrivateMapSize >= UncommittedBytesLimit) ||
                    (systemMemoryPressurePercentage > 0.0) ||
                    (commitCounter % NumCommitsBeforeRemap == 0) ||
                    (mmapv1GlobalOptions.journalOptions & MMAPV1Options::JournalAlwaysRemap);

                double remapFraction = 0.0;

                if (shouldRemap) {
                    // We want to remap all private views about every 2 seconds. There could be
                    // ~1000 views so we do a little each pass. There will be copy on write
                    // faults after remapping, so doing a little bit at a time will avoid big
                    // load spikes when the pages are touched.
                    //
                    // TODO: Instead of the time-based logic above, consider using ProcessInfo
                    //       and watching for getResidentSize to drop, which is more precise.
                    remapFraction = (curTimeMicros64() - remapLastTimestamp) / 2000000.0;

                    if (mmapv1GlobalOptions.journalOptions & MMAPV1Options::JournalAlwaysRemap) {
                        remapFraction = 1;
                    } else {
                        // We don't want to get close to the UncommittedBytesLimit
                        const double remapMemFraction =
                            estimatedPrivateMapSize / ((double)UncommittedBytesLimit);

                        remapFraction = std::max(remapMemFraction, remapFraction);

                        remapFraction = std::max(systemMemoryPressurePercentage, remapFraction);
                    }
                } else {
                    LOG(4) << "Early release flush lock";

                    // We will not be doing a remap so drop the flush lock. That way we will be
                    // doing the journal I/O outside of lock, so other threads can proceed.
                    invariant(!shouldRemap);
                    autoFlushLock.release();
                }

                buffer->journalListenerToken = getJournalListener()->getToken();
                // Request async I/O to the journal. This may block.
                journalWriter.writeBuffer(buffer, commitNumber);

                // Data has now been written to the shared view. If remap was requested, we
                // would still be holding the S flush lock here, so just upgrade it and
                // perform the remap.
                if (shouldRemap) {
                    // Need to wait for the previously scheduled journal writes to complete
                    // before any remap is attempted.
                    journalWriter.flush();
                    journalWriter.assertIdle();

                    // Upgrading the journal lock to flush stops all activity on the system,
                    // because we will be remapping memory and we don't want readers to be
                    // accessing it. Technically this step could be avoided on systems, which
                    // support atomic remap.
                    autoFlushLock.upgradeFlushLockToExclusive();
                    remapPrivateView(remapFraction);

                    autoFlushLock.release();

                    // Reset the private map estimate outside of the lock
                    estimatedPrivateMapSize = 0;
                    remapLastTimestamp = curTimeMicros64();

                    stats.curr()->_commitsInWriteLock++;
                    stats.curr()->_commitsInWriteLockMicros += t.micros();
                }
            }

            stats.curr()->_commits++;
            stats.curr()->_commitsMicros += t.micros();

            LOG(4) << "groupCommit end";
        } catch (DBException& e) {
            severe() << "dbexception in durThread causing immediate shutdown: " << e.toString();
            invariant(false);
        } catch (std::ios_base::failure& e) {
            severe() << "ios_base exception in durThread causing immediate shutdown: " << e.what();
            invariant(false);
        } catch (std::bad_alloc& e) {
            severe() << "bad_alloc exception in durThread causing immediate shutdown: " << e.what();
            invariant(false);
        } catch (std::exception& e) {
            severe() << "exception in durThread causing immediate shutdown: " << e.what();
            invariant(false);
        } catch (...) {
            severe() << "unhandled exception in durThread causing immediate shutdown";
            invariant(false);
        }
    }

    // Stops the journal thread and ensures everything was written
    invariant(!commitJob.hasWritten());

    journalWriter.flush();
    journalWriter.shutdown();

    log() << "Durability thread stopped";
}
Ejemplo n.º 17
0
void Stats::S::reset() {
    memset(this, 0, sizeof(*this));
    _startTimeMicros = curTimeMicros64();
}
Ejemplo n.º 18
0
 void CurOp::leave( Client::Context * context ) {
     unsigned long long now = curTimeMicros64();
     Top::global.record( _ns , _op , _lockType , now - _checkpoint , _command );
     _checkpoint = now;
 }
Ejemplo n.º 19
0
    void FileAllocator::run( FileAllocator * fa ) {
        setThreadName( "FileAllocator" );
        {
            // initialize unique temporary file name counter
            // TODO: SERVER-6055 -- Unify temporary file name selection
            SimpleMutex::scoped_lock lk(_uniqueNumberMutex);
            _uniqueNumber = curTimeMicros64();
        }
        while( 1 ) {
            {
                scoped_lock lk( fa->_pendingMutex );
                if ( fa->_pending.size() == 0 )
                    fa->_pendingUpdated.wait( lk.boost() );
            }
            while( 1 ) {
                string name;
                long size = 0;
                {
                    scoped_lock lk( fa->_pendingMutex );
                    if ( fa->_pending.size() == 0 )
                        break;
                    name = fa->_pending.front();
                    size = fa->_pendingSize[ name ];
                }

                string tmp;
                long fd = 0;
                try {
                    log() << "allocating new datafile " << name << ", filling with zeroes..." << endl;
                    
                    boost::filesystem::path parent = ensureParentDirCreated(name);
                    tmp = fa->makeTempFileName( parent );
                    ensureParentDirCreated(tmp);

#if defined(_WIN32)
                    fd = _open( tmp.c_str(), _O_RDWR | _O_CREAT | O_NOATIME, _S_IREAD | _S_IWRITE );
#else
                    fd = open(tmp.c_str(), O_CREAT | O_RDWR | O_NOATIME, S_IRUSR | S_IWUSR);
#endif
                    if ( fd < 0 ) {
                        log() << "FileAllocator: couldn't create " << name << " (" << tmp << ") " << errnoWithDescription() << endl;
                        uasserted(10439, "");
                    }

#if defined(POSIX_FADV_DONTNEED)
                    if( posix_fadvise(fd, 0, size, POSIX_FADV_DONTNEED) ) {
                        log() << "warning: posix_fadvise fails " << name << " (" << tmp << ") " << errnoWithDescription() << endl;
                    }
#endif

                    Timer t;

                    /* make sure the file is the full desired length */
                    ensureLength( fd , size );

                    close( fd );
                    fd = 0;

                    if( rename(tmp.c_str(), name.c_str()) ) {
                        const string& errStr = errnoWithDescription();
                        const string& errMessage = str::stream()
                                << "error: couldn't rename " << tmp
                                << " to " << name << ' ' << errStr;
                        msgasserted(13653, errMessage);
                    }
                    flushMyDirectory(name);

                    log() << "done allocating datafile " << name << ", "
                          << "size: " << size/1024/1024 << "MB, "
                          << " took " << ((double)t.millis())/1000.0 << " secs"
                          << endl;

                    // no longer in a failed state. allow new writers.
                    fa->_failed = false;
                }
                catch ( const std::exception& e ) {
                    log() << "error: failed to allocate new file: " << name
                          << " size: " << size << ' ' << e.what()
                          << ".  will try again in 10 seconds" << endl;
                    if ( fd > 0 )
                        close( fd );
                    try {
                        if ( ! tmp.empty() )
                            boost::filesystem::remove( tmp );
                        boost::filesystem::remove( name );
                    } catch ( const std::exception& e ) {
                        log() << "error removing files: " << e.what() << endl;
                    }
                    scoped_lock lk( fa->_pendingMutex );
                    fa->_failed = true;
                    // not erasing from pending
                    fa->_pendingUpdated.notify_all();
                    
                    
                    sleepsecs(10);
                    continue;
                }

                {
                    scoped_lock lk( fa->_pendingMutex );
                    fa->_pendingSize.erase( name );
                    fa->_pending.pop_front();
                    fa->_pendingUpdated.notify_all();
                }
            }
        }
    }
Ejemplo n.º 20
0
        static void _REMAPPRIVATEVIEW() {
            // todo: Consider using ProcessInfo herein and watching for getResidentSize to drop.  that could be a way 
            //       to assure very good behavior here.

            static unsigned startAt;
            static unsigned long long lastRemap;

            LOG(4) << "journal REMAPPRIVATEVIEW" << endl;

            invariant(!commitJob.hasWritten());

            // we want to remap all private views about every 2 seconds.  there could be ~1000 views so
            // we do a little each pass; beyond the remap time, more significantly, there will be copy on write
            // faults after remapping, so doing a little bit at a time will avoid big load spikes on
            // remapping.
            unsigned long long now = curTimeMicros64();
            double fraction = (now-lastRemap)/2000000.0;
            if (storageGlobalParams.durOptions & StorageGlobalParams::DurAlwaysRemap)
                fraction = 1;
            lastRemap = now;

#if defined(_WIN32) || defined(__sunos__)
            // Note that this negatively affects performance.
            // We must grab the exclusive lock here because remapPrivateView() on Windows and
            // Solaris need to grab it as well, due to the lack of an atomic way to remap a
            // memory mapped file.
            // See SERVER-5723 for performance improvement.
            // See SERVER-5680 to see why this code is necessary on Windows.
            // See SERVER-8795 to see why this code is necessary on Solaris.
            LockMongoFilesExclusive lk;
#else
            LockMongoFilesShared lk;
#endif
            set<MongoFile*>& files = MongoFile::getAllFiles();
            unsigned sz = files.size();
            if( sz == 0 )
                return;

            {
                // be careful not to use too much memory if the write rate is 
                // extremely high
                double f = privateMapBytes / ((double)UncommittedBytesLimit);
                if( f > fraction ) { 
                    fraction = f;
                }
                privateMapBytes = 0;
            }

            unsigned ntodo = (unsigned) (sz * fraction);
            if( ntodo < 1 ) ntodo = 1;
            if( ntodo > sz ) ntodo = sz;

            const set<MongoFile*>::iterator b = files.begin();
            const set<MongoFile*>::iterator e = files.end();
            set<MongoFile*>::iterator i = b;
            // skip to our starting position
            for( unsigned x = 0; x < startAt; x++ ) {
                i++;
                if( i == e ) i = b;
            }
            unsigned startedAt = startAt;
            startAt = (startAt + ntodo) % sz; // mark where to start next time

            Timer t;
            for( unsigned x = 0; x < ntodo; x++ ) {
                dassert( i != e );
                if( (*i)->isDurableMappedFile() ) {
                    DurableMappedFile *mmf = (DurableMappedFile*) *i;
                    verify(mmf);
                    if( mmf->willNeedRemap() ) {
                        mmf->remapThePrivateView();
                    }
                    i++;
                    if( i == e ) i = b;
                }
            }
            LOG(2) << "journal REMAPPRIVATEVIEW done startedAt: " << startedAt << " n:" << ntodo << ' ' << t.millis() << "ms" << endl;
        }
Ejemplo n.º 21
0
    void SnapshotData::takeSnapshot() {
        _created = curTimeMicros64();
        _globalUsage = Top::global.getGlobalData();
//        _totalWriteLockedTime = d.dbMutex.info().getTimeLocked();
        Top::global.cloneMap(_usage);
    }