void Stats::rotate() { unsigned long long now = curTimeMicros64(); unsigned long long dt = now - _lastRotate; if( dt >= _intervalMicros && _intervalMicros ) { // rotate curr->_dtMillis = (unsigned) (dt/1000); _lastRotate = now; curr = other(); curr->reset(); } }
/** We need to remap the private views periodically. otherwise they would become very large. Call within write lock. */ void REMAPPRIVATEVIEW() { static unsigned startAt; static unsigned long long lastRemap; dbMutex.assertWriteLocked(); dbMutex._remapPrivateViewRequested = false; assert( !commitJob.hasWritten() ); if( 0 ) { log() << "TEMP remapprivateview disabled for testing - will eventually run oom in this mode if db bigger than ram" << endl; return; } // we want to remap all private views about every 2 seconds. there could be ~1000 views so // we do a little each pass; beyond the remap time, more significantly, there will be copy on write // faults after remapping, so doing a little bit at a time will avoid big load spikes on // remapping. unsigned long long now = curTimeMicros64(); double fraction = (now-lastRemap)/20000000.0; set<MongoFile*>& files = MongoFile::getAllFiles(); unsigned sz = files.size(); if( sz == 0 ) return; unsigned ntodo = (unsigned) (sz * fraction); if( ntodo < 1 ) ntodo = 1; if( ntodo > sz ) ntodo = sz; const set<MongoFile*>::iterator b = files.begin(); const set<MongoFile*>::iterator e = files.end(); set<MongoFile*>::iterator i = b; // skip to our starting position for( unsigned x = 0; x < startAt; x++ ) { i++; if( i == e ) i = b; } startAt = (startAt + ntodo) % sz; // mark where to start next time for( unsigned x = 0; x < ntodo; x++ ) { dassert( i != e ); if( (*i)->isMongoMMF() ) { MongoMMF *mmf = (MongoMMF*) *i; assert(mmf); if( mmf->willNeedRemap() ) { mmf->willNeedRemap() = false; mmf->remapThePrivateView(); } i++; if( i == e ) i = b; } } }
void CurOp::ensureStarted() { if ( _start == 0 ) { _start = curTimeMicros64(); // If ensureStarted() is invoked after setMaxTimeMicros(), then time limit tracking will // start here. This is because time limit tracking can only commence after the // operation is assigned a start time. if (_maxTimeMicros > 0) { _maxTimeTracker.setTimeLimit(_start, _maxTimeMicros); } } }
bool Socket::connect(SockAddr& remote) { _remote = remote; _fd = socket(remote.getType(), SOCK_STREAM, 0); if ( _fd == INVALID_SOCKET ) { LOG(_logLevel) << "ERROR: connect invalid socket " << errnoWithDescription() << endl; return false; } if ( _timeout > 0 ) { setTimeout( _timeout ); } static const unsigned int connectTimeoutMillis = 5000; ConnectBG bg(_fd, remote); bg.go(); if ( bg.wait(connectTimeoutMillis) ) { if ( bg.inError() ) { warning() << "Failed to connect to " << _remote.getAddr() << ":" << _remote.getPort() << ", reason: " << bg.getErrnoWithDescription() << endl; close(); return false; } } else { // time out the connect close(); bg.wait(); // so bg stays in scope until bg thread terminates warning() << "Failed to connect to " << _remote.getAddr() << ":" << _remote.getPort() << " after " << connectTimeoutMillis << " milliseconds, giving up." << endl; return false; } if (remote.getType() != AF_UNIX) disableNagle(_fd); #ifdef SO_NOSIGPIPE // ignore SIGPIPE signals on osx, to avoid process exit const int one = 1; setsockopt( _fd , SOL_SOCKET, SO_NOSIGPIPE, &one, sizeof(int)); #endif _local = getLocalAddrForBoundSocketFd(_fd); _fdCreationMicroSec = curTimeMicros64(); _awaitingHandshake = false; return true; }
uint64_t CurOp::MaxTimeTracker::getRemainingMicros() const { if (!_enabled) { // 0 is "allow to run indefinitely". return 0; } // Does our accurate time source think time is up? If so, claim there is 1 microsecond // left for this operation. uint64_t now = curTimeMicros64(); if (_targetEpochMicros <= now) { return 1; } // Otherwise, calculate remaining time. return _targetEpochMicros - now; }
bool Socket::connect(SockAddr& remote) { _remote = remote; _fd = socket(remote.getType(), SOCK_STREAM, 0); if ( _fd == INVALID_SOCKET ) { LOG(_logLevel) << "ERROR: connect invalid socket " << errnoWithDescription() << endl; return false; } if ( _timeout > 0 ) { setTimeout( _timeout ); } ConnectBG bg(_fd, remote); bg.go(); if ( bg.wait(5000) ) { if ( bg.inError() ) { close(); return false; } } else { // time out the connect close(); bg.wait(); // so bg stays in scope until bg thread terminates return false; } if (remote.getType() != AF_UNIX) disableNagle(_fd); #ifdef SO_NOSIGPIPE // ignore SIGPIPE signals on osx, to avoid process exit const int one = 1; setsockopt( _fd , SOL_SOCKET, SO_NOSIGPIPE, &one, sizeof(int)); #endif _local = getLocalAddrForBoundSocketFd(_fd); _fdCreationMicroSec = curTimeMicros64(); return true; }
void CurOp::MaxTimeTracker::setTimeLimit(uint64_t startEpochMicros, uint64_t durationMicros) { dassert(durationMicros != 0); _enabled = true; _targetEpochMicros = startEpochMicros + durationMicros; uint64_t now = curTimeMicros64(); // If our accurate time source thinks time is not up yet, calculate the next target for // our approximate time source. if (_targetEpochMicros > now) { _approxTargetServerMillis = Listener::getElapsedTimeMillis() + static_cast<int64_t>((_targetEpochMicros - now) / 1000); } // Otherwise, set our approximate time source target such that it thinks time is already // up. else { _approxTargetServerMillis = Listener::getElapsedTimeMillis(); } }
bool CurOp::MaxTimeTracker::checkTimeLimit() { if (!_enabled) { return false; } // Does our approximate time source think time is not up yet? If so, return early. if (_approxTargetServerMillis > Listener::getElapsedTimeMillis()) { return false; } uint64_t now = curTimeMicros64(); // Does our accurate time source think time is not up yet? If so, readjust the target for // our approximate time source and return early. if (_targetEpochMicros > now) { _approxTargetServerMillis = Listener::getElapsedTimeMillis() + static_cast<int64_t>((_targetEpochMicros - now) / 1000); return false; } // Otherwise, time is up. return true; }
void statsThread() { /*cout << "TEMP disabled statsthread" << endl; if( 1 ) return;*/ Client::initThread("stats"); unsigned long long timeLastPass = 0; while ( 1 ) { { /* todo: do we even need readlock here? if so for what? */ readlock lk(""); Top::completeSnapshot(); q = (q+1)%NStats; Timing timing; dbMutex.info().getTimingInfo(timing.start, timing.timeLocked); unsigned long long now = curTimeMicros64(); if ( timeLastPass ) { unsigned long long dt = now - timeLastPass; unsigned long long dlocked = timing.timeLocked - tlast.timeLocked; { stringstream ss; ss << dt / 1000 << '\t'; ss << dlocked / 1000 << '\t'; if ( dt ) ss << (dlocked*100)/dt << '%'; string s = ss.str(); if ( cmdLine.cpu ) log() << "cpu: " << s << endl; lockStats[q] = s; ClientCursor::idleTimeReport( (unsigned) ((dt - dlocked)/1000) ); } } timeLastPass = now; tlast = timing; } sleepsecs(4); } }
ExitCode _initAndListen(int listenPort) { Client::initThread("initandlisten"); initWireSpec(); auto serviceContext = getGlobalServiceContext(); serviceContext->setFastClockSource(FastClockSourceFactory::create(Milliseconds(10))); auto opObserverRegistry = stdx::make_unique<OpObserverRegistry>(); opObserverRegistry->addObserver(stdx::make_unique<OpObserverShardingImpl>()); opObserverRegistry->addObserver(stdx::make_unique<UUIDCatalogObserver>()); if (serverGlobalParams.clusterRole == ClusterRole::ShardServer) { opObserverRegistry->addObserver(stdx::make_unique<ShardServerOpObserver>()); } else if (serverGlobalParams.clusterRole == ClusterRole::ConfigServer) { opObserverRegistry->addObserver(stdx::make_unique<ConfigServerOpObserver>()); } setupFreeMonitoringOpObserver(opObserverRegistry.get()); serviceContext->setOpObserver(std::move(opObserverRegistry)); DBDirectClientFactory::get(serviceContext).registerImplementation([](OperationContext* opCtx) { return std::unique_ptr<DBClientBase>(new DBDirectClient(opCtx)); }); const repl::ReplSettings& replSettings = repl::ReplicationCoordinator::get(serviceContext)->getSettings(); { ProcessId pid = ProcessId::getCurrent(); LogstreamBuilder l = log(LogComponent::kControl); l << "MongoDB starting : pid=" << pid << " port=" << serverGlobalParams.port << " dbpath=" << storageGlobalParams.dbpath; const bool is32bit = sizeof(int*) == 4; l << (is32bit ? " 32" : " 64") << "-bit host=" << getHostNameCached() << endl; } DEV log(LogComponent::kControl) << "DEBUG build (which is slower)" << endl; #if defined(_WIN32) VersionInfoInterface::instance().logTargetMinOS(); #endif logProcessDetails(); serviceContext->setServiceEntryPoint( stdx::make_unique<ServiceEntryPointMongod>(serviceContext)); if (!storageGlobalParams.repair) { auto tl = transport::TransportLayerManager::createWithConfig(&serverGlobalParams, serviceContext); auto res = tl->setup(); if (!res.isOK()) { error() << "Failed to set up listener: " << res; return EXIT_NET_ERROR; } serviceContext->setTransportLayer(std::move(tl)); } // Set up the periodic runner for background job execution. This is required to be running // before the storage engine is initialized. auto runner = makePeriodicRunner(serviceContext); runner->startup(); serviceContext->setPeriodicRunner(std::move(runner)); initializeStorageEngine(serviceContext, StorageEngineInitFlags::kNone); #ifdef MONGO_CONFIG_WIREDTIGER_ENABLED if (EncryptionHooks::get(serviceContext)->restartRequired()) { exitCleanly(EXIT_CLEAN); } #endif // Warn if we detect configurations for multiple registered storage engines in the same // configuration file/environment. if (serverGlobalParams.parsedOpts.hasField("storage")) { BSONElement storageElement = serverGlobalParams.parsedOpts.getField("storage"); invariant(storageElement.isABSONObj()); for (auto&& e : storageElement.Obj()) { // Ignore if field name under "storage" matches current storage engine. if (storageGlobalParams.engine == e.fieldName()) { continue; } // Warn if field name matches non-active registered storage engine. if (isRegisteredStorageEngine(serviceContext, e.fieldName())) { warning() << "Detected configuration for non-active storage engine " << e.fieldName() << " when current storage engine is " << storageGlobalParams.engine; } } } // Disallow running a storage engine that doesn't support capped collections with --profile if (!serviceContext->getStorageEngine()->supportsCappedCollections() && serverGlobalParams.defaultProfile != 0) { log() << "Running " << storageGlobalParams.engine << " with profiling is not supported. " << "Make sure you are not using --profile."; exitCleanly(EXIT_BADOPTIONS); } // Disallow running WiredTiger with --nojournal in a replica set if (storageGlobalParams.engine == "wiredTiger" && !storageGlobalParams.dur && replSettings.usingReplSets()) { log() << "Running wiredTiger without journaling in a replica set is not " << "supported. Make sure you are not using --nojournal and that " << "storage.journal.enabled is not set to 'false'."; exitCleanly(EXIT_BADOPTIONS); } logMongodStartupWarnings(storageGlobalParams, serverGlobalParams, serviceContext); #ifdef MONGO_CONFIG_SSL if (sslGlobalParams.sslAllowInvalidCertificates && ((serverGlobalParams.clusterAuthMode.load() == ServerGlobalParams::ClusterAuthMode_x509) || sequenceContains(saslGlobalParams.authenticationMechanisms, "MONGODB-X509"))) { log() << "** WARNING: While invalid X509 certificates may be used to" << startupWarningsLog; log() << "** connect to this server, they will not be considered" << startupWarningsLog; log() << "** permissible for authentication." << startupWarningsLog; log() << startupWarningsLog; } #endif { std::stringstream ss; ss << endl; ss << "*********************************************************************" << endl; ss << " ERROR: dbpath (" << storageGlobalParams.dbpath << ") does not exist." << endl; ss << " Create this directory or give existing directory in --dbpath." << endl; ss << " See http://dochub.mongodb.org/core/startingandstoppingmongo" << endl; ss << "*********************************************************************" << endl; uassert(10296, ss.str().c_str(), boost::filesystem::exists(storageGlobalParams.dbpath)); } initializeSNMP(); if (!storageGlobalParams.readOnly) { boost::filesystem::remove_all(storageGlobalParams.dbpath + "/_tmp/"); } if (mongodGlobalParams.scriptingEnabled) { ScriptEngine::setup(); } auto startupOpCtx = serviceContext->makeOperationContext(&cc()); bool canCallFCVSetIfCleanStartup = !storageGlobalParams.readOnly && (storageGlobalParams.engine != "devnull"); if (canCallFCVSetIfCleanStartup && !replSettings.usingReplSets()) { Lock::GlobalWrite lk(startupOpCtx.get()); FeatureCompatibilityVersion::setIfCleanStartup(startupOpCtx.get(), repl::StorageInterface::get(serviceContext)); } auto swNonLocalDatabases = repairDatabasesAndCheckVersion(startupOpCtx.get()); if (!swNonLocalDatabases.isOK()) { // SERVER-31611 introduced a return value to `repairDatabasesAndCheckVersion`. Previously, // a failing condition would fassert. SERVER-31611 covers a case where the binary (3.6) is // refusing to start up because it refuses acknowledgement of FCV 3.2 and requires the // user to start up with an older binary. Thus shutting down the server must leave the // datafiles in a state that the older binary can start up. This requires going through a // clean shutdown. // // The invariant is *not* a statement that `repairDatabasesAndCheckVersion` must return // `MustDowngrade`. Instead, it is meant as a guardrail to protect future developers from // accidentally buying into this behavior. New errors that are returned from the method // may or may not want to go through a clean shutdown, and they likely won't want the // program to return an exit code of `EXIT_NEED_DOWNGRADE`. severe(LogComponent::kControl) << "** IMPORTANT: " << swNonLocalDatabases.getStatus().reason(); invariant(swNonLocalDatabases == ErrorCodes::MustDowngrade); exitCleanly(EXIT_NEED_DOWNGRADE); } // Assert that the in-memory featureCompatibilityVersion parameter has been explicitly set. If // we are part of a replica set and are started up with no data files, we do not set the // featureCompatibilityVersion until a primary is chosen. For this case, we expect the in-memory // featureCompatibilityVersion parameter to still be uninitialized until after startup. if (canCallFCVSetIfCleanStartup && (!replSettings.usingReplSets() || swNonLocalDatabases.getValue())) { invariant(serverGlobalParams.featureCompatibility.isVersionInitialized()); } if (storageGlobalParams.upgrade) { log() << "finished checking dbs"; exitCleanly(EXIT_CLEAN); } // Start up health log writer thread. HealthLog::get(startupOpCtx.get()).startup(); auto const globalAuthzManager = AuthorizationManager::get(serviceContext); uassertStatusOK(globalAuthzManager->initialize(startupOpCtx.get())); // This is for security on certain platforms (nonce generation) srand((unsigned)(curTimeMicros64()) ^ (unsigned(uintptr_t(&startupOpCtx)))); if (globalAuthzManager->shouldValidateAuthSchemaOnStartup()) { Status status = verifySystemIndexes(startupOpCtx.get()); if (!status.isOK()) { log() << redact(status); if (status == ErrorCodes::AuthSchemaIncompatible) { exitCleanly(EXIT_NEED_UPGRADE); } else if (status == ErrorCodes::NotMaster) { // Try creating the indexes if we become master. If we do not become master, // the master will create the indexes and we will replicate them. } else { quickExit(EXIT_FAILURE); } } // SERVER-14090: Verify that auth schema version is schemaVersion26Final. int foundSchemaVersion; status = globalAuthzManager->getAuthorizationVersion(startupOpCtx.get(), &foundSchemaVersion); if (!status.isOK()) { log() << "Auth schema version is incompatible: " << "User and role management commands require auth data to have " << "at least schema version " << AuthorizationManager::schemaVersion26Final << " but startup could not verify schema version: " << status; log() << "To manually repair the 'authSchema' document in the admin.system.version " "collection, start up with --setParameter " "startupAuthSchemaValidation=false to disable validation."; exitCleanly(EXIT_NEED_UPGRADE); } if (foundSchemaVersion <= AuthorizationManager::schemaVersion26Final) { log() << "This server is using MONGODB-CR, an authentication mechanism which " << "has been removed from MongoDB 4.0. In order to upgrade the auth schema, " << "first downgrade MongoDB binaries to version 3.6 and then run the " << "authSchemaUpgrade command. " << "See http://dochub.mongodb.org/core/3.0-upgrade-to-scram-sha-1"; exitCleanly(EXIT_NEED_UPGRADE); } } else if (globalAuthzManager->isAuthEnabled()) { error() << "Auth must be disabled when starting without auth schema validation"; exitCleanly(EXIT_BADOPTIONS); } else { // If authSchemaValidation is disabled and server is running without auth, // warn the user and continue startup without authSchema metadata checks. log() << startupWarningsLog; log() << "** WARNING: Startup auth schema validation checks are disabled for the " "database." << startupWarningsLog; log() << "** This mode should only be used to manually repair corrupted auth " "data." << startupWarningsLog; } // This function may take the global lock. auto shardingInitialized = ShardingInitializationMongoD::get(startupOpCtx.get()) ->initializeShardingAwarenessIfNeeded(startupOpCtx.get()); if (shardingInitialized) { waitForShardRegistryReload(startupOpCtx.get()).transitional_ignore(); } auto storageEngine = serviceContext->getStorageEngine(); invariant(storageEngine); BackupCursorHooks::initialize(serviceContext, storageEngine); if (!storageGlobalParams.readOnly) { if (storageEngine->supportsCappedCollections()) { logStartup(startupOpCtx.get()); } startMongoDFTDC(); startFreeMonitoring(serviceContext); restartInProgressIndexesFromLastShutdown(startupOpCtx.get()); if (serverGlobalParams.clusterRole == ClusterRole::ShardServer) { // Note: For replica sets, ShardingStateRecovery happens on transition to primary. if (!repl::ReplicationCoordinator::get(startupOpCtx.get())->isReplEnabled()) { if (ShardingState::get(startupOpCtx.get())->enabled()) { uassertStatusOK(ShardingStateRecovery::recover(startupOpCtx.get())); } } } else if (serverGlobalParams.clusterRole == ClusterRole::ConfigServer) { initializeGlobalShardingStateForMongoD(startupOpCtx.get(), ConnectionString::forLocal(), kDistLockProcessIdForConfigServer); Balancer::create(startupOpCtx->getServiceContext()); ShardingCatalogManager::create( startupOpCtx->getServiceContext(), makeShardingTaskExecutor(executor::makeNetworkInterface("AddShard-TaskExecutor"))); Grid::get(startupOpCtx.get())->setShardingInitialized(); } else if (replSettings.usingReplSets()) { // standalone replica set auto keysCollectionClient = stdx::make_unique<KeysCollectionClientDirect>(); auto keyManager = std::make_shared<KeysCollectionManager>( KeysCollectionManager::kKeyManagerPurposeString, std::move(keysCollectionClient), Seconds(KeysRotationIntervalSec)); keyManager->startMonitoring(startupOpCtx->getServiceContext()); LogicalTimeValidator::set(startupOpCtx->getServiceContext(), stdx::make_unique<LogicalTimeValidator>(keyManager)); } repl::ReplicationCoordinator::get(startupOpCtx.get())->startup(startupOpCtx.get()); const unsigned long long missingRepl = checkIfReplMissingFromCommandLine(startupOpCtx.get()); if (missingRepl) { log() << startupWarningsLog; log() << "** WARNING: mongod started without --replSet yet " << missingRepl << " documents are present in local.system.replset." << startupWarningsLog; log() << "** Database contents may appear inconsistent with the oplog and may " "appear to not contain" << startupWarningsLog; log() << "** writes that were visible when this node was running as part of a " "replica set." << startupWarningsLog; log() << "** Restart with --replSet unless you are doing maintenance and no " "other clients are connected." << startupWarningsLog; log() << "** The TTL collection monitor will not start because of this." << startupWarningsLog; log() << "** "; log() << " For more info see http://dochub.mongodb.org/core/ttlcollections"; log() << startupWarningsLog; } else { startTTLBackgroundJob(); } if (replSettings.usingReplSets() || !internalValidateFeaturesAsMaster) { serverGlobalParams.validateFeaturesAsMaster.store(false); } } startClientCursorMonitor(); PeriodicTask::startRunningPeriodicTasks(); SessionKiller::set(serviceContext, std::make_shared<SessionKiller>(serviceContext, killSessionsLocal)); // Start up a background task to periodically check for and kill expired transactions; and a // background task to periodically check for and decrease cache pressure by decreasing the // target size setting for the storage engine's window of available snapshots. // // Only do this on storage engines supporting snapshot reads, which hold resources we wish to // release periodically in order to avoid storage cache pressure build up. if (storageEngine->supportsReadConcernSnapshot()) { startPeriodicThreadToAbortExpiredTransactions(serviceContext); startPeriodicThreadToDecreaseSnapshotHistoryCachePressure(serviceContext); } // Set up the logical session cache LogicalSessionCacheServer kind = LogicalSessionCacheServer::kStandalone; if (serverGlobalParams.clusterRole == ClusterRole::ShardServer) { kind = LogicalSessionCacheServer::kSharded; } else if (serverGlobalParams.clusterRole == ClusterRole::ConfigServer) { kind = LogicalSessionCacheServer::kConfigServer; } else if (replSettings.usingReplSets()) { kind = LogicalSessionCacheServer::kReplicaSet; } auto sessionCache = makeLogicalSessionCacheD(kind); LogicalSessionCache::set(serviceContext, std::move(sessionCache)); // MessageServer::run will return when exit code closes its socket and we don't need the // operation context anymore startupOpCtx.reset(); auto start = serviceContext->getServiceExecutor()->start(); if (!start.isOK()) { error() << "Failed to start the service executor: " << start; return EXIT_NET_ERROR; } start = serviceContext->getServiceEntryPoint()->start(); if (!start.isOK()) { error() << "Failed to start the service entry point: " << start; return EXIT_NET_ERROR; } if (!storageGlobalParams.repair) { start = serviceContext->getTransportLayer()->start(); if (!start.isOK()) { error() << "Failed to start the listener: " << start.toString(); return EXIT_NET_ERROR; } } serviceContext->notifyStartupComplete(); #ifndef _WIN32 mongo::signalForkSuccess(); #else if (ntservice::shouldStartService()) { ntservice::reportStatus(SERVICE_RUNNING); log() << "Service running"; } #endif if (MONGO_FAIL_POINT(shutdownAtStartup)) { log() << "starting clean exit via failpoint"; exitCleanly(EXIT_CLEAN); } MONGO_IDLE_THREAD_BLOCK; return waitForShutdown(); }
bool Socket::connect(SockAddr& remote) { _remote = remote; _fd = ::socket(remote.getType(), SOCK_STREAM, 0); if (_fd == INVALID_SOCKET) { networkWarnWithDescription(*this, "socket"); return false; } if (!setBlock(_fd, false)) { networkWarnWithDescription(*this, "set socket to non-blocking mode"); return false; } const Milliseconds connectTimeoutMillis(static_cast<int64_t>( _timeout > 0 ? std::min(kMaxConnectTimeoutMS, (_timeout * 1000)) : kMaxConnectTimeoutMS)); const Date_t expiration = Date_t::now() + connectTimeoutMillis; bool connectSucceeded = ::connect(_fd, _remote.raw(), _remote.addressSize) == 0; if (!connectSucceeded) { #ifdef _WIN32 if (WSAGetLastError() != WSAEWOULDBLOCK) { networkWarnWithDescription(*this, "connect"); return false; } #else if (errno != EINTR && errno != EINPROGRESS) { networkWarnWithDescription(*this, "connect"); return false; } #endif pollfd pfd; pfd.fd = _fd; pfd.events = POLLOUT; while (true) { const auto timeout = std::max(Milliseconds(0), expiration - Date_t::now()); int pollReturn = socketPoll(&pfd, 1, timeout.count()); #ifdef _WIN32 if (pollReturn == SOCKET_ERROR) { networkWarnWithDescription(*this, "poll"); return false; } #else if (pollReturn == -1) { if (errno != EINTR) { networkWarnWithDescription(*this, "poll"); return false; } // EINTR in poll, try again continue; } #endif // No activity for the full duration of the timeout. if (pollReturn == 0) { warning() << "Failed to connect to " << _remote.getAddr() << ":" << _remote.getPort() << " after " << connectTimeoutMillis << " milliseconds, giving up."; return false; } // We had a result, see if there's an error on the socket. int optVal; socklen_t optLen = sizeof(optVal); if (::getsockopt( _fd, SOL_SOCKET, SO_ERROR, reinterpret_cast<char*>(&optVal), &optLen) == -1) { networkWarnWithDescription(*this, "getsockopt"); return false; } if (optVal != 0) { networkWarnWithDescription(*this, "checking socket for error after poll", optVal); return false; } // We had activity and we don't have errors on the socket, we're connected. break; } } if (!setBlock(_fd, true)) { networkWarnWithDescription(*this, "could not set socket to blocking mode"); return false; } if (_timeout > 0) { setTimeout(_timeout); } if (remote.getType() != AF_UNIX) disableNagle(_fd); #ifdef SO_NOSIGPIPE // ignore SIGPIPE signals on osx, to avoid process exit const int one = 1; setsockopt(_fd, SOL_SOCKET, SO_NOSIGPIPE, &one, sizeof(int)); #endif _local = getLocalAddrForBoundSocketFd(_fd); _fdCreationMicroSec = curTimeMicros64(); _awaitingHandshake = false; return true; }
void CurOp::ensureStarted() { if ( _start == 0 ) _start = curTimeMicros64(); }
ExitCode _initAndListen(int listenPort) { Client::initThread("initandlisten"); _initWireSpec(); auto globalServiceContext = getGlobalServiceContext(); globalServiceContext->setFastClockSource(FastClockSourceFactory::create(Milliseconds(10))); globalServiceContext->setOpObserver(stdx::make_unique<OpObserver>()); DBDirectClientFactory::get(globalServiceContext) .registerImplementation([](OperationContext* txn) { return std::unique_ptr<DBClientBase>(new DBDirectClient(txn)); }); const repl::ReplSettings& replSettings = repl::getGlobalReplicationCoordinator()->getSettings(); { ProcessId pid = ProcessId::getCurrent(); LogstreamBuilder l = log(LogComponent::kControl); l << "MongoDB starting : pid=" << pid << " port=" << serverGlobalParams.port << " dbpath=" << storageGlobalParams.dbpath; if (replSettings.isMaster()) l << " master=" << replSettings.isMaster(); if (replSettings.isSlave()) l << " slave=" << (int)replSettings.isSlave(); const bool is32bit = sizeof(int*) == 4; l << (is32bit ? " 32" : " 64") << "-bit host=" << getHostNameCached() << endl; } DEV log(LogComponent::kControl) << "DEBUG build (which is slower)" << endl; #if defined(_WIN32) VersionInfoInterface::instance().logTargetMinOS(); #endif logProcessDetails(); checked_cast<ServiceContextMongoD*>(getGlobalServiceContext())->createLockFile(); transport::TransportLayerLegacy::Options options; options.port = listenPort; options.ipList = serverGlobalParams.bind_ip; auto sep = stdx::make_unique<ServiceEntryPointMongod>(getGlobalServiceContext()->getTransportLayer()); auto sepPtr = sep.get(); getGlobalServiceContext()->setServiceEntryPoint(std::move(sep)); // Create, start, and attach the TL auto transportLayer = stdx::make_unique<transport::TransportLayerLegacy>(options, sepPtr); auto res = transportLayer->setup(); if (!res.isOK()) { error() << "Failed to set up listener: " << res; return EXIT_NET_ERROR; } std::shared_ptr<DbWebServer> dbWebServer; if (serverGlobalParams.isHttpInterfaceEnabled) { dbWebServer.reset(new DbWebServer(serverGlobalParams.bind_ip, serverGlobalParams.port + 1000, getGlobalServiceContext(), new RestAdminAccess())); if (!dbWebServer->setupSockets()) { error() << "Failed to set up sockets for HTTP interface during startup."; return EXIT_NET_ERROR; } } getGlobalServiceContext()->initializeGlobalStorageEngine(); #ifdef MONGO_CONFIG_WIREDTIGER_ENABLED if (WiredTigerCustomizationHooks::get(getGlobalServiceContext())->restartRequired()) { exitCleanly(EXIT_CLEAN); } #endif // Warn if we detect configurations for multiple registered storage engines in // the same configuration file/environment. if (serverGlobalParams.parsedOpts.hasField("storage")) { BSONElement storageElement = serverGlobalParams.parsedOpts.getField("storage"); invariant(storageElement.isABSONObj()); BSONObj storageParamsObj = storageElement.Obj(); BSONObjIterator i = storageParamsObj.begin(); while (i.more()) { BSONElement e = i.next(); // Ignore if field name under "storage" matches current storage engine. if (storageGlobalParams.engine == e.fieldName()) { continue; } // Warn if field name matches non-active registered storage engine. if (getGlobalServiceContext()->isRegisteredStorageEngine(e.fieldName())) { warning() << "Detected configuration for non-active storage engine " << e.fieldName() << " when current storage engine is " << storageGlobalParams.engine; } } } if (!getGlobalServiceContext()->getGlobalStorageEngine()->getSnapshotManager()) { if (moe::startupOptionsParsed.count("replication.enableMajorityReadConcern") && moe::startupOptionsParsed["replication.enableMajorityReadConcern"].as<bool>()) { // Note: we are intentionally only erroring if the user explicitly requested that we // enable majority read concern. We do not error if the they are implicitly enabled for // CSRS because a required step in the upgrade procedure can involve an mmapv1 node in // the CSRS in the REMOVED state. This is handled by the TopologyCoordinator. invariant(replSettings.isMajorityReadConcernEnabled()); severe() << "Majority read concern requires a storage engine that supports" << " snapshots, such as wiredTiger. " << storageGlobalParams.engine << " does not support snapshots."; exitCleanly(EXIT_BADOPTIONS); } } logMongodStartupWarnings(storageGlobalParams, serverGlobalParams); { stringstream ss; ss << endl; ss << "*********************************************************************" << endl; ss << " ERROR: dbpath (" << storageGlobalParams.dbpath << ") does not exist." << endl; ss << " Create this directory or give existing directory in --dbpath." << endl; ss << " See http://dochub.mongodb.org/core/startingandstoppingmongo" << endl; ss << "*********************************************************************" << endl; uassert(10296, ss.str().c_str(), boost::filesystem::exists(storageGlobalParams.dbpath)); } { stringstream ss; ss << "repairpath (" << storageGlobalParams.repairpath << ") does not exist"; uassert(12590, ss.str().c_str(), boost::filesystem::exists(storageGlobalParams.repairpath)); } // TODO: This should go into a MONGO_INITIALIZER once we have figured out the correct // dependencies. if (snmpInit) { snmpInit(); } if (!storageGlobalParams.readOnly) { boost::filesystem::remove_all(storageGlobalParams.dbpath + "/_tmp/"); } if (mmapv1GlobalOptions.journalOptions & MMAPV1Options::JournalRecoverOnly) return EXIT_NET_ERROR; if (mongodGlobalParams.scriptingEnabled) { ScriptEngine::setup(); } auto startupOpCtx = getGlobalServiceContext()->makeOperationContext(&cc()); repairDatabasesAndCheckVersion(startupOpCtx.get()); if (storageGlobalParams.upgrade) { log() << "finished checking dbs"; exitCleanly(EXIT_CLEAN); } uassertStatusOK(getGlobalAuthorizationManager()->initialize(startupOpCtx.get())); /* this is for security on certain platforms (nonce generation) */ srand((unsigned)(curTimeMicros64() ^ startupSrandTimer.micros())); // The snapshot thread provides historical collection level and lock statistics for use // by the web interface. Only needed when HTTP is enabled. if (serverGlobalParams.isHttpInterfaceEnabled) { statsSnapshotThread.go(); invariant(dbWebServer); stdx::thread web(stdx::bind(&webServerListenThread, dbWebServer)); web.detach(); } #ifndef _WIN32 mongo::signalForkSuccess(); #endif AuthorizationManager* globalAuthzManager = getGlobalAuthorizationManager(); if (globalAuthzManager->shouldValidateAuthSchemaOnStartup()) { Status status = authindex::verifySystemIndexes(startupOpCtx.get()); if (!status.isOK()) { log() << redact(status); exitCleanly(EXIT_NEED_UPGRADE); } // SERVER-14090: Verify that auth schema version is schemaVersion26Final. int foundSchemaVersion; status = globalAuthzManager->getAuthorizationVersion(startupOpCtx.get(), &foundSchemaVersion); if (!status.isOK()) { log() << "Auth schema version is incompatible: " << "User and role management commands require auth data to have " << "at least schema version " << AuthorizationManager::schemaVersion26Final << " but startup could not verify schema version: " << status; exitCleanly(EXIT_NEED_UPGRADE); } if (foundSchemaVersion < AuthorizationManager::schemaVersion26Final) { log() << "Auth schema version is incompatible: " << "User and role management commands require auth data to have " << "at least schema version " << AuthorizationManager::schemaVersion26Final << " but found " << foundSchemaVersion << ". In order to upgrade " << "the auth schema, first downgrade MongoDB binaries to version " << "2.6 and then run the authSchemaUpgrade command."; exitCleanly(EXIT_NEED_UPGRADE); } } else if (globalAuthzManager->isAuthEnabled()) { error() << "Auth must be disabled when starting without auth schema validation"; exitCleanly(EXIT_BADOPTIONS); } else { // If authSchemaValidation is disabled and server is running without auth, // warn the user and continue startup without authSchema metadata checks. log() << startupWarningsLog; log() << "** WARNING: Startup auth schema validation checks are disabled for the " "database." << startupWarningsLog; log() << "** This mode should only be used to manually repair corrupted auth " "data." << startupWarningsLog; } auto shardingInitialized = uassertStatusOK(ShardingState::get(startupOpCtx.get()) ->initializeShardingAwarenessIfNeeded(startupOpCtx.get())); if (shardingInitialized) { reloadShardRegistryUntilSuccess(startupOpCtx.get()); } if (!storageGlobalParams.readOnly) { logStartup(startupOpCtx.get()); startFTDC(); getDeleter()->startWorkers(); restartInProgressIndexesFromLastShutdown(startupOpCtx.get()); if (serverGlobalParams.clusterRole == ClusterRole::ShardServer) { // Note: For replica sets, ShardingStateRecovery happens on transition to primary. if (!repl::getGlobalReplicationCoordinator()->isReplEnabled()) { uassertStatusOK(ShardingStateRecovery::recover(startupOpCtx.get())); } } else if (serverGlobalParams.clusterRole == ClusterRole::ConfigServer) { uassertStatusOK( initializeGlobalShardingStateForMongod(startupOpCtx.get(), ConnectionString::forLocal(), kDistLockProcessIdForConfigServer)); Balancer::create(startupOpCtx->getServiceContext()); } repl::getGlobalReplicationCoordinator()->startup(startupOpCtx.get()); const unsigned long long missingRepl = checkIfReplMissingFromCommandLine(startupOpCtx.get()); if (missingRepl) { log() << startupWarningsLog; log() << "** WARNING: mongod started without --replSet yet " << missingRepl << " documents are present in local.system.replset" << startupWarningsLog; log() << "** Restart with --replSet unless you are doing maintenance and " << " no other clients are connected." << startupWarningsLog; log() << "** The TTL collection monitor will not start because of this." << startupWarningsLog; log() << "** "; log() << " For more info see http://dochub.mongodb.org/core/ttlcollections"; log() << startupWarningsLog; } else { startTTLBackgroundJob(); } if (!replSettings.usingReplSets() && !replSettings.isSlave() && storageGlobalParams.engine != "devnull") { ScopedTransaction transaction(startupOpCtx.get(), MODE_X); Lock::GlobalWrite lk(startupOpCtx.get()->lockState()); FeatureCompatibilityVersion::setIfCleanStartup( startupOpCtx.get(), repl::StorageInterface::get(getGlobalServiceContext())); } } startClientCursorMonitor(); PeriodicTask::startRunningPeriodicTasks(); // MessageServer::run will return when exit code closes its socket and we don't need the // operation context anymore startupOpCtx.reset(); auto start = getGlobalServiceContext()->addAndStartTransportLayer(std::move(transportLayer)); if (!start.isOK()) { error() << "Failed to start the listener: " << start.toString(); return EXIT_NET_ERROR; } return waitForShutdown(); }
void run() { int iterations = 1000*1000; while(iterations--){ curTimeMicros64(); } }
void run(){ if ( _token.size() == 0 && _name.size() == 0 ){ log(1) << "mms not configured" << endl; return; } if ( _token.size() == 0 ){ log() << "no token for mms - not running" << endl; return; } if ( _name.size() == 0 ){ log() << "no name for mms - not running" << endl; return; } log() << "mms monitor staring... token:" << _token << " name:" << _name << " interval: " << _secsToSleep << endl; unsigned long long lastTime = 0; unsigned long long lastLockTime = 0; while ( ! inShutdown() ){ sleepsecs( _secsToSleep ); stringstream url; url << _baseurl << _token << "?"; url << "monitor_name=" << _name << "&"; url << "version=" << versionString << "&"; url << "git_hash=" << gitVersion() << "&"; { //percent_locked unsigned long long time = curTimeMicros64(); unsigned long long start , lock; dbMutexInfo.timingInfo( start , lock ); if ( lastTime ){ double timeDiff = (double) (time - lastTime); double lockDiff = (double) (lock - lastLockTime); url << "percent_locked=" << (int)ceil( 100 * ( lockDiff / timeDiff ) ) << "&"; } lastTime = time; lastLockTime = lock; } vector< string > dbNames; getDatabaseNames( dbNames ); boost::intmax_t totalSize = 0; for ( vector< string >::iterator i = dbNames.begin(); i != dbNames.end(); ++i ) { boost::intmax_t size = dbSize( i->c_str() ); totalSize += size; } url << "data_size=" << totalSize / ( 1024 * 1024 ) << "&"; /* TODO: message_operations update_operations insert_operations get_more_operations delete_operations kill_cursors_operations */ log(1) << "mms url: " << url.str() << endl; try { HttpClient c; map<string,string> headers; stringstream ss; int rc = c.get( url.str() , headers , ss ); log(1) << "\t response code: " << rc << endl; if ( rc != 200 ){ log() << "mms error response code:" << rc << endl; log(1) << "mms error body:" << ss.str() << endl; } } catch ( std::exception& e ){ log() << "mms get exception: " << e.what() << endl; } } }
/** * The main durability thread loop. There is a single instance of this function running. */ static void durThread(ClockSource* cs, int64_t serverStartMs) { Client::initThread("durability"); log() << "Durability thread started"; bool samePartition = true; try { const std::string dbpathDir = boost::filesystem::path(storageGlobalParams.dbpath).string(); samePartition = onSamePartition(getJournalDir().string(), dbpathDir); } catch (...) { } // Spawn the journal writer thread JournalWriter journalWriter(&commitNotify, &applyToDataFilesNotify, NumAsyncJournalWrites); journalWriter.start(); // Used as an estimate of how much / how fast to remap uint64_t commitCounter(0); uint64_t estimatedPrivateMapSize(0); uint64_t remapLastTimestamp(0); while (shutdownRequested.loadRelaxed() == 0) { unsigned ms = storageGlobalParams.journalCommitIntervalMs; if (ms == 0) { ms = samePartition ? 100 : 30; } // +1 so it never goes down to zero const int64_t oneThird = (ms / 3) + 1; // Reset the stats based on the reset interval if (stats.curr()->getCurrentDurationMillis() > DurStatsResetIntervalMillis) { stats.reset(); } try { stdx::unique_lock<stdx::mutex> lock(flushMutex); for (unsigned i = 0; i <= 2; i++) { if (stdx::cv_status::no_timeout == flushRequested.wait_for(lock, Milliseconds(oneThird).toSystemDuration())) { // Someone forced a flush break; } if (commitNotify.nWaiting()) { // One or more getLastError j:true is pending break; } if (commitJob.bytes() > UncommittedBytesLimit / 2) { // The number of written bytes is growing break; } } // The commit logic itself LOG(4) << "groupCommit begin"; Timer t; const ServiceContext::UniqueOperationContext txnPtr = cc().makeOperationContext(); OperationContext& txn = *txnPtr; AutoAcquireFlushLockForMMAPV1Commit autoFlushLock(txn.lockState()); // We need to snapshot the commitNumber after the flush lock has been obtained, // because at this point we know that we have a stable snapshot of the data. const CommitNotifier::When commitNumber(commitNotify.now()); LOG(4) << "Processing commit number " << commitNumber; if (!commitJob.hasWritten()) { // We do not need the journal lock anymore. Free it here, for the really // unlikely possibility that the writeBuffer command below blocks. autoFlushLock.release(); // getlasterror request could have came after the data was already committed. // No need to call committingReset though, because we have not done any // writes (hasWritten == false). JournalWriter::Buffer* const buffer = journalWriter.newBuffer(); buffer->setNoop(); buffer->journalListenerToken = getJournalListener()->getToken(); journalWriter.writeBuffer(buffer, commitNumber); } else { // This copies all the in-memory changes into the journal writer's buffer. JournalWriter::Buffer* const buffer = journalWriter.newBuffer(); PREPLOGBUFFER(buffer->getHeader(), buffer->getBuilder(), cs, serverStartMs); estimatedPrivateMapSize += commitJob.bytes(); commitCounter++; // Now that the write intents have been copied to the buffer, the commit job is // free to be reused. We need to reset the commit job's contents while under // the S flush lock, because otherwise someone might have done a write and this // would wipe out their changes without ever being committed. commitJob.committingReset(); double systemMemoryPressurePercentage = ProcessInfo::getSystemMemoryPressurePercentage(); // Now that the in-memory modifications have been collected, we can potentially // release the flush lock if remap is not necessary. // When we remap due to memory pressure, we look at two criteria // 1. If the amount of 4k pages touched exceeds 512 MB, // a reasonable estimate of memory pressure on Linux. // 2. Check if the amount of free memory on the machine is running low, // since #1 is underestimates the memory pressure on Windows since // commits in 64MB chunks. const bool shouldRemap = (estimatedPrivateMapSize >= UncommittedBytesLimit) || (systemMemoryPressurePercentage > 0.0) || (commitCounter % NumCommitsBeforeRemap == 0) || (mmapv1GlobalOptions.journalOptions & MMAPV1Options::JournalAlwaysRemap); double remapFraction = 0.0; if (shouldRemap) { // We want to remap all private views about every 2 seconds. There could be // ~1000 views so we do a little each pass. There will be copy on write // faults after remapping, so doing a little bit at a time will avoid big // load spikes when the pages are touched. // // TODO: Instead of the time-based logic above, consider using ProcessInfo // and watching for getResidentSize to drop, which is more precise. remapFraction = (curTimeMicros64() - remapLastTimestamp) / 2000000.0; if (mmapv1GlobalOptions.journalOptions & MMAPV1Options::JournalAlwaysRemap) { remapFraction = 1; } else { // We don't want to get close to the UncommittedBytesLimit const double remapMemFraction = estimatedPrivateMapSize / ((double)UncommittedBytesLimit); remapFraction = std::max(remapMemFraction, remapFraction); remapFraction = std::max(systemMemoryPressurePercentage, remapFraction); } } else { LOG(4) << "Early release flush lock"; // We will not be doing a remap so drop the flush lock. That way we will be // doing the journal I/O outside of lock, so other threads can proceed. invariant(!shouldRemap); autoFlushLock.release(); } buffer->journalListenerToken = getJournalListener()->getToken(); // Request async I/O to the journal. This may block. journalWriter.writeBuffer(buffer, commitNumber); // Data has now been written to the shared view. If remap was requested, we // would still be holding the S flush lock here, so just upgrade it and // perform the remap. if (shouldRemap) { // Need to wait for the previously scheduled journal writes to complete // before any remap is attempted. journalWriter.flush(); journalWriter.assertIdle(); // Upgrading the journal lock to flush stops all activity on the system, // because we will be remapping memory and we don't want readers to be // accessing it. Technically this step could be avoided on systems, which // support atomic remap. autoFlushLock.upgradeFlushLockToExclusive(); remapPrivateView(remapFraction); autoFlushLock.release(); // Reset the private map estimate outside of the lock estimatedPrivateMapSize = 0; remapLastTimestamp = curTimeMicros64(); stats.curr()->_commitsInWriteLock++; stats.curr()->_commitsInWriteLockMicros += t.micros(); } } stats.curr()->_commits++; stats.curr()->_commitsMicros += t.micros(); LOG(4) << "groupCommit end"; } catch (DBException& e) { severe() << "dbexception in durThread causing immediate shutdown: " << e.toString(); invariant(false); } catch (std::ios_base::failure& e) { severe() << "ios_base exception in durThread causing immediate shutdown: " << e.what(); invariant(false); } catch (std::bad_alloc& e) { severe() << "bad_alloc exception in durThread causing immediate shutdown: " << e.what(); invariant(false); } catch (std::exception& e) { severe() << "exception in durThread causing immediate shutdown: " << e.what(); invariant(false); } catch (...) { severe() << "unhandled exception in durThread causing immediate shutdown"; invariant(false); } } // Stops the journal thread and ensures everything was written invariant(!commitJob.hasWritten()); journalWriter.flush(); journalWriter.shutdown(); log() << "Durability thread stopped"; }
void Stats::S::reset() { memset(this, 0, sizeof(*this)); _startTimeMicros = curTimeMicros64(); }
void CurOp::leave( Client::Context * context ) { unsigned long long now = curTimeMicros64(); Top::global.record( _ns , _op , _lockType , now - _checkpoint , _command ); _checkpoint = now; }
void FileAllocator::run( FileAllocator * fa ) { setThreadName( "FileAllocator" ); { // initialize unique temporary file name counter // TODO: SERVER-6055 -- Unify temporary file name selection SimpleMutex::scoped_lock lk(_uniqueNumberMutex); _uniqueNumber = curTimeMicros64(); } while( 1 ) { { scoped_lock lk( fa->_pendingMutex ); if ( fa->_pending.size() == 0 ) fa->_pendingUpdated.wait( lk.boost() ); } while( 1 ) { string name; long size = 0; { scoped_lock lk( fa->_pendingMutex ); if ( fa->_pending.size() == 0 ) break; name = fa->_pending.front(); size = fa->_pendingSize[ name ]; } string tmp; long fd = 0; try { log() << "allocating new datafile " << name << ", filling with zeroes..." << endl; boost::filesystem::path parent = ensureParentDirCreated(name); tmp = fa->makeTempFileName( parent ); ensureParentDirCreated(tmp); #if defined(_WIN32) fd = _open( tmp.c_str(), _O_RDWR | _O_CREAT | O_NOATIME, _S_IREAD | _S_IWRITE ); #else fd = open(tmp.c_str(), O_CREAT | O_RDWR | O_NOATIME, S_IRUSR | S_IWUSR); #endif if ( fd < 0 ) { log() << "FileAllocator: couldn't create " << name << " (" << tmp << ") " << errnoWithDescription() << endl; uasserted(10439, ""); } #if defined(POSIX_FADV_DONTNEED) if( posix_fadvise(fd, 0, size, POSIX_FADV_DONTNEED) ) { log() << "warning: posix_fadvise fails " << name << " (" << tmp << ") " << errnoWithDescription() << endl; } #endif Timer t; /* make sure the file is the full desired length */ ensureLength( fd , size ); close( fd ); fd = 0; if( rename(tmp.c_str(), name.c_str()) ) { const string& errStr = errnoWithDescription(); const string& errMessage = str::stream() << "error: couldn't rename " << tmp << " to " << name << ' ' << errStr; msgasserted(13653, errMessage); } flushMyDirectory(name); log() << "done allocating datafile " << name << ", " << "size: " << size/1024/1024 << "MB, " << " took " << ((double)t.millis())/1000.0 << " secs" << endl; // no longer in a failed state. allow new writers. fa->_failed = false; } catch ( const std::exception& e ) { log() << "error: failed to allocate new file: " << name << " size: " << size << ' ' << e.what() << ". will try again in 10 seconds" << endl; if ( fd > 0 ) close( fd ); try { if ( ! tmp.empty() ) boost::filesystem::remove( tmp ); boost::filesystem::remove( name ); } catch ( const std::exception& e ) { log() << "error removing files: " << e.what() << endl; } scoped_lock lk( fa->_pendingMutex ); fa->_failed = true; // not erasing from pending fa->_pendingUpdated.notify_all(); sleepsecs(10); continue; } { scoped_lock lk( fa->_pendingMutex ); fa->_pendingSize.erase( name ); fa->_pending.pop_front(); fa->_pendingUpdated.notify_all(); } } } }
static void _REMAPPRIVATEVIEW() { // todo: Consider using ProcessInfo herein and watching for getResidentSize to drop. that could be a way // to assure very good behavior here. static unsigned startAt; static unsigned long long lastRemap; LOG(4) << "journal REMAPPRIVATEVIEW" << endl; invariant(!commitJob.hasWritten()); // we want to remap all private views about every 2 seconds. there could be ~1000 views so // we do a little each pass; beyond the remap time, more significantly, there will be copy on write // faults after remapping, so doing a little bit at a time will avoid big load spikes on // remapping. unsigned long long now = curTimeMicros64(); double fraction = (now-lastRemap)/2000000.0; if (storageGlobalParams.durOptions & StorageGlobalParams::DurAlwaysRemap) fraction = 1; lastRemap = now; #if defined(_WIN32) || defined(__sunos__) // Note that this negatively affects performance. // We must grab the exclusive lock here because remapPrivateView() on Windows and // Solaris need to grab it as well, due to the lack of an atomic way to remap a // memory mapped file. // See SERVER-5723 for performance improvement. // See SERVER-5680 to see why this code is necessary on Windows. // See SERVER-8795 to see why this code is necessary on Solaris. LockMongoFilesExclusive lk; #else LockMongoFilesShared lk; #endif set<MongoFile*>& files = MongoFile::getAllFiles(); unsigned sz = files.size(); if( sz == 0 ) return; { // be careful not to use too much memory if the write rate is // extremely high double f = privateMapBytes / ((double)UncommittedBytesLimit); if( f > fraction ) { fraction = f; } privateMapBytes = 0; } unsigned ntodo = (unsigned) (sz * fraction); if( ntodo < 1 ) ntodo = 1; if( ntodo > sz ) ntodo = sz; const set<MongoFile*>::iterator b = files.begin(); const set<MongoFile*>::iterator e = files.end(); set<MongoFile*>::iterator i = b; // skip to our starting position for( unsigned x = 0; x < startAt; x++ ) { i++; if( i == e ) i = b; } unsigned startedAt = startAt; startAt = (startAt + ntodo) % sz; // mark where to start next time Timer t; for( unsigned x = 0; x < ntodo; x++ ) { dassert( i != e ); if( (*i)->isDurableMappedFile() ) { DurableMappedFile *mmf = (DurableMappedFile*) *i; verify(mmf); if( mmf->willNeedRemap() ) { mmf->remapThePrivateView(); } i++; if( i == e ) i = b; } } LOG(2) << "journal REMAPPRIVATEVIEW done startedAt: " << startedAt << " n:" << ntodo << ' ' << t.millis() << "ms" << endl; }
void SnapshotData::takeSnapshot() { _created = curTimeMicros64(); _globalUsage = Top::global.getGlobalData(); // _totalWriteLockedTime = d.dbMutex.info().getTimeLocked(); Top::global.cloneMap(_usage); }