Example #1
0
    RocksEngine::RocksEngine(const std::string& path, bool durable)
        : _path(path), _durable(durable) {
        {  // create block cache
            uint64_t cacheSizeGB = 0;
            ProcessInfo pi;
            unsigned long long memSizeMB = pi.getMemSizeMB();
            if (memSizeMB > 0) {
                double cacheMB = memSizeMB / 2;
                cacheSizeGB = static_cast<uint64_t>(cacheMB / 1024);
            }
            if (cacheSizeGB < 1) {
                cacheSizeGB = 1;
            }
            _block_cache = rocksdb::NewLRUCache(cacheSizeGB * 1024 * 1024 * 1024LL);
        }
        // open DB
        rocksdb::DB* db;
        auto s = rocksdb::DB::Open(_options(), path, &db);
        ROCKS_STATUS_OK(s);
        _db.reset(db);

        // open iterator
        boost::scoped_ptr<rocksdb::Iterator> _iter(_db->NewIterator(rocksdb::ReadOptions()));

        // find maxPrefix
        _maxPrefix = 0;
        _iter->SeekToLast();
        if (_iter->Valid()) {
            // otherwise the DB is empty, so we just keep it at 0
            bool ok = extractPrefix(_iter->key(), &_maxPrefix);
            // this is DB corruption here
            invariant(ok);
        }

        // load ident to prefix map
        {
            boost::mutex::scoped_lock lk(_identPrefixMapMutex);
            for (_iter->Seek(kMetadataPrefix);
                 _iter->Valid() && _iter->key().starts_with(kMetadataPrefix); _iter->Next()) {
                rocksdb::Slice ident(_iter->key());
                ident.remove_prefix(kMetadataPrefix.size());
                // this could throw DBException, which then means DB corruption. We just let it fly
                // to the caller
                BSONObj identConfig(_iter->value().data());
                BSONElement element = identConfig.getField("prefix");
                // TODO: SERVER-16979 Correctly handle errors returned by RocksDB
                // This is DB corruption
                invariant(!element.eoo() || !element.isNumber());
                uint32_t identPrefix = static_cast<uint32_t>(element.numberInt());
                _identPrefixMap[StringData(ident.data(), ident.size())] = identPrefix;
            }
        }
    }
Example #2
0
size_t WiredTigerUtil::getCacheSizeMB(double requestedCacheSizeGB) {
    double cacheSizeMB;
    const double kMaxSizeCacheMB = 10 * 1000 * 1000;
    if (requestedCacheSizeGB == 0) {
        // Choose a reasonable amount of cache when not explicitly specified by user.
        // Set a minimum of 256MB, otherwise use 50% of available memory over 1GB.
        ProcessInfo pi;
        double memSizeMB = pi.getMemSizeMB();
        cacheSizeMB = std::max((memSizeMB - 1024) * 0.5, 256.0);
    } else {
        cacheSizeMB = 1024 * requestedCacheSizeGB;
    }
    if (cacheSizeMB > kMaxSizeCacheMB) {
        log() << "Requested cache size: " << cacheSizeMB << "MB exceeds max; setting to "
              << kMaxSizeCacheMB << "MB";
        cacheSizeMB = kMaxSizeCacheMB;
    }
    return static_cast<size_t>(cacheSizeMB);
}
        bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
            ProcessInfo p;
            BSONObjBuilder bSys, bOs;

            bSys.appendDate( "currentTime" , jsTime() );
            bSys.append( "hostname" , prettyHostName() );
            bSys.append( "cpuAddrSize", p.getAddrSize() );
            bSys.append( "memSizeMB", static_cast <unsigned>( p.getMemSizeMB() ) );
            bSys.append( "numCores", p.getNumCores() );
            bSys.append( "cpuArch", p.getArch() );
            bSys.append( "numaEnabled", p.hasNumaEnabled() );
            bOs.append( "type", p.getOsType() );
            bOs.append( "name", p.getOsName() );
            bOs.append( "version", p.getOsVersion() );

            result.append( StringData( "system" ), bSys.obj() );
            result.append( StringData( "os" ), bOs.obj() );
            p.appendSystemDetails( result );

            return true;
        }
WiredTigerKVEngine::WiredTigerKVEngine(const std::string& path,
                                       const std::string& extraOpenOptions,
                                       bool durable,
                                       bool repair)
    : _eventHandler(WiredTigerUtil::defaultEventHandlers()),
      _path(path),
      _durable(durable),
      _sizeStorerSyncTracker(100000, 60 * 1000) {
    size_t cacheSizeGB = wiredTigerGlobalOptions.cacheSizeGB;
    if (cacheSizeGB == 0) {
        // Since the user didn't provide a cache size, choose a reasonable default value.
        // We want to reserve 1GB for the system and binaries, but it's not bad to
        // leave a fair amount left over for pagecache since that's compressed storage.
        ProcessInfo pi;
        double memSizeMB = pi.getMemSizeMB();
        if (memSizeMB > 0) {
            double cacheMB = (memSizeMB - 1024) * 0.6;
            cacheSizeGB = static_cast<size_t>(cacheMB / 1024);
            if (cacheSizeGB < 1)
                cacheSizeGB = 1;
        }
    }

    boost::filesystem::path journalPath = path;
    journalPath /= "journal";
    if (_durable) {
        if (!boost::filesystem::exists(journalPath)) {
            try {
                boost::filesystem::create_directory(journalPath);
            } catch (std::exception& e) {
                log() << "error creating journal dir " << journalPath.string() << ' ' << e.what();
                throw;
            }
        }
    }

    _previousCheckedDropsQueued = Date_t::now();

    std::stringstream ss;
    ss << "create,";
    ss << "cache_size=" << cacheSizeGB << "G,";
    ss << "session_max=20000,";
    ss << "eviction=(threads_max=4),";
    ss << "config_base=false,";
    ss << "statistics=(fast),";
    // The setting may have a later setting override it if not using the journal.  We make it
    // unconditional here because even nojournal may need this setting if it is a transition
    // from using the journal.
    ss << "log=(enabled=true,archive=true,path=journal,compressor=";
    ss << wiredTigerGlobalOptions.journalCompressor << "),";
    ss << "file_manager=(close_idle_time=100000),";  //~28 hours, will put better fix in 3.1.x
    ss << "checkpoint=(wait=" << wiredTigerGlobalOptions.checkpointDelaySecs;
    ss << ",log_size=2GB),";
    ss << "statistics_log=(wait=" << wiredTigerGlobalOptions.statisticsLogDelaySecs << "),";
    ss << WiredTigerCustomizationHooks::get(getGlobalServiceContext())->getOpenConfig("system");
    ss << extraOpenOptions;
    if (!_durable) {
        // If we started without the journal, but previously used the journal then open with the
        // WT log enabled to perform any unclean shutdown recovery and then close and reopen in
        // the normal path without the journal.
        if (boost::filesystem::exists(journalPath)) {
            string config = ss.str();
            log() << "Detected WT journal files.  Running recovery from last checkpoint.";
            log() << "journal to nojournal transition config: " << config;
            int ret = wiredtiger_open(path.c_str(), &_eventHandler, config.c_str(), &_conn);
            if (ret == EINVAL) {
                fassertFailedNoTrace(28717);
            } else if (ret != 0) {
                Status s(wtRCToStatus(ret));
                msgassertedNoTrace(28718, s.reason());
            }
            invariantWTOK(_conn->close(_conn, NULL));
        }
        // This setting overrides the earlier setting because it is later in the config string.
        ss << ",log=(enabled=false),";
    }
    string config = ss.str();
    log() << "wiredtiger_open config: " << config;
    int ret = wiredtiger_open(path.c_str(), &_eventHandler, config.c_str(), &_conn);
    // Invalid argument (EINVAL) is usually caused by invalid configuration string.
    // We still fassert() but without a stack trace.
    if (ret == EINVAL) {
        fassertFailedNoTrace(28561);
    } else if (ret != 0) {
        Status s(wtRCToStatus(ret));
        msgassertedNoTrace(28595, s.reason());
    }

    _sessionCache.reset(new WiredTigerSessionCache(this));

    if (_durable) {
        _journalFlusher = stdx::make_unique<WiredTigerJournalFlusher>(_sessionCache.get());
        _journalFlusher->go();
    }

    _sizeStorerUri = "table:sizeStorer";
    {
        WiredTigerSession session(_conn);
        if (repair && _hasUri(session.getSession(), _sizeStorerUri)) {
            log() << "Repairing size cache";
            fassertNoTrace(28577, _salvageIfNeeded(_sizeStorerUri.c_str()));
        }
        _sizeStorer.reset(new WiredTigerSizeStorer(_conn, _sizeStorerUri));
        _sizeStorer->fillCache();
    }
}
Example #5
0
    RocksEngine::RocksEngine(const std::string& path, bool durable)
        : _path(path), _durable(durable), _maxPrefix(0) {
        {  // create block cache
            uint64_t cacheSizeGB = rocksGlobalOptions.cacheSizeGB;
            if (cacheSizeGB == 0) {
                ProcessInfo pi;
                unsigned long long memSizeMB = pi.getMemSizeMB();
                if (memSizeMB > 0) {
                    double cacheMB = memSizeMB / 2;
                    cacheSizeGB = static_cast<uint64_t>(cacheMB / 1024);
                }
                if (cacheSizeGB < 1) {
                    cacheSizeGB = 1;
                }
            }
            _block_cache = rocksdb::NewLRUCache(cacheSizeGB * 1024 * 1024 * 1024LL, 6);
        }
        _maxWriteMBPerSec = rocksGlobalOptions.maxWriteMBPerSec;
        _rateLimiter.reset(
            rocksdb::NewGenericRateLimiter(static_cast<int64_t>(_maxWriteMBPerSec) * 1024 * 1024));
        // open DB
        rocksdb::DB* db;
        auto s = rocksdb::DB::Open(_options(), path, &db);
        invariantRocksOK(s);
        _db.reset(db);

        _counterManager.reset(
            new RocksCounterManager(_db.get(), rocksGlobalOptions.crashSafeCounters));
        _compactionScheduler.reset(new RocksCompactionScheduler(_db.get()));

        // open iterator
        boost::scoped_ptr<rocksdb::Iterator> iter(_db->NewIterator(rocksdb::ReadOptions()));

        // find maxPrefix
        iter->SeekToLast();
        if (iter->Valid()) {
            // otherwise the DB is empty, so we just keep it at 0
            bool ok = extractPrefix(iter->key(), &_maxPrefix);
            // this is DB corruption here
            invariant(ok);
        }

        // load ident to prefix map. also update _maxPrefix if there's any prefix bigger than
        // current _maxPrefix
        {
            boost::lock_guard<boost::mutex> lk(_identPrefixMapMutex);
            for (iter->Seek(kMetadataPrefix);
                 iter->Valid() && iter->key().starts_with(kMetadataPrefix); iter->Next()) {
                invariantRocksOK(iter->status());
                rocksdb::Slice ident(iter->key());
                ident.remove_prefix(kMetadataPrefix.size());
                // this could throw DBException, which then means DB corruption. We just let it fly
                // to the caller
                BSONObj identConfig(iter->value().data());
                BSONElement element = identConfig.getField("prefix");

                if (element.eoo() || !element.isNumber()) {
                    log() << "Mongo metadata in RocksDB database is corrupted.";
                    invariant(false);
                }

                uint32_t identPrefix = static_cast<uint32_t>(element.numberInt());
                _identPrefixMap[StringData(ident.data(), ident.size())] = identPrefix;

                _maxPrefix = std::max(_maxPrefix, identPrefix);
            }
        }

        // just to be extra sure. we need this if last collection is oplog -- in that case we
        // reserve prefix+1 for oplog key tracker
        ++_maxPrefix;

        // load dropped prefixes
        {
            rocksdb::WriteBatch wb;
            // we will use this iter to check if prefixes are still alive
            boost::scoped_ptr<rocksdb::Iterator> prefixIter(
                _db->NewIterator(rocksdb::ReadOptions()));
            for (iter->Seek(kDroppedPrefix);
                 iter->Valid() && iter->key().starts_with(kDroppedPrefix); iter->Next()) {
                invariantRocksOK(iter->status());
                rocksdb::Slice prefix(iter->key());
                prefix.remove_prefix(kDroppedPrefix.size());
                prefixIter->Seek(prefix);
                invariantRocksOK(iter->status());
                if (prefixIter->Valid() && prefixIter->key().starts_with(prefix)) {
                    // prefix is still alive, let's instruct the compaction filter to clear it up
                    uint32_t int_prefix;
                    bool ok = extractPrefix(prefix, &int_prefix);
                    invariant(ok);
                    {
                        boost::lock_guard<boost::mutex> lk(_droppedPrefixesMutex);
                        _droppedPrefixes.insert(int_prefix);
                    }
                } else {
                    // prefix is no longer alive. let's remove the prefix from our dropped prefixes
                    // list
                    wb.Delete(iter->key());
                }
            }
            if (wb.Count() > 0) {
                auto s = _db->Write(rocksdb::WriteOptions(), &wb);
                invariantRocksOK(s);
            }
        }
    }
    WiredTigerKVEngine::WiredTigerKVEngine( const std::string& path,
                                            const std::string& extraOpenOptions,
                                            bool durable )
        : _durable( durable ),
          _epoch( 0 ),
          _sizeStorerSyncTracker( 100000, 60 * 1000 ) {

        _eventHandler.handle_error = mdb_handle_error;
        _eventHandler.handle_message = mdb_handle_message;
        _eventHandler.handle_progress = mdb_handle_progress;
        _eventHandler.handle_close = mdb_handle_close;

        int cacheSizeGB = 1;

        {
            ProcessInfo pi;
            unsigned long long memSizeMB  = pi.getMemSizeMB();
            if ( memSizeMB  > 0 ) {
                double cacheMB = memSizeMB / 50;
                cacheSizeGB = static_cast<int>( cacheMB / 1024 );
                if ( cacheSizeGB < 1 )
                    cacheSizeGB = 1;
            }
        }

        if ( _durable ) {
            boost::filesystem::path journalPath = path;
            journalPath /= "journal";
            if ( !boost::filesystem::exists( journalPath ) ) {
                try {
                    boost::filesystem::create_directory( journalPath );
                }
                catch( std::exception& e) {
                    log() << "error creating journal dir " << journalPath.string() << ' ' << e.what();
                    throw;
                }
            }
        }

        std::stringstream ss;
        ss << "create,";
        ss << "cache_size=" << cacheSizeGB << "G,";
        ss << "session_max=20000,";
        ss << "extensions=[local=(entry=index_collator_extension)],";
        ss << "statistics=(all),";
        if ( _durable ) {
            ss << "log=(enabled=true,archive=true,path=journal),";
        }
        ss << "checkpoint=(wait=60,log_size=2GB),";
        ss << extraOpenOptions;
        string config = ss.str();
        log() << "wiredtiger_open config: " << config;
        invariantWTOK(wiredtiger_open(path.c_str(), &_eventHandler, config.c_str(), &_conn));
        _sessionCache.reset( new WiredTigerSessionCache( this ) );

        _sizeStorerUri = "table:sizeStorer";
        {
            WiredTigerSession session( _conn, -1 );
            WiredTigerSizeStorer* ss = new WiredTigerSizeStorer();
            ss->loadFrom( &session, _sizeStorerUri );
            _sizeStorer.reset( ss );
        }
    }