RocksEngine::RocksEngine(const std::string& path, bool durable) : _path(path), _durable(durable) { { // create block cache uint64_t cacheSizeGB = 0; ProcessInfo pi; unsigned long long memSizeMB = pi.getMemSizeMB(); if (memSizeMB > 0) { double cacheMB = memSizeMB / 2; cacheSizeGB = static_cast<uint64_t>(cacheMB / 1024); } if (cacheSizeGB < 1) { cacheSizeGB = 1; } _block_cache = rocksdb::NewLRUCache(cacheSizeGB * 1024 * 1024 * 1024LL); } // open DB rocksdb::DB* db; auto s = rocksdb::DB::Open(_options(), path, &db); ROCKS_STATUS_OK(s); _db.reset(db); // open iterator boost::scoped_ptr<rocksdb::Iterator> _iter(_db->NewIterator(rocksdb::ReadOptions())); // find maxPrefix _maxPrefix = 0; _iter->SeekToLast(); if (_iter->Valid()) { // otherwise the DB is empty, so we just keep it at 0 bool ok = extractPrefix(_iter->key(), &_maxPrefix); // this is DB corruption here invariant(ok); } // load ident to prefix map { boost::mutex::scoped_lock lk(_identPrefixMapMutex); for (_iter->Seek(kMetadataPrefix); _iter->Valid() && _iter->key().starts_with(kMetadataPrefix); _iter->Next()) { rocksdb::Slice ident(_iter->key()); ident.remove_prefix(kMetadataPrefix.size()); // this could throw DBException, which then means DB corruption. We just let it fly // to the caller BSONObj identConfig(_iter->value().data()); BSONElement element = identConfig.getField("prefix"); // TODO: SERVER-16979 Correctly handle errors returned by RocksDB // This is DB corruption invariant(!element.eoo() || !element.isNumber()); uint32_t identPrefix = static_cast<uint32_t>(element.numberInt()); _identPrefixMap[StringData(ident.data(), ident.size())] = identPrefix; } } }
RocksEngine::RocksEngine(const std::string& path, bool durable) : _path(path), _durable(durable), _maxPrefix(0) { { // create block cache uint64_t cacheSizeGB = rocksGlobalOptions.cacheSizeGB; if (cacheSizeGB == 0) { ProcessInfo pi; unsigned long long memSizeMB = pi.getMemSizeMB(); if (memSizeMB > 0) { double cacheMB = memSizeMB / 2; cacheSizeGB = static_cast<uint64_t>(cacheMB / 1024); } if (cacheSizeGB < 1) { cacheSizeGB = 1; } } _block_cache = rocksdb::NewLRUCache(cacheSizeGB * 1024 * 1024 * 1024LL, 6); } _maxWriteMBPerSec = rocksGlobalOptions.maxWriteMBPerSec; _rateLimiter.reset( rocksdb::NewGenericRateLimiter(static_cast<int64_t>(_maxWriteMBPerSec) * 1024 * 1024)); // open DB rocksdb::DB* db; auto s = rocksdb::DB::Open(_options(), path, &db); invariantRocksOK(s); _db.reset(db); _counterManager.reset( new RocksCounterManager(_db.get(), rocksGlobalOptions.crashSafeCounters)); _compactionScheduler.reset(new RocksCompactionScheduler(_db.get())); // open iterator boost::scoped_ptr<rocksdb::Iterator> iter(_db->NewIterator(rocksdb::ReadOptions())); // find maxPrefix iter->SeekToLast(); if (iter->Valid()) { // otherwise the DB is empty, so we just keep it at 0 bool ok = extractPrefix(iter->key(), &_maxPrefix); // this is DB corruption here invariant(ok); } // load ident to prefix map. also update _maxPrefix if there's any prefix bigger than // current _maxPrefix { boost::lock_guard<boost::mutex> lk(_identPrefixMapMutex); for (iter->Seek(kMetadataPrefix); iter->Valid() && iter->key().starts_with(kMetadataPrefix); iter->Next()) { invariantRocksOK(iter->status()); rocksdb::Slice ident(iter->key()); ident.remove_prefix(kMetadataPrefix.size()); // this could throw DBException, which then means DB corruption. We just let it fly // to the caller BSONObj identConfig(iter->value().data()); BSONElement element = identConfig.getField("prefix"); if (element.eoo() || !element.isNumber()) { log() << "Mongo metadata in RocksDB database is corrupted."; invariant(false); } uint32_t identPrefix = static_cast<uint32_t>(element.numberInt()); _identPrefixMap[StringData(ident.data(), ident.size())] = identPrefix; _maxPrefix = std::max(_maxPrefix, identPrefix); } } // just to be extra sure. we need this if last collection is oplog -- in that case we // reserve prefix+1 for oplog key tracker ++_maxPrefix; // load dropped prefixes { rocksdb::WriteBatch wb; // we will use this iter to check if prefixes are still alive boost::scoped_ptr<rocksdb::Iterator> prefixIter( _db->NewIterator(rocksdb::ReadOptions())); for (iter->Seek(kDroppedPrefix); iter->Valid() && iter->key().starts_with(kDroppedPrefix); iter->Next()) { invariantRocksOK(iter->status()); rocksdb::Slice prefix(iter->key()); prefix.remove_prefix(kDroppedPrefix.size()); prefixIter->Seek(prefix); invariantRocksOK(iter->status()); if (prefixIter->Valid() && prefixIter->key().starts_with(prefix)) { // prefix is still alive, let's instruct the compaction filter to clear it up uint32_t int_prefix; bool ok = extractPrefix(prefix, &int_prefix); invariant(ok); { boost::lock_guard<boost::mutex> lk(_droppedPrefixesMutex); _droppedPrefixes.insert(int_prefix); } } else { // prefix is no longer alive. let's remove the prefix from our dropped prefixes // list wb.Delete(iter->key()); } } if (wb.Count() > 0) { auto s = _db->Write(rocksdb::WriteOptions(), &wb); invariantRocksOK(s); } } }