void BufferedLogForwarder::purge() { if (buffer_count_ <= FLAGS_buffered_log_max) { return; } size_t purge_count = buffer_count_ - FLAGS_buffered_log_max; // Collect purge_count indexes of each type (result/status) before // partitioning to find the oldest. Note this assumes that the indexes are // returned in ascending lexicographic order (true for RocksDB). std::vector<std::string> indexes; auto status = scanDatabaseKeys(kLogs, indexes, genIndexPrefix(true), purge_count); if (!status.ok()) { LOG(ERROR) << "Error scanning DB during buffered log purge"; return; } LOG(WARNING) << "Purging buffered logs limit (" << FLAGS_buffered_log_max << ") exceeded: " << buffer_count_; std::vector<std::string> status_indexes; status = scanDatabaseKeys(kLogs, status_indexes, genIndexPrefix(false), purge_count); if (!status.ok()) { LOG(ERROR) << "Error scanning DB during buffered log purge"; return; } indexes.insert(indexes.end(), status_indexes.begin(), status_indexes.end()); if (indexes.size() < purge_count) { LOG(ERROR) << "Trying to purge " << purge_count << " logs but only found " << indexes.size(); return; } size_t prefix_size = genIndexPrefix(true).size(); // Partition the indexes so that the first purge_count elements are the // oldest indexes (the ones to be purged) std::nth_element(indexes.begin(), indexes.begin() + purge_count - 1, indexes.end(), [&](const std::string& a, const std::string& b) { // Skip the prefix when doing comparisons return a.compare(prefix_size, std::string::npos, b, prefix_size, std::string::npos) < 0; }); indexes.erase(indexes.begin() + purge_count, indexes.end()); // Now only indexes of logs to be deleted remain iterate(indexes, [this](const std::string& index) { if (!deleteValueWithCount(kLogs, index).ok()) { LOG(ERROR) << "Error deleting value during buffered log purge"; } }); }
TEST_F(EventsDatabaseTests, test_gentable) { auto sub = std::make_shared<DBFakeEventSubscriber>(); // Lie about the tool type to enable optimizations. auto default_type = kToolType; kToolType = OSQUERY_TOOL_DAEMON; ASSERT_EQ(sub->optimize_time_, 0U); ASSERT_EQ(sub->expire_time_, 0U); sub->testAdd(getUnixTime() - 1); sub->testAdd(getUnixTime()); sub->testAdd(getUnixTime() + 1); // Test the expire workflow by creating a short expiration time. FLAGS_events_expiry = 10; std::vector<std::string> keys; scanDatabaseKeys("events", keys); EXPECT_GT(keys.size(), 10U); // Perform a "select" equivalent. QueryContext context; auto results = sub->genTable(context); // Expect all non-expired results: 11, + EXPECT_EQ(results.size(), 9U); // The expiration time is now - events_expiry. EXPECT_GT(sub->expire_time_, getUnixTime() - (FLAGS_events_expiry * 2)); EXPECT_LT(sub->expire_time_, getUnixTime()); // The optimize time will be changed too. ASSERT_GT(sub->optimize_time_, 0U); // Restore the tool type. kToolType = default_type; results = sub->genTable(context); EXPECT_EQ(results.size(), 3U); results = sub->genTable(context); EXPECT_EQ(results.size(), 3U); // The optimize time should have been written to the database. // It should be the same as the current (relative) optimize time. std::string content; getDatabaseValue("events", "optimize.DBFakePublisher.DBFakeSubscriber", content); EXPECT_EQ(std::to_string(sub->optimize_time_), content); keys.clear(); scanDatabaseKeys("events", keys); EXPECT_LT(keys.size(), 30U); }
TEST_F(DatabaseTests, test_scan_values) { setDatabaseValue(kLogs, "1", "0"); setDatabaseValue(kLogs, "2", "0"); setDatabaseValue(kLogs, "3", "0"); std::vector<std::string> keys; auto s = scanDatabaseKeys(kLogs, keys); EXPECT_TRUE(s.ok()); EXPECT_GT(keys.size(), 2U); keys.clear(); s = scanDatabaseKeys(kLogs, keys, 2); EXPECT_TRUE(s.ok()); EXPECT_EQ(keys.size(), 2U); }
TEST_F(EventsDatabaseTests, test_expire_check) { auto sub = std::make_shared<DBFakeEventSubscriber>(); // Set the max number of buffered events to something reasonably small. FLAGS_events_max = 10; auto t = 10000; // We are still at the mercy of the opaque EVENTS_CHECKPOINT define. for (size_t x = 0; x < 3; x++) { size_t num_events = 256 * x; for (size_t i = 0; i < num_events; i++) { sub->testAdd(t++); } // Since events tests are dependent, expect 257 + 3 events. QueryContext context; auto results = sub->genTable(context); if (x == 0) { // The first iteration is dependent on previous test state. continue; } // The number of events should remain constant. // In practice there may be an event still in the write queue. EXPECT_LT(results.size(), 60U); } // Try again, this time with a scan for (size_t k = 0; k < 3; k++) { for (size_t x = 0; x < 3; x++) { size_t num_events = 256 * x; for (size_t i = 0; i < num_events; i++) { sub->testAdd(t++); } // Records hold the event_id + time indexes. // Data hosts the event_id + JSON content. auto record_key = "records." + sub->dbNamespace(); auto data_key = "data." + sub->dbNamespace(); std::vector<std::string> records, datas; scanDatabaseKeys(kEvents, records, record_key); scanDatabaseKeys(kEvents, datas, data_key); EXPECT_LT(records.size(), 20U); EXPECT_LT(datas.size(), 60U); } } }
TEST_F(EventsDatabaseTests, test_gentable) { auto sub = std::make_shared<DBFakeEventSubscriber>(); auto status = sub->testAdd(1); status = sub->testAdd(2); status = sub->testAdd(11); status = sub->testAdd(61); status = sub->testAdd((1 * 3600) + 1); status = sub->testAdd((2 * 3600) + 1); ASSERT_EQ(0U, sub->optimize_time_); ASSERT_EQ(0U, sub->expire_time_); ASSERT_EQ(0U, sub->min_expiration_); auto t = getUnixTime(); sub->testAdd(t - 1); sub->testAdd(t); sub->testAdd(t + 1); // Test the expire workflow by creating a short expiration time. sub->setEventsExpiry(10); std::vector<std::string> keys; scanDatabaseKeys("events", keys); // 9 data records, 1 eid counter, 3 indexes, 15 index records. // Depending on the moment, an additional 3 indexes may be introduced. EXPECT_LE(16U, keys.size()); // Perform a "select" equivalent. auto results = genRows(sub.get()); // Expect all non-expired results: 11, + EXPECT_EQ(9U, results.size()); // The expiration time is now - events_expiry +/ 60. EXPECT_LT(t - (sub->getEventsExpiry() * 2), sub->expire_time_ + 60); EXPECT_GT(t, sub->expire_time_); // The optimize time will not be changed. ASSERT_EQ(0U, sub->optimize_time_); results = genRows(sub.get()); EXPECT_EQ(3U, results.size()); results = genRows(sub.get()); EXPECT_EQ(3U, results.size()); keys.clear(); scanDatabaseKeys("events", keys); EXPECT_LE(6U, keys.size()); }
void EventSubscriberPlugin::expireCheck(bool cleanup) { auto data_key = "data." + dbNamespace(); auto eid_key = "eid." + dbNamespace(); // Min key will be the last surviving key. size_t min_key = 0; { auto limit = getEventsMax(); std::vector<std::string> keys; scanDatabaseKeys(kEvents, keys, data_key); if (keys.size() <= limit) { return; } // There is an overflow of events buffered for this subscriber. LOG(WARNING) << "Expiring events for subscriber: " << getName() << " (limit " << limit << ")"; VLOG(1) << "Subscriber events " << getName() << " exceeded limit " << limit << " by: " << keys.size() - limit; // Inspect the N-FLAGS_events_max -th event's value and expire before the // time within the content. std::string last_key; getDatabaseValue(kEvents, eid_key, last_key); // The EID is the next-index. // EID - events_max is the most last-recent event to keep. min_key = boost::lexical_cast<size_t>(last_key) - getEventsMax(); if (cleanup) { // Scan each of the keys in keys, if their ID portion is < min_key. // Nix them, this requires lots of conversions, use with care. for (const auto& key : keys) { if (std::stoul(key.substr(key.rfind('.') + 1)) < min_key) { deleteDatabaseValue(kEvents, key); } } } } // Convert the key index into a time using the content. // The last-recent event is fetched and the corresponding time is used as // the expiration time for the subscriber. std::string content; getDatabaseValue(kEvents, data_key + "." + std::to_string(min_key), content); // Decode the value into a row structure to extract the time. Row r; if (!deserializeRowJSON(content, r) || r.count("time") == 0) { return; } // The last time will become the implicit expiration time. size_t last_time = boost::lexical_cast<size_t>(r.at("time")); if (last_time > 0) { expire_time_ = last_time; } // Finally, attempt an index query to trigger expirations. // In this case the result set is not used. getIndexes(expire_time_, 0); }
TEST_F(TLSLoggerTests, test_database) { // Start a server. TLSServerRunner::start(); TLSServerRunner::setClientConfig(); auto forwarder = std::make_shared<TLSLogForwarder>(); std::string expected = "{\"new_json\": true}"; forwarder->logString(expected); StatusLogLine status; status.message = "{\"status\": \"bar\"}"; forwarder->logStatus({status}); // Stop the server. TLSServerRunner::unsetClientConfig(); TLSServerRunner::stop(); std::vector<std::string> indexes; scanDatabaseKeys(kLogs, indexes); EXPECT_EQ(2U, indexes.size()); // Iterate using an unordered search, and search for the expected string // that was just logged. bool found_string = false; for (const auto& index : indexes) { std::string value; getDatabaseValue(kLogs, index, value); found_string = (found_string || value == expected); deleteDatabaseValue(kLogs, index); } EXPECT_TRUE(found_string); }
Status ViewsConfigParserPlugin::update(const std::string& source, const ParserConfig& config) { auto cv = config.find("views"); if (cv == config.end()) { return Status(1); } auto obj = data_.getObject(); data_.copyFrom(cv->second.doc(), obj); data_.add("views", obj); const auto& views = data_.doc()["views"]; // We use a restricted scope below to change the data structure from // an array to a set. This lets us do deletes much more efficiently std::vector<std::string> created_views; std::set<std::string> erase_views; { std::vector<std::string> old_views_vec; scanDatabaseKeys(kQueries, old_views_vec, kConfigViews); for (const auto& view : old_views_vec) { erase_views.insert(view.substr(kConfigViews.size())); } } QueryData r; for (const auto& view : views.GetObject()) { std::string name = view.name.GetString(); std::string query = view.value.GetString(); if (query.empty()) { continue; } std::string old_query = ""; getDatabaseValue(kQueries, kConfigViews + name, old_query); erase_views.erase(name); if (old_query == query) { continue; } // View has been updated osquery::query("DROP VIEW " + name, r); auto s = osquery::query("CREATE VIEW " + name + " AS " + query, r); if (s.ok()) { setDatabaseValue(kQueries, kConfigViews + name, query); } else { LOG(INFO) << "Error creating view (" << name << "): " << s.getMessage(); } } // Any views left are views that don't exist in the new configuration file // so we tear them down and remove them from the database. for (const auto& old_view : erase_views) { osquery::query("DROP VIEW " + old_view, r); deleteDatabaseValue(kQueries, kConfigViews + old_view); } return Status(0, "OK"); }
/// Get all ATC tables that should be registered from the database std::set<std::string> ATCConfigParserPlugin::registeredATCTables() { std::vector<std::string> tables; scanDatabaseKeys(kPersistentSettings, tables, kDatabaseKeyPrefix); std::set<std::string> set_tables; for (const auto& table : tables) { set_tables.insert(table.substr(kDatabaseKeyPrefix.size())); } return set_tables; }
Status BufferedLogForwarder::setUp() { // initialize buffer_count_ by scanning the DB std::vector<std::string> indexes; auto status = scanDatabaseKeys(kLogs, indexes, index_name_, 0); if (!status.ok()) { return Status(1, "Error scanning for buffered log count"); } buffer_count_ = indexes.size(); return Status(0); }
Status ATCConfigParserPlugin::setUp() { VLOG(1) << "Removing stale ATC entries"; std::vector<std::string> keys; scanDatabaseKeys(kPersistentSettings, keys, kDatabaseKeyPrefix); for (const auto& key : keys) { auto s = deleteDatabaseValue(kPersistentSettings, key); if (!s.ok()) { LOG(INFO) << "Could not clear ATC key " << key << "from database"; } } return Status(); }
void Config::purge() { // The first use of purge is removing expired query results. std::vector<std::string> saved_queries; scanDatabaseKeys(kQueries, saved_queries); const auto& schedule = this->schedule_; auto queryExists = [&schedule](const std::string& query_name) { for (const auto& pack : schedule->packs_) { const auto& pack_queries = pack->getSchedule(); if (pack_queries.count(query_name)) { return true; } } return false; }; RecursiveLock lock(config_schedule_mutex_); // Iterate over each result set in the database. for (const auto& saved_query : saved_queries) { if (queryExists(saved_query)) { continue; } std::string content; getDatabaseValue(kPersistentSettings, "timestamp." + saved_query, content); if (content.empty()) { // No timestamp is set for this query, perhaps this is the first time // query results expiration is applied. setDatabaseValue(kPersistentSettings, "timestamp." + saved_query, std::to_string(getUnixTime())); continue; } // Parse the timestamp and compare. size_t last_executed = 0; try { last_executed = boost::lexical_cast<size_t>(content); } catch (const boost::bad_lexical_cast& /* e */) { // Erase the timestamp as is it potentially corrupt. deleteDatabaseValue(kPersistentSettings, "timestamp." + saved_query); continue; } if (last_executed < getUnixTime() - 592200) { // Query has not run in the last week, expire results and interval. deleteDatabaseValue(kQueries, saved_query); deleteDatabaseValue(kPersistentSettings, "interval." + saved_query); deleteDatabaseValue(kPersistentSettings, "timestamp." + saved_query); VLOG(1) << "Expiring results for scheduled query: " << saved_query; } } }
void TLSLogForwarderRunner::check() { // Get a list of all the buffered log items, with a max of 1024 lines. std::vector<std::string> indexes; auto status = scanDatabaseKeys(kLogs, indexes, kTLSMaxLogLines); // For each index, accumulate the log line into the result or status set. std::vector<std::string> results, statuses; iterate(indexes, ([&results, &statuses](std::string& index) { std::string value; auto& target = ((index.at(0) == 'r') ? results : statuses); if (getDatabaseValue(kLogs, index, value)) { // Enforce a max log line size for TLS logging. if (value.size() > FLAGS_logger_tls_max) { LOG(WARNING) << "Line exceeds TLS logger max: " << value.size(); } else { target.push_back(std::move(value)); } } })); // If any results/statuses were found in the flushed buffer, send. if (results.size() > 0) { status = send(results, "result"); if (!status.ok()) { VLOG(1) << "Could not send results to logger URI: " << uri_ << " (" << status.getMessage() << ")"; } else { // Clear the results logs once they were sent. iterate(indexes, ([&results](std::string& index) { if (index.at(0) != 'r') { return; } deleteDatabaseValue(kLogs, index); })); } } if (statuses.size() > 0) { status = send(statuses, "status"); if (!status.ok()) { VLOG(1) << "Could not send status logs to logger URI: " << uri_ << " (" << status.getMessage() << ")"; } else { // Clear the status logs once they were sent. iterate(indexes, ([&results](std::string& index) { if (index.at(0) != 's') { return; } deleteDatabaseValue(kLogs, index); })); } } }
DistributedQueryRequest Distributed::popRequest() { // Read all pending queries. std::vector<std::string> queries; scanDatabaseKeys(kQueries, queries, kDistributedQueryPrefix); // Set the last-most-recent query as the request, and delete it. DistributedQueryRequest request; const auto& next = queries.front(); request.id = next.substr(kDistributedQueryPrefix.size()); getDatabaseValue(kQueries, next, request.query); deleteDatabaseValue(kQueries, next); return request; }
void BufferedLogForwarder::check() { // Get a list of all the buffered log items, with a max of 1024 lines. std::vector<std::string> indexes; auto status = scanDatabaseKeys(kLogs, indexes, index_name_, max_log_lines_); // For each index, accumulate the log line into the result or status set. std::vector<std::string> results, statuses; iterate(indexes, ([&results, &statuses, this](std::string& index) { std::string value; auto& target = isResultIndex(index) ? results : statuses; if (getDatabaseValue(kLogs, index, value)) { target.push_back(std::move(value)); } })); // If any results/statuses were found in the flushed buffer, send. if (results.size() > 0) { status = send(results, "result"); if (!status.ok()) { VLOG(1) << "Error sending results to logger: " << status.getMessage(); } else { // Clear the results logs once they were sent. iterate(indexes, ([this](std::string& index) { if (!isResultIndex(index)) { return; } deleteValueWithCount(kLogs, index); })); } } if (statuses.size() > 0) { status = send(statuses, "status"); if (!status.ok()) { VLOG(1) << "Error sending status to logger: " << status.getMessage(); } else { // Clear the status logs once they were sent. iterate(indexes, ([this](std::string& index) { if (!isStatusIndex(index)) { return; } deleteValueWithCount(kLogs, index); })); } } // Purge any logs exceeding the max after our send attempt if (FLAGS_buffered_log_max > 0) { purge(); } }
void dumpDatabase() { for (const auto& domain : kDomains) { std::vector<std::string> keys; if (!scanDatabaseKeys(domain, keys)) { continue; } for (const auto& key : keys) { std::string value; if (!getDatabaseValue(domain, key, value)) { continue; } fprintf( stdout, "%s[%s]: %s\n", domain.c_str(), key.c_str(), value.c_str()); } } }
Status init() override { // Before starting our subscription, purge any residual db entries as it's // unlikely we'll finish re-assmebling them std::vector<std::string> keys; scanDatabaseKeys(kEvents, keys, kScriptBlockPrefix); for (const auto& k : keys) { auto s = deleteDatabaseValue(kEvents, k); if (!s.ok()) { VLOG(1) << "Failed to delete stale script block from the database " << k; } } auto wc = createSubscriptionContext(); wc->sources.insert(kPowershellEventsChannel); subscribe(&PowershellEventSubscriber::Callback, wc); return Status(); }
void EventSubscriberPlugin::expireCheck() { auto data_key = "data." + dbNamespace(); auto eid_key = "eid." + dbNamespace(); std::vector<std::string> keys; scanDatabaseKeys(kEvents, keys, data_key); if (keys.size() <= FLAGS_events_max) { return; } // There is an overflow of events buffered for this subscriber. LOG(WARNING) << "Expiring events for subscriber: " << getName() << " limit (" << FLAGS_events_max << ") exceeded: " << keys.size(); // Inspect the N-FLAGS_events_max -th event's value and expire before the // time within the content. std::string last_key; getDatabaseValue(kEvents, eid_key, last_key); // The EID is the next-index. size_t max_key = boost::lexical_cast<size_t>(last_key) - FLAGS_events_max - 1; // Convert the key index into a time using the content. std::string content; getDatabaseValue(kEvents, data_key + "." + std::to_string(max_key), content); // Decode the value into a row structure to extract the time. Row r; if (!deserializeRowJSON(content, r) || r.count("time") == 0) { return; } // The last time will become the implicit expiration time. size_t last_time = boost::lexical_cast<size_t>(r.at("time")); if (last_time > 0) { expire_time_ = last_time; } // Finally, attempt an index query to trigger expirations. // In this case the result set is not used. getIndexes(expire_time_ - 1, -1); }
static Status migrateV0V1(void) { std::vector<std::string> keys; auto s = scanDatabaseKeys(kQueries, keys); if (!s.ok()) { return Status(1, "Failed to lookup legacy query data from database"); } for (const auto& key : keys) { // Skip over epoch and counter entries, as 0 is parsed by ptree if (boost::algorithm::ends_with(key, kDbEpochSuffix) || boost::algorithm::ends_with(key, kDbCounterSuffix) || boost::algorithm::starts_with(key, "query.")) { continue; } std::string value{""}; if (!getDatabaseValue(kQueries, key, value)) { LOG(WARNING) << "Failed to get value from database " << key; continue; } std::string out; s = ptreeToRapidJSON(value, out); if (!s.ok()) { LOG(WARNING) << "Conversion from ptree to RapidJSON failed for '" << key << ": " << value << "': " << s.what() << ". Dropping key!"; continue; } if (!setDatabaseValue(kQueries, key, out)) { LOG(WARNING) << "Failed to update value in database " << key << ": " << value; } } return Status(); }
Status scanDatabaseKeys(const std::string& domain, std::vector<std::string>& keys, size_t max) { return scanDatabaseKeys(domain, keys, "", max); }
Status PowershellEventSubscriber::Callback(const ECRef& ec, const SCRef& sc) { // For script block logging we only care about events with script blocks auto eid = ec->eventRecord.get("Event.System.EventID", -1); if (eid != kScriptBlockLoggingEid) { return Status(); } Row results; for (const auto& node : ec->eventRecord.get_child("Event", pt::ptree())) { if (node.first == "System" || node.first == "<xmlattr>") { continue; } // #4357: This should make use of RapidJSON parseTree(node.second, results); } FILETIME etime; GetSystemTimeAsFileTime(&etime); results["time"] = BIGINT(filetimeToUnixtime(etime)); results["datetime"] = ec->eventRecord.get("Event.System.TimeCreated.<xmlattr>.SystemTime", ""); // If there's only one script block no reassembly is needed if (results["MessageTotal"] == "1") { addScriptResult(results); return Status(); } // Add the script content to the DB for later reassembly auto s = setDatabaseValue(kEvents, kScriptBlockPrefix + results["ScriptBlockId"] + "." + results["MessageNumber"], results["ScriptBlockText"]); if (!s.ok()) { LOG(WARNING) << "Failed to add new Powershell block to database for script " << results["ScriptBlockId"]; } // If we expect more blocks bail out early if (results["MessageNumber"] != results["MessageTotal"]) { return Status(); } // Otherwise all script blocks should be accounted for so reconstruct std::vector<std::string> keys; s = scanDatabaseKeys( kEvents, keys, kScriptBlockPrefix + results["ScriptBlockId"]); if (!s.ok()) { LOG(WARNING) << "Failed to look up powershell script blocks for " << results["ScriptBlockId"]; return Status(1); } std::string powershell_script{""}; for (const auto& key : keys) { std::string val{""}; s = getDatabaseValue(kEvents, key, val); if (!s.ok()) { LOG(WARNING) << "Failed to retrieve script block " << key; continue; } powershell_script += val; s = deleteDatabaseValue(kEvents, key); if (!s.ok()) { LOG(WARNING) << "Failed to delete script block key from db " << key; } } results["ScriptBlockText"] = powershell_script; addScriptResult(results); return Status(); }
std::vector<std::string> Query::getStoredQueryNames() { std::vector<std::string> results; scanDatabaseKeys(kQueries, results); return results; }