// If we're using more than 256K of memory for log entries, prune // at least 10% of the log entries. // // mLogElementsLock must be held when this function is called. void LogBuffer::maybePrune(log_id_t id) { size_t sizes = stats.sizes(id); if (sizes > log_buffer_size(id)) { size_t sizeOver90Percent = sizes - ((log_buffer_size(id) * 9) / 10); size_t elements = stats.elements(id); unsigned long pruneRows = elements * sizeOver90Percent / sizes; elements /= 10; if (pruneRows <= elements) { pruneRows = elements; } prune(id, pruneRows); } }
// set the total space allocated to "id" int LogBuffer::setSize(log_id_t id, unsigned long size) { // Reasonable limits ... if (!valid_size(size)) { return -1; } pthread_mutex_lock(&mLogElementsLock); log_buffer_size(id) = size; pthread_mutex_unlock(&mLogElementsLock); return 0; }
// Prune at most 10% of the log entries or maxPrune, whichever is less. // // mLogElementsLock must be held when this function is called. void LogBuffer::maybePrune(log_id_t id) { size_t sizes = stats.sizes(id); unsigned long maxSize = log_buffer_size(id); if (sizes > maxSize) { size_t sizeOver = sizes - ((maxSize * 9) / 10); size_t elements = stats.realElements(id); size_t minElements = elements / 100; if (minElements < minPrune) { minElements = minPrune; } unsigned long pruneRows = elements * sizeOver / sizes; if (pruneRows < minElements) { pruneRows = minElements; } if (pruneRows > maxPrune) { pruneRows = maxPrune; } prune(id, pruneRows); } }
int base_get(struct base *base, const char *key, unsigned key_sz, char *buf, unsigned buf_sz) { uint128_t key_hash = md5(key, key_sz); uint64_t log_remno; int hpos; int r = itree_get2(base->itree, key_hash, &log_remno, &hpos); if (r < 0) { return -1; } struct log *log = log_by_remno(base->logs, log_remno); /* TODO: get rid of the awful malloc */ unsigned data_sz = log_buffer_size(log, hpos); char *data = malloc(data_sz); struct keyvalue kv; r = log_read(log, hpos, data, data_sz, &kv); if (r < 0) { free(data); return -2; } if (kv.value_sz > buf_sz) { free(data); return -3; } if (key_sz != kv.key_sz || memcmp(key, kv.key, key_sz) != 0) { log_error(base->db, "Congratulations! You just found a " "collision! Apparently key %*s has the same md5 hash as %*s!", key_sz, key, kv.key_sz, kv.key); free(data); return -1; } memcpy(buf, kv.value, kv.value_sz); free(data); return kv.value_sz; }
// get the total space allocated to "id" unsigned long LogBuffer::getSize(log_id_t id) { pthread_mutex_lock(&mLogElementsLock); size_t retval = log_buffer_size(id); pthread_mutex_unlock(&mLogElementsLock); return retval; }
// prune "pruneRows" of type "id" from the buffer. // // mLogElementsLock must be held when this function is called. void LogBuffer::prune(log_id_t id, unsigned long pruneRows) { LogTimeEntry *oldest = NULL; LogTimeEntry::lock(); // Region locked? LastLogTimes::iterator t = mTimes.begin(); while(t != mTimes.end()) { LogTimeEntry *entry = (*t); if (entry->owned_Locked() && (!oldest || (oldest->mStart > entry->mStart))) { oldest = entry; } t++; } LogBufferElementCollection::iterator it; // prune by worst offender by uid while (pruneRows > 0) { // recalculate the worst offender on every batched pass uid_t worst = (uid_t) -1; size_t worst_sizes = 0; size_t second_worst_sizes = 0; if ((id != LOG_ID_CRASH) && mPrune.worstUidEnabled()) { LidStatistics &l = stats.id(id); l.sort(); UidStatisticsCollection::iterator iu = l.begin(); if (iu != l.end()) { UidStatistics *u = *iu; worst = u->getUid(); worst_sizes = u->sizes(); if (++iu != l.end()) { second_worst_sizes = (*iu)->sizes(); } } } bool kick = false; for(it = mLogElements.begin(); it != mLogElements.end();) { LogBufferElement *e = *it; if (oldest && (oldest->mStart <= e->getMonotonicTime())) { break; } if (e->getLogId() != id) { ++it; continue; } uid_t uid = e->getUid(); if (uid == worst) { it = mLogElements.erase(it); unsigned short len = e->getMsgLen(); stats.subtract(len, id, worst, e->getPid()); delete e; kick = true; pruneRows--; if ((pruneRows == 0) || (worst_sizes < second_worst_sizes)) { break; } worst_sizes -= len; } else if (mPrune.naughty(e)) { // BlackListed it = mLogElements.erase(it); stats.subtract(e->getMsgLen(), id, uid, e->getPid()); delete e; pruneRows--; if (pruneRows == 0) { break; } } else { ++it; } } if (!kick || !mPrune.worstUidEnabled()) { break; // the following loop will ask bad clients to skip/drop } } bool whitelist = false; it = mLogElements.begin(); while((pruneRows > 0) && (it != mLogElements.end())) { LogBufferElement *e = *it; if (e->getLogId() == id) { if (oldest && (oldest->mStart <= e->getMonotonicTime())) { if (!whitelist) { if (stats.sizes(id) > (2 * log_buffer_size(id))) { // kick a misbehaving log reader client off the island oldest->release_Locked(); } else { oldest->triggerSkip_Locked(pruneRows); } } break; } if (mPrune.nice(e)) { // WhiteListed whitelist = true; it++; continue; } it = mLogElements.erase(it); stats.subtract(e->getMsgLen(), id, e->getUid(), e->getPid()); delete e; pruneRows--; } else { it++; } } if (whitelist && (pruneRows > 0)) { it = mLogElements.begin(); while((it != mLogElements.end()) && (pruneRows > 0)) { LogBufferElement *e = *it; if (e->getLogId() == id) { if (oldest && (oldest->mStart <= e->getMonotonicTime())) { if (stats.sizes(id) > (2 * log_buffer_size(id))) { // kick a misbehaving log reader client off the island oldest->release_Locked(); } else { oldest->triggerSkip_Locked(pruneRows); } break; } it = mLogElements.erase(it); stats.subtract(e->getMsgLen(), id, e->getUid(), e->getPid()); delete e; pruneRows--; } else { it++; } } } LogTimeEntry::unlock(); }
// prune "pruneRows" of type "id" from the buffer. // // This garbage collection task is used to expire log entries. It is called to // remove all logs (clear), all UID logs (unprivileged clear), or every // 256 or 10% of the total logs (whichever is less) to prune the logs. // // First there is a prep phase where we discover the reader region lock that // acts as a backstop to any pruning activity to stop there and go no further. // // There are three major pruning loops that follow. All expire from the oldest // entries. Since there are multiple log buffers, the Android logging facility // will appear to drop entries 'in the middle' when looking at multiple log // sources and buffers. This effect is slightly more prominent when we prune // the worst offender by logging source. Thus the logs slowly loose content // and value as you move back in time. This is preferred since chatty sources // invariably move the logs value down faster as less chatty sources would be // expired in the noise. // // The first loop performs blacklisting and worst offender pruning. Falling // through when there are no notable worst offenders and have not hit the // region lock preventing further worst offender pruning. This loop also looks // after managing the chatty log entries and merging to help provide // statistical basis for blame. The chatty entries are not a notification of // how much logs you may have, but instead represent how much logs you would // have had in a virtual log buffer that is extended to cover all the in-memory // logs without loss. They last much longer than the represented pruned logs // since they get multiplied by the gains in the non-chatty log sources. // // The second loop get complicated because an algorithm of watermarks and // history is maintained to reduce the order and keep processing time // down to a minimum at scale. These algorithms can be costly in the face // of larger log buffers, or severly limited processing time granted to a // background task at lowest priority. // // This second loop does straight-up expiration from the end of the logs // (again, remember for the specified log buffer id) but does some whitelist // preservation. Thus whitelist is a Hail Mary low priority, blacklists and // spam filtration all take priority. This second loop also checks if a region // lock is causing us to buffer too much in the logs to help the reader(s), // and will tell the slowest reader thread to skip log entries, and if // persistent and hits a further threshold, kill the reader thread. // // The third thread is optional, and only gets hit if there was a whitelist // and more needs to be pruned against the backstop of the region lock. // // mLogElementsLock must be held when this function is called. // bool LogBuffer::prune(log_id_t id, unsigned long pruneRows, uid_t caller_uid) { LogTimeEntry *oldest = NULL; bool busy = false; bool clearAll = pruneRows == ULONG_MAX; LogTimeEntry::lock(); // Region locked? LastLogTimes::iterator t = mTimes.begin(); while(t != mTimes.end()) { LogTimeEntry *entry = (*t); if (entry->owned_Locked() && entry->isWatching(id) && (!oldest || (oldest->mStart > entry->mStart))) { oldest = entry; } t++; } LogBufferElementCollection::iterator it; if (caller_uid != AID_ROOT) { // Only here if clearAll condition (pruneRows == ULONG_MAX) for(it = mLogElements.begin(); it != mLogElements.end();) { LogBufferElement *e = *it; if ((e->getLogId() != id) || (e->getUid() != caller_uid)) { ++it; continue; } if (oldest && (oldest->mStart <= e->getSequence())) { oldest->triggerSkip_Locked(id, pruneRows); busy = true; break; } it = erase(it); pruneRows--; } LogTimeEntry::unlock(); return busy; } // prune by worst offender by uid bool hasBlacklist = mPrune.naughty(); while (!clearAll && (pruneRows > 0)) { // recalculate the worst offender on every batched pass uid_t worst = (uid_t) -1; size_t worst_sizes = 0; size_t second_worst_sizes = 0; if (worstUidEnabledForLogid(id) && mPrune.worstUidEnabled()) { std::unique_ptr<const UidEntry *[]> sorted = stats.sort(2, id); if (sorted.get()) { if (sorted[0] && sorted[1]) { worst_sizes = sorted[0]->getSizes(); // Calculate threshold as 12.5% of available storage size_t threshold = log_buffer_size(id) / 8; if (worst_sizes > threshold) { worst = sorted[0]->getKey(); second_worst_sizes = sorted[1]->getSizes(); if (second_worst_sizes < threshold) { second_worst_sizes = threshold; } } } } } // skip if we have neither worst nor naughty filters if ((worst == (uid_t) -1) && !hasBlacklist) { break; } bool kick = false; bool leading = true; it = mLogElements.begin(); // Perform at least one mandatory garbage collection cycle in following // - clear leading chatty tags // - coalesce chatty tags // - check age-out of preserved logs bool gc = pruneRows <= 1; if (!gc && (worst != (uid_t) -1)) { LogBufferIteratorMap::iterator f = mLastWorstUid[id].find(worst); if ((f != mLastWorstUid[id].end()) && (f->second != mLogElements.end())) { leading = false; it = f->second; } } static const timespec too_old = { EXPIRE_HOUR_THRESHOLD * 60 * 60, 0 }; LogBufferElementCollection::iterator lastt; lastt = mLogElements.end(); --lastt; LogBufferElementLast last; while (it != mLogElements.end()) { LogBufferElement *e = *it; if (oldest && (oldest->mStart <= e->getSequence())) { busy = true; break; } if (e->getLogId() != id) { ++it; continue; } unsigned short dropped = e->getDropped(); // remove any leading drops if (leading && dropped) { it = erase(it); continue; } if (dropped && last.coalesce(e, dropped)) { it = erase(it, true); continue; } if (hasBlacklist && mPrune.naughty(e)) { last.clear(e); it = erase(it); if (dropped) { continue; } pruneRows--; if (pruneRows == 0) { break; } if (e->getUid() == worst) { kick = true; if (worst_sizes < second_worst_sizes) { break; } worst_sizes -= e->getMsgLen(); } continue; } if ((e->getRealTime() < ((*lastt)->getRealTime() - too_old)) || (e->getRealTime() > (*lastt)->getRealTime())) { break; } if (dropped) { last.add(e); if ((!gc && (e->getUid() == worst)) || (mLastWorstUid[id].find(e->getUid()) == mLastWorstUid[id].end())) { mLastWorstUid[id][e->getUid()] = it; } ++it; continue; } if (e->getUid() != worst) { leading = false; last.clear(e); ++it; continue; } pruneRows--; if (pruneRows == 0) { break; } kick = true; unsigned short len = e->getMsgLen(); // do not create any leading drops if (leading) { it = erase(it); } else { stats.drop(e); e->setDropped(1); if (last.coalesce(e, 1)) { it = erase(it, true); } else { last.add(e); if (!gc || (mLastWorstUid[id].find(worst) == mLastWorstUid[id].end())) { mLastWorstUid[id][worst] = it; } ++it; } } if (worst_sizes < second_worst_sizes) { break; } worst_sizes -= len; } last.clear(); if (!kick || !mPrune.worstUidEnabled()) { break; // the following loop will ask bad clients to skip/drop } } bool whitelist = false; bool hasWhitelist = mPrune.nice() && !clearAll; it = mLogElements.begin(); while((pruneRows > 0) && (it != mLogElements.end())) { LogBufferElement *e = *it; if (e->getLogId() != id) { it++; continue; } if (oldest && (oldest->mStart <= e->getSequence())) { busy = true; if (whitelist) { break; } if (stats.sizes(id) > (2 * log_buffer_size(id))) { // kick a misbehaving log reader client off the island oldest->release_Locked(); } else { oldest->triggerSkip_Locked(id, pruneRows); } break; } if (hasWhitelist && !e->getDropped() && mPrune.nice(e)) { // WhiteListed whitelist = true; it++; continue; } it = erase(it); pruneRows--; } // Do not save the whitelist if we are reader range limited if (whitelist && (pruneRows > 0)) { it = mLogElements.begin(); while((it != mLogElements.end()) && (pruneRows > 0)) { LogBufferElement *e = *it; if (e->getLogId() != id) { ++it; continue; } if (oldest && (oldest->mStart <= e->getSequence())) { busy = true; if (stats.sizes(id) > (2 * log_buffer_size(id))) { // kick a misbehaving log reader client off the island oldest->release_Locked(); } else { oldest->triggerSkip_Locked(id, pruneRows); } break; } it = erase(it); pruneRows--; } } LogTimeEntry::unlock(); return (pruneRows > 0) && busy; }