static int core_backtrace_is_duplicate(struct sr_stacktrace *bt1, const char *bt2_text) { struct sr_thread *thread1 = sr_stacktrace_find_crash_thread(bt1); if (thread1 == NULL) { log_notice("New stacktrace has no crash thread, disabling core stacktrace deduplicate"); dup_corebt_fini(); return 0; } int result; char *error_message; struct sr_stacktrace *bt2 = sr_stacktrace_parse(sr_abrt_type_from_type(type), bt2_text, &error_message); if (bt2 == NULL) { log_notice("Failed to parse backtrace, considering it not duplicate: %s", error_message); free(error_message); return 0; } struct sr_thread *thread2 = sr_stacktrace_find_crash_thread(bt2); if (thread2 == NULL) { log_notice("Failed to get crash thread, considering it not duplicate"); result = 0; goto end; } int length2 = sr_thread_frame_count(thread2); if (length2 <= 0) { log_notice("Core backtrace has zero frames, considering it not duplicate"); result = 0; goto end; } float distance = sr_distance(SR_DISTANCE_DAMERAU_LEVENSHTEIN, thread1, thread2); log_info("Distance between backtraces: %f", distance); result = (distance <= BACKTRACE_DUP_THRESHOLD); end: sr_stacktrace_free(bt2); return result; }
float distance_jaro_winkler(struct sr_thread *thread1, struct sr_thread *thread2) { assert(thread1->type == thread2->type); int frame1_count = sr_thread_frame_count(thread1); int frame2_count = sr_thread_frame_count(thread2); if (frame1_count == 0 && frame2_count == 0) return 1.0; int max_frame_count = frame2_count; if (max_frame_count < frame1_count) max_frame_count = frame1_count; int prefix_len = 0; bool still_prefix = true; float trans_count = 0, match_count = 0; struct sr_frame *curr_frame = sr_thread_frames(thread1); for (int i = 1; curr_frame; ++i) { bool match = false; struct sr_frame *curr_frame2 = sr_thread_frames(thread2); for (int j = 1; !match && curr_frame2; ++j) { /* Whether the prefix continues to be the same for both * threads or not. */ if (i == j && 0 != sr_frame_cmp_distance(curr_frame, curr_frame2)) still_prefix = false; /* Getting a match only if not too far away from each * other and if functions aren't both unpaired unknown * functions. */ if (abs(i - j) <= max_frame_count / 2 - 1 && 0 == sr_frame_cmp_distance(curr_frame, curr_frame2)) { match = true; if (i != j) ++trans_count; // transposition in place } curr_frame2 = sr_frame_next(curr_frame2); } if (still_prefix) ++prefix_len; if (match) ++match_count; curr_frame = sr_frame_next(curr_frame); } trans_count /= 2; if (prefix_len > 4) prefix_len = 4; if (0 == match_count) return 0; // so as not to divide by 0 float dist_jaro = (match_count / (float)frame1_count + match_count / (float)frame2_count + (match_count - trans_count) / match_count) / 3; /* How much weight we give to having common prefixes * (always k < 0.25). */ float k = 0.2; float dist = dist_jaro + (float)prefix_len * k * (1 - dist_jaro); return dist; }
float distance_levenshtein(struct sr_thread *thread1, struct sr_thread *thread2, bool transposition) { assert(thread1->type == thread2->type); int frame_count1 = sr_thread_frame_count(thread1); int frame_count2 = sr_thread_frame_count(thread2); int max_frame_count = frame_count2; if (max_frame_count < frame_count1) max_frame_count = frame_count1; /* Avoid division by zero in case we get two empty threads */ if (max_frame_count == 0) return 0.0; int m = frame_count1 + 1; int n = frame_count2 + 1; // store only two last rows and columns instead of whole 2D array SR_ASSERT(n <= SIZE_MAX - 1); SR_ASSERT(m <= SIZE_MAX - (n + 1)); int *dist = sr_malloc_array(sizeof(int), m + n + 1); int *dist1 = sr_malloc_array(sizeof(int), m + n + 1); // first row and column having distance equal to their position for (int i = m; i > 0; --i) dist[m - i] = i; for (int i = 0; i <= n; ++i) dist[m + i] = i; struct sr_frame *curr_frame2 = sr_thread_frames(thread2); struct sr_frame *prev_frame = NULL; struct sr_frame *prev_frame2 = NULL; for (int j = 1; curr_frame2; ++j) { struct sr_frame *curr_frame = sr_thread_frames(thread1); for (int i = 1; curr_frame; ++i) { int l = m + j - i; int dist2 = dist1[l]; dist1[l] = dist[l]; int cost; /*similar characters have distance equal to the previous one diagonally, "??" functions aren't taken as similar */ if (0 == sr_frame_cmp_distance(curr_frame, curr_frame2)) cost = 0; else { // different ones takes the lowest value of all // previous distances cost = 1; dist[l] += 1; if (dist[l] > dist[l - 1] + 1) dist[l] = dist[l - 1] + 1; if (dist[l] > dist[l + 1] + 1) dist[l] = dist[l + 1] + 1; } /*checking for transposition of two characters in both ways taking into account that "??" functions are not similar*/ if (transposition && (i >= 2 && j >= 2 && dist[l] > dist2 + cost && 0 == sr_frame_cmp_distance(curr_frame, prev_frame2) && 0 == sr_frame_cmp_distance(prev_frame, curr_frame2))) { dist[l] = dist2 + cost; } prev_frame = curr_frame; curr_frame = sr_frame_next(curr_frame); } prev_frame2 = curr_frame2; curr_frame2 = sr_frame_next(curr_frame2); } int result = dist[n]; free(dist); free(dist1); return (float)result / max_frame_count; }
static int core_backtrace_is_duplicate(struct sr_stacktrace *bt1, const char *bt2_text) { struct sr_thread *thread1 = sr_stacktrace_find_crash_thread(bt1); if (thread1 == NULL) { log_notice("New stacktrace has no crash thread, disabling core stacktrace deduplicate"); dup_corebt_fini(); return 0; } int result; char *error_message; struct sr_stacktrace *bt2 = sr_stacktrace_parse(sr_abrt_type_from_analyzer(analyzer), bt2_text, &error_message); if (bt2 == NULL) { log_notice("Failed to parse backtrace, considering it not duplicate: %s", error_message); free(error_message); return 0; } struct sr_thread *thread2 = sr_stacktrace_find_crash_thread(bt2); if (thread2 == NULL) { log_notice("Failed to get crash thread, considering it not duplicate"); result = 0; goto end; } int length2 = sr_thread_frame_count(thread2); if (length2 <= 0) { log_notice("Core backtrace has zero frames, considering it not duplicate"); result = 0; goto end; } /* This is an ugly workaround for https://github.com/abrt/btparser/issues/6 */ /* int length1 = sr_core_thread_get_frame_count(thread1); if (length1 <= 2 || length2 <= 2) { log_notice("Backtraces too short, falling back on full comparison"); result = (sr_core_thread_cmp(thread1, thread2) == 0); goto end; } */ float distance = sr_distance(SR_DISTANCE_DAMERAU_LEVENSHTEIN, thread1, thread2); log_info("Distance between backtraces: %f", distance); result = (distance <= BACKTRACE_DUP_THRESHOLD); end: sr_stacktrace_free(bt2); return result; }