Пример #1
0
float
btp_thread_levenshtein_distance_f(struct btp_thread *thread1, struct btp_thread *thread2)
{
    int frame_count1, frame_count2, max_frame_count;

    frame_count1 = btp_thread_get_frame_count(thread1);
    frame_count2 = btp_thread_get_frame_count(thread2);
    max_frame_count = frame_count1 > frame_count2 ? frame_count1 : frame_count2;

    if (!max_frame_count)
        return 1.0;

    return (float)btp_thread_levenshtein_distance_custom(thread1, thread2, true, btp_frame_cmp_simple) / max_frame_count;
}
Пример #2
0
void
btp_thread_append_to_str(struct btp_thread *thread,
                         struct strbuf *str,
                         bool verbose)
{
    int framecount = btp_thread_get_frame_count(thread);
    if (verbose)
    {
        strbuf_append_strf(str, "Thread no. %d (%d frames)\n",
                               thread->number, framecount);
    }
    else
        strbuf_append_str(str, "Thread\n");

    struct btp_frame *frame = thread->frames;
    while (frame)
    {
        btp_frame_append_to_str(frame, str, verbose);
        frame = frame->next;
    }
}
Пример #3
0
float
btp_thread_jarowinkler_distance_custom(struct btp_thread *thread1, struct btp_thread *thread2,
                                       btp_frame_cmp_type compare_func)
{
    int frame1_count = btp_thread_get_frame_count(thread1);
    int frame2_count = btp_thread_get_frame_count(thread2);

    if (frame1_count == 0 && frame2_count == 0)
    {
        return 1.0;
    }

    int max_frame_count = (frame2_count > frame1_count ? frame2_count : frame1_count);
    int i = 0, j, prefix_len = 0;
    bool match, still_prefix = true;
    float k, trans_count = 0, match_count = 0, dist_jaro, dist;


    struct btp_frame *curr_frame = thread1->frames;
    for (i = 1; curr_frame; i++)
    {
        match = false;
        struct btp_frame *curr_frame2 = thread2->frames;
        for (j = 1; !match && curr_frame2; j++)
        {
            /*whether the prefix continues to be the same for both threads or not*/
            if (i == j && 0 != compare_func(curr_frame, curr_frame2))
                still_prefix = false;

            /*getting a match only if not too far away from each other
              and if functions aren't both unpaired unknown functions */

            if (abs(i - j) <= max_frame_count / 2 - 1 &&
                    0 == compare_func(curr_frame, curr_frame2))
            {
                match = true;
                if(i != j)trans_count++;  // transposition in place
            }
            curr_frame2 = curr_frame2->next;
        }
        if (still_prefix)
            prefix_len++;
        if (match)
            match_count++;

        curr_frame = curr_frame->next;
    }
    trans_count = trans_count / 2;

    if (prefix_len > 4)
        prefix_len = 4;

    if (match_count == 0)return 0;  // so as not to divide by 0

    dist_jaro = (match_count / (float)frame1_count +
                 match_count / (float)frame2_count +
                 (match_count - trans_count) / match_count) / 3;

    k = 0.2;  /*how much weight we give to having common prefixes (always k<0.25)*/
    dist = dist_jaro + (float)prefix_len * k * (1 - dist_jaro);

    return dist;
}
Пример #4
0
int
btp_thread_levenshtein_distance_custom(struct btp_thread *thread1, struct btp_thread *thread2,
                                       bool transposition, btp_frame_cmp_type compare_func)
{
    int m = btp_thread_get_frame_count(thread1) + 1;
    int n = btp_thread_get_frame_count(thread2) + 1;

    // store only two last rows and columns instead of whole 2D array
    int dist[m + n + 1], dist1[m + n + 1], dist2;

    int i, j, l, cost = 0;

    // first row and column having distance equal to their position
    for (i = m; i > 0; i--)
        dist[m - i] = i;
    for (; i <= n; i++)
        dist[m + i] = i;

    struct btp_frame *curr_frame2 = thread2->frames;
    struct btp_frame *prev_frame = NULL;
    struct btp_frame *prev_frame2 = NULL;

    for (j = 1; curr_frame2; j++)
    {

        struct btp_frame *curr_frame = thread1->frames;
        for (i = 1; curr_frame; i++)
        {
            l = m + j - i;

            dist2 = dist1[l];
            dist1[l] = dist[l];

            /*similar characters have distance equal to the previous one diagonally,
              "??" functions aren't taken as similar */
            if (0 == compare_func(curr_frame, curr_frame2))
            {
                cost = 0;
            }
            // different ones takes the lowest value of all previous distances
            else
            {
                cost = 1;
                dist[l] += 1;
                if (dist[l] > dist[l - 1] + 1)
                    dist[l] = dist[l - 1] + 1;
                if (dist[l] > dist[l + 1] + 1)
                    dist[l] = dist[l + 1] + 1;
            }

            /*checking for transposition of two characters in both ways
              taking into account that "??" functions are not similar*/
            if (transposition &&
                    (i >= 2 && j >= 2 && dist[l] > dist2 + cost &&
                     0 == compare_func(curr_frame, prev_frame2) &&
                     0 == compare_func(prev_frame, curr_frame2)))
                dist[l] = dist2 + cost;

            prev_frame = curr_frame;
            curr_frame = curr_frame->next;
        }

        prev_frame2 = curr_frame2;
        curr_frame2 = curr_frame2->next;
    }

    return dist[n];
}