Exemplo n.º 1
0
static double calc_score_for_char(MatchInfo *m, UChar32 last, UChar32 current, int32_t distance_from_last_match) {
    double factor = 1.0;
    double ans = m->max_score_per_char;

    if (u_strchr32(m->level1, last) != NULL)
        factor = 0.9;
    else if (u_strchr32(m->level2, last) != NULL)
        factor = 0.8;
    else if (u_isULowercase(last) && u_isUUppercase(current))
        factor = 0.8;  // CamelCase
    else if (u_strchr32(m->level3, last) != NULL)
        factor = 0.7;
    else
        // If last is not a special char, factor diminishes
        // as distance from last matched char increases
        factor = (1.0 / distance_from_last_match) * 0.75;
    return ans * factor;
}
Exemplo n.º 2
0
static double process_item(MatchInfo *m, Stack *stack, int32_t *final_positions) {
    UChar32 nc, hc, lc;
    UChar *p;
    double final_score = 0.0, score = 0.0, score_for_char = 0.0;
    int32_t pos, i, j, hidx, nidx, last_idx, distance, *positions = final_positions + m->needle_len;
    MemoryItem mem = {0};

    stack_push(stack, 0, 0, 0, 0.0, final_positions);

    while (stack->pos >= 0) {
        stack_pop(stack, &hidx, &nidx, &last_idx, &score, positions);
        mem = m->memo[hidx][nidx][last_idx];
        if (mem.score == DBL_MAX) {
            // No memoized result, calculate the score
            for (i = nidx; i < m->needle_len;) {
                nidx = i;
                U16_NEXT(m->needle, i, m->needle_len, nc); // i now points to next char in needle 
                if (m->haystack_len - hidx < m->needle_len - nidx) { score = 0.0; break; }
                p = u_strchr32(m->haystack + hidx, nc);  // TODO: Use primary collation for the find
                if (p == NULL) { score = 0.0; break; }
                pos = (int32_t)(p - m->haystack);
                distance = u_countChar32(m->haystack + last_idx, pos - last_idx);  
                if (distance <= 1) score_for_char = m->max_score_per_char;
                else {
                    U16_GET(m->haystack, 0, pos, m->haystack_len, hc); 
                    j = pos;
                    U16_PREV(m->haystack, 0, j, lc); // lc is the prev character
                    score_for_char = calc_score_for_char(m, lc, hc, distance);
                }
                j = pos;
                U16_NEXT(m->haystack, j, m->haystack_len, hc); 
                hidx = j;
                if (m->haystack_len - hidx >= m->needle_len - nidx) stack_push(stack, hidx, nidx, last_idx, score, positions);
                last_idx = pos; 
                positions[nidx] = pos; 
                score += score_for_char;
            } // for(i) iterate over needle
            mem.score = score; memcpy(mem.positions, positions, sizeof(*positions) * m->needle_len);

        } else {
            score = mem.score; memcpy(positions, mem.positions, sizeof(*positions) * m->needle_len);
        }
        // We have calculated the score for this hidx, nidx, last_idx combination, update final_score and final_positions, if needed
        if (score > final_score) {
            final_score = score;
            memcpy(final_positions, positions, sizeof(*positions) * m->needle_len);
        }
    }
    return final_score;
}