int ldistance( char* requested, char* found) { int i,j; int r_len, f_len; #if SAFETY > 0 if( strlen(requested)>COMP_LEN || strlen(found)>COMP_LEN ) cout << "*** The length is longer than expected! ***" << endl; #endif r_len = (strlen(requested)>COMP_LEN ? COMP_LEN : strlen(requested)); f_len = (strlen(found)>COMP_LEN ? COMP_LEN : strlen(found)); distnce[0][0] = 0; for (j = 1; j <= ARR_SIZE; j++) distnce[0][j] = distnce[0][j-1] + addition; for (j = 1; j <= ARR_SIZE; j++) distnce[j][0] = distnce[j-1][0] + deletion; for (i = 1; i <= r_len; i++) for (j = 1; j <= f_len; j++) distnce[i][j] = SMALLEST_OF( (distnce[i-1][j-1] + ZERO_IF_EQUAL(i,j)), (distnce[i][j-1] + addition), (distnce[i-1][j] + deletion) ); return( distnce[r_len][f_len] ); }
/****************************** * Compute Levenshtein distance *****************************/ str levenshtein_impl(int *result, str *S, str *T, int *insdel_cost, int *replace_cost, int *transpose_cost) { char *s = *S; char *t = *T; int *d; /* pointer to matrix */ int n; /* length of s */ int m; /* length of t */ int i; /* iterates through s */ int j; /* iterates through t */ char s_i; /* ith character of s */ char t_j; /* jth character of t */ int cost; /* cost */ int cell; /* contents of target cell */ int above; /* contents of cell immediately above */ int left; /* contents of cell immediately to left */ int diag; /* contents of cell immediately above and to left */ int sz; /* number of cells in matrix */ int diag2 = 0, cost2 = 0; /* Step 1 */ n = (int) strlen(s); /* 64bit: assume strings are less than 2 GB */ m = (int) strlen(t); if (n == 0) { *result = m; return MAL_SUCCEED; } if (m == 0) { *result = n; return MAL_SUCCEED; } sz = (n + 1) * (m + 1) * sizeof(int); d = (int *) GDKmalloc(sz); if ( d == NULL) throw(MAL,"levenshtein", MAL_MALLOC_FAIL); /* Step 2 */ for (i = 0; i <= n; i++) { levenshtein_PutAt(d, i, 0, n, i); } for (j = 0; j <= m; j++) { levenshtein_PutAt(d, 0, j, n, j); } /* Step 3 */ for (i = 1; i <= n; i++) { s_i = s[i - 1]; /* Step 4 */ for (j = 1; j <= m; j++) { t_j = t[j - 1]; /* Step 5 */ if (s_i == t_j) { cost = 0; } else { cost = *replace_cost; } /* Step 6 */ above = levenshtein_GetAt(d, i - 1, j, n); left = levenshtein_GetAt(d, i, j - 1, n); diag = levenshtein_GetAt(d, i - 1, j - 1, n); if (j >= 2 && i >= 2) { /* NEW: detect transpositions */ diag2 = levenshtein_GetAt(d, i - 2, j - 2, n); if (s[i - 2] == t[j - 1] && s[i - 1] == t[j - 2]) { cost2 = *transpose_cost; } else { cost2 = 2; } cell = SMALLEST_OF4(above + *insdel_cost, left + *insdel_cost, diag + cost, diag2 + cost2); } else { cell = SMALLEST_OF(above + *insdel_cost, left + *insdel_cost, diag + cost); } levenshtein_PutAt(d, i, j, n, cell); } } /* Step 7 */ *result = levenshtein_GetAt(d, n, m, n); GDKfree(d); return MAL_SUCCEED; }