Example #1
0
int ldistance( char* requested, char* found)
{
   int i,j;
   int r_len, f_len;

#if SAFETY > 0
   if( strlen(requested)>COMP_LEN || strlen(found)>COMP_LEN )
     cout << "*** The length is longer than expected! ***" << endl;
#endif  

   r_len = (strlen(requested)>COMP_LEN ? COMP_LEN : strlen(requested));
   f_len = (strlen(found)>COMP_LEN ? COMP_LEN : strlen(found));
   
   distnce[0][0] = 0;
   for (j = 1; j <= ARR_SIZE; j++)
      distnce[0][j] = distnce[0][j-1] + addition;
   for (j = 1; j <= ARR_SIZE; j++)
      distnce[j][0] = distnce[j-1][0] + deletion;

   for (i = 1; i <= r_len; i++)
      for (j = 1; j <= f_len; j++)
         distnce[i][j] = SMALLEST_OF(
	     (distnce[i-1][j-1] + ZERO_IF_EQUAL(i,j)),
	     (distnce[i][j-1]   + addition),
	     (distnce[i-1][j]   + deletion) );

    return( distnce[r_len][f_len] );
}
Example #2
0
/******************************
 * Compute Levenshtein distance
 *****************************/
str
levenshtein_impl(int *result, str *S, str *T, int *insdel_cost, int *replace_cost, int *transpose_cost)
{
	char *s = *S;
	char *t = *T;
	int *d;			/* pointer to matrix */
	int n;			/* length of s */
	int m;			/* length of t */
	int i;			/* iterates through s */
	int j;			/* iterates through t */
	char s_i;		/* ith character of s */
	char t_j;		/* jth character of t */
	int cost;		/* cost */
	int cell;		/* contents of target cell */
	int above;		/* contents of cell immediately above */
	int left;		/* contents of cell immediately to left */
	int diag;		/* contents of cell immediately above and to left */
	int sz;			/* number of cells in matrix */
	int diag2 = 0, cost2 = 0;

	/* Step 1 */
	n = (int) strlen(s);	/* 64bit: assume strings are less than 2 GB */
	m = (int) strlen(t);
	if (n == 0) {
		*result = m;
		return MAL_SUCCEED;
	}
	if (m == 0) {
		*result = n;
		return MAL_SUCCEED;
	}
	sz = (n + 1) * (m + 1) * sizeof(int);
	d = (int *) GDKmalloc(sz);
	if ( d == NULL)
		throw(MAL,"levenshtein", MAL_MALLOC_FAIL);

	/* Step 2 */
	for (i = 0; i <= n; i++) {
		levenshtein_PutAt(d, i, 0, n, i);
	}

	for (j = 0; j <= m; j++) {
		levenshtein_PutAt(d, 0, j, n, j);
	}

	/* Step 3 */
	for (i = 1; i <= n; i++) {

		s_i = s[i - 1];

		/* Step 4 */
		for (j = 1; j <= m; j++) {

			t_j = t[j - 1];

			/* Step 5 */
			if (s_i == t_j) {
				cost = 0;
			} else {
				cost = *replace_cost;
			}

			/* Step 6 */
			above = levenshtein_GetAt(d, i - 1, j, n);
			left = levenshtein_GetAt(d, i, j - 1, n);
			diag = levenshtein_GetAt(d, i - 1, j - 1, n);

			if (j >= 2 && i >= 2) {
				/* NEW: detect transpositions */

				diag2 = levenshtein_GetAt(d, i - 2, j - 2, n);
				if (s[i - 2] == t[j - 1] && s[i - 1] == t[j - 2]) {
					cost2 = *transpose_cost;
				} else {
					cost2 = 2;
				}
				cell = SMALLEST_OF4(above + *insdel_cost, left + *insdel_cost, diag + cost, diag2 + cost2);
			} else {
				cell = SMALLEST_OF(above + *insdel_cost, left + *insdel_cost, diag + cost);
			}
			levenshtein_PutAt(d, i, j, n, cell);
		}
	}

	/* Step 7 */
	*result = levenshtein_GetAt(d, n, m, n);
	GDKfree(d);
	return MAL_SUCCEED;
}