Exemple #1
0
static double soundex_dist(unsigned int *a, unsigned int *b, unsigned int a_len, 
    unsigned int b_len, unsigned int *nfail) {
  const unsigned int l = 4;
  unsigned int sa[l];
  unsigned int sb[l];
  (*nfail) += soundex(a, a_len, sa);
  (*nfail) += soundex(b, b_len, sb);
  for (unsigned int i = 0; i < l; ++i) 
    if (sa[i] != sb[i]) return 1.0;
  return 0.0;
}
Exemple #2
0
float soundex_similarity(const char *str1, const char *str2) {

	char *s1 = soundex(str1);
	char *s2 = soundex(str2);

	float res = smith_waterman_gotoh_similarity(s1, s2);

	free(s1);
	free(s2);

	return (res);

}
void brsoundex(char *text, char *buffer, size_t len) {
	char *aux = strdup(text);

	switch(text[0]){
		case 'w':
			if (text[1] == 'o' || text[1] == 'a'){
				aux[0] = 'v';
			}
			break;
		case 'k':
			if (text[1] == 'a' || text[1] == 'o' || text[1] == 'u'){
				aux[0] = 'c';
			}
			break;
		case 'y':
			aux[0] = 'i';
			case 'c':
			if (text[1] == 'e' || text[1] == 'i'){
				aux[0] = 's';
			}
			break;
		case 'g':
			if (text[1] == 'e' || text[1] == 'i'){
				aux[0] = 'j';
			}
			break;
		case 'h':
			strcpy(aux, ((char *) text + sizeof(char)));
			break;
	}
	soundex(aux, buffer, len);
	free(aux);
}
Exemple #4
0
static PyObject* jellyfish_soundex(PyObject *self, PyObject *args)
{
    PyObject *pystr;
    PyObject *normalized;
    PyObject* ret;
    char *result;

    if (!PyArg_ParseTuple(args, "O", &pystr)) {
        return NULL;
    }

    normalized = normalize(self, pystr);
    if (!normalized) {
        return NULL;
    }

    result = soundex(UTF8_BYTES(normalized));
    Py_DECREF(normalized);

    if (!result) {
        // soundex only fails on bad malloc
        PyErr_NoMemory();
        return NULL;
    }

    ret = Py_BuildValue("s", result);
    free(result);

    return ret;
}
Exemple #5
0
Fichier : main.c Projet : kaydol/C
int main()
{
    int i;
    char *str, *B;

    FILE *fin = fopen("input.txt", "r");
    if (fin == NULL) { printf("Can't find input file! --main!\n"); return -1; }

    i = -1;
    str = (char*) malloc(sizeof(char));
    while (!feof(fin))
    {
        ++i;
        str = realloc(str, sizeof(char) * (i+1));
        str[i] = fgetc(fin);
        if feof(fin) str[i] = '\0';
    }
    fclose(fin);

    B = malloc(sizeof(char)*5);
    B[4] = '\0';

    soundex(str, B);
    printf("1. %s \n2. %s", str, B);
    return 0;
}
Exemple #6
0
        /* Add tagstr to the list of tags to be matched.
         * If we are using soundex matching, then store
         * its soundex version rather than its plain text.
         */
void
add_tag_to_list(int tag,const char *tagstr,TagOperator operator)
{
    if(tag >= tag_list_length){
        /* A new tag - one without a pre-defined _TAG #define.
         * Make sure there is room for it in TagList.
         */
        extend_tag_list_length(tag+1);
    }
    if((tag >= 0) && (tag < tag_list_length)){
        const char *string_to_store = tagstr;
        int ix;

        if(GlobalState.use_soundex){
            if(soundex_tag(tag)){
                string_to_store = soundex(tagstr);
            }
        }
        ix = add_to_taglist(string_to_store,&TagLists[tag]);
        if(ix >= 0){
            TagLists[tag].tag_strings[ix].operator = operator;
        }
        /* Ensure that we know we are checking tags. */
        GlobalState.check_tags = TRUE;
    }
    else{
        fprintf(GlobalState.logfile,
                "Illegal tag number %d in add_tag_to_list.\n",tag);
    }
}
Exemple #7
0
//dua du lieu vao cay btree2
void add2()        
{
   FILE *f;
   f=fopen("nhap.txt","r");
   int kt,i,j;
   while(!feof(f))
   {
      p2=(dict2*)malloc(sizeof(dict2));
      p1=(dict1*)malloc(sizeof(dict1));
      fscanf(f,"%s",p1->key);
      fgets(p1->value,256,f);
      p2->i=0;
      soundex(sound,p1->key,1,1);
   if(btsel(bt2,sound,(char*)p2,sizeof(dict2),&rsize2)!=0)
   {
      strcpy(p2->key[p2->i],p1->key);
      strcpy(p2->value[p2->i],p1->value);
      btins(bt2,sound,(char*)p2,sizeof(dict2));
   }
   else
   {  
      p2->i++;
      strcpy(p2->key[p2->i],p1->key);
      strcpy(p2->value[p2->i],p1->value);
      if(strcmp(p2->key[p2->i],p2->key[p2->i-1])!=0)
      btupd(bt2,sound,(char*)p2,sizeof(dict2));
   }  
   free(p2);
   free(p1);
   }  
   fclose(f);
} 
Exemple #8
0
void main(int argc, char *argv[]) {
  char c1[4]; 
  FILE *dir;
  char directory[100][71];
  char name[30];
  char req_name[30];
  char c2[4];
  int t,s,total_dir,found,done;
 
  /* if (argc != 2)  usage();
  if ((dir=fopen(argv[1],"r")) == NULL) FileErr();
  */
 
  done=0;
  for (t=0,s=0;!done;) {
    directory[t][s]=getchar();
    if (directory[t][s]=='\n') {
      if (s==0) done=1;
      directory[t][s]=0;
      t++; s=0;
    } else s++;
  }
  total_dir=t-2;

  done=0;
  while ((req_name[0]=getchar())!=EOF) {
    for (t=1;(req_name[t]=getchar()) != '\n';t++);
    req_name[t]=0;
    /* fscanf(dir,"%s\n",req_name);*/
    found=0;
    soundex(req_name,c1);
    printf("%s\t\t\t%s\n",req_name,c1);
    for (t=0;t<=total_dir;t++) {
      for (s=0;(name[s]=directory[t][s])!=' ';s++);
      name[s]=0;
      soundex(name,c2);
      /* printf("%s %s\n",directory[t],c2);*/
      if (!strcmp(c1,c2)) {
        printf("%s\n",directory[t]);
        found=1;
      }
    }
    if (!found) printf("NO ENTRIES FOUND\n");
    printf("\n");
  }
  /* fclose(dir);*/
}
Exemple #9
0
MODULE store_alpha_token (void)
{
    /*  We store a prefix+phonetic textual token                             */
    if (tok_size == 1)
        token [tok_size++] = '-';
        
    token [tok_size] = '\0';
    cur_scrit [scrit_count++] = scrit_add ('t',
        strprintf ("%c%c%s", token [0], token [1], soundex (token) + 1));
}
Exemple #10
0
int main()
{
    #ifdef TKD
    freopen("input.txt", "r", stdin);
    freopen("output.txt", "w", stdout);
    #endif
    char str[30];
    while(gets(str)!=NULL) {
        int len=strlen(str), i;
        for(i=0; i<len; i++) {
            int value=soundex(str[i]);
            if(value!=0 && (i==0 || value!=soundex(str[i-1]))) {
                printf("%d", value);
            }
        }
        printf("\n");
    }

    return 0;
}
Exemple #11
0
int main(void){
    while(scanf("%s",&c)!=EOF){
        prev = -1;
        for(i = 0; c[i] != '\0';i++){
            a = soundex(i);
            if(a > 0 && a != prev) printf("%d",a);
            prev = a;
        }
        printf("\n");
    }
    return 0;
}
Exemple #12
0
//tim` kiem
void search2()
{   
  int j,kt=0,c,i=0;
     char key1[40];
     p2=(dict2*)malloc(sizeof(dict2));
     printf("\nNhap tu ban muon tim:");mygetch();
     do
       { 
	 c = mygetch();printf("%c",c);
	 key[i]=c;
	 i++;
       }
     while((c!='\t')&&(c!='\n'));
     key[i-1]='\0';
     /*scanf("%*c");gets(key);
          if(btsel(bt1,key,(char*)p1,sizeof(dict1),&rsize1)!=0)
     {
       printf("\n\033[31m=>Khong co tu nay\033[0m");
       mygetch();
       }*/
     soundex(s1,key,1,1);
     if(btsel(bt2,s1,(char*)p2,sizeof(dict2),&rsize2)!=0)
     {
       printf("\n=>\033[31mKhong co tu nay\033[0m");
       mygetch();
     }
     else 
     {
       printf("\nTim kiem nang cao(bang thu vien soundex):\n");
       for(j=0;j<=p2->i;j++)
       {
         printf("%s=>\t",p2->key[j]);
       }
       mygetch();
       printf("\n\n=>Nhap vao y nghia:");scanf("%s",key1);
       for(j=0;j<=p2->i;j++)
       {
         if(strcmp(key1,p2->key[j])==0) 
         {
           printf("\n%s\t%s",key1,p2->value[j]);
           kt=1;
           break;
         }
       }
       if (kt==0) printf("\n\n\033[31mKhong co tu nay\033[0m");
       mygetch();
     }
     //     }
    mygetch();
}
Exemple #13
0
VALUE method_soundex(int args, VALUE *argv, VALUE self) {
    const char *str;
    char *result;
    VALUE *rb_res;
    
    if (TYPE(*argv) == T_STRING) {
        str = RSTRING(*argv)->ptr;
        result = soundex(str);
        rb_res = rb_str_new2(result);
        free(result);
    } else {
        rb_raise(rb_eTypeError, "expects a string");
    }

    return rb_res;
}
Exemple #14
0
        /* Check for one of list->strings matching the tag.
         * Return TRUE on match, FALSE on failure.
         * It is only necessary for a prefix of tag to match
         * the string.
         */
static Boolean
CheckList(int tag,const char *tag_string,StringArray *list)
{   unsigned list_index;
    Boolean wanted = FALSE;
    const char *search_str;

    if(GlobalState.use_soundex && soundex_tag(tag)){
        search_str = soundex(tag_string);
    }
    else{
        search_str = tag_string;
    }
    for(list_index = 0; (list_index < list->num_used_elements) && !wanted;
                                list_index++){
        const char *list_string = list->tag_strings[list_index].tag_string;

        if(strncmp(search_str,list_string,strlen(list_string)) == 0){
            wanted = TRUE;
        }
    }
    return wanted;
}
Exemple #15
0
int main(int argc, const char * argv[]) {
    std::string surname = "", soundex_code = "";
    while (true) {
        std::cout << "Enter surname (RETURN to quit): ";
        if (!(getline(std::cin, surname))) {
            if (std::cin.eof()) {
                std::cout << std::endl;
                break;
            }
            std::cout << "Please enter a valid surname" << std::endl;
            std::cin.clear();
            std::cin.ignore(10000, '\n');
        } else {
            if (surname.empty()) {
                return 0;
            } else {
                soundex_code = soundex(surname);
                std::cout << "Soundex code for " << surname << " is " << soundex_code << std::endl;
            }
        }
    }
    return 0;
}
int main(int argc, char *argv[]) {
    if (argc != 4) {
        printf("usage: \n\t$ %s <SimMetric> <string1> <string2>\n", basename(argv[0]));
        printf("\nWhere SimMetric is one of:\n");
        int i;
        for (i=0; i < SIMMETC; i++) {
            if (i > 0)
                printf(",");
            printf(" %s", SIMMETS[i]);
        }
        printf("\n");
        return (1);
    }
    else if (strcmp(argv[1], "all") == 0) {
        argv[1] = "block_distance"; main(argc, argv);
        argv[1] = "cosine"; main(argc, argv);
        argv[1] = "dice"; main(argc, argv);
        argv[1] = "euclidean_distance"; main(argc, argv);
        argv[1] = "jaccard"; main(argc, argv);
        argv[1] = "jaro"; main(argc, argv);
        argv[1] = "jaro_winkler"; main(argc, argv);
        argv[1] = "levenshtein"; main(argc, argv);
        argv[1] = "matching_coefficient"; main(argc, argv);
        argv[1] = "monge_elkan"; main(argc, argv);
        argv[1] = "needleman_wunch"; main(argc, argv);
        argv[1] = "overlap_coefficient"; main(argc, argv);
        argv[1] = "qgrams_distance"; main(argc, argv);
        argv[1] = "smith_waterman"; main(argc, argv);
        argv[1] = "smith_waterman_gotoh"; main(argc, argv);
        argv[1] = "soundex"; main(argc, argv);
        argv[1] = "metaphone"; main(argc, argv);
        argv[1] = "double_metaphone"; main(argc, argv);
    }
    else {
        float similarity = 0;
        char *sm_name, metrics[50], compare[50];

        sprintf(compare, "%10s & %-10s", argv[2], argv[3]);
        switch (which_type(argv[1])) {
            case 0:
            case 1:
                sm_name = "Block Distance";
                sprintf(metrics, "%d", block_distance(argv[2], argv[3]));
                similarity = block_distance_similarity(argv[2], argv[3]);
                break;
            case 2:
            case 3:
                sm_name = "Cosine Similarity";
                similarity = cosine_similarity(argv[2], argv[3]);
                sprintf(metrics, "%f", similarity);
                break;
            case 4:
                sm_name = "Dice Similarity";
                similarity = dice_similarity(argv[2], argv[3]);
                sprintf(metrics, "%f", similarity);
                break;
            case 5:
            case 6:
                sm_name = "Euclidean Distance";
                sprintf(metrics, "%3.2f", euclidean_distance(argv[2], argv[3]));
                similarity = euclidean_distance_similarity(argv[2], argv[3]);
                break;
            case 7:
            case 8:
                sm_name = "Jaccard Similarity";
                similarity = jaccard_similarity(argv[2], argv[3]);
                sprintf(metrics, "%f", similarity);
                break;
            case 9:
            case 10:
                sm_name = "Jaro Similarity";
                similarity = jaro_similarity(argv[2], argv[3]);
                sprintf(metrics, "%f", similarity);
                break;
            case 11:
            case 12:
                sm_name = "Jaro Winkler Similarity";
                similarity = jaro_winkler_similarity(argv[2], argv[3]);
                sprintf(metrics, "%f", similarity);
                break;
            case 13:
            case 14:
                sm_name = "Levenshtein Distance";
                sprintf(metrics, "%d", levenshtein(argv[2], argv[3]));
                similarity = levenshtein_similarity(argv[2], argv[3]);
                break;
            case 15:
            case 16:
                sm_name = "Matching Coefficient SimMetrics";
                sprintf(metrics, "%3.2f", matching_coefficient(argv[2], argv[3]));
                similarity = matching_coefficient_similarity(argv[2], argv[3]);
                break;
            case 17:
            case 18:
                sm_name = "Monge Elkan Similarity";
                similarity = monge_elkan_similarity(argv[2], argv[3]);
                sprintf(metrics, "%f", similarity);
                break;
            case 19:
            case 20:
                sm_name = "Needleman Wunch SimMetrics";
                sprintf(metrics, "%3.2f", needleman_wunch(argv[2], argv[3]));
                similarity = needleman_wunch_similarity(argv[2], argv[3]);
                break;
            case 21:
            case 22:
                sm_name = "Overlap Coefficient Similarity";
                similarity = overlap_coefficient_similarity(argv[2], argv[3]);
                sprintf(metrics, "%f", similarity);
                break;
            case 23:
            case 24:
                sm_name = "QGrams Distance";
                sprintf(metrics, "%d", qgrams_distance(argv[2], argv[3]));
                similarity = qgrams_distance_similarity(argv[2], argv[3]);
                break;
            case 25:
            case 26:
                sm_name = "Smith Waterman SimMetrics";
                sprintf(metrics, "%3.2f", smith_waterman(argv[2], argv[3]));
                similarity = smith_waterman_similarity(argv[2], argv[3]);
                break;
            case 27:
            case 28:
                sm_name = "Smith Waterman Gotoh SimMetrics";
                sprintf(metrics, "%3.2f", smith_waterman_gotoh(argv[2], argv[3]));
                similarity = smith_waterman_gotoh_similarity(argv[2], argv[3]);
                break;
            case 29:
            case 30:
                sm_name = "Soundex Phonetics";
                char *s1 = soundex(argv[2]);
                char *s2 = soundex(argv[3]);
                sprintf(metrics, "%s & %s", s1, s2);
                free(s1);
                free(s2);
                similarity = soundex_similarity(argv[2], argv[3]);
                break;
            case 31:
            case 32:
                sm_name = "Metaphone Phonetics";
                char *m1 = metaphone(argv[2]);
                char *m2 = metaphone(argv[3]);
                sprintf(metrics, "%s & %s", m1, m2);
                free(m1);
                free(m2);
                similarity = metaphone_similarity(argv[2], argv[3]);
                break;
            case 33:
            case 34:
                sm_name = "Double Metaphone Phonetics";
                char *dm1 = double_metaphone(argv[2]);
                char *dm2 = double_metaphone(argv[3]);
                sprintf(metrics, "%s & %s", dm1, dm2);
                free(dm1);
                free(dm2);
                similarity = double_metaphone_similarity(argv[2], argv[3]);
                break;
            default:
               printf("Unknown SimMetric %s, not found.\n", argv[1]);
               return (1);
        }

        printf("%-31s between %-25s is %12s ", sm_name, compare, metrics);
        printf("and yields a %3.0f%% similarity\n", similarity * 100);

        return (EXIT_SUCCESS);
    }
}
Exemple #17
0
    int
    test() {
	int i;
	char sdx[5] = { 0 };
        const char *names[][2] = {
            {"Soundex",	"S532"},
            {"Example",	"E251"},
            {"Sownteks",	"S532"},
            {"Ekzampul",	"E251"},
            {"Euler",	"E460"},
            {"Gauss",	"G200"},
            {"Hilbert",	"H416"},
            {"Knuth",	"K530"},
            {"Lloyd",	"L300"},
            // {"Lukasiewicz",	"L202"},
            {"Lukasiewicz",	"L200"},
            {"Ellery",	"E460"},
            {"Ghosh",	"G200"},
            {"Heilbronn",	"H416"},
            {"Kant",	"K530"},
            {"Ladd",	"L300"},
            // {"Lissajous",	"L222"},
            {"Lissajous",	"L200"},
            {"Wheaton",	"W350"},
            {"Burroughs",	"B620"},
            {"Burrows",	"B620"},
            {"O'Hara",	"O600"},
            {"Washington",	"W252"},
            {"Lee",		"L000"},
            {"Gutierrez",	"G362"},
            {"Pfister",	"P236"},
            {"Jackson",	"J250"},
            // {"Tymczak",	"T522"},
            {"Tymczak",	"T520"},
            {"VanDeusen",	"V532"},
            {"Ashcraft",	"A261"},
            {"XPfizer", "X126"},
            {"XFizer", "X126"},
            {"XSandesh", "X532"},
            {"XSondesh", "X532"},
            {0, 0}
        };

        init();

        puts("  Test name  Code  Got\n----------------------");
        for (i = 0; names[i][0]; i++) {
            c_soundex(names[i][0], sdx, 4);
            printf("%11s  %s  %s ", names[i][0], names[i][1], sdx);
            printf("%s\n", strcmp(sdx, names[i][1]) ? "not ok" : "ok");
            assert(!strcmp(sdx, names[i][1]));
        }

        nw::string s1 = "Xwholetthedogsout"; nw::string s2 = "Xwholatethedocsout";
        printf("soundex(%s, %s) = (%s, %s)\n", s1.c_str(), s2.c_str(), soundex(s1, 7).c_str(), soundex(s2, 7).c_str());

        // ltdgst, ltdcst
        // 433223, 433223
        // 4323, 4323

        return 0;
    }
Exemple #18
0
int main (int argc, char *argv [])
{
    char
        *alloc,
        dest [128],
        *table [10] = { "One", "Two", "Three", "Four", "Five",
                       "Six", "Seven", "Eight", "Nine", NULL },
        **new_table;
    DESCR
        *descr;
    int
        index,
        string;
    Bool
        result;

    puts ("Testing xstrcpy():");
    xstrcpy (dest, "This ", "Is ", "A ", "String", NULL);
    puts (dest);

    puts ("Testing xstrcpy():");
    alloc = xstrcpy (NULL, "This ", "Is ", "A ", "String", NULL);
    puts (alloc);

    puts ("Testing xstrcat():");
    xstrcat (dest, "1", "2", "3", NULL);
    puts (dest);

    puts ("Testing strt2descr():");
    descr     = strt2descr (table);
    printf ("Descriptor size=%ld\n", (long) descr-> size);

    new_table = descr2strt (descr);
    printf ("Contents of table: ");
    for (string = 0; new_table [string]; string++)
        printf ("[%s] ", new_table [string]);
    puts ("");

    printf ("Testing soundex():\n");
    for (index = 0; index < NAME_TABLE_SIZE; index++)
      {
        alloc = soundex (surname [index]);
        printf ("%20s -> %5s %s\n", 
                surname [index],
                alloc,
                streq (alloc, soundex_value [index])? "OK": "FAIL");
      }

    printf ("Testing match_pattern():\n\n");
    
    printf ("%-20s %-20s %-10s = %-10s => %s\n",
            "String", "Pattern", "Check case", "Result", "Test");
    printf ("---------------------------------------------------------------" \
            "----------\n");
    index = 0;
    while (pattern_table [index].string)
      {
        result = match_pattern (pattern_table [index].string,
                                pattern_table [index].pattern,
                                pattern_table [index].ignore_case);
        printf ("%-20s %-20s %-10s = %-10s => %s\n",
                pattern_table [index].string,
                pattern_table [index].pattern,
                pattern_table [index].ignore_case? "No": "Yes",
                result? "Ok": "Fail",
                (result == pattern_table [index].result)? "Ok": "Fail");
        index++;
      }
    return (EXIT_SUCCESS);
}
Exemple #19
0
SEXP R_soundex(SEXP x) {
  int n = length(x);
  int bytes = IS_CHARACTER(x);

  // when a and b are character vectors; create unsigned int vectors in which
  // the elements of and b will be copied
  unsigned int *s = NULL;
  if (bytes) {
    int ml = max_length(x);
    s = (unsigned int *) malloc(ml*sizeof(unsigned int));
    if (s == NULL) {
       free(s);
       error("Unable to allocate enough memory");
    }
  }

  if (bytes) {
    // create output variable
    SEXP y = allocVector(STRSXP, n);
    PROTECT(y);
    // compute soundexes, skipping NA's
    unsigned int nfail = 0;
    int len_s, isna_s;
    char sndx[5];
    unsigned int sndx_int[4];
    for (int i = 0; i < n; ++i) {
      s = get_elem(x, i, bytes, &len_s, &isna_s, s);
      if (isna_s) {
        SET_STRING_ELT(y, i, R_NaString);
      } else { 
        nfail += soundex(s, len_s, sndx_int);
        for (unsigned int j = 0; j < 4; ++j) sndx[j] = sndx_int[j];
        sndx[4] = 0;
        SET_STRING_ELT(y, i, mkChar(sndx));
      } 
    }
    // cleanup and return
    check_fail(nfail);
    free(s);
    UNPROTECT(1);
    return y;
  } else {
    // create output variable
    SEXP y = allocVector(VECSXP, n);
    PROTECT(y);
    // compute soundexes, skipping NA's
    unsigned int nfail = 0;
    int len_s, isna_s;
    for (int i = 0; i < n; ++i) {
      s = get_elem(x, i, bytes, &len_s, &isna_s, s);
      if (isna_s) {
        SEXP sndx = allocVector(INTSXP, 1);
        PROTECT(sndx);
        INTEGER(sndx)[0] = NA_INTEGER;
        SET_VECTOR_ELT(y, i, sndx);
        UNPROTECT(1);
      } else { 
        SEXP sndx = allocVector(INTSXP, 4);
        PROTECT(sndx);
        nfail += soundex(s, len_s, (unsigned int *)INTEGER(sndx));
        SET_VECTOR_ELT(y, i, sndx);
        UNPROTECT(1);
      } 
    }
    // cleanup and return
    check_fail(nfail);
    UNPROTECT(1);
    return y;
  }
}