static double soundex_dist(unsigned int *a, unsigned int *b, unsigned int a_len, unsigned int b_len, unsigned int *nfail) { const unsigned int l = 4; unsigned int sa[l]; unsigned int sb[l]; (*nfail) += soundex(a, a_len, sa); (*nfail) += soundex(b, b_len, sb); for (unsigned int i = 0; i < l; ++i) if (sa[i] != sb[i]) return 1.0; return 0.0; }
float soundex_similarity(const char *str1, const char *str2) { char *s1 = soundex(str1); char *s2 = soundex(str2); float res = smith_waterman_gotoh_similarity(s1, s2); free(s1); free(s2); return (res); }
void brsoundex(char *text, char *buffer, size_t len) { char *aux = strdup(text); switch(text[0]){ case 'w': if (text[1] == 'o' || text[1] == 'a'){ aux[0] = 'v'; } break; case 'k': if (text[1] == 'a' || text[1] == 'o' || text[1] == 'u'){ aux[0] = 'c'; } break; case 'y': aux[0] = 'i'; case 'c': if (text[1] == 'e' || text[1] == 'i'){ aux[0] = 's'; } break; case 'g': if (text[1] == 'e' || text[1] == 'i'){ aux[0] = 'j'; } break; case 'h': strcpy(aux, ((char *) text + sizeof(char))); break; } soundex(aux, buffer, len); free(aux); }
static PyObject* jellyfish_soundex(PyObject *self, PyObject *args) { PyObject *pystr; PyObject *normalized; PyObject* ret; char *result; if (!PyArg_ParseTuple(args, "O", &pystr)) { return NULL; } normalized = normalize(self, pystr); if (!normalized) { return NULL; } result = soundex(UTF8_BYTES(normalized)); Py_DECREF(normalized); if (!result) { // soundex only fails on bad malloc PyErr_NoMemory(); return NULL; } ret = Py_BuildValue("s", result); free(result); return ret; }
int main() { int i; char *str, *B; FILE *fin = fopen("input.txt", "r"); if (fin == NULL) { printf("Can't find input file! --main!\n"); return -1; } i = -1; str = (char*) malloc(sizeof(char)); while (!feof(fin)) { ++i; str = realloc(str, sizeof(char) * (i+1)); str[i] = fgetc(fin); if feof(fin) str[i] = '\0'; } fclose(fin); B = malloc(sizeof(char)*5); B[4] = '\0'; soundex(str, B); printf("1. %s \n2. %s", str, B); return 0; }
/* Add tagstr to the list of tags to be matched. * If we are using soundex matching, then store * its soundex version rather than its plain text. */ void add_tag_to_list(int tag,const char *tagstr,TagOperator operator) { if(tag >= tag_list_length){ /* A new tag - one without a pre-defined _TAG #define. * Make sure there is room for it in TagList. */ extend_tag_list_length(tag+1); } if((tag >= 0) && (tag < tag_list_length)){ const char *string_to_store = tagstr; int ix; if(GlobalState.use_soundex){ if(soundex_tag(tag)){ string_to_store = soundex(tagstr); } } ix = add_to_taglist(string_to_store,&TagLists[tag]); if(ix >= 0){ TagLists[tag].tag_strings[ix].operator = operator; } /* Ensure that we know we are checking tags. */ GlobalState.check_tags = TRUE; } else{ fprintf(GlobalState.logfile, "Illegal tag number %d in add_tag_to_list.\n",tag); } }
//dua du lieu vao cay btree2 void add2() { FILE *f; f=fopen("nhap.txt","r"); int kt,i,j; while(!feof(f)) { p2=(dict2*)malloc(sizeof(dict2)); p1=(dict1*)malloc(sizeof(dict1)); fscanf(f,"%s",p1->key); fgets(p1->value,256,f); p2->i=0; soundex(sound,p1->key,1,1); if(btsel(bt2,sound,(char*)p2,sizeof(dict2),&rsize2)!=0) { strcpy(p2->key[p2->i],p1->key); strcpy(p2->value[p2->i],p1->value); btins(bt2,sound,(char*)p2,sizeof(dict2)); } else { p2->i++; strcpy(p2->key[p2->i],p1->key); strcpy(p2->value[p2->i],p1->value); if(strcmp(p2->key[p2->i],p2->key[p2->i-1])!=0) btupd(bt2,sound,(char*)p2,sizeof(dict2)); } free(p2); free(p1); } fclose(f); }
void main(int argc, char *argv[]) { char c1[4]; FILE *dir; char directory[100][71]; char name[30]; char req_name[30]; char c2[4]; int t,s,total_dir,found,done; /* if (argc != 2) usage(); if ((dir=fopen(argv[1],"r")) == NULL) FileErr(); */ done=0; for (t=0,s=0;!done;) { directory[t][s]=getchar(); if (directory[t][s]=='\n') { if (s==0) done=1; directory[t][s]=0; t++; s=0; } else s++; } total_dir=t-2; done=0; while ((req_name[0]=getchar())!=EOF) { for (t=1;(req_name[t]=getchar()) != '\n';t++); req_name[t]=0; /* fscanf(dir,"%s\n",req_name);*/ found=0; soundex(req_name,c1); printf("%s\t\t\t%s\n",req_name,c1); for (t=0;t<=total_dir;t++) { for (s=0;(name[s]=directory[t][s])!=' ';s++); name[s]=0; soundex(name,c2); /* printf("%s %s\n",directory[t],c2);*/ if (!strcmp(c1,c2)) { printf("%s\n",directory[t]); found=1; } } if (!found) printf("NO ENTRIES FOUND\n"); printf("\n"); } /* fclose(dir);*/ }
MODULE store_alpha_token (void) { /* We store a prefix+phonetic textual token */ if (tok_size == 1) token [tok_size++] = '-'; token [tok_size] = '\0'; cur_scrit [scrit_count++] = scrit_add ('t', strprintf ("%c%c%s", token [0], token [1], soundex (token) + 1)); }
int main() { #ifdef TKD freopen("input.txt", "r", stdin); freopen("output.txt", "w", stdout); #endif char str[30]; while(gets(str)!=NULL) { int len=strlen(str), i; for(i=0; i<len; i++) { int value=soundex(str[i]); if(value!=0 && (i==0 || value!=soundex(str[i-1]))) { printf("%d", value); } } printf("\n"); } return 0; }
int main(void){ while(scanf("%s",&c)!=EOF){ prev = -1; for(i = 0; c[i] != '\0';i++){ a = soundex(i); if(a > 0 && a != prev) printf("%d",a); prev = a; } printf("\n"); } return 0; }
//tim` kiem void search2() { int j,kt=0,c,i=0; char key1[40]; p2=(dict2*)malloc(sizeof(dict2)); printf("\nNhap tu ban muon tim:");mygetch(); do { c = mygetch();printf("%c",c); key[i]=c; i++; } while((c!='\t')&&(c!='\n')); key[i-1]='\0'; /*scanf("%*c");gets(key); if(btsel(bt1,key,(char*)p1,sizeof(dict1),&rsize1)!=0) { printf("\n\033[31m=>Khong co tu nay\033[0m"); mygetch(); }*/ soundex(s1,key,1,1); if(btsel(bt2,s1,(char*)p2,sizeof(dict2),&rsize2)!=0) { printf("\n=>\033[31mKhong co tu nay\033[0m"); mygetch(); } else { printf("\nTim kiem nang cao(bang thu vien soundex):\n"); for(j=0;j<=p2->i;j++) { printf("%s=>\t",p2->key[j]); } mygetch(); printf("\n\n=>Nhap vao y nghia:");scanf("%s",key1); for(j=0;j<=p2->i;j++) { if(strcmp(key1,p2->key[j])==0) { printf("\n%s\t%s",key1,p2->value[j]); kt=1; break; } } if (kt==0) printf("\n\n\033[31mKhong co tu nay\033[0m"); mygetch(); } // } mygetch(); }
VALUE method_soundex(int args, VALUE *argv, VALUE self) { const char *str; char *result; VALUE *rb_res; if (TYPE(*argv) == T_STRING) { str = RSTRING(*argv)->ptr; result = soundex(str); rb_res = rb_str_new2(result); free(result); } else { rb_raise(rb_eTypeError, "expects a string"); } return rb_res; }
/* Check for one of list->strings matching the tag. * Return TRUE on match, FALSE on failure. * It is only necessary for a prefix of tag to match * the string. */ static Boolean CheckList(int tag,const char *tag_string,StringArray *list) { unsigned list_index; Boolean wanted = FALSE; const char *search_str; if(GlobalState.use_soundex && soundex_tag(tag)){ search_str = soundex(tag_string); } else{ search_str = tag_string; } for(list_index = 0; (list_index < list->num_used_elements) && !wanted; list_index++){ const char *list_string = list->tag_strings[list_index].tag_string; if(strncmp(search_str,list_string,strlen(list_string)) == 0){ wanted = TRUE; } } return wanted; }
int main(int argc, const char * argv[]) { std::string surname = "", soundex_code = ""; while (true) { std::cout << "Enter surname (RETURN to quit): "; if (!(getline(std::cin, surname))) { if (std::cin.eof()) { std::cout << std::endl; break; } std::cout << "Please enter a valid surname" << std::endl; std::cin.clear(); std::cin.ignore(10000, '\n'); } else { if (surname.empty()) { return 0; } else { soundex_code = soundex(surname); std::cout << "Soundex code for " << surname << " is " << soundex_code << std::endl; } } } return 0; }
int main(int argc, char *argv[]) { if (argc != 4) { printf("usage: \n\t$ %s <SimMetric> <string1> <string2>\n", basename(argv[0])); printf("\nWhere SimMetric is one of:\n"); int i; for (i=0; i < SIMMETC; i++) { if (i > 0) printf(","); printf(" %s", SIMMETS[i]); } printf("\n"); return (1); } else if (strcmp(argv[1], "all") == 0) { argv[1] = "block_distance"; main(argc, argv); argv[1] = "cosine"; main(argc, argv); argv[1] = "dice"; main(argc, argv); argv[1] = "euclidean_distance"; main(argc, argv); argv[1] = "jaccard"; main(argc, argv); argv[1] = "jaro"; main(argc, argv); argv[1] = "jaro_winkler"; main(argc, argv); argv[1] = "levenshtein"; main(argc, argv); argv[1] = "matching_coefficient"; main(argc, argv); argv[1] = "monge_elkan"; main(argc, argv); argv[1] = "needleman_wunch"; main(argc, argv); argv[1] = "overlap_coefficient"; main(argc, argv); argv[1] = "qgrams_distance"; main(argc, argv); argv[1] = "smith_waterman"; main(argc, argv); argv[1] = "smith_waterman_gotoh"; main(argc, argv); argv[1] = "soundex"; main(argc, argv); argv[1] = "metaphone"; main(argc, argv); argv[1] = "double_metaphone"; main(argc, argv); } else { float similarity = 0; char *sm_name, metrics[50], compare[50]; sprintf(compare, "%10s & %-10s", argv[2], argv[3]); switch (which_type(argv[1])) { case 0: case 1: sm_name = "Block Distance"; sprintf(metrics, "%d", block_distance(argv[2], argv[3])); similarity = block_distance_similarity(argv[2], argv[3]); break; case 2: case 3: sm_name = "Cosine Similarity"; similarity = cosine_similarity(argv[2], argv[3]); sprintf(metrics, "%f", similarity); break; case 4: sm_name = "Dice Similarity"; similarity = dice_similarity(argv[2], argv[3]); sprintf(metrics, "%f", similarity); break; case 5: case 6: sm_name = "Euclidean Distance"; sprintf(metrics, "%3.2f", euclidean_distance(argv[2], argv[3])); similarity = euclidean_distance_similarity(argv[2], argv[3]); break; case 7: case 8: sm_name = "Jaccard Similarity"; similarity = jaccard_similarity(argv[2], argv[3]); sprintf(metrics, "%f", similarity); break; case 9: case 10: sm_name = "Jaro Similarity"; similarity = jaro_similarity(argv[2], argv[3]); sprintf(metrics, "%f", similarity); break; case 11: case 12: sm_name = "Jaro Winkler Similarity"; similarity = jaro_winkler_similarity(argv[2], argv[3]); sprintf(metrics, "%f", similarity); break; case 13: case 14: sm_name = "Levenshtein Distance"; sprintf(metrics, "%d", levenshtein(argv[2], argv[3])); similarity = levenshtein_similarity(argv[2], argv[3]); break; case 15: case 16: sm_name = "Matching Coefficient SimMetrics"; sprintf(metrics, "%3.2f", matching_coefficient(argv[2], argv[3])); similarity = matching_coefficient_similarity(argv[2], argv[3]); break; case 17: case 18: sm_name = "Monge Elkan Similarity"; similarity = monge_elkan_similarity(argv[2], argv[3]); sprintf(metrics, "%f", similarity); break; case 19: case 20: sm_name = "Needleman Wunch SimMetrics"; sprintf(metrics, "%3.2f", needleman_wunch(argv[2], argv[3])); similarity = needleman_wunch_similarity(argv[2], argv[3]); break; case 21: case 22: sm_name = "Overlap Coefficient Similarity"; similarity = overlap_coefficient_similarity(argv[2], argv[3]); sprintf(metrics, "%f", similarity); break; case 23: case 24: sm_name = "QGrams Distance"; sprintf(metrics, "%d", qgrams_distance(argv[2], argv[3])); similarity = qgrams_distance_similarity(argv[2], argv[3]); break; case 25: case 26: sm_name = "Smith Waterman SimMetrics"; sprintf(metrics, "%3.2f", smith_waterman(argv[2], argv[3])); similarity = smith_waterman_similarity(argv[2], argv[3]); break; case 27: case 28: sm_name = "Smith Waterman Gotoh SimMetrics"; sprintf(metrics, "%3.2f", smith_waterman_gotoh(argv[2], argv[3])); similarity = smith_waterman_gotoh_similarity(argv[2], argv[3]); break; case 29: case 30: sm_name = "Soundex Phonetics"; char *s1 = soundex(argv[2]); char *s2 = soundex(argv[3]); sprintf(metrics, "%s & %s", s1, s2); free(s1); free(s2); similarity = soundex_similarity(argv[2], argv[3]); break; case 31: case 32: sm_name = "Metaphone Phonetics"; char *m1 = metaphone(argv[2]); char *m2 = metaphone(argv[3]); sprintf(metrics, "%s & %s", m1, m2); free(m1); free(m2); similarity = metaphone_similarity(argv[2], argv[3]); break; case 33: case 34: sm_name = "Double Metaphone Phonetics"; char *dm1 = double_metaphone(argv[2]); char *dm2 = double_metaphone(argv[3]); sprintf(metrics, "%s & %s", dm1, dm2); free(dm1); free(dm2); similarity = double_metaphone_similarity(argv[2], argv[3]); break; default: printf("Unknown SimMetric %s, not found.\n", argv[1]); return (1); } printf("%-31s between %-25s is %12s ", sm_name, compare, metrics); printf("and yields a %3.0f%% similarity\n", similarity * 100); return (EXIT_SUCCESS); } }
int test() { int i; char sdx[5] = { 0 }; const char *names[][2] = { {"Soundex", "S532"}, {"Example", "E251"}, {"Sownteks", "S532"}, {"Ekzampul", "E251"}, {"Euler", "E460"}, {"Gauss", "G200"}, {"Hilbert", "H416"}, {"Knuth", "K530"}, {"Lloyd", "L300"}, // {"Lukasiewicz", "L202"}, {"Lukasiewicz", "L200"}, {"Ellery", "E460"}, {"Ghosh", "G200"}, {"Heilbronn", "H416"}, {"Kant", "K530"}, {"Ladd", "L300"}, // {"Lissajous", "L222"}, {"Lissajous", "L200"}, {"Wheaton", "W350"}, {"Burroughs", "B620"}, {"Burrows", "B620"}, {"O'Hara", "O600"}, {"Washington", "W252"}, {"Lee", "L000"}, {"Gutierrez", "G362"}, {"Pfister", "P236"}, {"Jackson", "J250"}, // {"Tymczak", "T522"}, {"Tymczak", "T520"}, {"VanDeusen", "V532"}, {"Ashcraft", "A261"}, {"XPfizer", "X126"}, {"XFizer", "X126"}, {"XSandesh", "X532"}, {"XSondesh", "X532"}, {0, 0} }; init(); puts(" Test name Code Got\n----------------------"); for (i = 0; names[i][0]; i++) { c_soundex(names[i][0], sdx, 4); printf("%11s %s %s ", names[i][0], names[i][1], sdx); printf("%s\n", strcmp(sdx, names[i][1]) ? "not ok" : "ok"); assert(!strcmp(sdx, names[i][1])); } nw::string s1 = "Xwholetthedogsout"; nw::string s2 = "Xwholatethedocsout"; printf("soundex(%s, %s) = (%s, %s)\n", s1.c_str(), s2.c_str(), soundex(s1, 7).c_str(), soundex(s2, 7).c_str()); // ltdgst, ltdcst // 433223, 433223 // 4323, 4323 return 0; }
int main (int argc, char *argv []) { char *alloc, dest [128], *table [10] = { "One", "Two", "Three", "Four", "Five", "Six", "Seven", "Eight", "Nine", NULL }, **new_table; DESCR *descr; int index, string; Bool result; puts ("Testing xstrcpy():"); xstrcpy (dest, "This ", "Is ", "A ", "String", NULL); puts (dest); puts ("Testing xstrcpy():"); alloc = xstrcpy (NULL, "This ", "Is ", "A ", "String", NULL); puts (alloc); puts ("Testing xstrcat():"); xstrcat (dest, "1", "2", "3", NULL); puts (dest); puts ("Testing strt2descr():"); descr = strt2descr (table); printf ("Descriptor size=%ld\n", (long) descr-> size); new_table = descr2strt (descr); printf ("Contents of table: "); for (string = 0; new_table [string]; string++) printf ("[%s] ", new_table [string]); puts (""); printf ("Testing soundex():\n"); for (index = 0; index < NAME_TABLE_SIZE; index++) { alloc = soundex (surname [index]); printf ("%20s -> %5s %s\n", surname [index], alloc, streq (alloc, soundex_value [index])? "OK": "FAIL"); } printf ("Testing match_pattern():\n\n"); printf ("%-20s %-20s %-10s = %-10s => %s\n", "String", "Pattern", "Check case", "Result", "Test"); printf ("---------------------------------------------------------------" \ "----------\n"); index = 0; while (pattern_table [index].string) { result = match_pattern (pattern_table [index].string, pattern_table [index].pattern, pattern_table [index].ignore_case); printf ("%-20s %-20s %-10s = %-10s => %s\n", pattern_table [index].string, pattern_table [index].pattern, pattern_table [index].ignore_case? "No": "Yes", result? "Ok": "Fail", (result == pattern_table [index].result)? "Ok": "Fail"); index++; } return (EXIT_SUCCESS); }
SEXP R_soundex(SEXP x) { int n = length(x); int bytes = IS_CHARACTER(x); // when a and b are character vectors; create unsigned int vectors in which // the elements of and b will be copied unsigned int *s = NULL; if (bytes) { int ml = max_length(x); s = (unsigned int *) malloc(ml*sizeof(unsigned int)); if (s == NULL) { free(s); error("Unable to allocate enough memory"); } } if (bytes) { // create output variable SEXP y = allocVector(STRSXP, n); PROTECT(y); // compute soundexes, skipping NA's unsigned int nfail = 0; int len_s, isna_s; char sndx[5]; unsigned int sndx_int[4]; for (int i = 0; i < n; ++i) { s = get_elem(x, i, bytes, &len_s, &isna_s, s); if (isna_s) { SET_STRING_ELT(y, i, R_NaString); } else { nfail += soundex(s, len_s, sndx_int); for (unsigned int j = 0; j < 4; ++j) sndx[j] = sndx_int[j]; sndx[4] = 0; SET_STRING_ELT(y, i, mkChar(sndx)); } } // cleanup and return check_fail(nfail); free(s); UNPROTECT(1); return y; } else { // create output variable SEXP y = allocVector(VECSXP, n); PROTECT(y); // compute soundexes, skipping NA's unsigned int nfail = 0; int len_s, isna_s; for (int i = 0; i < n; ++i) { s = get_elem(x, i, bytes, &len_s, &isna_s, s); if (isna_s) { SEXP sndx = allocVector(INTSXP, 1); PROTECT(sndx); INTEGER(sndx)[0] = NA_INTEGER; SET_VECTOR_ELT(y, i, sndx); UNPROTECT(1); } else { SEXP sndx = allocVector(INTSXP, 4); PROTECT(sndx); nfail += soundex(s, len_s, (unsigned int *)INTEGER(sndx)); SET_VECTOR_ELT(y, i, sndx); UNPROTECT(1); } } // cleanup and return check_fail(nfail); UNPROTECT(1); return y; } }