int main(int argc, char** argv) { if (argc < 3) { std::cout << "Usage: " << argv[0] << " string1 string2" << std::endl; return 1; } std::cout << "Matching " << argv[1] << " and " << argv[2] << "; " << std::flush; // Convert input to UTF32 int-arrays std::vector<uint32_t> s2, t2; utf8to32(argv[1], s2); utf8to32(argv[2], t2); std::cout << "Levenshtein distance is " << levenshtein(s2, t2) << std::endl; }
void calc_all_distances(t_suggest *sugg, char *cmd) { int i; i = -1; while (sugg->binaries[++i]) { sugg->tab_dist[i] = levenshtein(cmd, sugg->binaries[i]); if (i == 0) sugg->dist_min = sugg->tab_dist[i]; else { sugg->dist_min = (sugg->dist_min > sugg->tab_dist[i]) ? sugg->tab_dist[i] : sugg->dist_min; } } }
int main(int argc, char* argv[]) { auto command_line = qflags::command_line(argc, argv); auto parser = qflags::parser(); auto ignore_case = qflags::flag("ignore_case", "i"); auto print_matrix = qflags::flag("print_matrix", "p"); std::string errors; bool result = true; result &= parser.add_argument(&ignore_case); result &= parser.add_argument(&print_matrix); result &= parser.parse(command_line, &errors); fprintf(stdout, "%s", errors.c_str()); // Compare two strings and print their edit distance. if (parser.remaining_argc() == 3) { unsigned long long distance = levenshtein(parser.remaining_argv()[1], parser.remaining_argv()[2], ignore_case, print_matrix); fprintf(stdout, "%llu\n", distance); } // Compare multiple strings and find the two nearest strings. else if (parser.remaining_argc() > 3) { find_nearest(parser.remaining_argc() - 1, parser.remaining_argv() + 1, ignore_case, print_matrix); } // Print usage string else { fprintf(stdout, "usage: levenshtein %s <string> <string> [<string>...]\n", parser.usage_string().c_str()); } return result ? 0 : 1; }
//-------------------------------------------------------------- string Autocorrect::find_similars(string word){ string similars_f = ""; int ld_min = 3; int ld_thresh = 2; char c = word.at(0); int it_begin = alpha_indices[get_index(c)]; int it_end = alpha_indices[get_next_index(c)]; for (int it = it_begin; it < it_end; ++it){ // get next word in dictionary string line = dictionary[it]; // calculate Levenshtein distance int ld = levenshtein(word, line); if (ld == 0){ // if similar word found, matching 100% similars_f = line; break; }else if (ld < ld_min && ld <= ld_thresh){ // if word is found with a smaller LD than found before similars_f = line; ld_min = ld; }else if (ld == ld_min && similars_f.empty() && ld <= ld_thresh){ // if word with similar LD distance is found similars_f = line; ld_min = ld; }else if (ld == ld_min && !similars_f.empty() && ld <= ld_thresh){ // if equal LDs and already words in similar: append word to string // ! separate words with comma --> word_in_string checks amount of commas + 1 ! similars_f.append(","); similars_f.append(ofToString(line)); } } return similars_f; }
const char *help_unknown_cmd(const char *cmd) { int i, n, best_similarity = 0; struct cmdnames main_cmds, other_cmds; memset(&main_cmds, 0, sizeof(main_cmds)); memset(&other_cmds, 0, sizeof(other_cmds)); memset(&aliases, 0, sizeof(aliases)); git_config(git_unknown_cmd_config, NULL); load_command_list("git-", &main_cmds, &other_cmds); add_cmd_list(&main_cmds, &aliases); add_cmd_list(&main_cmds, &other_cmds); qsort(main_cmds.names, main_cmds.cnt, sizeof(*main_cmds.names), cmdname_compare); uniq(&main_cmds); /* This abuses cmdname->len for levenshtein distance */ for (i = 0, n = 0; i < main_cmds.cnt; i++) { int cmp = 0; /* avoid compiler stupidity */ const char *candidate = main_cmds.names[i]->name; /* * An exact match means we have the command, but * for some reason exec'ing it gave us ENOENT; probably * it's a bad interpreter in the #! line. */ if (!strcmp(candidate, cmd)) die(_(bad_interpreter_advice), cmd, cmd); /* Does the candidate appear in common_cmds list? */ while (n < ARRAY_SIZE(common_cmds) && (cmp = strcmp(common_cmds[n].name, candidate)) < 0) n++; if ((n < ARRAY_SIZE(common_cmds)) && !cmp) { /* Yes, this is one of the common commands */ n++; /* use the entry from common_cmds[] */ if (starts_with(candidate, cmd)) { /* Give prefix match a very good score */ main_cmds.names[i]->len = 0; continue; } } main_cmds.names[i]->len = levenshtein(cmd, candidate, 0, 2, 1, 3) + 1; } qsort(main_cmds.names, main_cmds.cnt, sizeof(*main_cmds.names), levenshtein_compare); if (!main_cmds.cnt) die(_("Uh oh. Your system reports no Git commands at all.")); /* skip and count prefix matches */ for (n = 0; n < main_cmds.cnt && !main_cmds.names[n]->len; n++) ; /* still counting */ if (main_cmds.cnt <= n) { /* prefix matches with everything? that is too ambiguous */ best_similarity = SIMILARITY_FLOOR + 1; } else { /* count all the most similar ones */ for (best_similarity = main_cmds.names[n++]->len; (n < main_cmds.cnt && best_similarity == main_cmds.names[n]->len); n++) ; /* still counting */ } if (autocorrect && n == 1 && SIMILAR_ENOUGH(best_similarity)) { const char *assumed = main_cmds.names[0]->name; main_cmds.names[0] = NULL; clean_cmdnames(&main_cmds); fprintf_ln(stderr, _("WARNING: You called a Git command named '%s', " "which does not exist.\n" "Continuing under the assumption that you meant '%s'"), cmd, assumed); if (autocorrect > 0) { fprintf_ln(stderr, _("in %0.1f seconds automatically..."), (float)autocorrect/10.0); poll(NULL, 0, autocorrect * 100); } return assumed; } fprintf_ln(stderr, _("git: '%s' is not a git command. See 'git --help'."), cmd); if (SIMILAR_ENOUGH(best_similarity)) { fprintf_ln(stderr, Q_("\nDid you mean this?", "\nDid you mean one of these?", n)); for (i = 0; i < n; i++) fprintf(stderr, "\t%s\n", main_cmds.names[i]->name); } exit(1); }
void FunctionsTest::testLevenshtein() { QCOMPARE(levenshtein("", ""), 0); QCOMPARE(levenshtein("1", "1"), 0); QCOMPARE(levenshtein("12", "12"), 0); QCOMPARE(levenshtein("", "1"), 1); QCOMPARE(levenshtein("", "12"), 2); QCOMPARE(levenshtein("1", ""), 1); QCOMPARE(levenshtein("12", ""), 2); QCOMPARE(levenshtein("password", "password1"), 1); QCOMPARE(levenshtein("password", "assword"), 1); QCOMPARE(levenshtein("password", "Xassword"), 1); QCOMPARE(levenshtein("password", "passXord"), 1); QCOMPARE(levenshtein("12345678", "23456781"), 2); QCOMPARE(levenshtein("12345678", "34567812"), 4); QCOMPARE(levenshtein("12345678", "45678123"), 6); QCOMPARE(levenshtein("12345678", "56781234"), 8); QCOMPARE(levenshtein("12345678", "67812345"), 6); QCOMPARE(levenshtein("12345678", "78123456"), 4); QCOMPARE(levenshtein("12345678", "81234567"), 2); QCOMPARE(levenshtein("123", "321"), 2); QCOMPARE(levenshtein("1234", "4321"), 4); QCOMPARE(levenshtein("12345", "54321"), 4); QCOMPARE(levenshtein("123456", "654321"), 6); QCOMPARE(levenshtein("1234567", "7654321"), 6); QCOMPARE(levenshtein("12345678", "87654321"), 8); }
int main(int argc, char *argv[]) { FILE *bron,*aspell; char **lexicon; unsigned long *freqs; unsigned long *hash; char word[1024]; char capword[1024]; char commandline[1024]; char closestword[MAXNRCLOSEST+1][1024]; int closest[MAXNRCLOSEST+1]; unsigned long closestfreq[MAXNRCLOSEST+1]; unsigned long thishash; int i,j,k,l,thislev,nrlex=0,nrclosest=0,nraspell,readnr,wordlen,lexlen; FILE *context; char inlex,cap,hyp,inflection,tokstatus; char aspellline[32768]; char memline[32768]; char *part; char aspellword[1024]; /* allocate lexicon */ lexicon=malloc(sizeof(char*)); freqs=malloc(sizeof(unsigned long)); hash=malloc(sizeof(unsigned long)); /* read lexicon */ nrlex=0; bron=fopen(argv[1],"r"); while (!feof(bron)) { fscanf(bron,"%d %s ", &readnr,word); lexicon[nrlex]=malloc((strlen(word)+1)*sizeof(char)); strcpy(lexicon[nrlex],word); freqs[nrlex]=readnr; hash[nrlex]=sdbm(word); nrlex++; lexicon=realloc(lexicon,(nrlex+1)*sizeof(char*)); freqs=realloc(freqs,(nrlex+1)*sizeof(unsigned long)); hash=realloc(hash,(nrlex+1)*sizeof(unsigned long)); } fclose(bron); if (DEBUG2) fprintf(stderr,"read %d words from lexicon\n", nrlex); /* run aspell on input.tok.txt */ sprintf(commandline,"aspell --lang=en --pipe < %s > input.tok.aspell\n", argv[2]); if (DEBUG) fprintf(stderr,"executing: %s", commandline); system(commandline); context=fopen(argv[2],"r"); aspell=fopen("input.tok.aspell","r"); fgets(aspellline,32768,aspell); while (!feof(context)) { fscanf(context,"%s ",word); wordlen=strlen(word); nrclosest=0; tokstatus=1; if (strstr(",.!?:;\'\"(){}[]\\/|€",word)) tokstatus=0; if (tokstatus) if (isNumber(word)) tokstatus=0; if (strstr(word,"http://")) tokstatus=0; if (strstr(word,"@")) tokstatus=0; if (tokstatus) { fgets(aspellline,32768,aspell); strcpy(memline,aspellline); part=strtok(aspellline," \n"); if (strcmp(part,"&")==0) { // read alternatives from aspellline part=strtok(NULL," \n"); if (strcmp(part,word)==0) { part=strtok(NULL," \n"); sscanf(part,"%d",&nraspell); if (DEBUG2) fprintf(stderr,"[%s] [%d options]\n", word,nraspell); inlex=0; thishash=sdbm(word); for (k=0; ((k<nrlex)&&(!inlex)); k++) { if (thishash==hash[k]) { inlex=1; if (DEBUG2) fprintf(stderr,"word [%s] in lexicon, with frequency %ld\n", word,freqs[k]); } } // read extra string (offset) part=strtok(NULL," \n"); for (k=0; k<nraspell; k++) { part=strtok(NULL," \n"); strcpy(aspellword,""); for (l=0; l<strlen(part)-1; l++) { strcat(aspellword," "); aspellword[l]=part[l]; } if (!strstr(aspellword,"-")) { lexlen=strlen(aspellword); if ((lexlen>wordlen+MAXLD)|| (wordlen>lexlen+MAXLD)) thislev=MAXLD+1; else thislev=levenshtein(aspellword,word); if (thislev<=MAXLD) { inflection=0; // check: plural? if ((((aspellword[strlen(aspellword)-1]=='s')&& (word[strlen(word)-1]!='s')))|| (((aspellword[strlen(aspellword)-1]!='s')&& (word[strlen(word)-1]=='s')))) inflection=1; if ((((aspellword[strlen(aspellword)-1]=='e')&& (word[strlen(word)-1]!='e')))|| (((aspellword[strlen(aspellword)-1]!='e')&& (word[strlen(word)-1]=='e')))) inflection=1; if ((((aspellword[strlen(aspellword)-1]=='n')&& (word[strlen(word)-1]=='t')))|| (((aspellword[strlen(aspellword)-1]=='t')&& (word[strlen(word)-1]=='n')))) inflection=1; if (!inflection) { j=0; while ((j<nrclosest)&& (freqs[k]<closestfreq[j])) j++; if (j<nrclosest) { // move up for (l=nrclosest; l>j; l--) { strcpy(closestword[l],closestword[l-1]); closest[l]=closest[l-1]; closestfreq[l]=closestfreq[l-1]; } } // insert strcpy(closestword[j],aspellword); closest[j]=thislev; closestfreq[j]=freqs[k]; if (nrclosest<MAXNRCLOSEST) nrclosest++; } } } } if (DEBUG2) { fprintf(stderr,"closest to %s:\n", word); for (i=0; i<nrclosest; i++) fprintf(stderr," %2d %s\n", i,closestword[i]); } } } } fprintf(stdout,"%s", word); // post-filter (too bad for all the work done before): // don't correct capitalized words, don't correct words with // hyphens cap=0; if ((word[0]>='A')&& (word[0]<='Z')) cap=1; hyp=0; if (strstr(word,"-")) hyp=1; if ((nrclosest>0)&& (wordlen>=MINLENGTH)&& (!cap)&& (!hyp)) { for (i=0; i<nrclosest; i++) fprintf(stdout," %s", closestword[i]); if (DEBUG) { fprintf(stderr,"correction suggestions for %s: ", word); for (i=0; i<nrclosest; i++) fprintf(stderr," %s", closestword[i]); fprintf(stderr,"\n"); } fprintf(stdout," 0.6"); } fprintf(stdout,"\n"); } fclose(aspell); fclose(context); return 0; }
const char *help_unknown_cmd(const char *cmd) { unsigned int i, n = 0, best_similarity = 0; struct cmdnames main_cmds, other_cmds; memset(&main_cmds, 0, sizeof(main_cmds)); memset(&other_cmds, 0, sizeof(main_cmds)); memset(&aliases, 0, sizeof(aliases)); perf_config(perf_unknown_cmd_config, NULL); load_command_list("perf-", &main_cmds, &other_cmds); add_cmd_list(&main_cmds, &aliases); add_cmd_list(&main_cmds, &other_cmds); qsort(main_cmds.names, main_cmds.cnt, sizeof(main_cmds.names), cmdname_compare); uniq(&main_cmds); if (main_cmds.cnt) { /* This reuses cmdname->len for similarity index */ for (i = 0; i < main_cmds.cnt; ++i) main_cmds.names[i]->len = levenshtein(cmd, main_cmds.names[i]->name, 0, 2, 1, 4); qsort(main_cmds.names, main_cmds.cnt, sizeof(*main_cmds.names), levenshtein_compare); best_similarity = main_cmds.names[0]->len; n = 1; while (n < main_cmds.cnt && best_similarity == main_cmds.names[n]->len) ++n; } if (autocorrect && n == 1) { const char *assumed = main_cmds.names[0]->name; main_cmds.names[0] = NULL; clean_cmdnames(&main_cmds); fprintf(stderr, "WARNING: You called a perf program named '%s', " "which does not exist.\n" "Continuing under the assumption that you meant '%s'\n", cmd, assumed); if (autocorrect > 0) { fprintf(stderr, "in %0.1f seconds automatically...\n", (float)autocorrect/10.0); poll(NULL, 0, autocorrect * 100); } return assumed; } fprintf(stderr, "perf: '%s' is not a perf-command. See 'perf --help'.\n", cmd); if (main_cmds.cnt && best_similarity < 6) { fprintf(stderr, "\nDid you mean %s?\n", n < 2 ? "this": "one of these"); for (i = 0; i < n; i++) fprintf(stderr, "\t%s\n", main_cmds.names[i]->name); } exit(1); }
int main() { FILE *fpin; FILE *fpout; FILE *fpresult; if(fpin=fopen("E:\\PassRankDataSet\\phpbb.txt","r")) puts("phpbb read Open Ready\n"); else puts("Open Failed\n"); if(fpout=fopen("E:\\PassRankDataSet\\phpbb_Distance_Matrix.csv","w")) puts("phpbb write Open Ready\n"); else puts("Open Failed\n"); if(fpresult=fopen("E:\\PassRankDataSet\\phpbb_Analyse_Result.txt","w")) puts("phpbb result Open Ready\n"); else puts("Open Failed\n"); int i,j; int distCnt[maxDist]; for(i=0;i<maxDist;i++){ distCnt[i]=0; } int totalNum=0; int maxLen =0; //To Detect any passwd longer than 30 while(fscanf(fpin,"%s",pswd[totalNum]) != EOF){ //Read in the password set psLen[totalNum]=strlen(pswd[totalNum]); if(psLen[totalNum]>maxLen){ maxLen=psLen[totalNum]; printf("Line: %d \n",totalNum+1); printf("%s :Lenth is %d\n",pswd[totalNum],psLen[totalNum]); } //printf("%s : Lenth is %d\n",pswd[totalNum],psLen[totalNum]); totalNum++; if(totalNum>psNum){ printf("Warning: The password has exceed the Total Number\n"); return -1; } } fprintf(fpresult,"The totalNum is: %d\n",totalNum); fprintf(fpresult,"The maxLen is: %d\n",maxLen); for(i=0;i<totalNum;i++){ int k; for(k=0;k<i;k++){ fprintf(fpout,"0"); if(i<totalNum-1) fprintf(fpout,","); else fprintf(fpout,"\r\n"); } //printf("Cmping Line:%d \n",i); for(j=i+1;j<totalNum;j++){ //printf("Cmping:%d and %d\n",i,j); int dist_Temp= levenshtein(pswd[i],pswd[j]); //printf("The Dist of (%s,%s): %d\n",pswd[i],pswd[j],dist); if(dist_Temp>maxDist){ printf("Warning: The dist_Temp has exceed the maxDist expected\n"); return -1; } distCnt[dist_Temp]++; fprintf(fpout,"%d",dist_Temp); if(j<totalNum-1) fprintf(fpout,","); else fprintf(fpout,"\r\n"); } } int totalMatch=0; for(i=0;i<maxDist;i++){ fprintf(fpresult,"%d,",distCnt[i]); } fprintf(fpresult,"\r\n"); for(i=0;i<maxDist;i++){ fprintf(fpresult,"Distance:%d Ratio: %lf\r\n",i,distCnt[i]/(double)totalNum); } fclose(fpin); fclose(fpout); }
const char *help_unknown_cmd(const char *cmd) { int i, n, best_similarity = 0; struct cmdnames main_cmds, other_cmds; memset(&main_cmds, 0, sizeof(main_cmds)); memset(&other_cmds, 0, sizeof(other_cmds)); memset(&aliases, 0, sizeof(aliases)); git_config(git_unknown_cmd_config, NULL); load_command_list("git-", &main_cmds, &other_cmds); add_cmd_list(&main_cmds, &aliases); add_cmd_list(&main_cmds, &other_cmds); qsort(main_cmds.names, main_cmds.cnt, sizeof(main_cmds.names), cmdname_compare); uniq(&main_cmds); /* This reuses cmdname->len for similarity index */ for (i = 0; i < main_cmds.cnt; ++i) main_cmds.names[i]->len = levenshtein(cmd, main_cmds.names[i]->name, 0, 2, 1, 4); qsort(main_cmds.names, main_cmds.cnt, sizeof(*main_cmds.names), levenshtein_compare); if (!main_cmds.cnt) die ("Uh oh. Your system reports no Git commands at all."); best_similarity = main_cmds.names[0]->len; n = 1; while (n < main_cmds.cnt && best_similarity == main_cmds.names[n]->len) ++n; if (autocorrect && n == 1 && SIMILAR_ENOUGH(best_similarity)) { const char *assumed = main_cmds.names[0]->name; main_cmds.names[0] = NULL; clean_cmdnames(&main_cmds); fprintf(stderr, "WARNING: You called a Git command named '%s', " "which does not exist.\n" "Continuing under the assumption that you meant '%s'\n", cmd, assumed); if (autocorrect > 0) { fprintf(stderr, "in %0.1f seconds automatically...\n", (float)autocorrect/10.0); poll(NULL, 0, autocorrect * 100); } return assumed; } fprintf(stderr, "git: '%s' is not a git command. See 'git --help'.\n", cmd); if (SIMILAR_ENOUGH(best_similarity)) { fprintf(stderr, "\nDid you mean %s?\n", n < 2 ? "this": "one of these"); for (i = 0; i < n; i++) fprintf(stderr, "\t%s\n", main_cmds.names[i]->name); } exit(1); }
int main ( int argc, char ** argv ) { cmd_set_arguments ( argc, argv ); if ( setlocale ( LC_ALL, "" ) == NULL ) { fprintf ( stderr, "Failed to set locale.\n" ); return EXIT_FAILURE; } char **list = NULL; int llength = 0; char * test_str = "{host} {terminal} -e bash -c \"{ssh-client} {host}; echo '{terminal} {host}'\""; helper_parse_setup ( test_str, &list, &llength, "{host}", "chuck", "{terminal}", "x-terminal-emulator", NULL ); TASSERT ( llength == 6 ); TASSERT ( strcmp ( list[0], "chuck" ) == 0 ); TASSERT ( strcmp ( list[1], "x-terminal-emulator" ) == 0 ); TASSERT ( strcmp ( list[2], "-e" ) == 0 ); TASSERT ( strcmp ( list[3], "bash" ) == 0 ); TASSERT ( strcmp ( list[4], "-c" ) == 0 ); TASSERT ( strcmp ( list[5], "ssh chuck; echo 'x-terminal-emulator chuck'" ) == 0 ); g_strfreev ( list ); /** * Test some path functions. Not easy as not sure what is right output on travis. */ // Test if root is preserved. char *str = rofi_expand_path ( "/" ); TASSERT ( strcmp ( str, "/" ) == 0 ); g_free ( str ); // Test is relative path is preserved. str = rofi_expand_path ( "../AUTHORS" ); TASSERT ( strcmp ( str, "../AUTHORS" ) == 0 ); g_free ( str ); // Test another one. str = rofi_expand_path ( "/bin/false" ); TASSERT ( strcmp ( str, "/bin/false" ) == 0 ); g_free ( str ); // See if user paths get expanded in full path. str = rofi_expand_path ( "~/" ); const char *hd = g_get_home_dir (); TASSERT ( strcmp ( str, hd ) == 0 ); g_free ( str ); str = rofi_expand_path ( "~root/" ); TASSERT ( str[0] == '/' ); g_free ( str ); /** * Collating. */ char *res = token_collate_key ( "€ Sign", FALSE ); TASSERT ( strcmp ( res, "€ sign" ) == 0 ); g_free ( res ); res = token_collate_key ( "éÉêèë Sign", FALSE ); TASSERT ( strcmp ( res, "ééêèë sign" ) == 0 ); g_free ( res ); res = token_collate_key ( "éÉêèë³ Sign", TRUE ); TASSERT ( strcmp ( res, "éÉêèë3 Sign" ) == 0 ); g_free ( res ); /** * Char function */ TASSERT ( helper_parse_char ( "\\n" ) == '\n' ); TASSERT ( helper_parse_char ( "\\a" ) == '\a' ); TASSERT ( helper_parse_char ( "\\b" ) == '\b' ); TASSERT ( helper_parse_char ( "\\t" ) == '\t' ); TASSERT ( helper_parse_char ( "\\v" ) == '\v' ); TASSERT ( helper_parse_char ( "\\f" ) == '\f' ); TASSERT ( helper_parse_char ( "\\r" ) == '\r' ); TASSERT ( helper_parse_char ( "\\\\" ) == '\\' ); TASSERT ( helper_parse_char ( "\\0" ) == 0 ); TASSERT ( helper_parse_char ( "\\x77" ) == 'w' ); TASSERT ( helper_parse_char ( "\\x0A" ) == '\n' ); /** * tokenize */ config.regex = FALSE; config.glob = FALSE; char ** retv = tokenize ( "aAp nOoT MieS 12", FALSE ); TASSERT ( retv[0] && strcmp ( retv[0], "aap" ) == 0 ); TASSERT ( retv[1] && strcmp ( retv[1], "noot" ) == 0 ); TASSERT ( retv[2] && strcmp ( retv[2], "mies" ) == 0 ); TASSERT ( retv[3] && strcmp ( retv[3], "12" ) == 0 ); tokenize_free ( retv ); retv = tokenize ( "blub³ bOb bEp bEE", TRUE ); TASSERT ( retv[0] && strcmp ( retv[0], "blub3" ) == 0 ); TASSERT ( retv[1] && strcmp ( retv[1], "bOb" ) == 0 ); TASSERT ( retv[2] && strcmp ( retv[2], "bEp" ) == 0 ); TASSERT ( retv[3] && strcmp ( retv[3], "bEE" ) == 0 ); tokenize_free ( retv ); TASSERT ( levenshtein ( "aap", "aap" ) == 0 ); TASSERT ( levenshtein ( "aap", "aap " ) == 1 ); TASSERT ( levenshtein ( "aap ", "aap" ) == 1 ); TASSERTE ( levenshtein ( "aap", "aap noot" ), 5 ); TASSERTE ( levenshtein ( "aap", "noot aap" ), 5 ); TASSERTE ( levenshtein ( "aap", "noot aap mies" ), 10 ); TASSERTE ( levenshtein ( "noot aap mies", "aap" ), 10 ); TASSERTE ( levenshtein ( "otp", "noot aap" ), 5 ); }
int main ( int argc, char ** argv ) { cmd_set_arguments ( argc, argv ); if ( setlocale ( LC_ALL, "" ) == NULL ) { fprintf ( stderr, "Failed to set locale.\n" ); return EXIT_FAILURE; } /** * Collating. */ char *res = token_collate_key ( "€ Sign", FALSE ); TASSERT ( strcmp ( res, "€ sign" ) == 0 ); g_free ( res ); res = token_collate_key ( "éÉêèë Sign", FALSE ); TASSERT ( strcmp ( res, "ééêèë sign" ) == 0 ); g_free ( res ); res = token_collate_key ( "éÉêèë³ Sign", TRUE ); TASSERT ( strcmp ( res, "éÉêèë3 Sign" ) == 0 ); g_free ( res ); /** * Char function */ TASSERT ( helper_parse_char ( "\\n" ) == '\n' ); TASSERT ( helper_parse_char ( "\\a" ) == '\a' ); TASSERT ( helper_parse_char ( "\\b" ) == '\b' ); TASSERT ( helper_parse_char ( "\\t" ) == '\t' ); TASSERT ( helper_parse_char ( "\\v" ) == '\v' ); TASSERT ( helper_parse_char ( "\\f" ) == '\f' ); TASSERT ( helper_parse_char ( "\\r" ) == '\r' ); TASSERT ( helper_parse_char ( "\\\\" ) == '\\' ); TASSERT ( helper_parse_char ( "\\0" ) == 0 ); TASSERT ( helper_parse_char ( "\\x77" ) == 'w' ); TASSERT ( helper_parse_char ( "\\x0A" ) == '\n' ); /** * tokenize */ config.regex = FALSE; config.glob = FALSE; char ** retv = tokenize ( "aAp nOoT MieS 12", FALSE ); TASSERT ( retv[0] && strcmp ( retv[0], "aap" ) == 0 ); TASSERT ( retv[1] && strcmp ( retv[1], "noot" ) == 0 ); TASSERT ( retv[2] && strcmp ( retv[2], "mies" ) == 0 ); TASSERT ( retv[3] && strcmp ( retv[3], "12" ) == 0 ); tokenize_free ( retv ); retv = tokenize ( "blub³ bOb bEp bEE", TRUE ); TASSERT ( retv[0] && strcmp ( retv[0], "blub3" ) == 0 ); TASSERT ( retv[1] && strcmp ( retv[1], "bOb" ) == 0 ); TASSERT ( retv[2] && strcmp ( retv[2], "bEp" ) == 0 ); TASSERT ( retv[3] && strcmp ( retv[3], "bEE" ) == 0 ); tokenize_free ( retv ); TASSERT ( levenshtein ( "aap", "aap" ) == 0 ); TASSERT ( levenshtein ( "aap", "aap " ) == 1 ); TASSERT ( levenshtein ( "aap ", "aap" ) == 1 ); TASSERTE ( levenshtein ( "aap", "aap noot" ), 5 ); TASSERTE ( levenshtein ( "aap", "noot aap" ), 5 ); TASSERTE ( levenshtein ( "aap", "noot aap mies" ), 10 ); TASSERTE ( levenshtein ( "noot aap mies", "aap" ), 10 ); TASSERTE ( levenshtein ( "otp", "noot aap" ), 5 ); }
int fixed_match_notes (int fnotes, int mbar_number, int ibar_number, int delta_pitch) { int i,j, notes; int ioffset, moffset; int tplastnote,lastnote; /* for contour matching */ int deltapitch,deltapitchtp; int string1[32],string2[32]; ioffset = ibarlineptr[ibar_number]; moffset = tpbarlineptr[mbar_number]; /*printf("ioffset = %d moffset = %d\n",ioffset,moffset);*/ i = j = 0; notes = 0; lastnote = 0; tplastnote =0; while (notes < fnotes) { /*printf("%d %d\n",imidipitch[j+ioffset],tpmidipitch[i+moffset]);*/ if (imidipitch[j + ioffset] == RESTNOTE || imidipitch[j + ioffset] == BAR) { j++; continue; } /* pass over RESTS or BARS */ if (tpmidipitch[i + moffset] == RESTNOTE || tpmidipitch[i + moffset] == BAR) { i++; continue; } /* pass over RESTS or BARS */ if (imidipitch[j + ioffset] == -1 || tpmidipitch[i + moffset] == -1) { printf("unexpected negative pitch at %d or %d for xref %d\n",i+ioffset,i+moffset,xrefno); i++; j++; continue; } /* unknown contour note */ if (norhythm == 1) { inotelength[j + ioffset] = 0; tpnotelength[i + moffset] = 0; } if (con == 1) { /* contour matching */ if (tplastnote !=0) { deltapitchtp = tpmidipitch[i+moffset] - tplastnote; deltapitch = imidipitch[j+ioffset] - lastnote; tplastnote = tpmidipitch[i+moffset]; lastnote = imidipitch[j+ioffset]; if (qntflag > 0) { deltapitch = quantize7 (deltapitch); deltapitchtp = quantize7(deltapitchtp); } string1[notes] = 256*deltapitch + inotelength[j + ioffset]; string2[notes] = 256*deltapitchtp + tpnotelength[i + moffset]; if (notes < 32) notes++; else printf("notes > 32\n"); /* printf("deltapitch %d %d\n",deltapitch,deltapitchtp); printf("length %d %d\n",inotelength[j + ioffset],tpnotelength[i+moffset]); */ if (deltapitch != deltapitchtp) return -1; /* match succeeded */ /* printf("%d %d\n",deltapitch,deltapitchtp);*/ } else { /* first note in bar - no lastnote */ tplastnote = tpmidipitch[i+moffset]; lastnote = imidipitch[j+ioffset]; } } else { /* absolute matching (with transposition) */ /*printf("%d %d\n",imidipitch[j+ioffset],tpmidipitch[i+moffset]-delta_pitch); printf("%d %d\n",inotelength[j+ioffset],tpnotelength[i+moffset]); */ string1[notes] = 256*imidipitch[j+ioffset] + inotelength[j + ioffset]; string2[notes] = 256*(tpmidipitch[i+moffset] - delta_pitch) + tpnotelength[i + moffset]; if (notes < 32) notes++; else printf("notes > 32\n"); } i++; j++; } if (notes < 2) return -1; /*printf("ioffset = %d moffset = %d\n",ioffset,moffset);*/ if (levdist == 0) return perfect_match(string1,string2,notes); else return levenshtein(string1,string2,notes,notes); }
int main ( int argc, char ** argv ) { cmd_set_arguments ( argc, argv ); if ( setlocale ( LC_ALL, "" ) == NULL ) { fprintf ( stderr, "Failed to set locale.\n" ); return EXIT_FAILURE; } /** * Char function */ TASSERT ( helper_parse_char ( "\\n" ) == '\n' ); TASSERT ( helper_parse_char ( "\\a" ) == '\a' ); TASSERT ( helper_parse_char ( "\\b" ) == '\b' ); TASSERT ( helper_parse_char ( "\\t" ) == '\t' ); TASSERT ( helper_parse_char ( "\\v" ) == '\v' ); TASSERT ( helper_parse_char ( "\\f" ) == '\f' ); TASSERT ( helper_parse_char ( "\\r" ) == '\r' ); TASSERT ( helper_parse_char ( "\\\\" ) == '\\' ); TASSERT ( helper_parse_char ( "\\0" ) == 0 ); TASSERT ( helper_parse_char ( "\\x77" ) == 'w' ); TASSERT ( helper_parse_char ( "\\x0A" ) == '\n' ); /** * tokenize */ TASSERT ( levenshtein ( "aap", g_utf8_strlen ( "aap", -1), "aap", g_utf8_strlen ( "aap", -1) ) == 0 ); TASSERT ( levenshtein ( "aap", g_utf8_strlen ( "aap", -1), "aap ", g_utf8_strlen ( "aap ", -1) ) == 1 ); TASSERT ( levenshtein ( "aap ", g_utf8_strlen ( "aap ", -1), "aap", g_utf8_strlen ( "aap", -1) ) == 1 ); TASSERTE ( levenshtein ( "aap", g_utf8_strlen ( "aap", -1), "aap noot", g_utf8_strlen ( "aap noot", -1) ), 5 ); TASSERTE ( levenshtein ( "aap", g_utf8_strlen ( "aap", -1), "noot aap", g_utf8_strlen ( "noot aap", -1) ), 5 ); TASSERTE ( levenshtein ( "aap", g_utf8_strlen ( "aap", -1), "noot aap mies", g_utf8_strlen ( "noot aap mies", -1) ), 10 ); TASSERTE ( levenshtein ( "noot aap mies", g_utf8_strlen ( "noot aap mies", -1), "aap", g_utf8_strlen ( "aap", -1) ), 10 ); TASSERTE ( levenshtein ( "otp", g_utf8_strlen ( "otp", -1), "noot aap", g_utf8_strlen ( "noot aap", -1) ), 5 ); /** * Quick converision check. */ { char *str = rofi_latin_to_utf8_strdup ( "\xA1\xB5", 2 ); TASSERT ( g_utf8_collate ( str, "¡µ" ) == 0 ); g_free ( str ); } { char *str = rofi_force_utf8 ( "Valid utf8", 10 ); TASSERT ( g_utf8_collate ( str, "Valid utf8" ) == 0 ); g_free ( str ); char in[] = "Valid utf8 until \xc3\x28 we continue here"; TASSERT ( g_utf8_validate ( in, -1, NULL ) == FALSE ); str = rofi_force_utf8 ( in, strlen ( in ) ); TASSERT ( g_utf8_validate ( str, -1, NULL ) == TRUE ); TASSERT ( g_utf8_collate ( str, "Valid utf8 until �( we continue here" ) == 0 ); g_free ( str ); } // Pid test. // Tests basic functionality of writing it, locking, seeing if I can write same again // And close/reopen it again. { const char *path = "/tmp/rofi-test.pid"; TASSERT ( create_pid_file ( NULL ) == -1 ); int fd = create_pid_file ( path ); TASSERT ( fd >= 0 ); int fd2 = create_pid_file ( path ); TASSERT ( fd2 < 0 ); remove_pid_file ( fd ); fd = create_pid_file ( path ); TASSERT ( fd >= 0 ); remove_pid_file ( fd ); } }
int main(int argc, char *argv[]) { if (argc != 4) { printf("usage: \n\t$ %s <SimMetric> <string1> <string2>\n", basename(argv[0])); printf("\nWhere SimMetric is one of:\n"); int i; for (i=0; i < SIMMETC; i++) { if (i > 0) printf(","); printf(" %s", SIMMETS[i]); } printf("\n"); return (1); } else if (strcmp(argv[1], "all") == 0) { argv[1] = "block_distance"; main(argc, argv); argv[1] = "cosine"; main(argc, argv); argv[1] = "dice"; main(argc, argv); argv[1] = "euclidean_distance"; main(argc, argv); argv[1] = "jaccard"; main(argc, argv); argv[1] = "jaro"; main(argc, argv); argv[1] = "jaro_winkler"; main(argc, argv); argv[1] = "levenshtein"; main(argc, argv); argv[1] = "matching_coefficient"; main(argc, argv); argv[1] = "monge_elkan"; main(argc, argv); argv[1] = "needleman_wunch"; main(argc, argv); argv[1] = "overlap_coefficient"; main(argc, argv); argv[1] = "qgrams_distance"; main(argc, argv); argv[1] = "smith_waterman"; main(argc, argv); argv[1] = "smith_waterman_gotoh"; main(argc, argv); argv[1] = "soundex"; main(argc, argv); argv[1] = "metaphone"; main(argc, argv); argv[1] = "double_metaphone"; main(argc, argv); } else { float similarity = 0; char *sm_name, metrics[50], compare[50]; sprintf(compare, "%10s & %-10s", argv[2], argv[3]); switch (which_type(argv[1])) { case 0: case 1: sm_name = "Block Distance"; sprintf(metrics, "%d", block_distance(argv[2], argv[3])); similarity = block_distance_similarity(argv[2], argv[3]); break; case 2: case 3: sm_name = "Cosine Similarity"; similarity = cosine_similarity(argv[2], argv[3]); sprintf(metrics, "%f", similarity); break; case 4: sm_name = "Dice Similarity"; similarity = dice_similarity(argv[2], argv[3]); sprintf(metrics, "%f", similarity); break; case 5: case 6: sm_name = "Euclidean Distance"; sprintf(metrics, "%3.2f", euclidean_distance(argv[2], argv[3])); similarity = euclidean_distance_similarity(argv[2], argv[3]); break; case 7: case 8: sm_name = "Jaccard Similarity"; similarity = jaccard_similarity(argv[2], argv[3]); sprintf(metrics, "%f", similarity); break; case 9: case 10: sm_name = "Jaro Similarity"; similarity = jaro_similarity(argv[2], argv[3]); sprintf(metrics, "%f", similarity); break; case 11: case 12: sm_name = "Jaro Winkler Similarity"; similarity = jaro_winkler_similarity(argv[2], argv[3]); sprintf(metrics, "%f", similarity); break; case 13: case 14: sm_name = "Levenshtein Distance"; sprintf(metrics, "%d", levenshtein(argv[2], argv[3])); similarity = levenshtein_similarity(argv[2], argv[3]); break; case 15: case 16: sm_name = "Matching Coefficient SimMetrics"; sprintf(metrics, "%3.2f", matching_coefficient(argv[2], argv[3])); similarity = matching_coefficient_similarity(argv[2], argv[3]); break; case 17: case 18: sm_name = "Monge Elkan Similarity"; similarity = monge_elkan_similarity(argv[2], argv[3]); sprintf(metrics, "%f", similarity); break; case 19: case 20: sm_name = "Needleman Wunch SimMetrics"; sprintf(metrics, "%3.2f", needleman_wunch(argv[2], argv[3])); similarity = needleman_wunch_similarity(argv[2], argv[3]); break; case 21: case 22: sm_name = "Overlap Coefficient Similarity"; similarity = overlap_coefficient_similarity(argv[2], argv[3]); sprintf(metrics, "%f", similarity); break; case 23: case 24: sm_name = "QGrams Distance"; sprintf(metrics, "%d", qgrams_distance(argv[2], argv[3])); similarity = qgrams_distance_similarity(argv[2], argv[3]); break; case 25: case 26: sm_name = "Smith Waterman SimMetrics"; sprintf(metrics, "%3.2f", smith_waterman(argv[2], argv[3])); similarity = smith_waterman_similarity(argv[2], argv[3]); break; case 27: case 28: sm_name = "Smith Waterman Gotoh SimMetrics"; sprintf(metrics, "%3.2f", smith_waterman_gotoh(argv[2], argv[3])); similarity = smith_waterman_gotoh_similarity(argv[2], argv[3]); break; case 29: case 30: sm_name = "Soundex Phonetics"; char *s1 = soundex(argv[2]); char *s2 = soundex(argv[3]); sprintf(metrics, "%s & %s", s1, s2); free(s1); free(s2); similarity = soundex_similarity(argv[2], argv[3]); break; case 31: case 32: sm_name = "Metaphone Phonetics"; char *m1 = metaphone(argv[2]); char *m2 = metaphone(argv[3]); sprintf(metrics, "%s & %s", m1, m2); free(m1); free(m2); similarity = metaphone_similarity(argv[2], argv[3]); break; case 33: case 34: sm_name = "Double Metaphone Phonetics"; char *dm1 = double_metaphone(argv[2]); char *dm2 = double_metaphone(argv[3]); sprintf(metrics, "%s & %s", dm1, dm2); free(dm1); free(dm2); similarity = double_metaphone_similarity(argv[2], argv[3]); break; default: printf("Unknown SimMetric %s, not found.\n", argv[1]); return (1); } printf("%-31s between %-25s is %12s ", sm_name, compare, metrics); printf("and yields a %3.0f%% similarity\n", similarity * 100); return (EXIT_SUCCESS); } }