//目标串不回溯,模式串按照模式函数回溯 int index_KMP(char S[], char T[], int pos) { int next[255]; get_nextval(T,next); int i=pos, j=0; while(S[i]!='\0' && T[j]!='\0') { if(S[i] == T[j]) //比较后一个字符 { i++; j++; } else //重新开始一轮匹配 { i++; j = next[j]; } } if(T[j] == '\0') return (i-j); //匹配成功 else return -1; //串S从第pos个字符起,不存在和串T相同的子串 }
int index_KMP2(PSString s, PSString t,int*nextval) { get_nextval(t, nextval); int i=0, j=0; while (i < s->length && j < t->length) { if (j == -1 || s->data[i] == t->data[j]) { i++; j++; } else j = nextval[j]; } if (j >= t->length) return i - t->length; else return -1; }
char *kmp_strstr(const char *haystack, const char *needle) { // 通俗易懂版!!! int find_idx(0); int *N = get_nextval(needle); int i(0), j(0); while (haystack[i] != '\0' && needle[j] != '\0') { if (haystack[i] == needle[j]) { i++; j++; } else { if (N[j] == 0) { // needle[0:j] 所有子串的最长公共子串长度为0, needle[j] != needle[0], 模式串必须从头再来,一夜回到解放前! // 若needle[0:len(needle)] 所有子串的最长公共子串长度为0, 那KMP毛用都没有 find_idx = i; // 等价于 find_idx += j(前一次匹配了多长); 等价于 find_idx += j - N[j]; j = 0; // j = N[j] } else if (N[j] == -1) { // N[j] == -1 代表 needle[j] 和 needle[0]是一样的, // 现在needle[j] != haystack[i], // so, haystack可以往前走一步, 模式串要从头再开始匹配 ++i; find_idx = i; // 等价于 find_idx += j+1; 等价于 find_idx += j - N[j]; 你妹的,原来next定义一个-1就是因为这 j = 0; } else { find_idx = i - N[j]; //模式串的前N[j]个字符串和haystack的前N[j]个字符串是一样的 // 等价于 find_idx += j - N[j]; // 模式串位置 j = N[j]; } } } delete []N; if (needle[j] == '\0') return const_cast<char *>(&haystack[find_idx]); // 其实 find_idx = j - strlen(needle), 那么麻烦有毛用 else return NULL; }
int main() { SString mainstr = "aaabaaaab"; unsigned char *chars = (unsigned char *)"aaaab"; unsigned char *chstemp = chars; int len = 0; while(*chstemp){ len ++; chstemp++; } int temp = len; int temp2 = len; SString paternstr; paternstr[0] = len; unsigned char *p = paternstr + 1; unsigned char *ch = chars; while(temp) { *p++ = *ch++; temp--; } *p = '\0'; int *next =(int *)malloc(MAXSTRLEN * sizeof(int)); get_next(paternstr, next); //get_next2(paternstr, next); while(len) { printf("%d\n", next[len]); len--; } printf("\n%d\n",Index_KMP(mainstr, paternstr, 1, next)); int *nextval = (int *)malloc(MAXSTRLEN * sizeof(int)); get_nextval(paternstr, nextval); while(temp2) { printf("%d\n", nextval[temp2]); temp2--; } printf("\n%d\n", Index_KMP(mainstr, paternstr, 1, nextval)); return 0; }
int main(int argc, char *argv[]) { int haystack_len = 0; int pattern_len = 0; int i; int pos; if (argc > 1 && argv[1][0]) { snprintf(haystack, STR_BUF_LEN, "%s", argv[1]); } if (argc > 2 && argv[2][0]) { snprintf(pattern, STR_BUF_LEN, "%s", argv[2]); } printf("haystack: %s\n", haystack); printf("pattern: %s\n", pattern); haystack_len = strlen(haystack); pattern_len = strlen(pattern); get_nextval(pattern, pattern_len, next_map); for (i = 0; i < pattern_len; i++) { printf("next_map[%d] = %d\n", i, next_map[i]); } if (-1 != (pos = kmp_search(haystack, haystack_len, pattern, pattern_len, next_map, 0))) { printf("OK, the pattern:[%s] is substr of haystack:[%s], position is:%d.\n", pattern, haystack, pos); } else { printf("Sorry, the pattern:[%s] is NOT substr of haystack:[%s].\n", pattern, haystack); } return 0; }
int Index_KMP1(String S, String T, int pos) { int i = pos; /* i用于主串S中当前位置下标值,若pos不为1,则从pos位置开始匹配 */ int j = 1; /* j用于子串T中当前位置下标值 */ int next[255]; /* 定义一next数组 */ get_nextval(T, next); /* 对串T作分析,得到next数组 */ while (i <= S[0] && j <= T[0]) /* 若i小于S的长度并且j小于T的长度时,循环继续 */ { if (j == 0 || S[i] == T[j]) /* 两字母相等则继续,与朴素算法增加了j=0判断 */ { ++i; ++j; } else /* 指针后退重新开始匹配 */ j = next[j];/* j退回合适的位置,i值不变 */ } if (j > T[0]) return i - T[0]; else return 0; }
/*两个字符串进行扫描操作,返回一个位置值pos*/ int Index(String S, String T, int pos) { int i = pos; int j = 1; int nextval[255]; get_nextval(T, nextval); while (i<=(S[0]-'0')&&j<=T[0]) { if (j==0||S[i]==T[j]) { ++i; ++j; } else { j = nextval[j]; } }/*若跳出while循环,则T[0]全部扫描完成*/ if (j > T[0]) return i - T[0]; else return 0; }
int main() { int i, *p; String s1, s2; StrAssign(s1, "ababaaaba"); StrAssign(s2, "ababaaaba"); Index_KMP(s1, s2, 0); StrAssign(s1, "abcdex"); printf("子串为: "); StrPrint(s1); i = StrLength(s1); p = (int*)malloc((i + 1)*sizeof(int)); get_next(s1, p); printf("Next为: "); NextPrint(p, StrLength(s1)); printf("\n"); StrAssign(s1, "abcabx"); printf("子串为: "); StrPrint(s1); i = StrLength(s1); p = (int*)malloc((i + 1)*sizeof(int)); get_next(s1, p); printf("Next为: "); NextPrint(p, StrLength(s1)); printf("\n"); StrAssign(s1, "ababaaaba"); printf("子串为: "); StrPrint(s1); i = StrLength(s1); p = (int*)malloc((i + 1)*sizeof(int)); get_next(s1, p); printf("Next为: "); NextPrint(p, StrLength(s1)); printf("\n"); StrAssign(s1, "aaaaaaaab"); printf("子串为: "); StrPrint(s1); i = StrLength(s1); p = (int*)malloc((i + 1)*sizeof(int)); get_next(s1, p); printf("Next为: "); NextPrint(p, StrLength(s1)); printf("\n"); StrAssign(s1, "ababaaaba"); printf(" 子串为: "); StrPrint(s1); i = StrLength(s1); p = (int*)malloc((i + 1)*sizeof(int)); get_next(s1, p); printf(" Next为: "); NextPrint(p, StrLength(s1)); get_nextval(s1, p); printf("NextVal为: "); NextPrint(p, StrLength(s1)); printf("\n"); StrAssign(s1, "aaaaaaaab"); printf(" 子串为: "); StrPrint(s1); i = StrLength(s1); p = (int*)malloc((i + 1)*sizeof(int)); get_next(s1, p); printf(" Next为: "); NextPrint(p, StrLength(s1)); get_nextval(s1, p); printf("NextVal为: "); NextPrint(p, StrLength(s1)); printf("\n"); StrAssign(s1, "00000000000000000000000000000000000000000000000001"); printf("主串为: "); StrPrint(s1); StrAssign(s2, "0000000001"); printf("子串为: "); StrPrint(s2); printf("\n"); printf("主串和子串在第%d个字符处首次匹配(朴素模式匹配算法)\n", Index(s1, s2, 1)); printf("主串和子串在第%d个字符处首次匹配(KMP算法) \n", Index_KMP(s1, s2, 1)); printf("主串和子串在第%d个字符处首次匹配(KMP改良算法) \n", Index_KMP1(s1, s2, 1)); return 0; }