const char* boyermoore_search(const char *haystack, const char *needle) { /* * Calc string sizes */ size_t needle_len, haystack_len; needle_len = strlen(needle); haystack_len = strlen(haystack); /* * Simple checks */ if(haystack_len == 0) return NULL; if(needle_len == 0) return haystack; /* * Initialize heuristics */ int badcharacter[ALPHABET_SIZE]; int goodsuffix[needle_len+1]; prepare_badcharacter_heuristic(needle, needle_len, badcharacter); prepare_goodsuffix_heuristic(needle, needle_len, goodsuffix); /* * Boyer-Moore search */ size_t s = 0; while(s <= (haystack_len - needle_len)) { size_t j = needle_len; while(j > 0 && needle[j-1] == haystack[s+j-1]) j--; if(j > 0) { int k = badcharacter[(size_t) haystack[s+j-1]]; int m; if(k < (int)j && (m = j-k-1) > goodsuffix[j]) s+= m; else s+= goodsuffix[j]; } else { return haystack + s; } } /* not found */ return NULL; }
/* * Boyer-Moore search algorithm */ void boyermoore_search(c_array haystack_array, c_array needle_array) { /* Calc string sizes */ size_t needle_len, haystack_len; needle_len = needle_array.len; haystack_len = haystack_array.len; byte* haystack = haystack_array.data; byte* needle = needle_array.data; printf("needle length = %lu\n", needle_len); /** Simple checks */ if(haystack_len == 0) return; if(needle_len == 0) return; if(needle_len > haystack_len) return; printf("boyer_moore search\n"); /** Initialize heuristics */ int badcharacter[ALPHABET_SIZE]; int goodsuffix[needle_len+1]; prepare_badcharacter_heuristic(needle, needle_len, badcharacter); prepare_goodsuffix_heuristic(needle, needle_len, goodsuffix); /** Boyer-Moore search */ size_t s = 0, j = 0; while(s <= (haystack_len - needle_len)) { j = needle_len; if (s > 6250 && s < 6260) { while(j > 0 && needle[j-1] == haystack[s+j-1]) { printf("%lu: %d %d\n",s+j-1, needle[j-1], haystack[s+j-1]); j--; } printf("=%lu: %d %d\n", s+j-1, needle[j-1], haystack[s+j-1]); } else { while(j > 0 && needle[j-1] == haystack[s+j-1]) j--; } if(j > 0) { int k = badcharacter[haystack[s+j-1]]; int m; if(k < (int)j && (m = j-k-1) > goodsuffix[j]) { s += m; if (s > 6250 && s < 6260) printf("adding m = %d\n", m); } else { s += goodsuffix[j]; if (s > 6250 && s < 6260) printf("adding goodsuffix[%lu] = %d\n", j, goodsuffix[j]); } } else { printf("Pattern found at %lu\n", s); s += goodsuffix[0]; } } }