unsigned char *BM(const unsigned char *s, int slen, const unsigned char *wanted, int wlen) { int i; int j = 0; int len = slen - wlen; int bmGs[BM_XSIZE]; int bmBc[BM_ASIZE]; if (NULL == wanted || 0 > wlen || NULL == s || slen < wlen) return NULL; if (-1 == preBmGs(wanted, wlen, bmGs) || -1 == preBmBc(wanted, wlen, bmBc)) return NULL; while (j <= len) { for (i = wlen - 1; i >= 0 && wanted[i] == s[i+j]; i--); if (i < 0) { return (unsigned char *)(s + j); } else { #if BM_VERSION == 1 j += MAX(bmGs[i], i - bmBc[s[i+j]]); #else j += MAX(bmGs[i], bmBc[s[i+j]] - wlen + 1 + i); #endif } } return NULL; }
int bm_init(BM *bmp, unsigned char *x, int m, int icase) { int i; memset(bmp, 0, sizeof(bmp)); bmp->icase = icase; bmp->bmGs = (int *) calloc(sizeof(int), m); if (bmp->bmGs == NULL) return -1; bmp->saved_m = m; bmp->saved_x = (unsigned char *) malloc(m); if (bmp->saved_x == NULL) return -2; for (i = 0; i < m; i++) bmp->saved_x[i] = icase ? tolower(x[i]) : x[i]; /* Preprocessing */ if (preBmGs(bmp->saved_x, m, bmp->bmGs) < 0) return -3; preBmBc((unsigned char *) bmp->saved_x, m, bmp->bmBc); return 0; }
//T为文本串,P为模式串,成功返回找到次数,失败返回0 int bmsearch(char *T, char *P){ int i, j, bmGs[PSIZE], bmBc[ASIZE]; //初始化坏字符数组和好前缀数组 preBmGs(P, bmGs); preBmBc(P, bmBc); int m = strlen(P); int tLen = strlen(T); int ret = 0; j = 0; while(j <= tLen - m){ //j从前往后遍历文本串 for(i = m - 1; i >= 0; --i){ //i从后往前遍历模式串 cmp_count++; if(T[j + i] != P[i]){ break; } } if(i == -1){ //表示匹配成功 found[ret++] = j; j += m; }else{ j += (bmGs[i] > (bmBc[P[i]] - m + 1 + i) ? bmGs[i] : (bmBc[P[i]] - m + 1 + i)); } } return ret; }
// x - pointer to pattern // m - len(x) // y - pointer to string to search // n - len(y) // modified to return location of first match, or -1 int boyer_moore( unsigned char *x, int m, unsigned char *y, int n) { int i, j, bmBc[ASIZE]; int *bmGs = safe_malloc(m * sizeof(int)); /* Preprocessing */ preBmGs(x, m, bmGs); preBmBc(x, m, bmBc); /* Searching */ j = 0; while (j <= n - m) { for (i = m - 1; i >= 0 && x[i] == y[i + j]; --i) ; if (i < 0) { free(bmGs); return j; //j += bmGs[0]; // just returning the first match } else { j += MAX(bmGs[i], bmBc[y[i + j]] - m + 1 + i); } } free(bmGs); return -1; }
HB_ISIZ hb_strAtTBM( const char * needle, HB_ISIZ m, const char * haystack, HB_ISIZ n ) { HB_ISIZ r = 0; HB_ISIZ bcShift, j, shift, u, v, turboShift; HB_ISIZ bmBc[ ASIZE ]; HB_ISIZ * bmGs; bmGs = ( HB_ISIZ * ) hb_xgrab( m * sizeof( HB_ISIZ ) ); /* Preprocessing */ preBmGs( needle, m, bmGs ); preBmBc( needle, m, bmBc ); /* Searching */ j = u = 0; shift = m; while( j <= n - m ) { HB_ISIZ i = m - 1; while( i >= 0 && needle[ i ] == haystack[ i + j ] ) { --i; if( u != 0 && i == m - 1 - shift ) i -= u; } if( i < 0 ) { r = j + 1; break; #if 0 /* To continue search */ shift = bmGs[ 0 ]; u = m - shift; #endif } else { v = m - 1 - i; turboShift = u - v; bcShift = bmBc[ ( HB_UCHAR ) haystack[ i + j ] ] - m + 1 + i; shift = HB_MAX( turboShift, bcShift ); shift = HB_MAX( shift, bmGs[ i ] ); if( shift == bmGs[ i ] ) u = HB_MIN( m - shift, v ); else { if( turboShift < bcShift ) shift = HB_MAX( shift, u + 1 ); u = 0; } } j += shift; } hb_xfree( bmGs ); return r; }
/** * @brief string matching with Boyer-Moore algorithm. * Output the offset begin with pattern, otherwise nothing output means not any * pattern find in the search string. * Note: if do not want to use malloc, can use stack memory like ASIZE make a * int[] array but not a int* array, and when m >= XSIZE then use strstr(). This * application in a project name "QDBM" database code. * * @param x pattern. * @param m pattern length. * @param y search string. * @param n search string length. */ void string_matching_with_bm_algorithm(const char* x, int m, const char* y, int n) { int i = 0, j = 0, bmBc[ASIZE] = {0}, *bmGs = NULL, XSIZE = m+1; if (!(bmGs = (int*) malloc(sizeof(int)*XSIZE))) { fprintf(stderr, "malloc err!\n"); return; } preBmGs(x, m, bmGs, XSIZE); preBmBc(x, m, bmBc); // preprocessing while (j <= n-m) { // searching for (i = m-1; i >= 0 && x[i] == y[j+i]; i --) ; if (i < 0) { OUTPUT(j); j += bmGs[0]; } else { j += MAX(bmGs[i], bmBc[(int)(y[j+i])]-((m-1)-i)); } } free(bmGs); }
void * boyer_moore_init( unsigned char *x, int m) { boyer_moore_data_t *bm = safe_malloc(sizeof(boyer_moore_data_t)); bm->x = safe_malloc(m * sizeof(*x)); memcpy(bm->x, x, m * sizeof(*x)); bm->m = m; bm->bmGs = safe_malloc(m * sizeof(int)); // Pre-process preBmGs(x, m, bm->bmGs); preBmBc(x, m, bm->bmBc); return (void *) bm; }
int search(unsigned char *x, int m, unsigned char *y, int n) { int j, bmBc[SIGMA], qsBc[SIGMA], count; /* Preprocessing */ BEGIN_PREPROCESSING preBmBc(x, m, bmBc); preQsBc(x, m, qsBc); END_PREPROCESSING count = 0; /* Searching */ BEGIN_SEARCHING j = 0; while (j<= n - m) { if (memcmp(x, y + j, m) == 0) OUTPUT(j); j += MAX(bmBc[y[j + m - 1]], qsBc[y[j + m]]); } END_SEARCHING return count; }
char* Ssmith2(char * textt,char *patt,int n, int m)//smith ╦сие { int j,bmBc[ASIZE],qsBc[ASIZE]; unsigned char * text,*pat; text = (unsigned char*)textt; pat = (unsigned char*)patt; if(*pat == '\0') { OUTPUT(0);return textt;}; /* preprocessing */ preBmBc((char*)pat,m,bmBc); preQsBc((char*)pat,m,qsBc); /* searching */ j=0; while (j<=n-m) { if (memcmp(pat,text+j,m)==0) OUTPUT(j); j+=MAX(bmBc[text[j+m-1]],qsBc[text[j+m]]); } SRET(j); }