Example #1
0
unsigned char *BM(const unsigned char *s, int slen, const unsigned char *wanted, int wlen)
{
    int i;
    int j = 0;
    int len = slen - wlen;
    int bmGs[BM_XSIZE];
    int bmBc[BM_ASIZE];
    
    if (NULL == wanted || 0 > wlen || NULL == s || slen < wlen) return NULL;

    if (-1 == preBmGs(wanted, wlen, bmGs) || -1 == preBmBc(wanted, wlen, bmBc)) return NULL;
    
    while (j <= len) {
        for (i = wlen - 1; i >= 0 && wanted[i] == s[i+j]; i--);
        if (i < 0) {
            return (unsigned char *)(s + j);
        } else {
#if BM_VERSION == 1
            j += MAX(bmGs[i], i - bmBc[s[i+j]]);
#else
            j += MAX(bmGs[i], bmBc[s[i+j]] - wlen + 1 + i);
#endif
        }
    }
    
    return NULL;
}
Example #2
0
int
bm_init(BM *bmp,
	unsigned char *x,
	int m,
	int icase)
{
    int i;


    memset(bmp, 0, sizeof(bmp));

    bmp->icase = icase;
    bmp->bmGs = (int *) calloc(sizeof(int), m);
    if (bmp->bmGs == NULL)
	return -1;
    
    bmp->saved_m = m;
    bmp->saved_x = (unsigned char *) malloc(m);
    if (bmp->saved_x == NULL)
	return -2;
    
    for (i = 0; i < m; i++)
	bmp->saved_x[i] = icase ? tolower(x[i]) : x[i];
    
    /* Preprocessing */
    if (preBmGs(bmp->saved_x, m, bmp->bmGs) < 0)
	return -3;
    
    preBmBc((unsigned char *) bmp->saved_x, m, bmp->bmBc);

    return 0;
}    
Example #3
0
//T为文本串,P为模式串,成功返回找到次数,失败返回0
int bmsearch(char *T, char *P){
	int i, j, bmGs[PSIZE], bmBc[ASIZE];
	
	//初始化坏字符数组和好前缀数组
	preBmGs(P, bmGs);
	preBmBc(P, bmBc);

	int m = strlen(P);
	int tLen = strlen(T);
	int ret = 0;

	j = 0; 
	while(j <= tLen - m){				//j从前往后遍历文本串
		for(i = m - 1; i >= 0; --i){	//i从后往前遍历模式串
			cmp_count++;
			if(T[j + i] != P[i]){
				break;
			}
		}

		if(i == -1){	//表示匹配成功
			found[ret++] = j;
			j += m;
		}else{
			j += (bmGs[i] > (bmBc[P[i]] - m + 1 + i) ? bmGs[i] : (bmBc[P[i]] - m + 1 + i));	
		}
	}
	
	return ret;
}
Example #4
0
// x - pointer to pattern
// m - len(x)
// y - pointer to string to search
// n - len(y)
// modified to return location of first match, or -1
int
boyer_moore(
    unsigned char *x,
    int m,
    unsigned char *y,
    int n)
{
    int i, j, bmBc[ASIZE];
    int *bmGs = safe_malloc(m * sizeof(int));

    /* Preprocessing */
    preBmGs(x, m, bmGs);
    preBmBc(x, m, bmBc);

    /* Searching */
    j = 0;
    while (j <= n - m) {
        for (i = m - 1; i >= 0 && x[i] == y[i + j]; --i) ;
        if (i < 0) {
            free(bmGs);
            return j;
            //j += bmGs[0]; // just returning the first match
        }
        else {
            j += MAX(bmGs[i], bmBc[y[i + j]] - m + 1 + i);
        }
    }

    free(bmGs);
    return -1;
}
Example #5
0
HB_ISIZ hb_strAtTBM( const char * needle, HB_ISIZ m, const char * haystack, HB_ISIZ n )
{
   HB_ISIZ r = 0;
   HB_ISIZ bcShift, j, shift, u, v, turboShift;
   HB_ISIZ bmBc[ ASIZE ];
   HB_ISIZ * bmGs;

   bmGs = ( HB_ISIZ * ) hb_xgrab( m * sizeof( HB_ISIZ ) );

   /* Preprocessing */
   preBmGs( needle, m, bmGs );
   preBmBc( needle, m, bmBc );

   /* Searching */
   j = u = 0;
   shift = m;
   while( j <= n - m )
   {
      HB_ISIZ i = m - 1;
      while( i >= 0 && needle[ i ] == haystack[ i + j ] )
      {
         --i;
         if( u != 0 && i == m - 1 - shift )
            i -= u;
      }

      if( i < 0 )
      {
         r = j + 1;
         break;
#if 0 /* To continue search */
         shift = bmGs[ 0 ];
         u = m - shift;
#endif
      }
      else
      {
         v = m - 1 - i;
         turboShift = u - v;
         bcShift = bmBc[ ( HB_UCHAR ) haystack[ i + j ] ] - m + 1 + i;
         shift = HB_MAX( turboShift, bcShift );
         shift = HB_MAX( shift, bmGs[ i ] );
         if( shift == bmGs[ i ] )
            u = HB_MIN( m - shift, v );
         else
         {
            if( turboShift < bcShift )
               shift = HB_MAX( shift, u + 1 );
            u = 0;
         }
      }
      j += shift;
   }

   hb_xfree( bmGs );

   return r;
}
Example #6
0
/**
* @brief string matching with Boyer-Moore algorithm.
* Output the offset begin with pattern, otherwise nothing output means not any 
* pattern find in the search string.
* Note: if do not want to use malloc, can use stack memory like ASIZE make a 
* int[] array but not a int* array, and when m >= XSIZE then use strstr(). This
* application in a project name "QDBM" database code.
*
* @param x pattern.
* @param m pattern length.
* @param y search string.
* @param n search string length.
*/
void string_matching_with_bm_algorithm(const char* x, int m, 
    const char* y, int n) {
    int i = 0, j = 0, bmBc[ASIZE] = {0}, *bmGs = NULL, XSIZE = m+1;
    if (!(bmGs = (int*) malloc(sizeof(int)*XSIZE))) {
        fprintf(stderr, "malloc err!\n"); return;
    }
    preBmGs(x, m, bmGs, XSIZE); preBmBc(x, m, bmBc);        // preprocessing
    while (j <= n-m) {                                      // searching
        for (i = m-1; i >= 0 && x[i] == y[j+i]; i --) ;
        if (i < 0) { OUTPUT(j); j += bmGs[0]; }
        else { j += MAX(bmGs[i], bmBc[(int)(y[j+i])]-((m-1)-i)); }
    }
    free(bmGs);
}
Example #7
0
void *
boyer_moore_init(
    unsigned char *x,
    int m)
{
    boyer_moore_data_t *bm = safe_malloc(sizeof(boyer_moore_data_t));

    bm->x = safe_malloc(m * sizeof(*x));
    memcpy(bm->x, x, m * sizeof(*x));

    bm->m = m;

    bm->bmGs = safe_malloc(m * sizeof(int));

    // Pre-process
    preBmGs(x, m, bm->bmGs);
    preBmBc(x, m, bm->bmBc);

    return (void *) bm;
}
Example #8
0
int search(unsigned char *x, int m, unsigned char *y, int n) {
   int j, bmBc[SIGMA], qsBc[SIGMA], count;

   /* Preprocessing */
   BEGIN_PREPROCESSING
   preBmBc(x, m, bmBc);
   preQsBc(x, m, qsBc);
   END_PREPROCESSING

   count = 0;
   /* Searching */
   BEGIN_SEARCHING
   j = 0;
   while (j<= n - m) {
      if (memcmp(x, y + j, m) == 0)
         OUTPUT(j);
      j += MAX(bmBc[y[j + m - 1]], qsBc[y[j + m]]);
   }
   END_SEARCHING
   return count;
}
Example #9
0
char*  Ssmith2(char * textt,char *patt,int n, int m)//smith ╦сие 
{
	int j,bmBc[ASIZE],qsBc[ASIZE];
	unsigned char * text,*pat;
	text = (unsigned char*)textt;
	pat = (unsigned char*)patt;
    if(*pat == '\0') { OUTPUT(0);return textt;}; 


	/* preprocessing */
	preBmBc((char*)pat,m,bmBc);
	preQsBc((char*)pat,m,qsBc);

	/* searching */
	j=0;
	while (j<=n-m)
	{
		if (memcmp(pat,text+j,m)==0)
			OUTPUT(j);
		j+=MAX(bmBc[text[j+m-1]],qsBc[text[j+m]]);
	}
	SRET(j);
}