/* * wm->patt[0]=NULL; wm->patt[n_pat+1]=NULL recommended, * wm->patt[1]...wm->patt[n_pat] are valid entries */ void wm_search_prep_pat(struct WuManber *wm, int n_pat, unsigned char **pat_p, int nocase) { #if HAVE_WORDBOUND_OR_WHOLELINE wm->match_word_boundaries = 0; #endif wm->nocase = nocase; wm->patt = pat_p-1; wm->n_pat = n_pat; wm->use_bs3 = 0; wm->use_bs1 = 0; # if HAVE_WORDBOUND_OR_WHOLELINE i = 0; p=1; while(i<length) { patt[p] = pat_ptr; if(wm->match_word_boundaries) *pat_ptr++ = W_DELIM; if(wm->match_whole_line) *pat_ptr++ = L_DELIM; while((*pat_ptr = buf[i++]) != '\n') pat_ptr++; if(wm->match_word_boundaries) *pat_ptr++ = W_DELIM; if(wm->match_whole_line) *pat_ptr++ = L_DELIM; /* Can't be both on */ *pat_ptr++ = 0; p++; } #endif unsigned Mask = 15; int i; for(i=0; i< N_SYMB; i++) wm->tr[i] = i; if(wm->nocase) { for(i='A'; i<= 'Z'; i++) wm->tr[i] = i + 'a' - 'A'; } #if HAVE_WORDBOUND_OR_WHOLELINE if(wm->match_word_boundaries) { for(i=0; i<128; i++) if(!isalnum(i)) wm->tr[i] = W_DELIM; } #endif for(i=0; i< N_SYMB; i++) wm->tr1[i] = wm->tr[i]&Mask; wm->pat_len = (unsigned int *)calloc(n_pat+2, sizeof(unsigned int)); wm->p_size = 255; // max that fits in shift_min[] entries. for(i=1 ; i <= wm->n_pat; i++) { int l = strlen((char *)wm->patt[i]); wm->pat_len[i] = l; if (l!=0 && l < wm->p_size) wm->p_size = l; } if (wm->p_size == 0) { fprintf(stderr, "%s: the pattern file contains an empty string\n", wm->progname); exit(2); } if (n_pat > 100 && wm->p_size > 2) wm->use_bs3 = 1; if (wm->p_size == 1) wm->use_bs1 = 1; for (i=0; i<SHIFT_SZ; i++) wm->shift_min[i] = wm->p_size - 2; for (i=0; i<PAT_HASH_SZ; i++) wm->pat_hash[i] = 0; for (i=1; i<= n_pat; i++) f_prep(wm, i, wm->patt[i]); }
int prepf(int fp, struct pattern_image **ppatt_img, size_t * patt_image_len) { int length = 0, i, p = 1, num_pat; struct pattern_image *patt_img; unsigned char *pat_ptr; unsigned Mask = 15; int num_read; *ppatt_img = (struct pattern_image *) malloc(sizeof (struct pattern_image)); patt_img = *ppatt_img; *patt_image_len = sizeof (*patt_img); bzero(patt_img, *patt_image_len); pat_ptr = patt_img->pat_spool; patt_img->LONG = 0; patt_img->SHORT = 0; patt_img->p_size = 0; while ((num_read = read(fp, patt_img->buf + length, BLOCKSIZE)) > 0) { length = length + num_read; if (length > MAXPATFILE) { errlog("maximum pattern file size is %d\n", MAXPATFILE); return -1; } } patt_img->buf[length] = '\n'; i = 0; p = 1; while (i < length) { patt_img->patt[p] = pat_ptr - patt_img->pat_spool; if (WORDBOUND) *pat_ptr++ = W_DELIM; if (WHOLELINE) *pat_ptr++ = L_DELIM; while ((*pat_ptr = patt_img->buf[i++]) != '\n') pat_ptr++; if (WORDBOUND) *pat_ptr++ = W_DELIM; if (WHOLELINE) *pat_ptr++ = L_DELIM; /* Can't be both on */ *pat_ptr++ = 0; p++; } if (p > max_num) { errlog("maximum number of patterns is %d\n", max_num); return -1; } for (i = 1; i < 20; i++) *pat_ptr = i; /* boundary safety zone */ for (i = 0; i < MAXSYM; i++) patt_img->tr[i] = i; if (NOUPPER) { for (i = 'A'; i <= 'Z'; i++) patt_img->tr[i] = i + 'a' - 'A'; } if (WORDBOUND) { for (i = 0; i < 128; i++) if (!isalnum(i)) patt_img->tr[i] = W_DELIM; } for (i = 0; i < MAXSYM; i++) patt_img->tr1[i] = patt_img->tr[i] & Mask; num_pat = p - 1; patt_img->p_size = MAXPAT; for (i = 1; i <= num_pat; i++) { p = strlen(patt_img->pat_spool + patt_img->patt[i]); patt_img->pat_len[i] = p; if (p != 0 && p < patt_img->p_size) patt_img->p_size = p; } if (patt_img->p_size == 0) { errlog("the pattern file is empty\n"); return -1; } if (length > 400 && patt_img->p_size > 2) patt_img->LONG = 1; if (patt_img->p_size == 1) patt_img->SHORT = 1; for (i = 0; i < MAXMEMBER1; i++) patt_img->SHIFT1[i] = patt_img->p_size - 2; for (i = 0; i < MAXHASH; i++) { patt_img->HASH[i] = 0; } for (i = 1; i <= num_pat; i++) f_prep(i, patt_img->pat_spool + patt_img->patt[i], patt_img); return 0; }