int evaluate_fast_regex( struct fast_regex * fre_t, char * str, size_t len ) { char * sub ; struct _fregex * fre = (struct _fregex *)( fre_t->data ) ; if( fre->kwset ) { struct kwsmatch kwsm ; sub = kwsexec( fre->kwset, (char *)str, len, &kwsm) ; if( sub == NULL ) return 0 ; if( kwsm.index < fre->num_exact_kws ) { return 1 ; } } if( HAS_DFA(fre_t->options) ) { int backref = 0 ; sub = dfaexec( &(fre->dfa), str, (str+len), 0, NULL, &backref) ; if( sub == NULL ) return 0 ; if ( !backref || (fre_t->options & FRE_NO_REGEX) ) return 1 ; } return re_match( &fre->regex , str, len, 0, NULL ) > 0 ; }
static int fixmatch(struct grep_pat *p, char *line, char *eol, regmatch_t *match) { struct kwsmatch kwsm; size_t offset = kwsexec(p->kws, line, eol - line, &kwsm); if (offset == -1) { match->rm_so = match->rm_eo = -1; return REG_NOMATCH; } else { match->rm_so = offset; match->rm_eo = match->rm_so + kwsm.size[0]; return 0; } }
static unsigned int contains(mmfile_t *mf, struct diff_options *o, regex_t *regexp, kwset_t kws) { unsigned int cnt; unsigned long sz; const char *data; sz = mf->size; data = mf->ptr; cnt = 0; if (regexp) { regmatch_t regmatch; int flags = 0; assert(data[sz] == '\0'); while (*data && !regexec(regexp, data, 1, ®match, flags)) { flags |= REG_NOTBOL; data += regmatch.rm_eo; if (*data && regmatch.rm_so == regmatch.rm_eo) data++; cnt++; } } else { /* Classic exact string match */ while (sz) { struct kwsmatch kwsm; size_t offset = kwsexec(kws, data, sz, &kwsm); const char *found; if (offset == -1) break; else found = data + offset; sz -= found - data + kwsm.size[0]; data = found + kwsm.size[0]; cnt++; } } return cnt; }
static unsigned int contains(mmfile_t *mf, regex_t *regexp, kwset_t kws) { unsigned int cnt; unsigned long sz; const char *data; sz = mf->size; data = mf->ptr; cnt = 0; if (regexp) { regmatch_t regmatch; int flags = 0; while (sz && *data && !regexec_buf(regexp, data, sz, 1, ®match, flags)) { flags |= REG_NOTBOL; data += regmatch.rm_eo; sz -= regmatch.rm_eo; if (sz && *data && regmatch.rm_so == regmatch.rm_eo) { data++; sz--; } cnt++; } } else { /* Classic exact string match */ while (sz) { struct kwsmatch kwsm; size_t offset = kwsexec(kws, data, sz, &kwsm); if (offset == -1) break; sz -= offset + kwsm.size[0]; data += offset + kwsm.size[0]; cnt++; } } return cnt; }
size_t EGexecute (char const *buf, size_t size, size_t *match_size, char const *start_ptr) { char const *buflim, *beg, *end, *match, *best_match, *mb_start; char eol = eolbyte; int backref; regoff_t start; size_t len, best_len; struct kwsmatch kwsm; size_t i, ret_val; mb_len_map_t *map = NULL; if (MB_CUR_MAX > 1) { if (match_icase) { /* mbtolower adds a NUL byte at the end. That will provide space for the sentinel byte dfaexec may add. */ char *case_buf = mbtolower (buf, &size, &map); if (start_ptr) start_ptr = case_buf + (start_ptr - buf); buf = case_buf; } } mb_start = buf; buflim = buf + size; for (beg = end = buf; end < buflim; beg = end) { if (!start_ptr) { /* We don't care about an exact match. */ if (kwset) { /* Find a possible match using the KWset matcher. */ size_t offset = kwsexec (kwset, beg, buflim - beg, &kwsm); if (offset == (size_t) -1) goto failure; beg += offset; /* Narrow down to the line containing the candidate, and run it through DFA. */ if ((end = memchr(beg, eol, buflim - beg)) != NULL) end++; else end = buflim; match = beg; while (beg > buf && beg[-1] != eol) --beg; if (kwsm.index < kwset_exact_matches) { if (!MBS_SUPPORT) goto success; if (mb_start < beg) mb_start = beg; if (MB_CUR_MAX == 1 || !is_mb_middle (&mb_start, match, buflim, kwsm.size[0])) goto success; } if (dfaexec (dfa, beg, (char *) end, 0, NULL, &backref) == NULL) continue; } else { /* No good fixed strings; start with DFA. */ char const *next_beg = dfaexec (dfa, beg, (char *) buflim, 0, NULL, &backref); /* If there's no match, or if we've matched the sentinel, we're done. */ if (next_beg == NULL || next_beg == buflim) break; /* Narrow down to the line we've found. */ beg = next_beg; if ((end = memchr(beg, eol, buflim - beg)) != NULL) end++; else end = buflim; while (beg > buf && beg[-1] != eol) --beg; } /* Successful, no backreferences encountered! */ if (!backref) goto success; } else { /* We are looking for the leftmost (then longest) exact match. We will go through the outer loop only once. */ beg = start_ptr; end = buflim; } /* If the "line" is longer than the maximum regexp offset, die as if we've run out of memory. */ if (TYPE_MAXIMUM (regoff_t) < end - buf - 1) xalloc_die (); /* If we've made it to this point, this means DFA has seen a probable match, and we need to run it through Regex. */ best_match = end; best_len = 0; for (i = 0; i < pcount; i++) { patterns[i].regexbuf.not_eol = 0; start = re_search (&(patterns[i].regexbuf), buf, end - buf - 1, beg - buf, end - beg - 1, &(patterns[i].regs)); if (start < -1) xalloc_die (); else if (0 <= start) { len = patterns[i].regs.end[0] - start; match = buf + start; if (match > best_match) continue; if (start_ptr && !match_words) goto assess_pattern_match; if ((!match_lines && !match_words) || (match_lines && len == end - beg - 1)) { match = beg; len = end - beg; goto assess_pattern_match; } /* If -w, check if the match aligns with word boundaries. We do this iteratively because: (a) the line may contain more than one occurrence of the pattern, and (b) Several alternatives in the pattern might be valid at a given point, and we may need to consider a shorter one to find a word boundary. */ if (match_words) while (match <= best_match) { regoff_t shorter_len = 0; if ((match == buf || !WCHAR ((unsigned char) match[-1])) && (start + len == end - buf - 1 || !WCHAR ((unsigned char) match[len]))) goto assess_pattern_match; if (len > 0) { /* Try a shorter length anchored at the same place. */ --len; patterns[i].regexbuf.not_eol = 1; shorter_len = re_match (&(patterns[i].regexbuf), buf, match + len - beg, match - buf, &(patterns[i].regs)); if (shorter_len < -1) xalloc_die (); } if (0 < shorter_len) len = shorter_len; else { /* Try looking further on. */ if (match == end - 1) break; match++; patterns[i].regexbuf.not_eol = 0; start = re_search (&(patterns[i].regexbuf), buf, end - buf - 1, match - buf, end - match - 1, &(patterns[i].regs)); if (start < 0) { if (start < -1) xalloc_die (); break; } len = patterns[i].regs.end[0] - start; match = buf + start; } } /* while (match <= best_match) */ continue; assess_pattern_match: if (!start_ptr) { /* Good enough for a non-exact match. No need to look at further patterns, if any. */ goto success; } if (match < best_match || (match == best_match && len > best_len)) { /* Best exact match: leftmost, then longest. */ best_match = match; best_len = len; } } /* if re_search >= 0 */ } /* for Regex patterns. */ if (best_match < end) { /* We have found an exact match. We were just waiting for the best one (leftmost then longest). */ beg = best_match; len = best_len; goto success_in_len; } } /* for (beg = end ..) */ failure: ret_val = -1; goto out; success: len = end - beg; success_in_len:; size_t off = beg - buf; mb_case_map_apply (map, &off, &len); *match_size = len; ret_val = off; out: return ret_val; }
static size_t EGexecute (const void *compiled_pattern, const char *buf, size_t buf_size, size_t *match_size, bool exact) { struct compiled_regex *cregex = (struct compiled_regex *) compiled_pattern; register const char *buflim, *beg, *end; char eol = cregex->eolbyte; int backref, start, len; struct kwsmatch kwsm; size_t i; #ifdef MBS_SUPPORT char *mb_properties = NULL; #endif /* MBS_SUPPORT */ #ifdef MBS_SUPPORT if (MB_CUR_MAX > 1 && cregex->ckwset.kwset) mb_properties = check_multibyte_string (buf, buf_size); #endif /* MBS_SUPPORT */ buflim = buf + buf_size; for (beg = end = buf; end < buflim; beg = end) { if (!exact) { if (cregex->ckwset.kwset) { /* Find a possible match using the KWset matcher. */ size_t offset = kwsexec (cregex->ckwset.kwset, beg, buflim - beg, &kwsm); if (offset == (size_t) -1) { #ifdef MBS_SUPPORT if (MB_CUR_MAX > 1) free (mb_properties); #endif return (size_t)-1; } beg += offset; /* Narrow down to the line containing the candidate, and run it through DFA. */ end = memchr (beg, eol, buflim - beg); if (end != NULL) end++; else end = buflim; #ifdef MBS_SUPPORT if (MB_CUR_MAX > 1 && mb_properties[beg - buf] == 0) continue; #endif while (beg > buf && beg[-1] != eol) --beg; if (kwsm.index < cregex->kwset_exact_matches) goto success; if (dfaexec (&cregex->dfa, beg, end - beg, &backref) == (size_t) -1) continue; } else { /* No good fixed strings; start with DFA. */ size_t offset = dfaexec (&cregex->dfa, beg, buflim - beg, &backref); if (offset == (size_t) -1) break; /* Narrow down to the line we've found. */ beg += offset; end = memchr (beg, eol, buflim - beg); if (end != NULL) end++; else end = buflim; while (beg > buf && beg[-1] != eol) --beg; } /* Successful, no backreferences encountered! */ if (!backref) goto success; } else end = beg + buf_size; /* If we've made it to this point, this means DFA has seen a probable match, and we need to run it through Regex. */ for (i = 0; i < cregex->pcount; i++) { cregex->patterns[i].regexbuf.not_eol = 0; if (0 <= (start = re_search (&(cregex->patterns[i].regexbuf), beg, end - beg - 1, 0, end - beg - 1, &(cregex->patterns[i].regs)))) { len = cregex->patterns[i].regs.end[0] - start; if (exact) { *match_size = len; return start; } if ((!cregex->match_lines && !cregex->match_words) || (cregex->match_lines && len == end - beg - 1)) goto success; /* If -w, check if the match aligns with word boundaries. We do this iteratively because: (a) the line may contain more than one occurence of the pattern, and (b) Several alternatives in the pattern might be valid at a given point, and we may need to consider a shorter one to find a word boundary. */ if (cregex->match_words) while (start >= 0) { if ((start == 0 || !IS_WORD_CONSTITUENT ((unsigned char) beg[start - 1])) && (len == end - beg - 1 || !IS_WORD_CONSTITUENT ((unsigned char) beg[start + len]))) goto success; if (len > 0) { /* Try a shorter length anchored at the same place. */ --len; cregex->patterns[i].regexbuf.not_eol = 1; len = re_match (&(cregex->patterns[i].regexbuf), beg, start + len, start, &(cregex->patterns[i].regs)); } if (len <= 0) { /* Try looking further on. */ if (start == end - beg - 1) break; ++start; cregex->patterns[i].regexbuf.not_eol = 0; start = re_search (&(cregex->patterns[i].regexbuf), beg, end - beg - 1, start, end - beg - 1 - start, &(cregex->patterns[i].regs)); len = cregex->patterns[i].regs.end[0] - start; } } } } /* for Regex patterns. */ } /* for (beg = end ..) */ #ifdef MBS_SUPPORT if (MB_CUR_MAX > 1 && mb_properties) free (mb_properties); #endif /* MBS_SUPPORT */ return (size_t) -1; success: #ifdef MBS_SUPPORT if (MB_CUR_MAX > 1 && mb_properties) free (mb_properties); #endif /* MBS_SUPPORT */ *match_size = end - beg; return beg - buf; }
static size_t Fexecute (const void *compiled_pattern, const char *buf, size_t buf_size, size_t *match_size, bool exact) { struct compiled_kwset *ckwset = (struct compiled_kwset *) compiled_pattern; register const char *beg, *curr, *end; register size_t len; char eol = ckwset->eolbyte; struct kwsmatch kwsmatch; #ifdef MBS_SUPPORT char *mb_properties; if (MB_CUR_MAX > 1) mb_properties = check_multibyte_string (buf, buf_size); #endif /* MBS_SUPPORT */ for (beg = buf; beg <= buf + buf_size; ++beg) { size_t offset = kwsexec (ckwset->kwset, beg, buf + buf_size - beg, &kwsmatch); if (offset == (size_t) -1) { #ifdef MBS_SUPPORT if (MB_CUR_MAX > 1) free (mb_properties); #endif /* MBS_SUPPORT */ return offset; } #ifdef MBS_SUPPORT if (MB_CUR_MAX > 1 && mb_properties[offset+beg-buf] == 0) continue; /* It is a part of multibyte character. */ #endif /* MBS_SUPPORT */ beg += offset; len = kwsmatch.size[0]; if (exact) { *match_size = len; #ifdef MBS_SUPPORT if (MB_CUR_MAX > 1) free (mb_properties); #endif /* MBS_SUPPORT */ return beg - buf; } if (ckwset->match_lines) { if (beg > buf && beg[-1] != eol) continue; if (beg + len < buf + buf_size && beg[len] != eol) continue; goto success; } else if (ckwset->match_words) for (curr = beg; len; ) { if (curr > buf && IS_WORD_CONSTITUENT ((unsigned char) curr[-1])) break; if (curr + len < buf + buf_size && IS_WORD_CONSTITUENT ((unsigned char) curr[len])) { offset = kwsexec (ckwset->kwset, beg, --len, &kwsmatch); if (offset == (size_t) -1) { #ifdef MBS_SUPPORT if (MB_CUR_MAX > 1) free (mb_properties); #endif /* MBS_SUPPORT */ return offset; } curr = beg + offset; len = kwsmatch.size[0]; } else goto success; } else goto success; } #ifdef MBS_SUPPORT if (MB_CUR_MAX > 1) free (mb_properties); #endif /* MBS_SUPPORT */ return -1; success: end = (const char *) memchr (beg + len, eol, (buf + buf_size) - (beg + len)); end++; while (buf < beg && beg[-1] != eol) --beg; *match_size = end - beg; #ifdef MBS_SUPPORT if (MB_CUR_MAX > 1) free (mb_properties); #endif /* MBS_SUPPORT */ return beg - buf; }