static size_t EGexecute (const void *compiled_pattern, const char *buf, size_t buf_size, size_t *match_size, bool exact) { struct compiled_regex *cregex = (struct compiled_regex *) compiled_pattern; register const char *buflim, *beg, *end; char eol = cregex->eolbyte; int backref, start, len; struct kwsmatch kwsm; size_t i; #ifdef MBS_SUPPORT char *mb_properties = NULL; #endif /* MBS_SUPPORT */ #ifdef MBS_SUPPORT if (MB_CUR_MAX > 1 && cregex->ckwset.kwset) mb_properties = check_multibyte_string (buf, buf_size); #endif /* MBS_SUPPORT */ buflim = buf + buf_size; for (beg = end = buf; end < buflim; beg = end) { if (!exact) { if (cregex->ckwset.kwset) { /* Find a possible match using the KWset matcher. */ size_t offset = kwsexec (cregex->ckwset.kwset, beg, buflim - beg, &kwsm); if (offset == (size_t) -1) { #ifdef MBS_SUPPORT if (MB_CUR_MAX > 1) free (mb_properties); #endif return (size_t)-1; } beg += offset; /* Narrow down to the line containing the candidate, and run it through DFA. */ end = memchr (beg, eol, buflim - beg); if (end != NULL) end++; else end = buflim; #ifdef MBS_SUPPORT if (MB_CUR_MAX > 1 && mb_properties[beg - buf] == 0) continue; #endif while (beg > buf && beg[-1] != eol) --beg; if (kwsm.index < cregex->kwset_exact_matches) goto success; if (dfaexec (&cregex->dfa, beg, end - beg, &backref) == (size_t) -1) continue; } else { /* No good fixed strings; start with DFA. */ size_t offset = dfaexec (&cregex->dfa, beg, buflim - beg, &backref); if (offset == (size_t) -1) break; /* Narrow down to the line we've found. */ beg += offset; end = memchr (beg, eol, buflim - beg); if (end != NULL) end++; else end = buflim; while (beg > buf && beg[-1] != eol) --beg; } /* Successful, no backreferences encountered! */ if (!backref) goto success; } else end = beg + buf_size; /* If we've made it to this point, this means DFA has seen a probable match, and we need to run it through Regex. */ for (i = 0; i < cregex->pcount; i++) { cregex->patterns[i].regexbuf.not_eol = 0; if (0 <= (start = re_search (&(cregex->patterns[i].regexbuf), beg, end - beg - 1, 0, end - beg - 1, &(cregex->patterns[i].regs)))) { len = cregex->patterns[i].regs.end[0] - start; if (exact) { *match_size = len; return start; } if ((!cregex->match_lines && !cregex->match_words) || (cregex->match_lines && len == end - beg - 1)) goto success; /* If -w, check if the match aligns with word boundaries. We do this iteratively because: (a) the line may contain more than one occurence of the pattern, and (b) Several alternatives in the pattern might be valid at a given point, and we may need to consider a shorter one to find a word boundary. */ if (cregex->match_words) while (start >= 0) { if ((start == 0 || !IS_WORD_CONSTITUENT ((unsigned char) beg[start - 1])) && (len == end - beg - 1 || !IS_WORD_CONSTITUENT ((unsigned char) beg[start + len]))) goto success; if (len > 0) { /* Try a shorter length anchored at the same place. */ --len; cregex->patterns[i].regexbuf.not_eol = 1; len = re_match (&(cregex->patterns[i].regexbuf), beg, start + len, start, &(cregex->patterns[i].regs)); } if (len <= 0) { /* Try looking further on. */ if (start == end - beg - 1) break; ++start; cregex->patterns[i].regexbuf.not_eol = 0; start = re_search (&(cregex->patterns[i].regexbuf), beg, end - beg - 1, start, end - beg - 1 - start, &(cregex->patterns[i].regs)); len = cregex->patterns[i].regs.end[0] - start; } } } } /* for Regex patterns. */ } /* for (beg = end ..) */ #ifdef MBS_SUPPORT if (MB_CUR_MAX > 1 && mb_properties) free (mb_properties); #endif /* MBS_SUPPORT */ return (size_t) -1; success: #ifdef MBS_SUPPORT if (MB_CUR_MAX > 1 && mb_properties) free (mb_properties); #endif /* MBS_SUPPORT */ *match_size = end - beg; return beg - buf; }
static size_t Fexecute (const void *compiled_pattern, const char *buf, size_t buf_size, size_t *match_size, bool exact) { struct compiled_kwset *ckwset = (struct compiled_kwset *) compiled_pattern; register const char *beg, *curr, *end; register size_t len; char eol = ckwset->eolbyte; struct kwsmatch kwsmatch; #ifdef MBS_SUPPORT char *mb_properties; if (MB_CUR_MAX > 1) mb_properties = check_multibyte_string (buf, buf_size); #endif /* MBS_SUPPORT */ for (beg = buf; beg <= buf + buf_size; ++beg) { size_t offset = kwsexec (ckwset->kwset, beg, buf + buf_size - beg, &kwsmatch); if (offset == (size_t) -1) { #ifdef MBS_SUPPORT if (MB_CUR_MAX > 1) free (mb_properties); #endif /* MBS_SUPPORT */ return offset; } #ifdef MBS_SUPPORT if (MB_CUR_MAX > 1 && mb_properties[offset+beg-buf] == 0) continue; /* It is a part of multibyte character. */ #endif /* MBS_SUPPORT */ beg += offset; len = kwsmatch.size[0]; if (exact) { *match_size = len; #ifdef MBS_SUPPORT if (MB_CUR_MAX > 1) free (mb_properties); #endif /* MBS_SUPPORT */ return beg - buf; } if (ckwset->match_lines) { if (beg > buf && beg[-1] != eol) continue; if (beg + len < buf + buf_size && beg[len] != eol) continue; goto success; } else if (ckwset->match_words) for (curr = beg; len; ) { if (curr > buf && IS_WORD_CONSTITUENT ((unsigned char) curr[-1])) break; if (curr + len < buf + buf_size && IS_WORD_CONSTITUENT ((unsigned char) curr[len])) { offset = kwsexec (ckwset->kwset, beg, --len, &kwsmatch); if (offset == (size_t) -1) { #ifdef MBS_SUPPORT if (MB_CUR_MAX > 1) free (mb_properties); #endif /* MBS_SUPPORT */ return offset; } curr = beg + offset; len = kwsmatch.size[0]; } else goto success; } else goto success; } #ifdef MBS_SUPPORT if (MB_CUR_MAX > 1) free (mb_properties); #endif /* MBS_SUPPORT */ return -1; success: end = (const char *) memchr (beg + len, eol, (buf + buf_size) - (beg + len)); end++; while (buf < beg && beg[-1] != eol) --beg; *match_size = end - beg; #ifdef MBS_SUPPORT if (MB_CUR_MAX > 1) free (mb_properties); #endif /* MBS_SUPPORT */ return beg - buf; }