static int baz(const char *s, int s_len, struct regex_info *info) { int i, result = -1, is_anchored = info->brackets[0].ptr[0] == '^'; for (i = 0; i <= s_len; i++) { result = doh(s + i, s_len - i, info, 0); if (result >= 0) { result += i; break; } if (is_anchored) break; } return result; }
static int foo(const char *re, int re_len, const char *s, int s_len, struct slre_cap *caps, struct regex_info *info) { int result, i, step, depth = 0; const char *stack[ARRAY_SIZE(info->brackets)]; stack[0] = re; /* First bracket captures everything */ info->brackets[0].ptr = re; info->brackets[0].len = re_len; info->num_brackets = 1; /* Make a single pass over regex string, memorize brackets and branches */ for (i = 0; i < re_len; i += step) { step = get_op_len(re + i, re_len - i); if (re[i] == '|') { FAIL_IF(info->num_branches >= ARRAY_SIZE(info->branches), "Too many |. Increase MAX_BRANCHES"); info->branches[info->num_branches].bracket_index = info->brackets[info->num_brackets - 1].len == -1 ? info->num_brackets - 1 : depth; info->branches[info->num_branches].schlong = &re[i]; info->num_branches++; } else if (re[i] == '(') { FAIL_IF(info->num_brackets >= ARRAY_SIZE(info->brackets), "Too many (. Increase MAX_BRACKETS"); depth++; /* Order is important here. Depth increments first. */ stack[depth] = &re[i]; info->brackets[info->num_brackets].ptr = re + i + 1; info->brackets[info->num_brackets].len = -1; info->num_brackets++; } else if (re[i] == ')') { int ind = info->brackets[info->num_brackets - 1].len == -1 ? info->num_brackets - 1 : depth; info->brackets[ind].len = &re[i] - info->brackets[ind].ptr; DBG(("SETTING BRACKET %d [%.*s]\n", ind, info->brackets[ind].len, info->brackets[ind].ptr)); depth--; FAIL_IF(depth < 0, static_error_unbalanced_brackets); FAIL_IF(i > 0 && re[i - 1] == '(', static_error_no_match); } } FAIL_IF(depth != 0, static_error_unbalanced_brackets); setup_branch_points(info); /* Scan the string from left to right, applying the regex. Stop on match. */ result = 0; for (i = 0; i < s_len; i++) { result = doh(s + i, s_len - i, caps, info, 0); DBG((" (iter %d) -> %d [%.*s] [%.*s] [%s]\n", i, result, re_len, re, s_len - i, s + i, info->error_msg)); if (result > 0 || re[0] == '^') { result += i; break; } } return result; }
static int bar(const char *re, int re_len, const char *s, int s_len, struct regex_info *info, int bi) { /* i is offset in re, j is offset in s, bi is brackets index */ int i, j, n, step; for (i = j = 0; i < re_len && j <= s_len; i += step) { /* Handle quantifiers. Get the length of the chunk. */ step = re[i] == '(' ? info->brackets[bi + 1].len + 2 : get_op_len(re + i, re_len - i); DBG(("%s [%.*s] [%.*s] re_len=%d step=%d i=%d j=%d\n", __func__, re_len - i, re + i, s_len - j, s + j, re_len, step, i, j)); FAIL_IF(is_quantifier(&re[i]), SLRE_UNEXPECTED_QUANTIFIER); FAIL_IF(step <= 0, SLRE_INVALID_CHARACTER_SET); if (i + step < re_len && is_quantifier(re + i + step)) { DBG(("QUANTIFIER: [%.*s]%c [%.*s]\n", step, re + i, re[i + step], s_len - j, s + j)); if (re[i + step] == '?') { int result = bar(re + i, step, s + j, s_len - j, info, bi); j += result > 0 ? result : 0; i++; } else if (re[i + step] == '+' || re[i + step] == '*') { int j2 = j, nj = j, n1, n2 = -1, ni, non_greedy = 0; /* Points to the regexp code after the quantifier */ ni = i + step + 1; if (ni < re_len && re[ni] == '?') { non_greedy = 1; ni++; } do { if ((n1 = bar(re + i, step, s + j2, s_len - j2, info, bi)) > 0) { j2 += n1; } if (re[i + step] == '+' && n1 < 0) break; if (ni >= re_len) { /* After quantifier, there is nothing */ nj = j2; } else if ((n2 = bar(re + ni, re_len - ni, s + j2, s_len - j2, info, bi)) >= 0) { /* Regex after quantifier matched */ nj = j2 + n2; } if (nj > j && non_greedy) break; } while (n1 > 0); if (n1 < 0 && re[i + step] == '*' && (n2 = bar(re + ni, re_len - ni, s + j, s_len - j, info, bi)) > 0) { nj = j + n2; } DBG(("STAR/PLUS END: %d %d %d %d %d\n", j, nj, re_len - ni, n1, n2)); FAIL_IF(re[i + step] == '+' && nj == j, SLRE_NO_MATCH); /* If while loop body above was not executed for the * quantifier, */ /* make sure the rest of the regex matches */ FAIL_IF(nj == j && ni < re_len && n2 < 0, SLRE_NO_MATCH); /* Returning here cause we've matched the rest of RE already */ return nj; } continue; } if (re[i] == '[') { n = match_set(re + i + 1, re_len - (i + 2), s + j, info); DBG(("SET %.*s [%.*s] -> %d\n", step, re + i, s_len - j, s + j, n)); FAIL_IF(n <= 0, SLRE_NO_MATCH); j += n; } else if (re[i] == '(') { n = SLRE_NO_MATCH; bi++; FAIL_IF(bi >= info->num_brackets, SLRE_INTERNAL_ERROR); DBG(("CAPTURING [%.*s] [%.*s] [%s]\n", step, re + i, s_len - j, s + j, re + i + step)); if (re_len - (i + step) <= 0) { /* Nothing follows brackets */ n = doh(s + j, s_len - j, info, bi); } else { int j2; for (j2 = 0; j2 <= s_len - j; j2++) { if ((n = doh(s + j, s_len - (j + j2), info, bi)) >= 0 && bar(re + i + step, re_len - (i + step), s + j + n, s_len - (j + n), info, bi) >= 0) break; } } DBG(("CAPTURED [%.*s] [%.*s]:%d\n", step, re + i, s_len - j, s + j, n)); FAIL_IF(n < 0, n); if (info->caps != NULL) { info->caps[bi - 1].ptr = s + j; info->caps[bi - 1].len = n; } j += n; } else if (re[i] == '^') { FAIL_IF(j != 0, SLRE_NO_MATCH); } else if (re[i] == '$') { FAIL_IF(j != s_len, SLRE_NO_MATCH); } else { FAIL_IF(j >= s_len, SLRE_NO_MATCH); n = match_op((unsigned char *) (re + i), (unsigned char *) (s + j), info); FAIL_IF(n <= 0, n); j += n; } } return j; }
static int bar(const char *re, int re_len, const char *s, int s_len, struct slre_cap *caps, struct regex_info *info, int bi) { /* i is offset in re, j is offset in s, bi is brackets index */ int i, j, n, step; DBG(("%s [%.*s] [%.*s]\n", __func__, re_len, re, s_len, s)); for (i = j = 0; i < re_len && j < s_len; i += step) { /* Handle quantifiers. Get the length of the chunk. */ step = re[i] == '(' ? info->brackets[bi + 1].len + 2 : get_op_len(re + i, re_len - i); DBG(("%s [%.*s] [%.*s] re_len=%d step=%d i=%d j=%d\n", __func__, re_len - i, re + i, s_len - j, s + j, re_len, step, i, j)); FAIL_IF(is_quantifier(&re[i]), static_error_unexpected_quantifier); FAIL_IF(step <= 0, static_error_invalid_set); if (i + step < re_len && is_quantifier(re + i + step)) { DBG(("QUANTIFIER: [%.*s] %c\n", step, re + i, re[i + step])); if (re[i + step] == '?') { j += bar(re + i, step, s + j, s_len - j, caps, info, bi); i++; continue; } else if (re[i + step] == '+' || re[i + step] == '*') { int j2 = j, nj = 0, n1, n2, ni, non_greedy = 0; /* Points to the regexp code after the quantifier */ ni = i + step + 1; if (ni < re_len && re[ni] == '?') { non_greedy = 1; ni++; } while ((n1 = bar(re + i, step, s + j2, s_len - j2, caps, info, bi)) > 0) { if (ni >= re_len) { /* After quantifier, there is nothing */ nj = j2 + n1; } else if ((n2 = bar(re + ni, re_len - ni, s + j2 + n1, s_len - (j2 + n1), caps, info, bi)) > 0) { nj = j2 + n1 + n2; } if (nj > 0 && non_greedy) break; j2 += n1; } FAIL_IF(re[i + step] == '+' && nj == 0, static_error_no_match); return nj; } } if (re[i] == '[') { n = match_set(re + i + 1, re_len - (i + 2), s + j, info); DBG(("SET %.*s [%.*s] -> %d\n", step, re + i, s_len - j, s + j, n)); FAIL_IF(n <= 0, static_error_no_match); j += n; } else if (re[i] == '(') { bi++; FAIL_IF(bi >= info->num_brackets, static_error_internal); DBG(("CAPTURING [%.*s] [%.*s]\n", step, re + i, s_len - j, s + j)); n = doh(s + j, s_len - j, caps, info, bi); DBG(("CAPTURED [%.*s] [%.*s]:%d\n", step, re + i, s_len - j, s + j, n)); FAIL_IF(n <= 0, info->error_msg); if (caps != NULL) { caps[bi - 1].ptr = s + j; caps[bi - 1].len = n; } j += n; } else if (re[i] == '^') { FAIL_IF(j != 0, static_error_no_match); } else { n = match_op((unsigned char *) (re + i), (unsigned char *) (s + j), info); FAIL_IF(n <= 0, info->error_msg); j += n; } } /* * Process $ anchor here. If we've reached the end of the string, * but did not exhaust regexp yet, this is no match. */ FAIL_IF(i < re_len && !(re[i] == '$' && i + 1 == re_len), static_error_no_match); return j; }