/* * Returns: -(i + 1) on failure (position that it failed with minus sign) * error code on error * REG_OK on success */ static inline int fastcmp(const fastmatch_t *fg, const void *data, tre_str_type_t type) { const char *str_byte = data; const char *pat_byte = fg->pattern; const tre_char_t *str_wide = data; const tre_char_t *pat_wide = fg->wpattern; const bool *escmap = (type == STR_WIDE) ? fg->wescmap : fg->escmap; size_t len = (type == STR_WIDE) ? fg->wlen : fg->len; int ret = REG_OK; /* Compare the pattern and the input char-by-char from the last position. */ for (int i = len - 1; i >= 0; i--) { switch (type) { case STR_WIDE: /* Check dot */ if (fg->hasdot && pat_wide[i] == TRE_CHAR('.') && (!escmap || !escmap[i]) && (!fg->newline || (str_wide[i] != TRE_CHAR('\n')))) continue; /* Compare */ if (fg->icase ? (towlower(pat_wide[i]) == towlower(str_wide[i])) : (pat_wide[i] == str_wide[i])) continue; break; default: /* Check dot */ if (fg->hasdot && pat_byte[i] == '.' && (!escmap || !escmap[i]) && (!fg->newline || (str_byte[i] != '\n'))) continue; /* Compare */ if (fg->icase ? (tolower((unsigned char)pat_byte[i]) == tolower((unsigned char)str_byte[i])) : (pat_byte[i] == str_byte[i])) continue; } DPRINT(("fastcmp: mismatch at position %d\n", i)); ret = -(i + 1); break; } return ret; }
/* * Returns: REG_OK on success, error code otherwise */ int tre_compile_fast(fastmatch_t *fg, const tre_char_t *pat, size_t n, int cflags) { tre_char_t *tmp; size_t pos = 0, hasdot = 0, whasdot = 0; ssize_t firstdot = -1, wfirstdot = -1; bool escaped = false; bool *_escmap = NULL; INIT_COMP; /* Remove beginning-of-line character ('^'). */ if (pat[0] == TRE_CHAR('^')) { fg->bol = true; n--; pat++; } CHECK_MATCHALL(false); /* Handle word-boundary matching when GNU extensions are enabled */ if ((cflags & REG_GNU) && (n >= 14) && (memcmp(pat, TRE_CHAR("[[:<:]]"), 7 * sizeof(tre_char_t)) == 0) && (memcmp(pat + n - 7, TRE_CHAR("[[:>:]]"), 7 * sizeof(tre_char_t)) == 0)) { n -= 14; pat += 7; fg->word = true; } /* Cannot handle word boundaries with MB string */ if (fg->word && (TRE_MB_CUR_MAX > 1)) return REG_BADPAT; tmp = xmalloc((n + 1) * sizeof(tre_char_t)); if (tmp == NULL) return REG_ESPACE; /* Copies the char into the stored pattern and skips to the next char. */ #define STORE_CHAR \ do \ { \ tmp[pos++] = pat[i]; \ escaped = false; \ continue; \ } while (0) /* Traverse the input pattern for processing */ for (unsigned int i = 0; i < n; i++) { switch (pat[i]) { case TRE_CHAR('\\'): if (escaped) STORE_CHAR; else if (i == n - 1) goto badpat; else escaped = true; continue; case TRE_CHAR('['): if (escaped) STORE_CHAR; else goto badpat; continue; case TRE_CHAR('*'): if (escaped || (!(cflags & REG_EXTENDED) && (i == 0))) STORE_CHAR; else goto badpat; continue; case TRE_CHAR('+'): case TRE_CHAR('?'): if ((cflags & REG_EXTENDED) && (i == 0)) continue; else if ((cflags & REG_EXTENDED) ^ !escaped) STORE_CHAR; else goto badpat; continue; case TRE_CHAR('.'): if (escaped) { if (!_escmap) _escmap = xmalloc(n * sizeof(bool)); if (!_escmap) { xfree(tmp); return REG_ESPACE; } _escmap[i] = true; STORE_CHAR; } else { whasdot = i; if (wfirstdot == -1) wfirstdot = i; STORE_CHAR; } continue; case TRE_CHAR('^'): STORE_CHAR; continue; case TRE_CHAR('$'): if (!escaped && (i == n - 1)) fg->eol = true; else STORE_CHAR; continue; case TRE_CHAR('('): if ((cflags & REG_EXTENDED) ^ escaped) goto badpat; else STORE_CHAR; continue; case TRE_CHAR('{'): if (!(cflags & REG_EXTENDED) ^ escaped) STORE_CHAR; else if (!(cflags & REG_EXTENDED) && (i == 0)) STORE_CHAR; else if ((cflags & REG_EXTENDED) && (i == 0)) continue; else goto badpat; continue; case TRE_CHAR('|'): if ((cflags & REG_EXTENDED) ^ escaped) goto badpat; else STORE_CHAR; continue; default: if (escaped) goto badpat; else STORE_CHAR; continue; } continue; badpat: xfree(tmp); DPRINT(("tre_compile_fast: compilation of pattern failed, falling" "back to NFA\n")); return REG_BADPAT; } fg->hasdot = wfirstdot > -1; /* * The pattern has been processed and copied to tmp as a literal string * with escapes, anchors (^$) and the word boundary match character * classes stripped out. */ #ifdef TRE_WCHAR SAVE_PATTERN(tmp, pos, fg->wpattern, fg->wlen); fg->wescmap = _escmap; STORE_MBS_PAT; /* * The position of dots and escaped dots is different in the MB string * than in to the wide string so traverse the converted string, as well, * to store these positions. */ if (fg->hasdot || (fg->wescmap != NULL)) { if (fg->wescmap != NULL) { fg->escmap = xmalloc(fg->len * sizeof(bool)); if (!fg->escmap) { tre_free_fast(fg); return REG_ESPACE; } } escaped = false; for (unsigned int i = 0; i < fg->len; i++) if (fg->pattern[i] == '\\') escaped = !escaped; else if (fg->pattern[i] == '.' && escaped) { fg->escmap[i] = true; escaped = false; } else if (fg->pattern[i] == '.' && !escaped) { hasdot = i; if (firstdot == -1) firstdot = i; } else escaped = false; } #else SAVE_PATTERN(tmp, pos, fg->pattern, fg->len); fg->escmap = _escmap; #endif xfree(tmp); DPRINT(("tre_compile_fast: pattern: %s, len %zu, bol %c, eol %c, " "icase: %c, word: %c, newline %c\n", fg->pattern, fg->len, fg->bol ? 'y' : 'n', fg->eol ? 'y' : 'n', fg->icase ? 'y' : 'n', fg->word ? 'y' : 'n', fg->newline ? 'y' : 'n')); /* Check whether reverse QS algorithm is more efficient */ if ((wfirstdot > -1) && (fg->wlen - whasdot + 1 < (size_t)wfirstdot) && fg->nosub) { fg->reversed = true; DPRINT(("tre_compile_fast: using reverse QS algorithm\n")); } FILL_QSBC; FILL_BMGS; #ifdef TRE_WCHAR FILL_QSBC_WIDE; FILL_BMGS_WIDE; #endif return REG_OK; }