int main (void) { struct re_pattern_buffer r; size_t i; int ret = 0; for (i = 0; i < sizeof (tests) / sizeof (tests[i]); ++i) { re_set_syntax (tests[i].syntax); memset (&r, 0, sizeof (r)); if (re_compile_pattern (tests[i].regex, strlen (tests[i].regex), &r)) { printf ("re_compile_pattern %zd failed\n", i); ret = 1; continue; } size_t len = strlen (tests[i].string); int rv = re_search (&r, tests[i].string, len, 0, len, NULL); if (rv != tests[i].retval) { printf ("re_search %zd unexpected value %d != %d\n", i, rv, tests[i].retval); ret = 1; } regfree (&r); } return ret; }
static int find_substr (astr as, const char *s2, size_t s2size, size_t from, size_t to, bool forward, bool notbol, bool noteol, bool regex, bool icase) { int ret = -1; struct re_pattern_buffer pattern; struct re_registers search_regs; reg_syntax_t syntax = RE_SYNTAX_EMACS; memset (&pattern, 0, sizeof (pattern)); if (!regex) syntax |= RE_PLAIN; if (icase) syntax |= RE_ICASE; re_set_syntax (syntax); search_regs.num_regs = 1; re_find_err = re_compile_pattern (s2, (int) s2size, &pattern); pattern.not_bol = notbol; pattern.not_eol = noteol; if (!re_find_err) ret = re_search (&pattern, astr_cstr (as), (int) astr_len (as), forward ? from : to - 1, forward ? (to - from) : -(to - 1 - from), &search_regs); if (ret >= 0) { ret = forward ? search_regs.end[0] : ret; free (search_regs.start); free (search_regs.end); } regfree (&pattern); return ret; }
static int run_test_backwards (const char *expr, const char *mem, size_t memlen, int icase, int expected) { regex_t re; const char *err; size_t offset; int cnt; re_set_syntax ((RE_SYNTAX_POSIX_BASIC & ~RE_DOT_NEWLINE) | RE_HAT_LISTS_NOT_NEWLINE | (icase ? RE_ICASE : 0)); memset (&re, 0, sizeof (re)); re.fastmap = malloc (256); if (re.fastmap == NULL) error (EXIT_FAILURE, errno, "cannot allocate fastmap"); err = re_compile_pattern (expr, strlen (expr), &re); if (err != NULL) error (EXIT_FAILURE, 0, "cannot compile expression: %s", err); if (re_compile_fastmap (&re)) error (EXIT_FAILURE, 0, "couldn't compile fastmap"); cnt = 0; offset = memlen; assert (mem[memlen] == '\0'); while (offset <= memlen) { int start; const char *sp; const char *ep; start = re_search (&re, mem, memlen, offset, -offset, NULL); if (start == -1) break; if (start == -2) error (EXIT_FAILURE, 0, "internal error in re_search"); sp = mem + start; while (sp > mem && sp[-1] != '\n') --sp; ep = mem + start; while (*ep != '\0' && *ep != '\n') ++ep; printf ("match %d: \"%.*s\"\n", ++cnt, (int) (ep - sp), sp); offset = sp - 1 - mem; } regfree (&re); /* Return an error if the number of matches found is not match we expect. */ return cnt != expected; }
int regexp_compile(struct regexp *r) { /* See the GNU regex manual or regex.h in gnulib for * an explanation of these flags. They are set so that the regex * matcher interprets regular expressions the same way that libfa * does */ static const reg_syntax_t syntax = RE_CONTEXT_INDEP_OPS|RE_CONTEXT_INVALID_OPS|RE_DOT_NOT_NULL |RE_INTERVALS|RE_NO_BK_BRACES|RE_NO_BK_PARENS|RE_NO_BK_REFS |RE_NO_BK_VBAR|RE_NO_EMPTY_RANGES |RE_NO_POSIX_BACKTRACKING|RE_CONTEXT_INVALID_DUP|RE_NO_GNU_OPS; reg_syntax_t old_syntax = re_syntax_options; const char *c = NULL; if (r->re == NULL) CALLOC(r->re, 1); re_syntax_options = syntax; c = re_compile_pattern(r->pattern->str, strlen(r->pattern->str), r->re); re_syntax_options = old_syntax; r->re->regs_allocated = REGS_REALLOCATE; if (c != NULL) { char *p = escape(r->pattern->str, -1); syntax_error(r->info, "invalid regexp /%s/: %s", p, c); free(p); return -1; } return 0; }
/* Compile a regexp and signal a Lisp error if anything goes wrong. */ void compile_pattern (Lisp_Object pattern, struct re_pattern_buffer *bufp, char *translate, int backward) { char *val; Lisp_Object dummy; if (EQ (pattern, last_regexp) && translate == bufp->translate /* 92.4.10 by K.Handa */ /* 93.7.13 by K.Handa */ && NILP (current_buffer->mc_flag) == !bufp->mc_flag && (!bufp->syntax_version || bufp->syntax_version == syntax_table_version) && (!bufp->category_version || bufp->category_version == category_table_version)) return; if (CONSP (pattern)) /* pre-compiled regexp */ { Lisp_Object compiled; val = 0; pattern = XCONS (pattern)->car; if (CONSP (pattern) && (compiled = backward ? XCONS(pattern)->cdr : XCONS(pattern)->car) && XTYPE (compiled) == Lisp_Vector && XVECTOR (compiled)->size == 4) { /* set_pattern will set bufp->allocated to NULL */ set_pattern (compiled, bufp, translate); return; } val = "Invalied pre-compiled regexp"; goto invalid_regexp; } CHECK_STRING (pattern, 0); last_regexp = Qnil; bufp->translate = translate; bufp->syntax_version = bufp->category_version = 0; /* 93.7.13 by K.Handa */ /* 92.7.10 by T.Enami 'bufp->allocated == 0' means bufp->buffer points to pre-compiled pattern in a lisp string, which should not be 'realloc'ed. */ if (bufp->allocated == 0) bufp->buffer = 0; val = re_compile_pattern (XSTRING (pattern)->data, XSTRING (pattern)->size, bufp); if (val) { invalid_regexp: dummy = build_string (val); while (1) Fsignal (Qinvalid_regexp, Fcons (dummy, Qnil)); } last_regexp = pattern; return; }
static int modregex_regex (INSTANCE * my, int * params) { const char * reg = string_get(params[0]); const char * str = string_get(params[1]); int result = -1; unsigned n; struct re_pattern_buffer pb; struct re_registers re; int start[16]; int end[16]; int * regex_reg; /* Alloc the pattern resources */ memset (&pb, 0, sizeof(pb)); memset (&re, 0, sizeof(re)); pb.buffer = malloc(4096); pb.allocated = 4096; pb.fastmap = malloc(256); pb.regs_allocated = 16; re.num_regs = 16; re.start = start; re.end = end; re_syntax_options = RE_SYNTAX_POSIX_MINIMAL_EXTENDED | REG_ICASE; /* Match the regex */ if (re_compile_pattern (reg, strlen(reg), &pb) == 0) { result = re_search (&pb, str, strlen(str), 0, strlen(str), &re); if (result != -1) { /* Fill the regex_reg global variables */ regex_reg = (int *) &GLODWORD( mod_regex, REGEX_REG); for (n = 0 ; n < 16 && n <= pb.re_nsub ; n++) { string_discard (regex_reg[n]); regex_reg[n] = string_newa (str + re.start[n], re.end[n] - re.start[n]); string_use (regex_reg[n]); } } } /* Free the resources */ free (pb.buffer); free (pb.fastmap); string_discard(params[0]); string_discard(params[1]); return result; }
/* **---------------------------------------------------------------------- ** .Klasse: TBRegexp ** ** .Methode: compile ** ** .Beschreibung: Initialisierung der Searchengine ** ** .Parameter: const char*, pattern, I, das Suchmuster ** bool , use_fastmap, I, TRUE->benutze Fastmap ** ** .Rueckgabewert: ** ** --------------------------------------------------------------------- ** ** .Methodenbeschreibung: ** Compile the pattern `pattern` into an internal form. ** On Resumption: I am not usable until a valid pattern is set. **----------------------------------------------------------------- */ void TBRegexp::compile( const char* pattern, bool use_fastmap ) { free(); registers_ = new re_registers; pattern_buf_ = new re_pattern_buffer; ZeroMemory( pattern_buf_, zsizeof( re_pattern_buffer ) ); ZeroMemory( registers_, zsizeof( re_registers ) ); if ( use_fastmap ) pattern_buf_->fastmap = (char*) malloc( 256 ); // 256 :-). const char* error = re_compile_pattern( pattern, lstrlen( pattern ), pattern_buf_ ); }
void set_word_regexp (const char *regexp) { const char *msg; struct re_pattern_buffer new_word_regexp; if (!*regexp || STREQ (regexp, DEFAULT_WORD_REGEXP)) { default_word_regexp = true; return; } /* Dry run to see whether the new expression is compilable. */ init_pattern_buffer (&new_word_regexp, NULL); msg = re_compile_pattern (regexp, strlen (regexp), &new_word_regexp); regfree (&new_word_regexp); if (msg != NULL) { M4ERROR ((warning_status, 0, "bad regular expression `%s': %s", regexp, msg)); return; } /* If compilation worked, retry using the word_regexp struct. We can't rely on struct assigns working, so redo the compilation. The fastmap can be reused between compilations, and will be freed by the final regfree. */ if (!word_regexp.fastmap) word_regexp.fastmap = xcharalloc (UCHAR_MAX + 1); msg = re_compile_pattern (regexp, strlen (regexp), &word_regexp); assert (!msg); re_set_registers (&word_regexp, ®s, regs.num_regs, regs.start, regs.end); if (re_compile_fastmap (&word_regexp)) assert (false); default_word_regexp = false; }
int main (void) { struct re_pattern_buffer r; struct re_registers s; setlocale (LC_ALL, "en_US.UTF-8"); memset (&r, 0, sizeof (r)); memset (&s, 0, sizeof (s)); re_set_syntax (RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE | RE_ICASE); re_compile_pattern ("insert into", 11, &r); re_search (&r, "\xFF\0\x12\xA2\xAA\xC4\xB1,K\x12\xC4\xB1*\xACK", 15, 0, 15, &s); return 0; }
int do_one_test (const struct test_s *test, const char *fail) { int res; const char *err; struct re_pattern_buffer regbuf; re_set_syntax (test->syntax); memset (®buf, '\0', sizeof (regbuf)); err = re_compile_pattern (test->pattern, strlen (test->pattern), ®buf); if (err != NULL) { printf ("%sre_compile_pattern \"%s\" failed: %s\n", fail, test->pattern, err); return 1; } res = re_search (®buf, test->string, strlen (test->string), test->start, strlen (test->string) - test->start, NULL); if (res != test->res) { printf ("%sre_search \"%s\" \"%s\" failed: %d (expected %d)\n", fail, test->pattern, test->string, res, test->res); regfree (®buf); return 1; } if (test->res > 0 && test->start == 0) { res = re_search (®buf, test->string, strlen (test->string), test->res, strlen (test->string) - test->res, NULL); if (res != test->res) { printf ("%sre_search from expected \"%s\" \"%s\" failed: %d (expected %d)\n", fail, test->pattern, test->string, res, test->res); regfree (®buf); return 1; } } regfree (®buf); return 0; }
static int do_test (void) { struct re_pattern_buffer r; /* ကျွန်ုပ်x */ const char *s = "\xe1\x80\x80\xe1\x80\xbb\xe1\x80\xbd\xe1\x80\x94\xe1\x80\xba\xe1\x80\xaf\xe1\x80\x95\xe1\x80\xbax"; if (setlocale (LC_ALL, "en_US.UTF-8") == NULL) { puts ("setlocale failed"); return 1; } memset (&r, 0, sizeof (r)); re_compile_pattern ("[^x]x", 5, &r); /* This was triggering a buffer overflow. */ re_search (&r, s, strlen (s), 0, strlen (s), 0); return 0; }
bool Regexp::compile(vespalib::stringref re, Flags flags) { re_set_syntax(flags.flags()); regex_t *preg = (regex_t *)_data; preg->translate = NULL; preg->fastmap = static_cast<char *>(malloc(256)); preg->buffer = NULL; preg->allocated = 0; const char * error = re_compile_pattern(re.data(), re.size(), preg); if (error != 0) { LOG(warning, "invalid regexp '%s': %s", vespalib::string(re).c_str(), error); return false; } if (re_compile_fastmap(preg) != 0) { LOG(warning, "re_compile_fastmap failed for regexp '%s'", vespalib::string(re).c_str()); return false; } return true; }
int main (void) { struct re_pattern_buffer regbuf; const char *err; size_t i; int ret = 0; #ifdef HAVE_MCHECK_H mtrace (); #endif for (i = 0; i < sizeof (tests) / sizeof (tests[0]); ++i) { int start; re_set_syntax (tests[i].syntax); memset (®buf, '\0', sizeof (regbuf)); err = re_compile_pattern (tests[i].pattern, strlen (tests[i].pattern), ®buf); if (err != NULL) { printf ("re_compile_pattern failed: %s\n", err); ret = 1; continue; } start = re_search (®buf, tests[i].string, strlen (tests[i].string), 0, strlen (tests[i].string), NULL); if (start != tests[i].start) { printf ("re_search failed %d\n", start); ret = 1; regfree (®buf); continue; } regfree (®buf); } return ret; }
void set_word_regexp (const char *regexp) { int i; char test[2]; const char *msg; if (!strcmp (regexp, DEFAULT_WORD_REGEXP)) { default_word_regexp = TRUE; return; } default_word_regexp = FALSE; msg = re_compile_pattern (regexp, strlen (regexp), &word_regexp); if (msg != NULL) { M4ERROR ((warning_status, 0, "Bad regular expression `%s': %s", regexp, msg)); return; } if (word_start == NULL) word_start = xmalloc (256); word_start[0] = '\0'; test[1] = '\0'; for (i = 1; i < 256; i++) { test[0] = i; if (re_search (&word_regexp, test, 1, 0, 0, ®s) >= 0) strcat (word_start, test); } }
int main (void) { struct re_pattern_buffer regex; struct re_registers regs; const char *s; int match; int result = 0; regs.num_regs = 1; memset (®ex, '\0', sizeof (regex)); s = re_compile_pattern ("[abc]*d", 7, ®ex); if (s != NULL) { puts ("re_compile_pattern return non-NULL value"); result = 1; } else { match = re_match (®ex, "foacabdxy", 9, 2, ®s); if (match != 5) { printf ("re_match returned %d, expected 5\n", match); result = 1; } else if (regs.start[0] != 2 || regs.end[0] != 7) { printf ("re_match returned %d..%d, expected 2..7\n", regs.start[0], regs.end[0]); result = 1; } puts (" -> OK"); } return result; }
int find_regexp(buffer * const b, const char *regex, const bool skip_first) { const unsigned char * const up_case = b->encoding == ENC_UTF8 ? ascii_up_case : localised_up_case; bool recompile_string; if (!regex) { regex = b->find_string; recompile_string = b->find_string_changed || !b->last_was_regexp; } else recompile_string = true; if (!regex || !strlen(regex)) return ERROR; if (re_pb.buffer == NULL) { if (re_pb.buffer = malloc(START_BUFFER_SIZE)) re_pb.allocated = START_BUFFER_SIZE; else return OUT_OF_MEMORY; } re_pb.fastmap = (void *)d; /* We have to be careful: even if the search string has not changed, it is possible that case sensitivity has. In this case, we force recompilation. */ if (b->opt.case_search) { if (re_pb.translate != 0) recompile_string = true; re_pb.translate = 0; } else { if (re_pb.translate != up_case) recompile_string = true; re_pb.translate = (unsigned char *)up_case; } if (recompile_string) { const char *actual_regex = regex; /* If the buffer encoding is UTF-8, we need to replace dots with UTF8DOT, non-word-constituents (\W) with UTF8NONWORD, and embed complemented character classes in UTF8COMP, so that they do not match UTF-8 subsequences. Moreover, we must compute the remapping from the virtual to the actual groups caused by the new groups thus introduced. */ if (b->encoding == ENC_UTF8) { const char *s; char *q; bool escape = false; int virtual_group = 0, real_group = 0, dots = 0, comps = 0, nonwords = 0; s = regex; /* We first scan regex to compute the exact number of characters of the actual (i.e., after substitutions) regex. */ do { if (!escape) { if (*s == '.') dots++; else if (*s == '[') { if (*(s+1) == '^') { comps++; s++; } if (*(s+1) == ']') s++; /* A literal ]. */ /* We scan the list up to ] and check that no non-US-ASCII characters appear. */ do if (utf8len(*(++s)) != 1) return UTF8_REGEXP_CHARACTER_CLASS_NOT_SUPPORTED; while(*s && *s != ']'); } else if (*s == '\\') { escape = true; continue; } } else if (*s == 'W') nonwords++; escape = false; } while(*(++s)); actual_regex = q = malloc(strlen(regex) + 1 + (strlen(UTF8DOT) - 1) * dots + (strlen(UTF8NONWORD) - 2) * nonwords + (strlen(UTF8COMP) - 1) * comps); if (!actual_regex) return OUT_OF_MEMORY; s = regex; escape = false; do { if (escape || *s != '.' && *s != '(' && *s != '[' && *s != '\\') { if (escape && *s == 'W') { q--; strcpy(q, UTF8NONWORD); q += strlen(UTF8NONWORD); real_group++; } else *(q++) = *s; } else { if (*s == '\\') { escape = true; *(q++) = '\\'; continue; } if (*s == '.') { strcpy(q, UTF8DOT); q += strlen(UTF8DOT); real_group++; } else if (*s == '(') { *(q++) = '('; if (virtual_group < RE_NREGS - 1) map_group[++virtual_group] = ++real_group; } else if (*s == '[') { if (*(s+1) == '^') { strcpy(q, UTF8COMP); q += strlen(UTF8COMP); s++; if (*(s+1) == ']') *(q++) = *(++s); /* A literal ]. */ do *(q++) = *(++s); while (*s && *s != ']'); if (*s) *(q++) = ')'; real_group++; } else { *(q++) = '['; if (*(s+1) == ']') *(q++) = *(++s); /* A literal ]. */ do *(q++) = *(++s); while (*s && *s != ']'); } } } escape = false; } while(*(s++)); /* This assert may be false if a [ is not closed. */ assert(strlen(actual_regex) == strlen(regex) + (strlen(UTF8DOT) - 1) * dots + (strlen(UTF8NONWORD) - 2) * nonwords + (strlen(UTF8COMP) - 1) * comps); } const char * p = re_compile_pattern(actual_regex, strlen(actual_regex), &re_pb); if (b->encoding == ENC_UTF8) free((void*)actual_regex); if (p) { /* Here we have a very dirty hack: since we cannot return the error of regex, we print it here. Which means that we access term.c's functions. 8^( */ print_message(p); alert(); return ERROR; } } b->find_string_changed = 0; line_desc *ld = b->cur_line_desc; int64_t y = b->cur_line; stop = false; if (! b->opt.search_back) { int64_t start_pos = b->cur_pos + (skip_first ? 1 : 0); while(y < b->num_lines && !stop) { assert(ld->ld_node.next != NULL); int64_t pos; if (start_pos <= ld->line_len && (pos = re_search(&re_pb, ld->line ? ld->line : "", ld->line_len, start_pos, ld->line_len - start_pos, &re_reg)) >= 0) { goto_line(b, y); goto_pos(b, pos); return OK; } ld = (line_desc *)ld->ld_node.next; start_pos = 0; y++; } } else { int64_t start_pos = b->cur_pos + (skip_first ? -1 : 0); while(y >= 0 && !stop) { assert(ld->ld_node.prev != NULL); int64_t pos; if (start_pos >= 0 && (pos = re_search(&re_pb, ld->line ? ld->line : "", ld->line_len, start_pos, -start_pos - 1, &re_reg)) >= 0) { goto_line(b, y); goto_pos(b, pos); return OK; } ld = (line_desc *)ld->ld_node.prev; if (ld->ld_node.prev) start_pos = ld->line_len; y--; } } return stop ? STOPPED : NOT_FOUND; }
int main (void) { int result = 0; static struct re_pattern_buffer regex; unsigned char folded_chars[UCHAR_MAX + 1]; int i; const char *s; struct re_registers regs; #if HAVE_DECL_ALARM /* Some builds of glibc go into an infinite loop on this test. */ int alarm_value = 2; signal (SIGALRM, SIG_DFL); alarm (alarm_value); #endif if (setlocale (LC_ALL, "en_US.UTF-8")) { { /* http://sourceware.org/ml/libc-hacker/2006-09/msg00008.html This test needs valgrind to catch the bug on Debian GNU/Linux 3.1 x86, but it might catch the bug better on other platforms and it shouldn't hurt to try the test here. */ static char const pat[] = "insert into"; static char const data[] = "\xFF\0\x12\xA2\xAA\xC4\xB1,K\x12\xC4\xB1*\xACK"; re_set_syntax (RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE | RE_ICASE); memset (®ex, 0, sizeof regex); s = re_compile_pattern (pat, sizeof pat - 1, ®ex); if (s) result |= 1; else if (re_search (®ex, data, sizeof data - 1, 0, sizeof data - 1, ®s) != -1) result |= 1; } /* Check whether it's really a UTF-8 locale. On mingw, the setlocale call succeeds but returns "English_United States.1252", with locale_charset() returning "CP1252". */ if (strcmp (locale_charset (), "UTF-8") == 0) { /* This test is from glibc bug 15078. The test case is from Andreas Schwab in <http://www.sourceware.org/ml/libc-alpha/2013-01/msg00967.html>. */ static char const pat[] = "[^x]x"; static char const data[] = /* <U1000><U103B><U103D><U1014><U103A><U102F><U1015><U103A> */ "\xe1\x80\x80" "\xe1\x80\xbb" "\xe1\x80\xbd" "\xe1\x80\x94" "\xe1\x80\xba" "\xe1\x80\xaf" "\xe1\x80\x95" "\xe1\x80\xba" "x"; re_set_syntax (0); memset (®ex, 0, sizeof regex); s = re_compile_pattern (pat, sizeof pat - 1, ®ex); if (s) result |= 1; else { i = re_search (®ex, data, sizeof data - 1, 0, sizeof data - 1, 0); if (i != 0 && i != 21) result |= 1; } } if (! setlocale (LC_ALL, "C")) return 1; } /* This test is from glibc bug 3957, reported by Andrew Mackey. */ re_set_syntax (RE_SYNTAX_EGREP | RE_HAT_LISTS_NOT_NEWLINE); memset (®ex, 0, sizeof regex); s = re_compile_pattern ("a[^x]b", 6, ®ex); if (s) result |= 2; /* This should fail, but succeeds for glibc-2.5. */ else if (re_search (®ex, "a\nb", 3, 0, 3, ®s) != -1) result |= 2; /* This regular expression is from Spencer ere test number 75 in grep-2.3. */ re_set_syntax (RE_SYNTAX_POSIX_EGREP); memset (®ex, 0, sizeof regex); for (i = 0; i <= UCHAR_MAX; i++) folded_chars[i] = i; regex.translate = folded_chars; s = re_compile_pattern ("a[[:@:>@:]]b\n", 11, ®ex); /* This should fail with _Invalid character class name_ error. */ if (!s) result |= 4; /* Ensure that [b-a] is diagnosed as invalid, when using RE_NO_EMPTY_RANGES. */ re_set_syntax (RE_SYNTAX_POSIX_EGREP | RE_NO_EMPTY_RANGES); memset (®ex, 0, sizeof regex); s = re_compile_pattern ("a[b-a]", 6, ®ex); if (s == 0) result |= 8; /* This should succeed, but does not for glibc-2.1.3. */ memset (®ex, 0, sizeof regex); s = re_compile_pattern ("{1", 2, ®ex); if (s) result |= 8; /* The following example is derived from a problem report against gawk from Jorge Stolfi <*****@*****.**>. */ memset (®ex, 0, sizeof regex); s = re_compile_pattern ("[an\371]*n", 7, ®ex); if (s) result |= 8; /* This should match, but does not for glibc-2.2.1. */ else if (re_match (®ex, "an", 2, 0, ®s) != 2) result |= 8; memset (®ex, 0, sizeof regex); s = re_compile_pattern ("x", 1, ®ex); if (s) result |= 8; /* glibc-2.2.93 does not work with a negative RANGE argument. */ else if (re_search (®ex, "wxy", 3, 2, -2, ®s) != 1) result |= 8; /* The version of regex.c in older versions of gnulib ignored RE_ICASE. Detect that problem too. */ re_set_syntax (RE_SYNTAX_EMACS | RE_ICASE); memset (®ex, 0, sizeof regex); s = re_compile_pattern ("x", 1, ®ex); if (s) result |= 16; else if (re_search (®ex, "WXY", 3, 0, 3, ®s) < 0) result |= 16; /* Catch a bug reported by Vin Shelton in http://lists.gnu.org/archive/html/bug-coreutils/2007-06/msg00089.html */ re_set_syntax (RE_SYNTAX_POSIX_BASIC & ~RE_CONTEXT_INVALID_DUP & ~RE_NO_EMPTY_RANGES); memset (®ex, 0, sizeof regex); s = re_compile_pattern ("[[:alnum:]_-]\\\\+$", 16, ®ex); if (s) result |= 32; /* REG_STARTEND was added to glibc on 2004-01-15. Reject older versions. */ if (! REG_STARTEND) result |= 64; #if 0 /* It would be nice to reject hosts whose regoff_t values are too narrow (including glibc on hosts with 64-bit ptrdiff_t and 32-bit int), but we should wait until glibc implements this feature. Otherwise, support for equivalence classes and multibyte collation symbols would always be broken except when compiling --without-included-regex. */ if (sizeof (regoff_t) < sizeof (ptrdiff_t) || sizeof (regoff_t) < sizeof (ssize_t)) result |= 64; #endif return result; }
void GEAcompile (char const *pattern, size_t size, reg_syntax_t syntax_bits) { const char *err; const char *p, *sep; size_t total = size; char *motif; if (match_icase) syntax_bits |= RE_ICASE; re_set_syntax (syntax_bits); dfasyntax (syntax_bits, match_icase, eolbyte); /* For GNU regex compiler we have to pass the patterns separately to detect errors like "[\nallo\n]\n". The patterns here are "[", "allo" and "]" GNU regex should have raise a syntax error. The same for backref, where the backref should have been local to each pattern. */ p = pattern; do { size_t len; sep = memchr (p, '\n', total); if (sep) { len = sep - p; sep++; total -= (len + 1); } else { len = total; total = 0; } patterns = xnrealloc (patterns, pcount + 1, sizeof *patterns); patterns[pcount] = patterns0; if ((err = re_compile_pattern (p, len, &(patterns[pcount].regexbuf))) != NULL) error (EXIT_TROUBLE, 0, "%s", err); pcount++; p = sep; } while (sep && total != 0); /* In the match_words and match_lines cases, we use a different pattern for the DFA matcher that will quickly throw out cases that won't work. Then if DFA succeeds we do some hairy stuff using the regex matcher to decide whether the match should really count. */ if (match_words || match_lines) { static char const line_beg_no_bk[] = "^("; static char const line_end_no_bk[] = ")$"; static char const word_beg_no_bk[] = "(^|[^[:alnum:]_])("; static char const word_end_no_bk[] = ")([^[:alnum:]_]|$)"; static char const line_beg_bk[] = "^\\("; static char const line_end_bk[] = "\\)$"; static char const word_beg_bk[] = "\\(^\\|[^[:alnum:]_]\\)\\("; static char const word_end_bk[] = "\\)\\([^[:alnum:]_]\\|$\\)"; int bk = !(syntax_bits & RE_NO_BK_PARENS); char *n = xmalloc (sizeof word_beg_bk - 1 + size + sizeof word_end_bk); strcpy (n, match_lines ? (bk ? line_beg_bk : line_beg_no_bk) : (bk ? word_beg_bk : word_beg_no_bk)); total = strlen(n); memcpy (n + total, pattern, size); total += size; strcpy (n + total, match_lines ? (bk ? line_end_bk : line_end_no_bk) : (bk ? word_end_bk : word_end_no_bk)); total += strlen (n + total); pattern = motif = n; size = total; } else motif = NULL; dfa = dfaalloc (); dfacomp (pattern, size, dfa, 1); kwsmusts (); free(motif); }
static void compile_regex_1 (struct regex *new_regex, int needed_sub) { #ifdef REG_PERL int errcode; errcode = regncomp(&new_regex->pattern, new_regex->re, new_regex->sz, (needed_sub ? 0 : REG_NOSUB) | new_regex->flags | extended_regexp_flags); if (errcode) { char errorbuf[200]; regerror(errcode, NULL, errorbuf, 200); bad_prog(gettext(errorbuf)); } #else const char *error; int syntax = ((extended_regexp_flags & REG_EXTENDED) ? RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC); syntax &= ~RE_DOT_NOT_NULL; syntax |= RE_NO_POSIX_BACKTRACKING; switch (posixicity) { case POSIXLY_EXTENDED: syntax &= ~RE_UNMATCHED_RIGHT_PAREN_ORD; break; case POSIXLY_CORRECT: syntax |= RE_UNMATCHED_RIGHT_PAREN_ORD; break; case POSIXLY_BASIC: syntax |= RE_UNMATCHED_RIGHT_PAREN_ORD | RE_LIMITED_OPS | RE_NO_GNU_OPS; break; } #ifdef RE_ICASE syntax |= (new_regex->flags & REG_ICASE) ? RE_ICASE : 0; #endif #ifdef RE_NO_SUB syntax |= needed_sub ? 0 : RE_NO_SUB; #endif new_regex->pattern.fastmap = malloc (1 << (sizeof (char) * 8)); /* If REG_NEWLINE is set, newlines are treated differently. */ if (new_regex->flags & REG_NEWLINE) { /* REG_NEWLINE implies neither . nor [^...] match newline. */ syntax &= ~RE_DOT_NEWLINE; syntax |= RE_HAT_LISTS_NOT_NEWLINE; } re_set_syntax (syntax); error = re_compile_pattern (new_regex->re, new_regex->sz, &new_regex->pattern); new_regex->pattern.newline_anchor = (new_regex->flags & REG_NEWLINE) != 0; new_regex->pattern.translate = NULL; #ifndef RE_ICASE if (new_regex->flags & REG_ICASE) { static char translate[1 << (sizeof(char) * 8)]; int i; for (i = 0; i < sizeof(translate) / sizeof(char); i++) translate[i] = tolower (i); new_regex->pattern.translate = translate; } #endif if (error) bad_prog(error); #endif /* Just to be sure, I mark this as not POSIXLY_CORRECT behavior */ if (needed_sub && new_regex->pattern.re_nsub < needed_sub - 1 && posixicity == POSIXLY_EXTENDED) { char buf[200]; sprintf(buf, _("invalid reference \\%d on `s' command's RHS"), needed_sub - 1); bad_prog(buf); } }
Regexp * make_regexp(const char *s, size_t len, int ignorecase, int dfa) { Regexp *rp; const char *rerr; const char *src = s; char *temp; const char *end = s + len; register char *dest; register int c, c2; static short first = TRUE; static short no_dfa = FALSE; int has_anchor = FALSE; /* The number of bytes in the current multibyte character. It is 0, when the current character is a singlebyte character. */ size_t is_multibyte = 0; #ifdef MBS_SUPPORT mbstate_t mbs; if (gawk_mb_cur_max > 1) memset(&mbs, 0, sizeof(mbstate_t)); /* Initialize. */ #endif if (first) { first = FALSE; no_dfa = (getenv("GAWK_NO_DFA") != NULL); /* for debugging and testing */ } /* Handle escaped characters first. */ /* * Build a copy of the string (in dest) with the * escaped characters translated, and generate the regex * from that. */ emalloc(dest, char *, len + 2, "make_regexp"); temp = dest; while (src < end) { #ifdef MBS_SUPPORT if (gawk_mb_cur_max > 1 && ! is_multibyte) { /* The previous byte is a singlebyte character, or last byte of a multibyte character. We check the next character. */ is_multibyte = mbrlen(src, end - src, &mbs); if ((is_multibyte == 1) || (is_multibyte == (size_t) -1) || (is_multibyte == (size_t) -2 || (is_multibyte == 0))) { /* We treat it as a singlebyte character. */ is_multibyte = 0; } } #endif /* We skip multibyte character, since it must not be a special character. */ if ((gawk_mb_cur_max == 1 || ! is_multibyte) && (*src == '\\')) { c = *++src; switch (c) { case 'a': case 'b': case 'f': case 'n': case 'r': case 't': case 'v': case 'x': case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': c2 = parse_escape(&src); if (c2 < 0) cant_happen(); /* * Unix awk treats octal (and hex?) chars * literally in re's, so escape regexp * metacharacters. */ if (do_traditional && ! do_posix && (ISDIGIT(c) || c == 'x') && strchr("()|*+?.^$\\[]", c2) != NULL) *dest++ = '\\'; *dest++ = (char) c2; break; case '8': case '9': /* a\9b not valid */ *dest++ = c; src++; break; case 'y': /* normally \b */ /* gnu regex op */ if (! do_traditional) { *dest++ = '\\'; *dest++ = 'b'; src++; break; } /* else, fall through */ default: *dest++ = '\\'; *dest++ = (char) c; src++; break; } /* switch */ } else { c = *src; if (c == '^' || c == '$') has_anchor = TRUE; *dest++ = *src++; /* not '\\' */ } if (gawk_mb_cur_max > 1 && is_multibyte) is_multibyte--; } /* while */ *dest = '\0' ; /* Only necessary if we print dest ? */ emalloc(rp, Regexp *, sizeof(*rp), "make_regexp"); memset((char *) rp, 0, sizeof(*rp)); rp->pat.allocated = 0; /* regex will allocate the buffer */ emalloc(rp->pat.fastmap, char *, 256, "make_regexp"); /* * Lo these many years ago, had I known what a P.I.T.A. IGNORECASE * was going to turn out to be, I wouldn't have bothered with it. * * In the case where we have a multibyte character set, we have no * choice but to use RE_ICASE, since the casetable is for single-byte * character sets only. * * On the other hand, if we do have a single-byte character set, * using the casetable should give a performance improvement, since * it's computed only once, not each time a regex is compiled. We * also think it's probably better for portability. See the * discussion by the definition of casetable[] in eval.c. */ if (ignorecase) { if (gawk_mb_cur_max > 1) { syn |= RE_ICASE; rp->pat.translate = NULL; } else { syn &= ~RE_ICASE; rp->pat.translate = (char *) casetable; } } else { rp->pat.translate = NULL; syn &= ~RE_ICASE; } dfasyntax(syn | (ignorecase ? RE_ICASE : 0), ignorecase ? TRUE : FALSE, '\n'); re_set_syntax(syn); len = dest - temp; if ((rerr = re_compile_pattern(temp, len, &(rp->pat))) != NULL) fatal("%s: /%s/", rerr, temp); /* rerr already gettextized inside regex routines */ /* gack. this must be done *after* re_compile_pattern */ rp->pat.newline_anchor = FALSE; /* don't get \n in middle of string */ if (dfa && ! no_dfa) { dfacomp(temp, len, &(rp->dfareg), TRUE); rp->dfa = TRUE; } else rp->dfa = FALSE; rp->has_anchor = has_anchor; free(temp); return rp; }
static void * compile (const char *pattern, size_t pattern_size, bool match_icase, bool match_words, bool match_lines, char eolbyte, reg_syntax_t syntax) { struct compiled_regex *cregex; const char *err; const char *sep; size_t total = pattern_size; const char *motif = pattern; cregex = (struct compiled_regex *) xmalloc (sizeof (struct compiled_regex)); memset (cregex, '\0', sizeof (struct compiled_regex)); cregex->match_words = match_words; cregex->match_lines = match_lines; cregex->eolbyte = eolbyte; cregex->patterns = NULL; cregex->pcount = 0; re_set_syntax (syntax); dfasyntax (syntax, match_icase, eolbyte); /* For GNU regex compiler we have to pass the patterns separately to detect errors like "[\nallo\n]\n". The patterns here are "[", "allo" and "]" GNU regex should have raise a syntax error. The same for backref, where the backref should have been local to each pattern. */ do { size_t len; sep = memchr (motif, '\n', total); if (sep) { len = sep - motif; sep++; total -= (len + 1); } else { len = total; total = 0; } cregex->patterns = xrealloc (cregex->patterns, (cregex->pcount + 1) * sizeof (struct patterns)); memset (&cregex->patterns[cregex->pcount], '\0', sizeof (struct patterns)); if ((err = re_compile_pattern (motif, len, &(cregex->patterns[cregex->pcount].regexbuf))) != NULL) error (exit_failure, 0, err); cregex->pcount++; motif = sep; } while (sep && total != 0); /* In the match_words and match_lines cases, we use a different pattern for the DFA matcher that will quickly throw out cases that won't work. Then if DFA succeeds we do some hairy stuff using the regex matcher to decide whether the match should really count. */ if (match_words || match_lines) { /* In the whole-word case, we use the pattern: (^|[^[:alnum:]_])(userpattern)([^[:alnum:]_]|$). In the whole-line case, we use the pattern: ^(userpattern)$. */ static const char line_beg[] = "^("; static const char line_end[] = ")$"; static const char word_beg[] = "(^|[^[:alnum:]_])("; static const char word_end[] = ")([^[:alnum:]_]|$)"; char *n = (char *) xmalloc (sizeof word_beg - 1 + pattern_size + sizeof word_end); size_t i; strcpy (n, match_lines ? line_beg : word_beg); i = strlen(n); memcpy (n + i, pattern, pattern_size); i += pattern_size; strcpy (n + i, match_lines ? line_end : word_end); i += strlen (n + i); pattern = n; pattern_size = i; } dfacomp (pattern, pattern_size, &cregex->dfa, 1); kwsmusts (cregex, match_icase, match_words, match_lines, eolbyte); return cregex; }
string *grep(char *regexp, char *line, int num_vars) { struct re_pattern_buffer *rc; struct re_registers *p; const_string ok; string *vars = NULL; string *lookup; int i; if (KPSE_DEBUG_P(MKTEX_FINE_DEBUG)) { fprintf(stderr, "Grep\n\t%s\n\tin\n\t%s\n", regexp, line); } if (test_file('z', line)) return NULL; /* This will retrieve the precompiled regexp or compile it and remember it. vars contains the strings matched, num_vars the number of these strings. */ #if 0 if ((lookup = hash_lookup(symtab, regexp))) rc = (struct re_pattern_buffer *)lookup[0]; else rc = NULL; if (rc == NULL) { #endif /* Compile the regexp and stores the result */ if (KPSE_DEBUG_P(MKTEX_FINE_DEBUG)) { fprintf(stderr, "\tCompiling the regexp\n"); } re_syntax_options = RE_SYNTAX_POSIX_EGREP; rc = (struct re_pattern_buffer *) calloc(1, sizeof(struct re_pattern_buffer)); rc->regs_allocated = REGS_UNALLOCATED; if ((ok = re_compile_pattern(regexp, strlen(regexp), rc)) != 0) FATAL1("Can't compile regex %s\n", regexp); #if 0 hash_remove_all(symtab, regexp); hash_insert(symtab, regexp, (char *)rc); } else if (KPSE_DEBUG_P(MKTEX_FINE_DEBUG)) { fprintf(stderr, "\tAlready compiled\n"); } #endif p = (struct re_registers *) calloc(1, sizeof(struct re_registers)); p->num_regs = num_vars; if ((re_match(rc, line, strlen(line), 0, p)) > 0) { vars = (char **) xmalloc ((num_vars+1) * sizeof(char *)); for (i = 0; i <= num_vars; i++) { vars[i] = malloc((p->end[i] - p->start[i] + 1)*sizeof(char)); strncpy(vars[i], line+p->start[i], p->end[i] - p->start[i]); vars[i][p->end[i] - p->start[i]] = '\0'; } } free (p); if (KPSE_DEBUG_P(MKTEX_FINE_DEBUG)) { if (vars) for(i = 0; i <= num_vars; i++) fprintf(stderr, "String %d matches %s\n", i, vars[i]); } return vars; }
static int modregex_regex_replace (INSTANCE * my, int * params) { const char * reg = string_get(params[0]); const char * rep = string_get(params[1]); const char * str = string_get(params[2]); unsigned reg_len = strlen(reg); unsigned str_len = strlen(str); unsigned rep_len = strlen(rep); char * replacement; unsigned replacement_len; int fixed_replacement = strchr(rep, '\\') ? 0:1; struct re_pattern_buffer pb; struct re_registers re; int start[16]; int end[16]; unsigned startpos = 0; unsigned nextpos; int regex_filled = 0; char * result = 0; unsigned result_allocated = 0; int result_string = 0; unsigned n; int * regex_reg; /* Alloc a buffer for the resulting string */ result = malloc(128); result_allocated = 128; *result = 0; /* Alloc the pattern resources */ memset (&pb, 0, sizeof(pb)); memset (&re, 0, sizeof(re)); pb.buffer = malloc(4096); pb.allocated = 4096; pb.used = 0; pb.fastmap = malloc(256); pb.translate = NULL; pb.fastmap_accurate = 0; pb.regs_allocated = 16; re.start = start; re.end = end; re_syntax_options = RE_SYNTAX_POSIX_MINIMAL_EXTENDED; /* Run the regex */ if (re_compile_pattern (reg, reg_len, &pb) == 0) { startpos = 0; while (startpos < str_len) { nextpos = re_search (&pb, str, str_len, startpos, str_len - startpos, &re); if ((int)nextpos < 0) break; /* Fill the REGEX_REG global variables */ if (regex_filled == 0) { regex_filled = 1; regex_reg = (int *)&GLODWORD( mod_regex, REGEX_REG); for (n = 0 ; n < 16 && n <= pb.re_nsub ; n++) { string_discard (regex_reg[n]); regex_reg[n] = string_newa (str + re.start[n], re.end[n] - re.start[n]); string_use (regex_reg[n]); } } /* Prepare the replacement string */ if (fixed_replacement == 0) { int total_length = rep_len; const char * bptr; char * ptr; /* Count the size */ ptr = strchr(rep, '\\'); while (ptr) { if (ptr[1] >= '0' && ptr[1] <= '9') total_length += re.end[ptr[1]-'0'] - re.start[ptr[1]-'0'] - 2; ptr = strchr(ptr+1, '\\'); } /* Fill the replacement string */ replacement = calloc (total_length+1, 1); bptr = rep; ptr = strchr(rep, '\\'); while (ptr) { if (ptr[1] >= '0' && ptr[1] <= '9') { strncpy (replacement+strlen(replacement), bptr, ptr-bptr); strncpy (replacement+strlen(replacement), str + re.start[ptr[1]-'0'], re.end[ptr[1]-'0'] - re.start[ptr[1]-'0']); bptr = ptr+2; } ptr = strchr (ptr+1, '\\'); } strcat (replacement, bptr); replacement_len = strlen(replacement); } else { replacement = (char *)rep; replacement_len = rep_len; } /* Fill the resulting string */ if (result_allocated < strlen(result)+(nextpos-startpos)+1+replacement_len) { result_allocated += ((nextpos-startpos+1+replacement_len) & ~127) + 128; result = realloc(result, result_allocated); } result[strlen(result)+(nextpos-startpos)] = 0; memcpy (result + strlen(result), str+startpos, nextpos-startpos); strcat (result, replacement); if (fixed_replacement == 0) free (replacement); /* Continue the search */ startpos = nextpos+re_match(&pb, str, str_len, nextpos, 0); if (startpos < nextpos) break; if (startpos == nextpos) startpos++; } } /* Copy remaining characters */ nextpos = str_len; if (result_allocated < strlen(result)+(nextpos-startpos)+1) { result_allocated += ((nextpos-startpos+1) & ~127) + 128; result = realloc(result, result_allocated); } result[strlen(result)+(nextpos-startpos)] = 0; memcpy (result + strlen(result), str+startpos, nextpos-startpos); /* Free resources */ free (pb.buffer); free (pb.fastmap); string_discard(params[0]); string_discard(params[1]); string_discard(params[2]); /* Return the new string */ result_string = string_new(result); string_use(result_string); free(result); return result_string; }
static int modregex_split (INSTANCE * my, int * params) { const char * reg = string_get(params[0]); const char * str = string_get(params[1]); int * result_array = (int *)params[2]; int result_array_size = params[3]; int count = 0; int pos, lastpos = 0; struct re_pattern_buffer pb; struct re_registers re; int start[16]; int end[16]; /* Alloc the pattern resources */ memset (&pb, 0, sizeof(pb)); memset (&re, 0, sizeof(re)); pb.buffer = malloc(4096); pb.allocated = 4096; pb.fastmap = malloc(256); pb.regs_allocated = 16; re.num_regs = 16; re.start = start; re.end = end; re_syntax_options = RE_SYNTAX_POSIX_MINIMAL_EXTENDED; /* Match the regex */ if (re_compile_pattern (reg, strlen(reg), &pb) == 0) { for (;;) { pos = re_search (&pb, str, strlen(str), lastpos, strlen(str), &re); if (pos == -1) break; *result_array = string_newa (str + lastpos, pos-lastpos); string_use(*result_array); result_array++; count++; result_array_size--; if (result_array_size == 0) break; lastpos = pos + re_match (&pb, str, strlen(str), pos, 0); if (lastpos < pos) break; if (lastpos == pos) lastpos++; } if (result_array_size > 0) { *result_array = string_new (str + lastpos); string_use (*result_array); count++; } } /* Free the resources */ free (pb.buffer); free (pb.fastmap); string_discard(params[0]); string_discard(params[1]); return count; }
_EXPORT void SubjectToThread (BString &string) { // a regex that matches a non-ASCII UTF8 character: #define U8C \ "[\302-\337][\200-\277]" \ "|\340[\302-\337][\200-\277]" \ "|[\341-\357][\200-\277][\200-\277]" \ "|\360[\220-\277][\200-\277][\200-\277]" \ "|[\361-\367][\200-\277][\200-\277][\200-\277]" \ "|\370[\210-\277][\200-\277][\200-\277][\200-\277]" \ "|[\371-\373][\200-\277][\200-\277][\200-\277][\200-\277]" \ "|\374[\204-\277][\200-\277][\200-\277][\200-\277][\200-\277]" \ "|\375[\200-\277][\200-\277][\200-\277][\200-\277][\200-\277]" #define PATTERN \ "^ +" \ "|^(\\[[^]]*\\])(\\<| +| *(\\<(\\w|" U8C "){2,3} *(\\[[^\\]]*\\])? *:)+ *)" \ "|^( +| *(\\<(\\w|" U8C "){2,3} *(\\[[^\\]]*\\])? *:)+ *)" \ "| *\\(fwd\\) *$" if (gRebuf == NULL && atomic_add(&gLocker,1) == 0) { // the idea is to compile the regexp once to speed up testing for (int i=0; i<256; ++i) gTranslation[i]=i; for (int i='a'; i<='z'; ++i) gTranslation[i]=toupper(i); gRe.translate = gTranslation; gRe.regs_allocated = REGS_FIXED; re_syntax_options = RE_SYNTAX_POSIX_EXTENDED; const char *pattern = PATTERN; // count subexpressions in PATTERN for (unsigned int i=0; pattern[i] != 0; ++i) { if (pattern[i] == '\\') ++i; else if (pattern[i] == '(') ++gNsub; } const char *err = re_compile_pattern(pattern,strlen(pattern),&gRe); if (err == NULL) gRebuf = &gRe; else fprintf(stderr, "Failed to compile the regex: %s\n", err); } else { int32 tries = 200; while (gRebuf == NULL && tries-- > 0) snooze(10000); } if (gRebuf) { struct re_registers regs; // can't be static if this function is to be thread-safe regs.num_regs = gNsub; regs.start = (regoff_t*)malloc(gNsub*sizeof(regoff_t)); regs.end = (regoff_t*)malloc(gNsub*sizeof(regoff_t)); for (int start=0; (start=re_search(gRebuf, string.String(), string.Length(), 0, string.Length(), ®s)) >= 0; ) { // // we found something // // don't delete [bemaildaemon]... if (start == regs.start[1]) start = regs.start[2]; string.Remove(start,regs.end[0]-start); if (start) string.Insert(' ',1,start); } free(regs.start); free(regs.end); } // Finally remove leading and trailing space. Some software, like // tm-edit 1.8, appends a space to the subject, which would break // threading if we left it in. trim_white_space(string); }
int main(int argc, char **argv) { char *locale = setlocale(LC_ALL, ""); for(int i = 0; i < 256; i++) localised_up_case[i] = toupper(i); if (locale) { struct re_pattern_buffer re_pb; struct re_registers re_reg; memset(&re_pb, 0, sizeof re_pb); memset(&re_reg, 0, sizeof re_reg); re_pb.translate = localised_up_case; re_compile_pattern(LOCALE_REGEX, strlen(LOCALE_REGEX), &re_pb); if (re_search(&re_pb, locale, strlen(locale), 0, strlen(locale), &re_reg) >= 0) { if (re_reg.start[1] >= 0) io_utf8 = true; } free(re_reg.start); free(re_reg.end); } bool no_config = false; char *macro_name = NULL, *key_bindings_name = NULL, *menu_conf_name = NULL, *startup_prefs_name = DEF_PREFS_NAME; char * const skiplist = calloc(argc, 1); if (!skiplist) exit(1); /* We need this many flags. */ for(int i = 1; i < argc; i++) { if (argv[i][0] == '-' && (!strcmp(&argv[i][1], "h") || !strcmp(&argv[i][1], "-help" "\0" VERSION_STRING))) { puts(ARG_HELP); exit(0); } /* Special arguments start with two dashes. If we find one, we cancel its entry in argv[], so that it will be skipped when opening the specified files. The only exception is +N for skipping to the N-th line. */ if (argv[i][0] == '-' && argv[i][1] == '-') { if (!argv[i][2]) i++; /* You can use "--" to force the next token to be a filename */ else if (!strcmp(&argv[i][2], "noconfig") || !strcmp(&argv[i][2], "no-config")) { no_config = true; skiplist[i] = 1; /* argv[i] = NULL; */ } else if (!strcmp(&argv[i][2], "noansi") || !strcmp(&argv[i][2], "no-ansi")) { ansi = false; skiplist[i] = 1; /* argv[i] = NULL; */ } else if (!strcmp(&argv[i][2], "no-syntax")) { do_syntax = false; skiplist[i] = 1; /* argv[i] = NULL; */ } else if (!strcmp(&argv[i][2], "prefs")) { if (i < argc-1) { startup_prefs_name = argv[i+1]; skiplist[i] = skiplist[i+1] = 1; /* argv[i] = argv[i+1] = NULL; */ } } else if (!strcmp(&argv[i][2], "ansi")) { ansi = true; skiplist[i] = 1; /* argv[i] = NULL; */ } else if (!strcmp(&argv[i][2], "utf8")) { io_utf8 = true; skiplist[i] = 1; /* argv[i] = NULL; */ } else if (!strcmp(&argv[i][2], "no-utf8")) { io_utf8 = false; skiplist[i] = 1; /* argv[i] = NULL; */ } else if (!strcmp(&argv[i][2], "macro")) { if (i < argc-1) { macro_name = argv[i+1]; skiplist[i] = skiplist[i+1] = 1; /* argv[i] = argv[i+1] = NULL; */ } } else if (!strcmp(&argv[i][2], "keys")) { if (i < argc-1) { key_bindings_name = argv[i+1]; skiplist[i] = skiplist[i+1] = 1; /* argv[i] = argv[i+1] = NULL; */ } } else if (!strcmp(&argv[i][2], "menus")) { if (i < argc-1) { menu_conf_name = argv[i+1]; skiplist[i] = skiplist[i+1] = 1; /* argv[i] = argv[i+1] = NULL; */ } } } } #ifdef NE_TEST /* Dump the builtin menu and key bindings to compare to doc/default.menus and doc/default.keys. */ int dump_config(void); dump_config(); #endif /* Unless --noconfig was specified, we try to configure the menus and the keyboard. Note that these functions can exit() on error. */ if (!no_config) { get_menu_configuration(menu_conf_name); get_key_bindings(key_bindings_name); } /* If we cannot even create a buffer, better go... */ if (!new_buffer()) exit(1); /* Now that key_bindings are loaded, try to fix up the message for NOT_FOUND. */ { char *repeat_last_keystroke, *new_not_found; if ((repeat_last_keystroke = find_key_strokes(REPEATLAST_A, 1))) { if ((new_not_found = malloc(39+strlen(repeat_last_keystroke)))) { strcat(strcat(strcpy(new_not_found, "Not Found. (RepeatLast with "), repeat_last_keystroke), " to wrap.)"); error_msg[NOT_FOUND] = new_not_found; } free(repeat_last_keystroke); } } clear_buffer(cur_buffer); /* The INT_MAX clip always exists, and it is used by the Through command. */ clip_desc * const cd = alloc_clip_desc(INT_MAX, 0); if (!cd) exit(1); add_head(&clips, &cd->cd_node); /* General terminfo and cursor motion initalization. From here onwards, we cannot exit() lightly. */ term_init(); /* We will be always using the last line for the status bar. */ set_terminal_window(ne_lines-1); /* We read in all the key capabilities. */ read_key_capabilities(); /* Some initializations of other modules... */ re_set_syntax( RE_CONTEXT_INDEP_ANCHORS | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE | RE_NEWLINE_ALT | RE_NO_BK_PARENS | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES ); bool first_file = true; load_virtual_extensions(); load_auto_prefs(cur_buffer, startup_prefs_name); buffer *stdin_buffer = NULL; if (!isatty(fileno(stdin))) { first_file = false; const int error = load_fd_in_buffer(cur_buffer, fileno(stdin)); print_error(error); stdin_buffer = cur_buffer; if (!(freopen("/dev/tty", "r", stdin))) { fprintf(stderr, "Cannot reopen input tty\n"); abort(); } } /* The terminal is prepared for interactive I/O. */ set_interactive_mode(); clear_entire_screen(); /* This function sets fatal_code() as signal interrupt handler for all the dangerous signals (SIGILL, SIGSEGV etc.). */ set_fatal_code(); if (argc > 1) { /* The first file opened does not need a NEWDOC_A action. Note that file loading can be interrupted (wildcarding can sometimes produce unwanted results). */ uint64_t first_line = 0, first_col = 0; bool binary = false, skip_plus = false, read_only = false; stop = false; for(int i = 1; i < argc && !stop; i++) { if (argv[i] && !skiplist[i]) { if (argv[i][0] == '+' && !skip_plus) { /* looking for "+", or "+N" or "+N,M" */ uint64_t tmp_l = INT64_MAX, tmp_c = 0; char *d; errno = 0; if (argv[i][1]) { if (isdigit((unsigned char)argv[i][1])) { tmp_l = strtoll(argv[i]+1, &d, 10); if (!errno) { if (*d) { /* separator between N and M */ if (isdigit((unsigned char)d[1])) { tmp_c = strtoll(d+1, &d, 10); if (*d) errno = ERANGE; } else errno = ERANGE; } } } else errno = ERANGE; } if (!errno) { first_line = tmp_l; first_col = tmp_c; } else { skip_plus = true; i--; } } else if (!strcmp(argv[i], "--binary")) { binary = true; } else if (!strcmp(argv[i], "--read-only") || !strcmp(argv[i], "--readonly") || !strcmp(argv[i], "--ro")) { read_only = true; } else { if (!strcmp(argv[i], "-") && stdin_buffer) { stdin_buffer->opt.binary = binary; if (read_only) stdin_buffer->opt.read_only = read_only; if (first_line) do_action(stdin_buffer, GOTOLINE_A, first_line, NULL); if (first_col) do_action(stdin_buffer, GOTOCOLUMN_A, first_col, NULL); stdin_buffer = NULL; } else { if (!strcmp(argv[i], "--")) i++; if (!first_file) do_action(cur_buffer, NEWDOC_A, -1, NULL); else first_file = false; cur_buffer->opt.binary = binary; if (i < argc) do_action(cur_buffer, OPEN_A, 0, str_dup(argv[i])); if (first_line) do_action(cur_buffer, GOTOLINE_A, first_line, NULL); if (first_col) do_action(cur_buffer, GOTOCOLUMN_A, first_col, NULL); if (read_only) cur_buffer->opt.read_only = read_only; } first_line = first_col = 0; skip_plus = binary = read_only = false; } } } free(skiplist); /* This call makes current the first specified file. It is called only if more than one buffer exist. */ if (get_nth_buffer(1)) do_action(cur_buffer, NEXTDOC_A, -1, NULL); } /* We delay updates. In this way the macro activity does not cause display activity. */ reset_window(); delay_update(); if (macro_name) do_action(cur_buffer, MACRO_A, -1, str_dup(macro_name)); else if (first_file) { /* If there is no file to load, and no macro to execute, we display the "NO WARRANTY" message. */ about(); } while(true) { /* If we are displaying the "NO WARRANTY" info, we should not refresh the window now */ if (!displaying_info) { refresh_window(cur_buffer); if (cur_buffer->opt.automatch) automatch_bracket(cur_buffer, true); } draw_status_bar(); move_cursor(cur_buffer->cur_y, cur_buffer->cur_x); int c = get_key_code(); if (window_changed_size) { print_error(do_action(cur_buffer, REFRESH_A, 0, NULL)); window_changed_size = displaying_info = false; cur_buffer->automatch.shown = 0; } if (c == INVALID_CHAR) continue; /* Window resizing. */ const input_class ic = CHAR_CLASS(c); if (displaying_info) { refresh_window(cur_buffer); displaying_info = false; } if (cur_buffer->automatch.shown) automatch_bracket(cur_buffer, false); switch(ic) { case INVALID: print_error(INVALID_CHARACTER); break; case ALPHA: print_error(do_action(cur_buffer, INSERTCHAR_A, c, NULL)); break; case TAB: print_error(do_action(cur_buffer, INSERTTAB_A, 1, NULL)); break; case RETURN: print_error(do_action(cur_buffer, INSERTLINE_A, -1, NULL)); break; case COMMAND: if (c < 0) c = -c - 1; if (key_binding[c]) print_error(execute_command_line(cur_buffer, key_binding[c])); break; default: break; } } }
/** * \brief Lookup a specific path in the list * * \param list a #GPPortInfoList * \param path a path * * Looks for an entry in the list with the supplied path. If no exact match * can be found, a regex search will be performed in the hope some driver * claimed ports like "serial:*". * * \return The index of the entry or a gphoto2 error code **/ int gp_port_info_list_lookup_path (GPPortInfoList *list, const char *path) { int i, result, generic; regex_t pattern; #ifdef HAVE_GNU_REGEX const char *rv; #else regmatch_t match; #endif CHECK_NULL (list && path); gp_log (GP_LOG_DEBUG, "gphoto2-port-info-list", ngettext( "Looking for path '%s' (%i entry available)...", "Looking for path '%s' (%i entries available)...", list->count ), path, list->count); /* Exact match? */ for (generic = i = 0; i < list->count; i++) if (!strlen (list->info[i].name)) generic++; else if (!strcmp (list->info[i].path, path)) return (i - generic); /* Regex match? */ gp_log (GP_LOG_DEBUG, "gphoto2-port-info-list", _("Starting regex search for '%s'..."), path); for (i = 0; i < list->count; i++) { GPPortInfo newinfo; if (strlen (list->info[i].name)) continue; gp_log (GP_LOG_DEBUG, "gphoto2-port-info-list", _("Trying '%s'..."), list->info[i].path); /* Compile the pattern */ #ifdef HAVE_GNU_REGEX memset (&pattern, 0, sizeof (pattern)); rv = re_compile_pattern (list->info[i].path, strlen (list->info[i].path), &pattern); if (rv) { gp_log (GP_LOG_DEBUG, "gphoto2-port-info-list", "%s", rv); continue; } #else result = regcomp (&pattern, list->info[i].path, REG_ICASE); if (result) { char buf[1024]; if (regerror (result, &pattern, buf, sizeof (buf))) gp_log (GP_LOG_ERROR, "gphoto2-port-info-list", "%s", buf); else gp_log (GP_LOG_ERROR, "gphoto2-port-info-list", _("regcomp failed")); return (GP_ERROR_UNKNOWN_PORT); } #endif /* Try to match */ #ifdef HAVE_GNU_REGEX result = re_match (&pattern, path, strlen (path), 0, NULL); regfree (&pattern); if (result < 0) { gp_log (GP_LOG_DEBUG, "gphoto2-port-info-list", _("re_match failed (%i)"), result); continue; } #else result = regexec (&pattern, path, 1, &match, 0); regfree (&pattern); if (result) { gp_log (GP_LOG_DEBUG, "gphoto2-port-info-list", _("regexec failed")); continue; } #endif memcpy (&newinfo, &list->info[i], sizeof(newinfo)); strncpy (newinfo.path, path, sizeof (newinfo.path)); strncpy (newinfo.name, _("Generic Port"), sizeof (newinfo.name)); CR (result = gp_port_info_list_append (list, newinfo)); return result; } return (GP_ERROR_UNKNOWN_PORT); }
Regexp * make_regexp(char *s, size_t len, int ignorecase, int dfa) { Regexp *rp; const char *rerr; char *src = s; char *temp; char *end = s + len; register char *dest; register int c, c2; /* Handle escaped characters first. */ /* * Build a copy of the string (in dest) with the * escaped characters translated, and generate the regex * from that. */ emalloc(dest, char *, len + 2, "make_regexp"); temp = dest; while (src < end) { if (*src == '\\') { c = *++src; switch (c) { case 'a': case 'b': case 'f': case 'n': case 'r': case 't': case 'v': case 'x': case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': c2 = parse_escape(&src); if (c2 < 0) cant_happen(); /* * Unix awk treats octal (and hex?) chars * literally in re's, so escape regexp * metacharacters. */ if (do_traditional && ! do_posix && (ISDIGIT(c) || c == 'x') && strchr("()|*+?.^$\\[]", c2) != NULL) *dest++ = '\\'; *dest++ = (char) c2; break; case '8': case '9': /* a\9b not valid */ *dest++ = c; src++; break; case 'y': /* normally \b */ /* gnu regex op */ if (! do_traditional) { *dest++ = '\\'; *dest++ = 'b'; src++; break; } /* else, fall through */ default: *dest++ = '\\'; *dest++ = (char) c; src++; break; } /* switch */ } else *dest++ = *src++; /* not '\\' */ } /* for */ *dest = '\0' ; /* Only necessary if we print dest ? */ emalloc(rp, Regexp *, sizeof(*rp), "make_regexp"); memset((char *) rp, 0, sizeof(*rp)); rp->pat.allocated = 0; /* regex will allocate the buffer */ emalloc(rp->pat.fastmap, char *, 256, "make_regexp"); if (ignorecase) rp->pat.translate = casetable; else rp->pat.translate = NULL; len = dest - temp; if ((rerr = re_compile_pattern(temp, len, &(rp->pat))) != NULL) fatal("%s: /%s/", gettext(rerr), temp); /* gack. this must be done *after* re_compile_pattern */ rp->pat.newline_anchor = FALSE; /* don't get \n in middle of string */ if (dfa && ! ignorecase) { dfacomp(temp, len, &(rp->dfareg), TRUE); rp->dfa = TRUE; } else rp->dfa = FALSE; free(temp); return rp; }
int main (void) { struct re_pattern_buffer regbuf; const char *err; size_t i; int ret = 0; mtrace (); setlocale (LC_ALL, "de_DE.UTF-8"); for (i = 0; i < sizeof (tests) / sizeof (tests[0]); ++i) { int res, optimized; re_set_syntax (tests[i].syntax); memset (®buf, '\0', sizeof (regbuf)); err = re_compile_pattern (tests[i].pattern, strlen (tests[i].pattern), ®buf); if (err != NULL) { printf ("re_compile_pattern failed: %s\n", err); ret = 1; continue; } /* Check if re_search will be done as multi-byte or single-byte. */ optimized = ((re_dfa_t *) regbuf.buffer)->mb_cur_max == 1; if (optimized != tests[i].optimize) { printf ("pattern %zd %soptimized while it should%s be\n", i, optimized ? "" : "not ", tests[i].optimize ? "" : " not"); ret = 1; } int str_len = strlen (tests[i].string); res = re_search (®buf, tests[i].string, str_len, 0, str_len, NULL); if (res != tests[i].res) { printf ("re_search %zd failed: %d\n", i, res); ret = 1; regfree (®buf); continue; } res = re_search (®buf, tests[i].string, str_len, str_len, -str_len, NULL); if (res != tests[i].res) { printf ("backward re_search %zd failed: %d\n", i, res); ret = 1; regfree (®buf); continue; } regfree (®buf); re_set_syntax (tests[i].syntax | RE_ICASE); memset (®buf, '\0', sizeof (regbuf)); err = re_compile_pattern (tests[i].pattern, strlen (tests[i].pattern), ®buf); if (err != NULL) { printf ("re_compile_pattern failed: %s\n", err); ret = 1; continue; } /* Check if re_search will be done as multi-byte or single-byte. */ optimized = ((re_dfa_t *) regbuf.buffer)->mb_cur_max == 1; if (optimized) { printf ("pattern %zd optimized while it should not be when case insensitive\n", i); ret = 1; } res = re_search (®buf, tests[i].string, str_len, 0, str_len, NULL); if (res != tests[i].res) { printf ("ICASE re_search %zd failed: %d\n", i, res); ret = 1; regfree (®buf); continue; } res = re_search (®buf, tests[i].string, str_len, str_len, -str_len, NULL); if (res != tests[i].res) { printf ("ICASE backward re_search %zd failed: %d\n", i, res); ret = 1; regfree (®buf); continue; } regfree (®buf); } return ret; }
static void compile_regex_1 (struct regex *new_regex, int needed_sub) { const char *error; int syntax = ((extended_regexp_flags & REG_EXTENDED) ? RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC); syntax &= ~RE_DOT_NOT_NULL; syntax |= RE_NO_POSIX_BACKTRACKING; switch (posixicity) { case POSIXLY_EXTENDED: syntax &= ~RE_UNMATCHED_RIGHT_PAREN_ORD; break; case POSIXLY_CORRECT: syntax |= RE_UNMATCHED_RIGHT_PAREN_ORD; break; case POSIXLY_BASIC: syntax |= RE_UNMATCHED_RIGHT_PAREN_ORD | RE_NO_GNU_OPS; if (!(extended_regexp_flags & REG_EXTENDED)) syntax |= RE_LIMITED_OPS; break; } if (new_regex->flags & REG_ICASE) syntax |= RE_ICASE; else new_regex->pattern.fastmap = malloc (1 << (sizeof (char) * 8)); syntax |= needed_sub ? 0 : RE_NO_SUB; /* If REG_NEWLINE is set, newlines are treated differently. */ if (new_regex->flags & REG_NEWLINE) { /* REG_NEWLINE implies neither . nor [^...] match newline. */ syntax &= ~RE_DOT_NEWLINE; syntax |= RE_HAT_LISTS_NOT_NEWLINE; } re_set_syntax (syntax); error = re_compile_pattern (new_regex->re, new_regex->sz, &new_regex->pattern); new_regex->pattern.newline_anchor = buffer_delimiter == '\n' && (new_regex->flags & REG_NEWLINE) != 0; new_regex->pattern.translate = NULL; #ifndef RE_ICASE if (new_regex->flags & REG_ICASE) { static char translate[1 << (sizeof (char) * 8)]; int i; for (i = 0; i < sizeof (translate) / sizeof (char); i++) translate[i] = tolower (i); new_regex->pattern.translate = translate; } #endif if (error) bad_prog (error); /* Just to be sure, I mark this as not POSIXLY_CORRECT behavior */ if (needed_sub && new_regex->pattern.re_nsub < needed_sub - 1 && posixicity == POSIXLY_EXTENDED) { char buf[200]; sprintf (buf, _("invalid reference \\%d on `s' command's RHS"), needed_sub - 1); bad_prog (buf); } int dfaopts = buffer_delimiter == '\n' ? 0 : DFA_EOL_NUL; new_regex->dfa = dfaalloc (); dfasyntax (new_regex->dfa, &localeinfo, syntax, dfaopts); dfacomp (new_regex->re, new_regex->sz, new_regex->dfa, 1); /* The patterns which consist of only ^ or $ often appear in substitution, but regex and dfa are not good at them, as regex does not build fastmap, and as all in buffer must be scanned for $. So we mark them to handle manually. */ if (new_regex->sz == 1) { if (new_regex->re[0] == '^') new_regex->begline = true; if (new_regex->re[0] == '$') new_regex->endline = true; } }