CAMLprim value re_search_backward(value re, value str, value startpos) { unsigned char * starttxt = &Byte_u(str, 0); unsigned char * txt = &Byte_u(str, Long_val(startpos)); unsigned char * endtxt = &Byte_u(str, caml_string_length(str)); unsigned char * startchars; if (txt < starttxt || txt > endtxt) caml_invalid_argument("Str.search_backward"); if (Startchars(re) == -1) { do { if (re_match(re, starttxt, txt, endtxt, 0)) return re_alloc_groups(re, str); txt--; } while (txt >= starttxt); return Atom(0); } else { startchars = (unsigned char *) String_val(Field(Cpool(re), Startchars(re))); do { while (txt > starttxt && startchars[*txt] == 0) txt--; if (re_match(re, starttxt, txt, endtxt, 0)) return re_alloc_groups(re, str); txt--; } while (txt >= starttxt); return Atom(0); } }
static const char *re_maxmatch(const FrRegExElt *re, const char *candidate, char *&matchbuf, const char *matchbuf_end, char **groups, size_t num_groups) { char *match = matchbuf ; const char *match_end = 0 ; const char *end = candidate ; size_t max_reps = re->maxReps() ; size_t min_reps = re->minReps() ; size_t best_reps = 0 ; while (max_reps >= min_reps) { match = matchbuf ; const char *split = re_match(re,candidate,min_reps,max_reps, match,matchbuf_end,groups,num_groups) ; if (split) { const FrRegExElt *next = re->getNext() ; if (next) end = re_match(next,split,match,matchbuf_end,groups,num_groups) ; else if (*split == '\0') { best_reps = max_reps ; match_end = split ; break ; // found a complete match } else end = split ; if (end && end > match_end) { match_end = end ; best_reps = max_reps ; if (*end == '\0') break ; } } else return 0 ; // can't match! if (max_reps > 0) max_reps-- ; // try one less next time else break ; } if (best_reps > 0 && best_reps != max_reps) { // re-compute the best match if necessary match = matchbuf ; const char *split = re_match(re,candidate,best_reps,best_reps, match,matchbuf_end,groups,num_groups) ; const FrRegExElt *next = re->getNext() ; if (split && next) (void)re_match(next,split,match,matchbuf_end,groups,num_groups) ; } assertq(match >= matchbuf && match <= matchbuf_end) ; matchbuf = match ; return match_end ; }
static int rbsigar_ptql_re_impl(void *data, char *haystack, char *needle) { #ifdef RB_RUBY_19 /* XXX no more regex.h */ return 0; #else struct re_pattern_buffer *regex; int len = strlen(haystack); int retval; const char *err; regex = ALLOC(struct re_pattern_buffer); MEMZERO((char *)regex, struct re_pattern_buffer, 1); /* XXX cache */ if ((err = re_compile_pattern(needle, strlen(needle), regex))) { re_free_pattern(regex); rb_raise(RB_REGEX_ERROR, "%s", err); return 0; } retval = re_match(regex, haystack, len, 0, NULL); re_free_pattern(regex); return retval > 0; #endif }
/* **---------------------------------------------------------------------- ** .Klasse: TBRegexp ** ** .Methode: match ** ** .Beschreibung: Groesse eines PatternMatches bestimmen ** ** .Parameter: const char*, s , I, String in dem gesucht wird ** size_t , pos , I, Anfangsposition der Suche ** size_t , len , I, Range in dem gesucht wird ** ** .Rueckgabewert: int, Anzahl der Zeichen des Matches ** --------------------------------------------------------------------- ** ** .Methodenbeschreibung: **----------------------------------------------------------------- */ int TBRegexp::match( const char* s, size_t pos, size_t len ) { search_string_ = (char*) s; search_pos_ = pos; return re_match( pattern_buf_, s, len, pos, registers_ ); }
//----------------------------------------------------------------- int t_mep_data::find_symbol_from_all_variables(const char *s_find_what, bool use_regular) { int count_found; count_found = 0; if (data_type == MEP_DATA_STRING) { // string for (int col = 0; col < num_cols - 1; col++) for (int t = 0; t < num_data; t++) if (re_match(_data_string[t][col], s_find_what, use_regular)) { // this is a missing value count_found++; } } else if (data_type == MEP_DATA_DOUBLE) { // double // try to convert them to double char* pEnd; double d_find_what; d_find_what = strtod(s_find_what, &pEnd); for (int col = 0; col < num_cols - 1; col++) for (int t = 0; t < num_data; t++) if (fabs(_data_double[t][col] - d_find_what) < 1E-10) { // this is a missing value count_found++; } } return count_found; }
//----------------------------------------------------------------- int t_mep_data::replace_symbol_from_selected_col(const char *s_find_what, const char* s_replace_with, int col, bool use_regular) { int count_replaced = 0; if (data_type == MEP_DATA_STRING) { // string for (int t = 0; t < num_data; t++) if (re_match(_data_string[t][col], s_find_what, use_regular)) { // this is a missing value strcpy(_data_string[t][col], s_replace_with); count_replaced++; } } else if (data_type == MEP_DATA_DOUBLE) { // double // try to convert them to double char* pEnd; double d_find_what; d_find_what = strtod(s_find_what, &pEnd); double d_replace_with; d_replace_with = strtod(s_replace_with, &pEnd); for (int t = 0; t < num_data; t++) if (fabs(_data_double[t][col] - d_find_what) < 1E-10) { // this is a missing value _data_double[t][col] = d_replace_with; count_replaced++; } } _modified = true; return count_replaced; }
int evaluate_fast_regex( struct fast_regex * fre_t, char * str, size_t len ) { char * sub ; struct _fregex * fre = (struct _fregex *)( fre_t->data ) ; if( fre->kwset ) { struct kwsmatch kwsm ; sub = kwsexec( fre->kwset, (char *)str, len, &kwsm) ; if( sub == NULL ) return 0 ; if( kwsm.index < fre->num_exact_kws ) { return 1 ; } } if( HAS_DFA(fre_t->options) ) { int backref = 0 ; sub = dfaexec( &(fre->dfa), str, (str+len), 0, NULL, &backref) ; if( sub == NULL ) return 0 ; if ( !backref || (fre_t->options & FRE_NO_REGEX) ) return 1 ; } return re_match( &fre->regex , str, len, 0, NULL ) > 0 ; }
/* * Matches text against the regular expression re. * It will return non-zero if text matches the pattern re, otherwise it * returns 0. */ int rx_match (const char *text, const char *re) { struct mstruct m; m.flags = 0; return re_match (text, re, &m); }
int regexp_match(struct regexp *r, const char *string, const int size, const int start, struct re_registers *regs) { if (r->re == NULL) { if (regexp_compile(r) == -1) return -3; } return re_match(r->re, string, size, start, regs); }
FrObject *FrRegExp::match(const char *word) const { if (!regex || !word || !*word) return 0 ; char *groups[10] ; for (size_t i = 0 ; i < lengthof(groups) ; i++) groups[i] = 0 ; // char *end = strchr(word,'\0') ; FrObject *result ; char *matchbuf = 0 ; // if (re_match(regex,word,matchbuf,0,groups,lengthof(groups)) == end) const char *end ; if ((end = re_match(regex,word,matchbuf,0,groups,lengthof(groups))) != 0 && !*end) { char translation[FrMAX_SYMBOLNAME_LEN+1] ; char *trans_end = &translation[FrMAX_SYMBOLNAME_LEN] ; char *xlat = translation ; const char *repl ; for (repl = replacement ; *repl && xlat < trans_end ; repl++) { char c = *repl ; if (c == FrRE_QUOTE) { // escape-char plus digit specifies a replacement taken from the // source match c = *++repl ; if (Fr_isdigit(c)) { const char *targ = groups[c-'0'] ; if (targ) { size_t len = strlen(targ) ; memcpy(xlat,targ,len) ; xlat += len ; } else FrWarningVA("mismatch in r.e. replacement: %%%c",c) ; } else if (c) *xlat++ = *++repl ; else break ; } else *xlat++ = c ; } *xlat = '\0' ; result = new FrString(translation) ; } else result = 0 ; for (size_t j = 0 ; j < lengthof(groups) ; j++) if (groups[j]) FrFree(groups[j]) ; return result ; }
re_err_t re_match(const char *text, const char *regex, const char *opts) { pcre *re; const char *error; int erroffset; int options; int rc; int ovector[OVECT_SIZE]; int offset; if (text == NULL || regex == NULL) return CEPARAM; if ((options = parse_opts(opts)) < 0) return CEOPTION; if ((re = pcre_compile(regex, options, &error, &erroffset, NULL)) == NULL) return CERECOMP; if ((rc = pcre_exec(re, NULL, text, strlen(text), 0, 0, ovector, NELEMS(ovector))) < 0) { pcre_free(re); return CEREEXEC; } else if (rc == 0) { /* it's just a waring (not an error) but may * loss captured substing for the small ovector */ fprintf(stderr, "%s: warning: too many brackets used\n", __FUNCTION__); match_info.nvect = 0; pcre_free(re); return CESUCCESS; /* not an error */ } /* the pattern is match and * at least one pair of offset is set */ save_match_info(text, rc, ovector); /* /g modifier */ if (strchr(opts, 'g') == NULL) { if (ovector[1] == ovector[0]) offset = ovector[1] + 1; else offset = ovector[1]; if (offset <= strlen(text)) re_match(text + offset, regex, opts); } pcre_free(re); return CESUCCESS; }
CAMLprim value re_partial_match(value re, value str, value pos) { unsigned char * starttxt = &Byte_u(str, 0); unsigned char * txt = &Byte_u(str, Long_val(pos)); unsigned char * endtxt = &Byte_u(str, caml_string_length(str)); if (txt < starttxt || txt > endtxt) caml_invalid_argument("Str.string_partial_match"); if (re_match(re, starttxt, txt, endtxt, 1)) { return re_alloc_groups(re, str); } else { return Atom(0); } }
BOOL parse_zero_encoders( char* mIncoming ) { char expression = "zero (position|encoders) (v)?(w)?(x)?(y)?(z)?"; int match = re_match( 7, Captures, expression, mIncoming ); BOOL is_speed_command = strcmp(mIncoming, "zero position "); for (int b=0; b<NUM_MOTORS; b++) { if (is_in_set(which, b) ) Encoders[b].Count=0; } form_response( "ACK zero positioned" ); return is_speed_command; }
BOOL parse_home_command( char* mIncoming ) { char expression = "home ([vV])? ([wW])? ([xX])? ([yY])? ([zZ])?"; int match = re_match( 2, Captures, expression, mIncoming ); BOOL is_speed_command = strcmp(mIncoming, "HOME:"); byte which = which_motors(mIncoming); for (int b=0; b<NUM_MOTORS; b++) { if (is_in_set(which, b) ) set_motor_duty( b, HOMING_SPEED ); } form_response("ACK: homing..."); return is_speed_command; }
BOOL parse_use_encoder( char* mIncoming ) { char expression[] = "^use (encoder|potentiometer)"; int match = re_match( 2, Captures, expression, mIncoming ); int use_encoder = strcmp(mIncoming, "use encoder"); int use_potentiometer = strcmp(mIncoming, "use potientiometer"); if (use_encoder==0) { FiveMotorConfigData.use_encoder = 1; form_response("ACK: use encoder"); } else if (use_potentiometer==0) { FiveMotorConfigData.use_encoder = 0; form_response("ACK using potentiometer"); } return FALSE; }
BOOL parse_measure_travel( char* mIncoming ) { char expression = "measure travel"; int match = re_match( 2, Captures, expression, mIncoming ); BOOL is_speed_command = strcmp(mIncoming, "measure travel"); byte which = which_motors(mIncoming); for (int b=0; b<NUM_MOTORS; b++) { if (is_in_set(which, b) ) set_motor_duty( b, HOMING_SPEED ); } // Don't wait until further limit switches triggered, just ack the cmd: form_response( "ACK measuring travel..." ); return is_speed_command; }
int main(int argc, char *argv[]) { re_err_t ret; char buf[1024]; char *result; if (parse_args(argc, argv) != 0) { usage(); exit(1); } //fprintf(stderr, "------- Execute -------\n"); if (is_match) { if ((ret = re_match(text, regex, options)) != CESUCCESS) { fprintf(stderr, "Not match: %s\n", re_strerr(ret)); exit(1); } if (cap_idx >= 0) { if ((ret = re_capstr(cap_idx, buf, sizeof(buf))) != CESUCCESS) { fprintf(stderr, "re_capstr(): %s\n", re_strerr(ret)); exit(1); } printf("[%d]: %s\n", cap_idx, buf); } else { if ((ret = re_capstr(0, buf, sizeof(buf))) != CESUCCESS) { fprintf(stderr, "re_capstr(): %s\n", re_strerr(ret)); exit(1); } printf("%s\n", buf); } } else { if ((ret = re_subs(text, regex, replace, options, &result)) != CESUCCESS) { fprintf(stderr, "Fail to replace: %s\n", re_strerr(ret)); exit(1); } //printf("Replaced!\n"); printf("%s\n", result); free(result); } exit(0); }
/* * applies regular expression pattern to contents of the directory * * for entries that match, the fully qualified pathname is inserted into * the treeset */ static int applyRe(char *dir, RegExp *reg, TreeSet *ts) { DIR *dd; struct dirent *dent; int status = 1; /* * open the directory */ if ((dd = opendir(dir)) == NULL) { fprintf(stderr, "Error opening directory `%s'\n", dir); return 0; } /* * for each entry in the directory */ while (status && (dent = readdir(dd)) != NULL) { if (strcmp(".", dent->d_name) == 0 || strcmp("..", dent->d_name) == 0) continue; if (!(dent->d_type & DT_DIR)) { char b[4096], *sp; /* * see if filename matches regular expression */ if (! re_match(reg, dent->d_name)) continue; sprintf(b, "%s/%s", dir, dent->d_name); /* * duplicate fully qualified pathname for insertion into treeset */ if ((sp = strdup(b)) != NULL) { if (!ts_add(ts, sp)) { fprintf(stderr, "Error adding `%s' to tree set\n", sp); free(sp); status = 0; break; } } else { fprintf(stderr, "Error adding `%s' to tree set\n", b); status = 0; break; } } } (void) closedir(dd); return status; }
vespalib::string Regexp::replace(vespalib::stringref s, vespalib::stringref replacement) const { if ( ! valid() ) { return s; } regex_t *preg = const_cast<regex_t *>(static_cast<const regex_t *>(_data)); vespalib::string modified; int prev(0); for(int pos(re_search(preg, s.data(), s.size(), 0, s.size(), NULL)); pos >=0; pos = re_search(preg, s.data()+prev, s.size()-prev, 0, s.size()-prev, NULL)) { modified += s.substr(prev, pos); modified += replacement; int count = re_match(preg, s.data()+prev, s.size()-prev, pos, NULL); prev += pos + count; } modified += s.substr(prev); return modified; }
int patable_match(struct patable *table, const char *source, size_t len, int *ngroup, int *ovector, size_t ovsize) { int i; int ng; if (len == (size_t)-1) len = strlen(source); for (i = 0; i < table->cur; i++) { assert(table->pat[i].re != 0); ng = re_match(table->pat[i].re, table->pat[i].ext, source, len, ovector, ovsize); if (ng > 0) { *ngroup = ng; return i; } } *ngroup = 0; return -1; }
BOOL parse_set_unit( char* mIncoming ) { char expression = "^set unit (inch|meter|mm|feet)"; int match = re_match( 2, Captures, expression, mIncoming ); int set_unit = strcmp(mIncoming, "set unit "); char* ptr = mIncoming+0; if ( (strcmp(mIncoming, "meters")==0) ) { FiveMotorConfigData.units = meters; // enum eMeasuringUnit form_response( "ACK unit=meters" ); } if ( (strcmp(mIncoming, "mm")==0) ) { FiveMotorConfigData.units = millimeters; // enum eMeasuringUnit form_response( "ACK unit=mm" ); } if ( (strcmp(mIncoming, "feet")==0) ) { FiveMotorConfigData.units = feet; form_response( "ACK unit=feet" ); } if ( (strcmp(mIncoming, "inches")==0) ) { FiveMotorConfigData.units = inches; form_response( "ACK unit=inches" ); } return FALSE; }
BOOL parse_read_position( char* mIncoming ) { char expression = "read (position|speed|frequency)"; int match = re_match( 2, Captures, expression, mIncoming ); BOOL send_pos = strcmp(mIncoming, "read position "); BOOL send_speed = strcmp(mIncoming, "read speed "); BOOL send_base_frequency = strcmp(mIncoming, "read frequency "); if (send_pos) { send_positions(); return TRUE; } if (send_speed) { send_speeds(); return TRUE; } if (send_base_frequency) { send_speeds(); return TRUE; } return FALSE; }
/* * Matches text against the regular expression re and extracts the position * of the matching text. * If the text matches the pattern re, the pointers pointed to by beg and end * will be set to point to the begining and end of the matching substring in * text. */ int rx_search (const char *text, const char *re, const char **beg, const char **end) { struct mstruct m; m.flags = GREEDY; m.start = 0; m.end = 0; if (re_match (text, re, &m)) { if (beg) *beg = m.start; if (end) *end = m.end; return 1; } if (beg) *beg = 0; if (end) *end = 0; return 0; }
int main (void) { struct re_pattern_buffer regex; struct re_registers regs; const char *s; int match; int result = 0; regs.num_regs = 1; memset (®ex, '\0', sizeof (regex)); s = re_compile_pattern ("[abc]*d", 7, ®ex); if (s != NULL) { puts ("re_compile_pattern return non-NULL value"); result = 1; } else { match = re_match (®ex, "foacabdxy", 9, 2, ®s); if (match != 5) { printf ("re_match returned %d, expected 5\n", match); result = 1; } else if (regs.start[0] != 2 || regs.end[0] != 7) { printf ("re_match returned %d..%d, expected 2..7\n", regs.start[0], regs.end[0]); result = 1; } puts (" -> OK"); } return result; }
int re_search(regexp_t bufp, unsigned char *string, int size, int pos, int range, regexp_registers_t regs) { unsigned char *fastmap; unsigned char *translate; unsigned char *text; unsigned char *partstart; unsigned char *partend; int dir; int ret; unsigned char anchor; assert(size >= 0 && pos >= 0); assert(pos + range >= 0 && pos + range <= size); /* Bugfix by ylo */ fastmap = bufp->fastmap; translate = bufp->translate; if (fastmap && !bufp->fastmap_accurate) { re_compile_fastmap(bufp); if (PyErr_Occurred()) return -2; } anchor = bufp->anchor; if (bufp->can_be_null == 1) /* can_be_null == 2: can match null at eob */ fastmap = NULL; if (range < 0) { dir = -1; range = -range; } else dir = 1; if (anchor == 2) { if (pos != 0) return -1; else range = 0; } for (; range >= 0; range--, pos += dir) { if (fastmap) { if (dir == 1) { /* searching forwards */ text = string + pos; partend = string + size; partstart = text; if (translate) while (text != partend && !fastmap[(unsigned char) translate[(unsigned char)*text]]) text++; else while (text != partend && !fastmap[(unsigned char)*text]) text++; pos += text - partstart; range -= text - partstart; if (pos == size && bufp->can_be_null == 0) return -1; } else { /* searching backwards */ text = string + pos; partstart = string + pos - range; partend = text; if (translate) while (text != partstart && !fastmap[(unsigned char) translate[(unsigned char)*text]]) text--; else while (text != partstart && !fastmap[(unsigned char)*text]) text--; pos -= partend - text; range -= partend - text; } } if (anchor == 1) { /* anchored to begline */ if (pos > 0 && (string[pos - 1] != '\n')) continue; } assert(pos >= 0 && pos <= size); ret = re_match(bufp, string, size, pos, regs); if (ret >= 0) return pos; if (ret == -2) return -2; } return -1; }
static int modregex_split (INSTANCE * my, int * params) { const char * reg = string_get(params[0]); const char * str = string_get(params[1]); int * result_array = (int *)params[2]; int result_array_size = params[3]; int count = 0; int pos, lastpos = 0; struct re_pattern_buffer pb; struct re_registers re; int start[16]; int end[16]; /* Alloc the pattern resources */ memset (&pb, 0, sizeof(pb)); memset (&re, 0, sizeof(re)); pb.buffer = malloc(4096); pb.allocated = 4096; pb.fastmap = malloc(256); pb.regs_allocated = 16; re.num_regs = 16; re.start = start; re.end = end; re_syntax_options = RE_SYNTAX_POSIX_MINIMAL_EXTENDED; /* Match the regex */ if (re_compile_pattern (reg, strlen(reg), &pb) == 0) { for (;;) { pos = re_search (&pb, str, strlen(str), lastpos, strlen(str), &re); if (pos == -1) break; *result_array = string_newa (str + lastpos, pos-lastpos); string_use(*result_array); result_array++; count++; result_array_size--; if (result_array_size == 0) break; lastpos = pos + re_match (&pb, str, strlen(str), pos, 0); if (lastpos < pos) break; if (lastpos == pos) lastpos++; } if (result_array_size > 0) { *result_array = string_new (str + lastpos); string_use (*result_array); count++; } } /* Free the resources */ free (pb.buffer); free (pb.fastmap); string_discard(params[0]); string_discard(params[1]); return count; }
int main (void) { int result = 0; static struct re_pattern_buffer regex; unsigned char folded_chars[UCHAR_MAX + 1]; int i; const char *s; struct re_registers regs; #if HAVE_DECL_ALARM /* Some builds of glibc go into an infinite loop on this test. */ int alarm_value = 2; signal (SIGALRM, SIG_DFL); alarm (alarm_value); #endif if (setlocale (LC_ALL, "en_US.UTF-8")) { { /* http://sourceware.org/ml/libc-hacker/2006-09/msg00008.html This test needs valgrind to catch the bug on Debian GNU/Linux 3.1 x86, but it might catch the bug better on other platforms and it shouldn't hurt to try the test here. */ static char const pat[] = "insert into"; static char const data[] = "\xFF\0\x12\xA2\xAA\xC4\xB1,K\x12\xC4\xB1*\xACK"; re_set_syntax (RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE | RE_ICASE); memset (®ex, 0, sizeof regex); s = re_compile_pattern (pat, sizeof pat - 1, ®ex); if (s) result |= 1; else if (re_search (®ex, data, sizeof data - 1, 0, sizeof data - 1, ®s) != -1) result |= 1; } /* Check whether it's really a UTF-8 locale. On mingw, the setlocale call succeeds but returns "English_United States.1252", with locale_charset() returning "CP1252". */ if (strcmp (locale_charset (), "UTF-8") == 0) { /* This test is from glibc bug 15078. The test case is from Andreas Schwab in <http://www.sourceware.org/ml/libc-alpha/2013-01/msg00967.html>. */ static char const pat[] = "[^x]x"; static char const data[] = /* <U1000><U103B><U103D><U1014><U103A><U102F><U1015><U103A> */ "\xe1\x80\x80" "\xe1\x80\xbb" "\xe1\x80\xbd" "\xe1\x80\x94" "\xe1\x80\xba" "\xe1\x80\xaf" "\xe1\x80\x95" "\xe1\x80\xba" "x"; re_set_syntax (0); memset (®ex, 0, sizeof regex); s = re_compile_pattern (pat, sizeof pat - 1, ®ex); if (s) result |= 1; else { i = re_search (®ex, data, sizeof data - 1, 0, sizeof data - 1, 0); if (i != 0 && i != 21) result |= 1; } } if (! setlocale (LC_ALL, "C")) return 1; } /* This test is from glibc bug 3957, reported by Andrew Mackey. */ re_set_syntax (RE_SYNTAX_EGREP | RE_HAT_LISTS_NOT_NEWLINE); memset (®ex, 0, sizeof regex); s = re_compile_pattern ("a[^x]b", 6, ®ex); if (s) result |= 2; /* This should fail, but succeeds for glibc-2.5. */ else if (re_search (®ex, "a\nb", 3, 0, 3, ®s) != -1) result |= 2; /* This regular expression is from Spencer ere test number 75 in grep-2.3. */ re_set_syntax (RE_SYNTAX_POSIX_EGREP); memset (®ex, 0, sizeof regex); for (i = 0; i <= UCHAR_MAX; i++) folded_chars[i] = i; regex.translate = folded_chars; s = re_compile_pattern ("a[[:@:>@:]]b\n", 11, ®ex); /* This should fail with _Invalid character class name_ error. */ if (!s) result |= 4; /* Ensure that [b-a] is diagnosed as invalid, when using RE_NO_EMPTY_RANGES. */ re_set_syntax (RE_SYNTAX_POSIX_EGREP | RE_NO_EMPTY_RANGES); memset (®ex, 0, sizeof regex); s = re_compile_pattern ("a[b-a]", 6, ®ex); if (s == 0) result |= 8; /* This should succeed, but does not for glibc-2.1.3. */ memset (®ex, 0, sizeof regex); s = re_compile_pattern ("{1", 2, ®ex); if (s) result |= 8; /* The following example is derived from a problem report against gawk from Jorge Stolfi <*****@*****.**>. */ memset (®ex, 0, sizeof regex); s = re_compile_pattern ("[an\371]*n", 7, ®ex); if (s) result |= 8; /* This should match, but does not for glibc-2.2.1. */ else if (re_match (®ex, "an", 2, 0, ®s) != 2) result |= 8; memset (®ex, 0, sizeof regex); s = re_compile_pattern ("x", 1, ®ex); if (s) result |= 8; /* glibc-2.2.93 does not work with a negative RANGE argument. */ else if (re_search (®ex, "wxy", 3, 2, -2, ®s) != 1) result |= 8; /* The version of regex.c in older versions of gnulib ignored RE_ICASE. Detect that problem too. */ re_set_syntax (RE_SYNTAX_EMACS | RE_ICASE); memset (®ex, 0, sizeof regex); s = re_compile_pattern ("x", 1, ®ex); if (s) result |= 16; else if (re_search (®ex, "WXY", 3, 0, 3, ®s) < 0) result |= 16; /* Catch a bug reported by Vin Shelton in http://lists.gnu.org/archive/html/bug-coreutils/2007-06/msg00089.html */ re_set_syntax (RE_SYNTAX_POSIX_BASIC & ~RE_CONTEXT_INVALID_DUP & ~RE_NO_EMPTY_RANGES); memset (®ex, 0, sizeof regex); s = re_compile_pattern ("[[:alnum:]_-]\\\\+$", 16, ®ex); if (s) result |= 32; /* REG_STARTEND was added to glibc on 2004-01-15. Reject older versions. */ if (! REG_STARTEND) result |= 64; #if 0 /* It would be nice to reject hosts whose regoff_t values are too narrow (including glibc on hosts with 64-bit ptrdiff_t and 32-bit int), but we should wait until glibc implements this feature. Otherwise, support for equivalence classes and multibyte collation symbols would always be broken except when compiling --without-included-regex. */ if (sizeof (regoff_t) < sizeof (ptrdiff_t) || sizeof (regoff_t) < sizeof (ssize_t)) result |= 64; #endif return result; }
size_t EGexecute (char const *buf, size_t size, size_t *match_size, char const *start_ptr) { char const *buflim, *beg, *end, *match, *best_match, *mb_start; char eol = eolbyte; int backref; regoff_t start; size_t len, best_len; struct kwsmatch kwsm; size_t i, ret_val; mb_len_map_t *map = NULL; if (MB_CUR_MAX > 1) { if (match_icase) { /* mbtolower adds a NUL byte at the end. That will provide space for the sentinel byte dfaexec may add. */ char *case_buf = mbtolower (buf, &size, &map); if (start_ptr) start_ptr = case_buf + (start_ptr - buf); buf = case_buf; } } mb_start = buf; buflim = buf + size; for (beg = end = buf; end < buflim; beg = end) { if (!start_ptr) { /* We don't care about an exact match. */ if (kwset) { /* Find a possible match using the KWset matcher. */ size_t offset = kwsexec (kwset, beg, buflim - beg, &kwsm); if (offset == (size_t) -1) goto failure; beg += offset; /* Narrow down to the line containing the candidate, and run it through DFA. */ if ((end = memchr(beg, eol, buflim - beg)) != NULL) end++; else end = buflim; match = beg; while (beg > buf && beg[-1] != eol) --beg; if (kwsm.index < kwset_exact_matches) { if (!MBS_SUPPORT) goto success; if (mb_start < beg) mb_start = beg; if (MB_CUR_MAX == 1 || !is_mb_middle (&mb_start, match, buflim, kwsm.size[0])) goto success; } if (dfaexec (dfa, beg, (char *) end, 0, NULL, &backref) == NULL) continue; } else { /* No good fixed strings; start with DFA. */ char const *next_beg = dfaexec (dfa, beg, (char *) buflim, 0, NULL, &backref); /* If there's no match, or if we've matched the sentinel, we're done. */ if (next_beg == NULL || next_beg == buflim) break; /* Narrow down to the line we've found. */ beg = next_beg; if ((end = memchr(beg, eol, buflim - beg)) != NULL) end++; else end = buflim; while (beg > buf && beg[-1] != eol) --beg; } /* Successful, no backreferences encountered! */ if (!backref) goto success; } else { /* We are looking for the leftmost (then longest) exact match. We will go through the outer loop only once. */ beg = start_ptr; end = buflim; } /* If the "line" is longer than the maximum regexp offset, die as if we've run out of memory. */ if (TYPE_MAXIMUM (regoff_t) < end - buf - 1) xalloc_die (); /* If we've made it to this point, this means DFA has seen a probable match, and we need to run it through Regex. */ best_match = end; best_len = 0; for (i = 0; i < pcount; i++) { patterns[i].regexbuf.not_eol = 0; start = re_search (&(patterns[i].regexbuf), buf, end - buf - 1, beg - buf, end - beg - 1, &(patterns[i].regs)); if (start < -1) xalloc_die (); else if (0 <= start) { len = patterns[i].regs.end[0] - start; match = buf + start; if (match > best_match) continue; if (start_ptr && !match_words) goto assess_pattern_match; if ((!match_lines && !match_words) || (match_lines && len == end - beg - 1)) { match = beg; len = end - beg; goto assess_pattern_match; } /* If -w, check if the match aligns with word boundaries. We do this iteratively because: (a) the line may contain more than one occurrence of the pattern, and (b) Several alternatives in the pattern might be valid at a given point, and we may need to consider a shorter one to find a word boundary. */ if (match_words) while (match <= best_match) { regoff_t shorter_len = 0; if ((match == buf || !WCHAR ((unsigned char) match[-1])) && (start + len == end - buf - 1 || !WCHAR ((unsigned char) match[len]))) goto assess_pattern_match; if (len > 0) { /* Try a shorter length anchored at the same place. */ --len; patterns[i].regexbuf.not_eol = 1; shorter_len = re_match (&(patterns[i].regexbuf), buf, match + len - beg, match - buf, &(patterns[i].regs)); if (shorter_len < -1) xalloc_die (); } if (0 < shorter_len) len = shorter_len; else { /* Try looking further on. */ if (match == end - 1) break; match++; patterns[i].regexbuf.not_eol = 0; start = re_search (&(patterns[i].regexbuf), buf, end - buf - 1, match - buf, end - match - 1, &(patterns[i].regs)); if (start < 0) { if (start < -1) xalloc_die (); break; } len = patterns[i].regs.end[0] - start; match = buf + start; } } /* while (match <= best_match) */ continue; assess_pattern_match: if (!start_ptr) { /* Good enough for a non-exact match. No need to look at further patterns, if any. */ goto success; } if (match < best_match || (match == best_match && len > best_len)) { /* Best exact match: leftmost, then longest. */ best_match = match; best_len = len; } } /* if re_search >= 0 */ } /* for Regex patterns. */ if (best_match < end) { /* We have found an exact match. We were just waiting for the best one (leftmost then longest). */ beg = best_match; len = best_len; goto success_in_len; } } /* for (beg = end ..) */ failure: ret_val = -1; goto out; success: len = end - beg; success_in_len:; size_t off = beg - buf; mb_case_map_apply (map, &off, &len); *match_size = len; ret_val = off; out: return ret_val; }
static size_t EGexecute (const void *compiled_pattern, const char *buf, size_t buf_size, size_t *match_size, bool exact) { struct compiled_regex *cregex = (struct compiled_regex *) compiled_pattern; register const char *buflim, *beg, *end; char eol = cregex->eolbyte; int backref, start, len; struct kwsmatch kwsm; size_t i; #ifdef MBS_SUPPORT char *mb_properties = NULL; #endif /* MBS_SUPPORT */ #ifdef MBS_SUPPORT if (MB_CUR_MAX > 1 && cregex->ckwset.kwset) mb_properties = check_multibyte_string (buf, buf_size); #endif /* MBS_SUPPORT */ buflim = buf + buf_size; for (beg = end = buf; end < buflim; beg = end) { if (!exact) { if (cregex->ckwset.kwset) { /* Find a possible match using the KWset matcher. */ size_t offset = kwsexec (cregex->ckwset.kwset, beg, buflim - beg, &kwsm); if (offset == (size_t) -1) { #ifdef MBS_SUPPORT if (MB_CUR_MAX > 1) free (mb_properties); #endif return (size_t)-1; } beg += offset; /* Narrow down to the line containing the candidate, and run it through DFA. */ end = memchr (beg, eol, buflim - beg); if (end != NULL) end++; else end = buflim; #ifdef MBS_SUPPORT if (MB_CUR_MAX > 1 && mb_properties[beg - buf] == 0) continue; #endif while (beg > buf && beg[-1] != eol) --beg; if (kwsm.index < cregex->kwset_exact_matches) goto success; if (dfaexec (&cregex->dfa, beg, end - beg, &backref) == (size_t) -1) continue; } else { /* No good fixed strings; start with DFA. */ size_t offset = dfaexec (&cregex->dfa, beg, buflim - beg, &backref); if (offset == (size_t) -1) break; /* Narrow down to the line we've found. */ beg += offset; end = memchr (beg, eol, buflim - beg); if (end != NULL) end++; else end = buflim; while (beg > buf && beg[-1] != eol) --beg; } /* Successful, no backreferences encountered! */ if (!backref) goto success; } else end = beg + buf_size; /* If we've made it to this point, this means DFA has seen a probable match, and we need to run it through Regex. */ for (i = 0; i < cregex->pcount; i++) { cregex->patterns[i].regexbuf.not_eol = 0; if (0 <= (start = re_search (&(cregex->patterns[i].regexbuf), beg, end - beg - 1, 0, end - beg - 1, &(cregex->patterns[i].regs)))) { len = cregex->patterns[i].regs.end[0] - start; if (exact) { *match_size = len; return start; } if ((!cregex->match_lines && !cregex->match_words) || (cregex->match_lines && len == end - beg - 1)) goto success; /* If -w, check if the match aligns with word boundaries. We do this iteratively because: (a) the line may contain more than one occurence of the pattern, and (b) Several alternatives in the pattern might be valid at a given point, and we may need to consider a shorter one to find a word boundary. */ if (cregex->match_words) while (start >= 0) { if ((start == 0 || !IS_WORD_CONSTITUENT ((unsigned char) beg[start - 1])) && (len == end - beg - 1 || !IS_WORD_CONSTITUENT ((unsigned char) beg[start + len]))) goto success; if (len > 0) { /* Try a shorter length anchored at the same place. */ --len; cregex->patterns[i].regexbuf.not_eol = 1; len = re_match (&(cregex->patterns[i].regexbuf), beg, start + len, start, &(cregex->patterns[i].regs)); } if (len <= 0) { /* Try looking further on. */ if (start == end - beg - 1) break; ++start; cregex->patterns[i].regexbuf.not_eol = 0; start = re_search (&(cregex->patterns[i].regexbuf), beg, end - beg - 1, start, end - beg - 1 - start, &(cregex->patterns[i].regs)); len = cregex->patterns[i].regs.end[0] - start; } } } } /* for Regex patterns. */ } /* for (beg = end ..) */ #ifdef MBS_SUPPORT if (MB_CUR_MAX > 1 && mb_properties) free (mb_properties); #endif /* MBS_SUPPORT */ return (size_t) -1; success: #ifdef MBS_SUPPORT if (MB_CUR_MAX > 1 && mb_properties) free (mb_properties); #endif /* MBS_SUPPORT */ *match_size = end - beg; return beg - buf; }
string *grep(char *regexp, char *line, int num_vars) { struct re_pattern_buffer *rc; struct re_registers *p; const_string ok; string *vars = NULL; string *lookup; int i; if (KPSE_DEBUG_P(MKTEX_FINE_DEBUG)) { fprintf(stderr, "Grep\n\t%s\n\tin\n\t%s\n", regexp, line); } if (test_file('z', line)) return NULL; /* This will retrieve the precompiled regexp or compile it and remember it. vars contains the strings matched, num_vars the number of these strings. */ #if 0 if ((lookup = hash_lookup(symtab, regexp))) rc = (struct re_pattern_buffer *)lookup[0]; else rc = NULL; if (rc == NULL) { #endif /* Compile the regexp and stores the result */ if (KPSE_DEBUG_P(MKTEX_FINE_DEBUG)) { fprintf(stderr, "\tCompiling the regexp\n"); } re_syntax_options = RE_SYNTAX_POSIX_EGREP; rc = (struct re_pattern_buffer *) calloc(1, sizeof(struct re_pattern_buffer)); rc->regs_allocated = REGS_UNALLOCATED; if ((ok = re_compile_pattern(regexp, strlen(regexp), rc)) != 0) FATAL1("Can't compile regex %s\n", regexp); #if 0 hash_remove_all(symtab, regexp); hash_insert(symtab, regexp, (char *)rc); } else if (KPSE_DEBUG_P(MKTEX_FINE_DEBUG)) { fprintf(stderr, "\tAlready compiled\n"); } #endif p = (struct re_registers *) calloc(1, sizeof(struct re_registers)); p->num_regs = num_vars; if ((re_match(rc, line, strlen(line), 0, p)) > 0) { vars = (char **) xmalloc ((num_vars+1) * sizeof(char *)); for (i = 0; i <= num_vars; i++) { vars[i] = malloc((p->end[i] - p->start[i] + 1)*sizeof(char)); strncpy(vars[i], line+p->start[i], p->end[i] - p->start[i]); vars[i][p->end[i] - p->start[i]] = '\0'; } } free (p); if (KPSE_DEBUG_P(MKTEX_FINE_DEBUG)) { if (vars) for(i = 0; i <= num_vars; i++) fprintf(stderr, "String %d matches %s\n", i, vars[i]); } return vars; }