Example #1
0
CAMLprim value re_search_backward(value re, value str, value startpos)
{
  unsigned char * starttxt = &Byte_u(str, 0);
  unsigned char * txt = &Byte_u(str, Long_val(startpos));
  unsigned char * endtxt = &Byte_u(str, caml_string_length(str));
  unsigned char * startchars;

  if (txt < starttxt || txt > endtxt)
    caml_invalid_argument("Str.search_backward");
  if (Startchars(re) == -1) {
    do {
      if (re_match(re, starttxt, txt, endtxt, 0))
        return re_alloc_groups(re, str);
      txt--;
    } while (txt >= starttxt);
    return Atom(0);
  } else {
    startchars =
      (unsigned char *) String_val(Field(Cpool(re), Startchars(re)));
    do {
      while (txt > starttxt && startchars[*txt] == 0) txt--;
      if (re_match(re, starttxt, txt, endtxt, 0))
        return re_alloc_groups(re, str);
      txt--;
    } while (txt >= starttxt);
    return Atom(0);
  }
}
Example #2
0
static const char *re_maxmatch(const FrRegExElt *re, const char *candidate,
			       char *&matchbuf, const char *matchbuf_end,
			       char **groups, size_t num_groups)
{
   char *match = matchbuf ;
   const char *match_end = 0 ;
   const char *end = candidate ;
   size_t max_reps = re->maxReps() ;
   size_t min_reps = re->minReps() ;
   size_t best_reps = 0 ;
   while (max_reps >= min_reps)
      {
      match = matchbuf ;
      const char *split = re_match(re,candidate,min_reps,max_reps,
				   match,matchbuf_end,groups,num_groups) ;
      if (split)
	 {
	 const FrRegExElt *next = re->getNext() ;
	 if (next)
	    end = re_match(next,split,match,matchbuf_end,groups,num_groups) ;
	 else if (*split == '\0')
	    {
	    best_reps = max_reps ;
	    match_end = split ;
	    break ;			// found a complete match
	    }
	 else
	    end = split ;
	 if (end && end > match_end)
	    {
	    match_end = end ;
	    best_reps = max_reps ;
	    if (*end == '\0')
	       break ;
	    }
	 }
      else
	 return 0 ;			// can't match!
      if (max_reps > 0)
	 max_reps-- ;			// try one less next time
      else
	 break ;
      }
   if (best_reps > 0 && best_reps != max_reps)
      {
      // re-compute the best match if necessary
      match = matchbuf ;
      const char *split = re_match(re,candidate,best_reps,best_reps,
				   match,matchbuf_end,groups,num_groups) ;
      const FrRegExElt *next = re->getNext() ;
      if (split && next)
	 (void)re_match(next,split,match,matchbuf_end,groups,num_groups) ;
      }
   assertq(match >= matchbuf && match <= matchbuf_end) ;
   matchbuf = match ;
   return match_end ;
}
Example #3
0
static int rbsigar_ptql_re_impl(void *data,
                                char *haystack, char *needle)
{
#ifdef RB_RUBY_19
    /* XXX no more regex.h */
    return 0;
#else
    struct re_pattern_buffer *regex;
    int len = strlen(haystack);
    int retval;
    const char *err;

    regex = ALLOC(struct re_pattern_buffer);
    MEMZERO((char *)regex, struct re_pattern_buffer, 1);
    /* XXX cache */
    if ((err = re_compile_pattern(needle, strlen(needle), regex))) {
        re_free_pattern(regex);
        rb_raise(RB_REGEX_ERROR, "%s", err);
        return 0;
    }

    retval = re_match(regex, haystack, len, 0, NULL);
    re_free_pattern(regex);
    return retval > 0;
#endif
}
Example #4
0
/*
**----------------------------------------------------------------------
** .Klasse:       TBRegexp
**
** .Methode:      match
**
** .Beschreibung: Groesse eines PatternMatches bestimmen
**
** .Parameter:  const char*, s      , I, String in dem gesucht wird
**              size_t     , pos    , I, Anfangsposition der Suche
**              size_t     , len    , I, Range in dem gesucht wird
**
** .Rueckgabewert:  int, Anzahl der Zeichen des Matches
** ---------------------------------------------------------------------
**
** .Methodenbeschreibung:
**-----------------------------------------------------------------
*/
int TBRegexp::match( const char* s, size_t pos, size_t len )
{
  search_string_ = (char*) s;
  search_pos_ = pos;

  return re_match( pattern_buf_, s, len, pos, registers_ );
}
Example #5
0
//-----------------------------------------------------------------
int t_mep_data::find_symbol_from_all_variables(const char *s_find_what, bool use_regular)
{
	int count_found;
	count_found = 0;
	if (data_type == MEP_DATA_STRING) { // string
		for (int col = 0; col < num_cols - 1; col++)
			for (int t = 0; t < num_data; t++)
				if (re_match(_data_string[t][col], s_find_what, use_regular)) {
					// this is a missing value

					count_found++;
				}
	}
	else
		if (data_type == MEP_DATA_DOUBLE) { // double
			// try to convert them to double
			char* pEnd;
			double d_find_what;
			d_find_what = strtod(s_find_what, &pEnd);

			for (int col = 0; col < num_cols - 1; col++)
				for (int t = 0; t < num_data; t++)
					if (fabs(_data_double[t][col] - d_find_what) < 1E-10) {
						// this is a missing value
						count_found++;
					}
		}

	return count_found;
}
Example #6
0
//-----------------------------------------------------------------
int t_mep_data::replace_symbol_from_selected_col(const char *s_find_what, const char* s_replace_with, int col, bool use_regular)
{
	int count_replaced = 0;
	if (data_type == MEP_DATA_STRING) { // string
		for (int t = 0; t < num_data; t++)
			if (re_match(_data_string[t][col], s_find_what, use_regular)) {
				// this is a missing value
				strcpy(_data_string[t][col], s_replace_with);
				count_replaced++;
			}
	}
	else
		if (data_type == MEP_DATA_DOUBLE) { // double
			// try to convert them to double
			char* pEnd;
			double d_find_what;
			d_find_what = strtod(s_find_what, &pEnd);

			double d_replace_with;
			d_replace_with = strtod(s_replace_with, &pEnd);

			for (int t = 0; t < num_data; t++)
				if (fabs(_data_double[t][col] - d_find_what) < 1E-10) {
					// this is a missing value
					_data_double[t][col] = d_replace_with;
					count_replaced++;
				}
		}

	_modified = true;
	return count_replaced;
}
Example #7
0
int evaluate_fast_regex( struct fast_regex * fre_t, char * str, size_t len )
{
	char * sub ;
	struct _fregex * fre = (struct _fregex *)( fre_t->data ) ;

	if( fre->kwset )
	{
		struct kwsmatch kwsm ;
		sub = kwsexec( fre->kwset, (char *)str, len, &kwsm) ;
		if( sub == NULL )
			return 0 ;
		if( kwsm.index < fre->num_exact_kws )
		{
			return 1 ;
		}
	}

	if( HAS_DFA(fre_t->options) )
	{
		int backref = 0 ;
		sub = dfaexec( &(fre->dfa), str, (str+len), 0, NULL, &backref) ;
		if( sub == NULL )
			return 0 ;
		if ( !backref || (fre_t->options & FRE_NO_REGEX) )
			return 1 ;
	}

	return re_match( &fre->regex , str, len, 0, NULL ) > 0 ;
}
Example #8
0
/*
 * Matches text against the regular expression re.
 * It will return non-zero if text matches the pattern re, otherwise it 
 * returns 0.
 */
int
rx_match (const char *text, const char *re)
{
  struct mstruct m;
  m.flags = 0;

  return re_match (text, re, &m);
}
Example #9
0
File: regexp.c Project: mchf/augeas
int regexp_match(struct regexp *r,
                 const char *string, const int size,
                 const int start, struct re_registers *regs) {
    if (r->re == NULL) {
        if (regexp_compile(r) == -1)
            return -3;
    }
    return re_match(r->re, string, size, start, regs);
}
Example #10
0
FrObject *FrRegExp::match(const char *word) const
{
   if (!regex || !word || !*word)
      return 0 ;
   char *groups[10] ;
   for (size_t i = 0 ; i < lengthof(groups) ; i++)
      groups[i] = 0 ;
//   char *end = strchr(word,'\0') ;
   FrObject *result ;
   char *matchbuf = 0 ;
//   if (re_match(regex,word,matchbuf,0,groups,lengthof(groups)) == end)
   const char *end ;
   if ((end = re_match(regex,word,matchbuf,0,groups,lengthof(groups))) != 0 &&
       !*end)
      {
      char translation[FrMAX_SYMBOLNAME_LEN+1] ;
      char *trans_end = &translation[FrMAX_SYMBOLNAME_LEN] ;
      char *xlat = translation ;
      const char *repl ;
      for (repl = replacement ; *repl && xlat < trans_end ; repl++)
	 {
	 char c = *repl ;
	 if (c == FrRE_QUOTE)
	    {
	    // escape-char plus digit specifies a replacement taken from the
	    //   source match
	    c = *++repl ;
	    if (Fr_isdigit(c))
	       {
	       const char *targ = groups[c-'0'] ;
	       if (targ)
		  {
		  size_t len = strlen(targ) ;
		  memcpy(xlat,targ,len) ;
		  xlat += len ;
		  }
	       else
		  FrWarningVA("mismatch in r.e. replacement: %%%c",c) ;
	       }
	    else if (c)
	       *xlat++ = *++repl ;
	    else
	       break ;
	    }
	 else
	    *xlat++ = c ;
	 }
      *xlat = '\0' ;
      result = new FrString(translation) ;
      }
   else
      result = 0 ;
   for (size_t j = 0 ; j < lengthof(groups) ; j++)
      if (groups[j]) FrFree(groups[j]) ;
   return result ;
}
Example #11
0
re_err_t re_match(const char *text, const char *regex, const char *opts)
{
    pcre *re;
    const char *error;
    int erroffset;
    int options;
    int rc;
    int ovector[OVECT_SIZE];
    int offset;

    if (text == NULL || regex == NULL)
        return CEPARAM;

    if ((options = parse_opts(opts)) < 0)
        return CEOPTION;

    if ((re = pcre_compile(regex, options, &error, &erroffset, NULL)) == NULL)
        return CERECOMP;

    if ((rc = pcre_exec(re, NULL, text, strlen(text), 0, 0, 
                    ovector, NELEMS(ovector))) < 0) {
        pcre_free(re);
        return CEREEXEC;
    } else if (rc == 0) {
        /* it's just a waring (not an error) but may 
         * loss captured substing for the small ovector */
        fprintf(stderr, "%s: warning: too many brackets used\n", __FUNCTION__);
        match_info.nvect = 0;
        pcre_free(re);
        return CESUCCESS; /* not an error */
    }

    /* the pattern is match and 
     * at least one pair of offset is set */
    save_match_info(text, rc, ovector);

    /* /g modifier */
    if (strchr(opts, 'g') == NULL) {
        if (ovector[1] == ovector[0])
            offset = ovector[1] + 1;
        else
            offset = ovector[1];

        if (offset <= strlen(text))
            re_match(text + offset, regex, opts);
    }

    pcre_free(re);
    return CESUCCESS;
}
Example #12
0
CAMLprim value re_partial_match(value re, value str, value pos)
{
  unsigned char * starttxt = &Byte_u(str, 0);
  unsigned char * txt = &Byte_u(str, Long_val(pos));
  unsigned char * endtxt = &Byte_u(str, caml_string_length(str));

  if (txt < starttxt || txt > endtxt)
    caml_invalid_argument("Str.string_partial_match");
  if (re_match(re, starttxt, txt, endtxt, 1)) {
    return re_alloc_groups(re, str);
  } else {
    return Atom(0);
  }
}
BOOL parse_zero_encoders( char* mIncoming )
{
	char expression = "zero (position|encoders) (v)?(w)?(x)?(y)?(z)?";
	int match = re_match( 7, Captures, expression, mIncoming );
		
	BOOL is_speed_command = strcmp(mIncoming, "zero position ");

	for (int b=0; b<NUM_MOTORS; b++)
	{
		if (is_in_set(which, b) )
			Encoders[b].Count=0;
	}
	form_response( "ACK zero positioned" );	
	return is_speed_command;
}
BOOL parse_home_command( char* mIncoming )
{
	char expression = "home ([vV])? ([wW])? ([xX])? ([yY])? ([zZ])?";
	int match = re_match( 2, Captures, expression, mIncoming );
		
	BOOL is_speed_command = strcmp(mIncoming, "HOME:");
	byte which = which_motors(mIncoming);
	for (int b=0; b<NUM_MOTORS; b++)
	{
		if (is_in_set(which, b) )
			set_motor_duty( b, HOMING_SPEED );		
	}
	form_response("ACK: homing...");
	return is_speed_command;
}
BOOL parse_use_encoder( char* mIncoming )
{
	char expression[] = "^use (encoder|potentiometer)";
	int match = re_match( 2, Captures, expression, mIncoming );
		
	int use_encoder 	   = strcmp(mIncoming, "use encoder");
	int use_potentiometer  = strcmp(mIncoming, "use potientiometer");			
	if (use_encoder==0) {
			FiveMotorConfigData.use_encoder = 1;
			form_response("ACK: use encoder");
	} else if (use_potentiometer==0) {
			FiveMotorConfigData.use_encoder = 0;
			form_response("ACK using potentiometer");
	}
	return FALSE;
}
BOOL parse_measure_travel( char* mIncoming )
{
	char expression = "measure travel";
	int match = re_match( 2, Captures, expression, mIncoming );
	
	BOOL is_speed_command = strcmp(mIncoming, "measure travel");
	byte which = which_motors(mIncoming);
	for (int b=0; b<NUM_MOTORS; b++)
	{
		if (is_in_set(which, b) )
			set_motor_duty( b, HOMING_SPEED );		
	}
	// Don't wait until further limit switches triggered, just ack the cmd:
	form_response( "ACK measuring travel..." );
	return is_speed_command;
}
Example #17
0
int main(int argc, char *argv[])
{
    re_err_t ret;
    char buf[1024];
    char *result;

    if (parse_args(argc, argv) != 0) {
        usage();
        exit(1);
    }

    //fprintf(stderr, "-------  Execute  -------\n");

    if (is_match) {
        if ((ret = re_match(text, regex, options)) != CESUCCESS) {
            fprintf(stderr, "Not match: %s\n", re_strerr(ret));
            exit(1);
        }

        if (cap_idx >= 0) {
            if ((ret = re_capstr(cap_idx, buf, sizeof(buf))) != CESUCCESS) {
                fprintf(stderr, "re_capstr(): %s\n", re_strerr(ret));
                exit(1);
            }

            printf("[%d]: %s\n", cap_idx, buf);
        } else {
            if ((ret = re_capstr(0, buf, sizeof(buf))) != CESUCCESS) {
                fprintf(stderr, "re_capstr(): %s\n", re_strerr(ret));
                exit(1);
            }

            printf("%s\n", buf);
        }
    } else {
        if ((ret = re_subs(text, regex, replace, options, &result)) != CESUCCESS) {
            fprintf(stderr, "Fail to replace: %s\n", re_strerr(ret));
            exit(1);
        }

        //printf("Replaced!\n");
        printf("%s\n", result);
        free(result);
    }

    exit(0);
}
Example #18
0
/*
 * applies regular expression pattern to contents of the directory
 *
 * for entries that match, the fully qualified pathname is inserted into
 * the treeset
 */
static int applyRe(char *dir, RegExp *reg, TreeSet *ts) {
   DIR *dd;
   struct dirent *dent;
   int status = 1;

   /*
    * open the directory
    */
   if ((dd = opendir(dir)) == NULL) {
      fprintf(stderr, "Error opening directory `%s'\n", dir);
      return 0;
   }
   /*
    * for each entry in the directory
    */
   while (status && (dent = readdir(dd)) != NULL) {
      if (strcmp(".", dent->d_name) == 0 || strcmp("..", dent->d_name) == 0)
         continue;
      if (!(dent->d_type & DT_DIR)) {
         char b[4096], *sp;
	 /*
	  * see if filename matches regular expression
	  */
	 if (! re_match(reg, dent->d_name))
            continue;
         sprintf(b, "%s/%s", dir, dent->d_name);
	 /*
	  * duplicate fully qualified pathname for insertion into treeset
	  */
	 if ((sp = strdup(b)) != NULL) {
            if (!ts_add(ts, sp)) {
               fprintf(stderr, "Error adding `%s' to tree set\n", sp);
	       free(sp);
	       status = 0;
	       break;
	    }
	 } else {
            fprintf(stderr, "Error adding `%s' to tree set\n", b);
	    status = 0;
	    break;
	 }
      }
   }
   (void) closedir(dd);
   return status;
}
Example #19
0
vespalib::string Regexp::replace(vespalib::stringref s, vespalib::stringref replacement) const
{
    if ( ! valid() ) { return s; }
    regex_t *preg = const_cast<regex_t *>(static_cast<const regex_t *>(_data));
    vespalib::string modified;
    int prev(0);
    for(int pos(re_search(preg, s.data(), s.size(), 0, s.size(), NULL));
        pos >=0;
        pos = re_search(preg, s.data()+prev, s.size()-prev, 0, s.size()-prev, NULL))
    {
        modified += s.substr(prev, pos);
        modified += replacement;
        int count = re_match(preg, s.data()+prev, s.size()-prev, pos, NULL);
        prev += pos + count;
    }
    modified += s.substr(prev);
    return modified;
}
Example #20
0
int
patable_match(struct patable *table, const char *source, size_t len,
              int *ngroup, int *ovector, size_t ovsize)
{
  int i;
  int ng;

  if (len == (size_t)-1)
    len = strlen(source);

  for (i = 0; i < table->cur; i++) {
    assert(table->pat[i].re != 0);
    ng = re_match(table->pat[i].re, table->pat[i].ext, source, len,
                  ovector, ovsize);
    if (ng > 0) {
      *ngroup = ng;
      return i;
    }
  }
  *ngroup = 0;
  return -1;
}
BOOL parse_set_unit( char* mIncoming )
{
	char expression = "^set unit (inch|meter|mm|feet)";
	int match = re_match( 2, Captures, expression, mIncoming );
	
	int set_unit = strcmp(mIncoming, "set unit ");
	char* ptr = mIncoming+0;
	if ( (strcmp(mIncoming, "meters")==0) )	{
		FiveMotorConfigData.units  = meters;   		// enum eMeasuringUnit		
		form_response( "ACK unit=meters" );
	} if ( (strcmp(mIncoming, "mm")==0) )	{
		FiveMotorConfigData.units  = millimeters;   // enum eMeasuringUnit
		form_response( "ACK unit=mm" );
	} if ( (strcmp(mIncoming, "feet")==0) )	 {
		FiveMotorConfigData.units  = feet;
		form_response( "ACK unit=feet" );
	} if ( (strcmp(mIncoming, "inches")==0) )	{
		FiveMotorConfigData.units  = inches;	
		form_response( "ACK unit=inches" );
	}
	return FALSE;
}
BOOL parse_read_position( char* mIncoming )
{
	char expression = "read (position|speed|frequency)";
	int match = re_match( 2, Captures, expression, mIncoming );
		
	BOOL send_pos   = strcmp(mIncoming, "read position ");
	BOOL send_speed = strcmp(mIncoming, "read speed ");
	BOOL send_base_frequency = strcmp(mIncoming, "read frequency ");	
	if (send_pos) 
	{	
		send_positions();	
		return TRUE;	
	} if (send_speed)			
	{	
		send_speeds();		
		return TRUE;	
	} if (send_base_frequency)
	{	
		send_speeds();		
		return TRUE;	
	}
	return FALSE;
}
Example #23
0
/*
 *	Matches text against the regular expression re and extracts the position
 *	of the matching text. 
 *	If the text matches the pattern re, the pointers pointed to by beg and end
 *	will be set to point to the begining and end of the matching substring in 
 *	text.
 */
int
rx_search (const char *text, const char *re, const char **beg,
           const char **end)
{
  struct mstruct m;
  m.flags = GREEDY;
  m.start = 0;
  m.end = 0;

  if (re_match (text, re, &m))
    {
      if (beg)
        *beg = m.start;
      if (end)
        *end = m.end;
      return 1;
    }

  if (beg)
    *beg = 0;
  if (end)
    *end = 0;
  return 0;
}
Example #24
0
int
main (void)
{
  struct re_pattern_buffer regex;
  struct re_registers regs;
  const char *s;
  int match;
  int result = 0;

  regs.num_regs = 1;
  memset (&regex, '\0', sizeof (regex));
  s = re_compile_pattern ("[abc]*d", 7, &regex);
  if (s != NULL)
    {
      puts ("re_compile_pattern return non-NULL value");
      result = 1;
    }
  else
    {
      match = re_match (&regex, "foacabdxy", 9, 2, &regs);
      if (match != 5)
	{
	  printf ("re_match returned %d, expected 5\n", match);
	  result = 1;
	}
      else if (regs.start[0] != 2 || regs.end[0] != 7)
	{
	  printf ("re_match returned %d..%d, expected 2..7\n",
		  regs.start[0], regs.end[0]);
	  result = 1;
	}
	puts (" -> OK");
    }

  return result;
}
Example #25
0
int re_search(regexp_t bufp, unsigned char *string, int size, int pos,
              int range, regexp_registers_t regs)
{
	unsigned char *fastmap;
	unsigned char *translate;
	unsigned char *text;
	unsigned char *partstart;
	unsigned char *partend;
	int dir;
	int ret;
	unsigned char anchor;
  
	assert(size >= 0 && pos >= 0);
	assert(pos + range >= 0 && pos + range <= size); /* Bugfix by ylo */
  
	fastmap = bufp->fastmap;
	translate = bufp->translate;
	if (fastmap && !bufp->fastmap_accurate) {
                re_compile_fastmap(bufp);
	        if (PyErr_Occurred()) return -2;
	}
	
	anchor = bufp->anchor;
	if (bufp->can_be_null == 1) /* can_be_null == 2: can match null at eob */
		fastmap = NULL;

	if (range < 0)
	{
		dir = -1;
		range = -range;
	}
	else
		dir = 1;

	if (anchor == 2) {
		if (pos != 0)
			return -1;
		else
			range = 0;
	}

	for (; range >= 0; range--, pos += dir)
	{
		if (fastmap)
		{
			if (dir == 1)
			{ /* searching forwards */

				text = string + pos;
				partend = string + size;
				partstart = text;
				if (translate)
					while (text != partend &&
					       !fastmap[(unsigned char) translate[(unsigned char)*text]])
						text++;
				else
					while (text != partend && !fastmap[(unsigned char)*text])
						text++;
				pos += text - partstart;
				range -= text - partstart;
				if (pos == size && bufp->can_be_null == 0)
					return -1;
			}
			else
			{ /* searching backwards */
				text = string + pos;
				partstart = string + pos - range;
				partend = text;
				if (translate)
					while (text != partstart &&
					       !fastmap[(unsigned char)
						       translate[(unsigned char)*text]])
						text--;
				else
					while (text != partstart &&
					       !fastmap[(unsigned char)*text])
						text--;
				pos -= partend - text;
				range -= partend - text;
			}
		}
		if (anchor == 1)
		{ /* anchored to begline */
			if (pos > 0 && (string[pos - 1] != '\n'))
				continue;
		}
		assert(pos >= 0 && pos <= size);
		ret = re_match(bufp, string, size, pos, regs);
		if (ret >= 0)
			return pos;
		if (ret == -2)
			return -2;
	}
	return -1;
}
Example #26
0
static int modregex_split (INSTANCE * my, int * params)
{
    const char * reg = string_get(params[0]);
    const char * str = string_get(params[1]);
    int * result_array = (int *)params[2];
    int result_array_size = params[3];
    int count = 0;
    int pos, lastpos = 0;

    struct re_pattern_buffer pb;
    struct re_registers re;
    int start[16];
    int end[16];

    /* Alloc the pattern resources */

    memset (&pb, 0, sizeof(pb));
    memset (&re, 0, sizeof(re));
    pb.buffer = malloc(4096);
    pb.allocated = 4096;
    pb.fastmap = malloc(256);
    pb.regs_allocated = 16;
    re.num_regs = 16;
    re.start = start;
    re.end = end;

    re_syntax_options = RE_SYNTAX_POSIX_MINIMAL_EXTENDED;

    /* Match the regex */

    if (re_compile_pattern (reg, strlen(reg), &pb) == 0)
    {
        for (;;)
        {
            pos = re_search (&pb, str, strlen(str), lastpos, strlen(str), &re);
            if (pos == -1) break;
            *result_array = string_newa (str + lastpos, pos-lastpos);
            string_use(*result_array);
            result_array++;
            count++;
            result_array_size--;
            if (result_array_size == 0) break;
            lastpos = pos + re_match (&pb, str, strlen(str), pos, 0);
            if (lastpos < pos) break;
            if (lastpos == pos) lastpos++;
        }
        if (result_array_size > 0)
        {
            *result_array = string_new (str + lastpos);
            string_use (*result_array);
            count++;
        }
    }

    /* Free the resources */
    free (pb.buffer);
    free (pb.fastmap);
    string_discard(params[0]);
    string_discard(params[1]);

    return count;
}
Example #27
0
int
main (void)
{
  int result = 0;
  static struct re_pattern_buffer regex;
  unsigned char folded_chars[UCHAR_MAX + 1];
  int i;
  const char *s;
  struct re_registers regs;

#if HAVE_DECL_ALARM
  /* Some builds of glibc go into an infinite loop on this test.  */
  int alarm_value = 2;
  signal (SIGALRM, SIG_DFL);
  alarm (alarm_value);
#endif
  if (setlocale (LC_ALL, "en_US.UTF-8"))
    {
      {
        /* http://sourceware.org/ml/libc-hacker/2006-09/msg00008.html
           This test needs valgrind to catch the bug on Debian
           GNU/Linux 3.1 x86, but it might catch the bug better
           on other platforms and it shouldn't hurt to try the
           test here.  */
        static char const pat[] = "insert into";
        static char const data[] =
          "\xFF\0\x12\xA2\xAA\xC4\xB1,K\x12\xC4\xB1*\xACK";
        re_set_syntax (RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE
                       | RE_ICASE);
        memset (&regex, 0, sizeof regex);
        s = re_compile_pattern (pat, sizeof pat - 1, &regex);
        if (s)
          result |= 1;
        else if (re_search (&regex, data, sizeof data - 1,
                            0, sizeof data - 1, &regs)
                 != -1)
          result |= 1;
      }

      /* Check whether it's really a UTF-8 locale.
         On mingw, the setlocale call succeeds but returns
         "English_United States.1252", with locale_charset() returning
         "CP1252".  */
      if (strcmp (locale_charset (), "UTF-8") == 0)
        {
          /* This test is from glibc bug 15078.
             The test case is from Andreas Schwab in
             <http://www.sourceware.org/ml/libc-alpha/2013-01/msg00967.html>.
          */
          static char const pat[] = "[^x]x";
          static char const data[] =
            /* <U1000><U103B><U103D><U1014><U103A><U102F><U1015><U103A> */
            "\xe1\x80\x80"
            "\xe1\x80\xbb"
            "\xe1\x80\xbd"
            "\xe1\x80\x94"
            "\xe1\x80\xba"
            "\xe1\x80\xaf"
            "\xe1\x80\x95"
            "\xe1\x80\xba"
            "x";
          re_set_syntax (0);
          memset (&regex, 0, sizeof regex);
          s = re_compile_pattern (pat, sizeof pat - 1, &regex);
          if (s)
            result |= 1;
          else
            {
              i = re_search (&regex, data, sizeof data - 1,
                             0, sizeof data - 1, 0);
              if (i != 0 && i != 21)
                result |= 1;
            }
        }

      if (! setlocale (LC_ALL, "C"))
        return 1;
    }

  /* This test is from glibc bug 3957, reported by Andrew Mackey.  */
  re_set_syntax (RE_SYNTAX_EGREP | RE_HAT_LISTS_NOT_NEWLINE);
  memset (&regex, 0, sizeof regex);
  s = re_compile_pattern ("a[^x]b", 6, &regex);
  if (s)
    result |= 2;
  /* This should fail, but succeeds for glibc-2.5.  */
  else if (re_search (&regex, "a\nb", 3, 0, 3, &regs) != -1)
    result |= 2;

  /* This regular expression is from Spencer ere test number 75
     in grep-2.3.  */
  re_set_syntax (RE_SYNTAX_POSIX_EGREP);
  memset (&regex, 0, sizeof regex);
  for (i = 0; i <= UCHAR_MAX; i++)
    folded_chars[i] = i;
  regex.translate = folded_chars;
  s = re_compile_pattern ("a[[:@:>@:]]b\n", 11, &regex);
  /* This should fail with _Invalid character class name_ error.  */
  if (!s)
    result |= 4;

  /* Ensure that [b-a] is diagnosed as invalid, when
     using RE_NO_EMPTY_RANGES. */
  re_set_syntax (RE_SYNTAX_POSIX_EGREP | RE_NO_EMPTY_RANGES);
  memset (&regex, 0, sizeof regex);
  s = re_compile_pattern ("a[b-a]", 6, &regex);
  if (s == 0)
    result |= 8;

  /* This should succeed, but does not for glibc-2.1.3.  */
  memset (&regex, 0, sizeof regex);
  s = re_compile_pattern ("{1", 2, &regex);
  if (s)
    result |= 8;

  /* The following example is derived from a problem report
     against gawk from Jorge Stolfi <*****@*****.**>.  */
  memset (&regex, 0, sizeof regex);
  s = re_compile_pattern ("[an\371]*n", 7, &regex);
  if (s)
    result |= 8;
  /* This should match, but does not for glibc-2.2.1.  */
  else if (re_match (&regex, "an", 2, 0, &regs) != 2)
    result |= 8;

  memset (&regex, 0, sizeof regex);
  s = re_compile_pattern ("x", 1, &regex);
  if (s)
    result |= 8;
  /* glibc-2.2.93 does not work with a negative RANGE argument.  */
  else if (re_search (&regex, "wxy", 3, 2, -2, &regs) != 1)
    result |= 8;

  /* The version of regex.c in older versions of gnulib
     ignored RE_ICASE.  Detect that problem too.  */
  re_set_syntax (RE_SYNTAX_EMACS | RE_ICASE);
  memset (&regex, 0, sizeof regex);
  s = re_compile_pattern ("x", 1, &regex);
  if (s)
    result |= 16;
  else if (re_search (&regex, "WXY", 3, 0, 3, &regs) < 0)
    result |= 16;

  /* Catch a bug reported by Vin Shelton in
     http://lists.gnu.org/archive/html/bug-coreutils/2007-06/msg00089.html
     */
  re_set_syntax (RE_SYNTAX_POSIX_BASIC
                 & ~RE_CONTEXT_INVALID_DUP
                 & ~RE_NO_EMPTY_RANGES);
  memset (&regex, 0, sizeof regex);
  s = re_compile_pattern ("[[:alnum:]_-]\\\\+$", 16, &regex);
  if (s)
    result |= 32;

  /* REG_STARTEND was added to glibc on 2004-01-15.
     Reject older versions.  */
  if (! REG_STARTEND)
    result |= 64;

#if 0
  /* It would be nice to reject hosts whose regoff_t values are too
     narrow (including glibc on hosts with 64-bit ptrdiff_t and
     32-bit int), but we should wait until glibc implements this
     feature.  Otherwise, support for equivalence classes and
     multibyte collation symbols would always be broken except
     when compiling --without-included-regex.   */
  if (sizeof (regoff_t) < sizeof (ptrdiff_t)
      || sizeof (regoff_t) < sizeof (ssize_t))
    result |= 64;
#endif

  return result;
}
Example #28
0
size_t
EGexecute (char const *buf, size_t size, size_t *match_size,
           char const *start_ptr)
{
  char const *buflim, *beg, *end, *match, *best_match, *mb_start;
  char eol = eolbyte;
  int backref;
  regoff_t start;
  size_t len, best_len;
  struct kwsmatch kwsm;
  size_t i, ret_val;
  mb_len_map_t *map = NULL;

  if (MB_CUR_MAX > 1)
    {
      if (match_icase)
        {
          /* mbtolower adds a NUL byte at the end.  That will provide
             space for the sentinel byte dfaexec may add.  */
          char *case_buf = mbtolower (buf, &size, &map);
          if (start_ptr)
            start_ptr = case_buf + (start_ptr - buf);
          buf = case_buf;
        }
    }

  mb_start = buf;
  buflim = buf + size;

  for (beg = end = buf; end < buflim; beg = end)
    {
      if (!start_ptr)
        {
          /* We don't care about an exact match.  */
          if (kwset)
            {
              /* Find a possible match using the KWset matcher. */
              size_t offset = kwsexec (kwset, beg, buflim - beg, &kwsm);
              if (offset == (size_t) -1)
                goto failure;
              beg += offset;
              /* Narrow down to the line containing the candidate, and
                 run it through DFA. */
              if ((end = memchr(beg, eol, buflim - beg)) != NULL)
                end++;
              else
                end = buflim;
              match = beg;
              while (beg > buf && beg[-1] != eol)
                --beg;
              if (kwsm.index < kwset_exact_matches)
                {
                  if (!MBS_SUPPORT)
                    goto success;

                  if (mb_start < beg)
                    mb_start = beg;
                  if (MB_CUR_MAX == 1
                      || !is_mb_middle (&mb_start, match, buflim,
                                        kwsm.size[0]))
                    goto success;
                }
              if (dfaexec (dfa, beg, (char *) end, 0, NULL, &backref) == NULL)
                continue;
            }
          else
            {
              /* No good fixed strings; start with DFA. */
              char const *next_beg = dfaexec (dfa, beg, (char *) buflim,
                                              0, NULL, &backref);
              /* If there's no match, or if we've matched the sentinel,
                 we're done.  */
              if (next_beg == NULL || next_beg == buflim)
                break;
              /* Narrow down to the line we've found. */
              beg = next_beg;
              if ((end = memchr(beg, eol, buflim - beg)) != NULL)
                end++;
              else
                end = buflim;
              while (beg > buf && beg[-1] != eol)
                --beg;
            }
          /* Successful, no backreferences encountered! */
          if (!backref)
            goto success;
        }
      else
        {
          /* We are looking for the leftmost (then longest) exact match.
             We will go through the outer loop only once.  */
          beg = start_ptr;
          end = buflim;
        }

      /* If the "line" is longer than the maximum regexp offset,
         die as if we've run out of memory.  */
      if (TYPE_MAXIMUM (regoff_t) < end - buf - 1)
        xalloc_die ();

      /* If we've made it to this point, this means DFA has seen
         a probable match, and we need to run it through Regex. */
      best_match = end;
      best_len = 0;
      for (i = 0; i < pcount; i++)
        {
          patterns[i].regexbuf.not_eol = 0;
          start = re_search (&(patterns[i].regexbuf),
                             buf, end - buf - 1,
                             beg - buf, end - beg - 1,
                             &(patterns[i].regs));
          if (start < -1)
            xalloc_die ();
          else if (0 <= start)
            {
              len = patterns[i].regs.end[0] - start;
              match = buf + start;
              if (match > best_match)
                continue;
              if (start_ptr && !match_words)
                goto assess_pattern_match;
              if ((!match_lines && !match_words)
                  || (match_lines && len == end - beg - 1))
                {
                  match = beg;
                  len = end - beg;
                  goto assess_pattern_match;
                }
              /* If -w, check if the match aligns with word boundaries.
                 We do this iteratively because:
                 (a) the line may contain more than one occurrence of the
                 pattern, and
                 (b) Several alternatives in the pattern might be valid at a
                 given point, and we may need to consider a shorter one to
                 find a word boundary.  */
              if (match_words)
                while (match <= best_match)
                  {
                    regoff_t shorter_len = 0;
                    if ((match == buf || !WCHAR ((unsigned char) match[-1]))
                        && (start + len == end - buf - 1
                            || !WCHAR ((unsigned char) match[len])))
                      goto assess_pattern_match;
                    if (len > 0)
                      {
                        /* Try a shorter length anchored at the same place. */
                        --len;
                        patterns[i].regexbuf.not_eol = 1;
                        shorter_len = re_match (&(patterns[i].regexbuf),
                                                buf, match + len - beg,
                                                match - buf,
                                                &(patterns[i].regs));
                        if (shorter_len < -1)
                          xalloc_die ();
                      }
                    if (0 < shorter_len)
                      len = shorter_len;
                    else
                      {
                        /* Try looking further on. */
                        if (match == end - 1)
                          break;
                        match++;
                        patterns[i].regexbuf.not_eol = 0;
                        start = re_search (&(patterns[i].regexbuf),
                                           buf, end - buf - 1,
                                           match - buf, end - match - 1,
                                           &(patterns[i].regs));
                        if (start < 0)
                          {
                            if (start < -1)
                              xalloc_die ();
                            break;
                          }
                        len = patterns[i].regs.end[0] - start;
                        match = buf + start;
                      }
                  } /* while (match <= best_match) */
              continue;
            assess_pattern_match:
              if (!start_ptr)
                {
                  /* Good enough for a non-exact match.
                     No need to look at further patterns, if any.  */
                  goto success;
                }
              if (match < best_match || (match == best_match && len > best_len))
                {
                  /* Best exact match:  leftmost, then longest.  */
                  best_match = match;
                  best_len = len;
                }
            } /* if re_search >= 0 */
        } /* for Regex patterns.  */
        if (best_match < end)
          {
            /* We have found an exact match.  We were just
               waiting for the best one (leftmost then longest).  */
            beg = best_match;
            len = best_len;
            goto success_in_len;
          }
    } /* for (beg = end ..) */

 failure:
  ret_val = -1;
  goto out;

 success:
  len = end - beg;
 success_in_len:;
  size_t off = beg - buf;
  mb_case_map_apply (map, &off, &len);
  *match_size = len;
  ret_val = off;
 out:
  return ret_val;
}
Example #29
0
static size_t
EGexecute (const void *compiled_pattern,
	   const char *buf, size_t buf_size,
	   size_t *match_size, bool exact)
{
  struct compiled_regex *cregex = (struct compiled_regex *) compiled_pattern;
  register const char *buflim, *beg, *end;
  char eol = cregex->eolbyte;
  int backref, start, len;
  struct kwsmatch kwsm;
  size_t i;
#ifdef MBS_SUPPORT
  char *mb_properties = NULL;
#endif /* MBS_SUPPORT */

#ifdef MBS_SUPPORT
  if (MB_CUR_MAX > 1 && cregex->ckwset.kwset)
    mb_properties = check_multibyte_string (buf, buf_size);
#endif /* MBS_SUPPORT */

  buflim = buf + buf_size;

  for (beg = end = buf; end < buflim; beg = end)
    {
      if (!exact)
	{
	  if (cregex->ckwset.kwset)
	    {
	      /* Find a possible match using the KWset matcher. */
	      size_t offset = kwsexec (cregex->ckwset.kwset, beg, buflim - beg, &kwsm);
	      if (offset == (size_t) -1)
		{
#ifdef MBS_SUPPORT
		  if (MB_CUR_MAX > 1)
		    free (mb_properties);
#endif
		  return (size_t)-1;
		}
	      beg += offset;
	      /* Narrow down to the line containing the candidate, and
		 run it through DFA. */
	      end = memchr (beg, eol, buflim - beg);
	      if (end != NULL)
		end++;
	      else
		end = buflim;
#ifdef MBS_SUPPORT
	      if (MB_CUR_MAX > 1 && mb_properties[beg - buf] == 0)
		continue;
#endif
	      while (beg > buf && beg[-1] != eol)
		--beg;
	      if (kwsm.index < cregex->kwset_exact_matches)
		goto success;
	      if (dfaexec (&cregex->dfa, beg, end - beg, &backref) == (size_t) -1)
		continue;
	    }
	  else
	    {
	      /* No good fixed strings; start with DFA. */
	      size_t offset = dfaexec (&cregex->dfa, beg, buflim - beg, &backref);
	      if (offset == (size_t) -1)
		break;
	      /* Narrow down to the line we've found. */
	      beg += offset;
	      end = memchr (beg, eol, buflim - beg);
	      if (end != NULL)
		end++;
	      else
		end = buflim;
	      while (beg > buf && beg[-1] != eol)
		--beg;
	    }
	  /* Successful, no backreferences encountered! */
	  if (!backref)
	    goto success;
	}
      else
	end = beg + buf_size;

      /* If we've made it to this point, this means DFA has seen
	 a probable match, and we need to run it through Regex. */
      for (i = 0; i < cregex->pcount; i++)
	{
	  cregex->patterns[i].regexbuf.not_eol = 0;
	  if (0 <= (start = re_search (&(cregex->patterns[i].regexbuf), beg,
				       end - beg - 1, 0,
				       end - beg - 1, &(cregex->patterns[i].regs))))
	    {
	      len = cregex->patterns[i].regs.end[0] - start;
	      if (exact)
		{
		  *match_size = len;
		  return start;
		}
	      if ((!cregex->match_lines && !cregex->match_words)
		  || (cregex->match_lines && len == end - beg - 1))
		goto success;
	      /* If -w, check if the match aligns with word boundaries.
		 We do this iteratively because:
		 (a) the line may contain more than one occurence of the
		 pattern, and
		 (b) Several alternatives in the pattern might be valid at a
		 given point, and we may need to consider a shorter one to
		 find a word boundary.  */
	      if (cregex->match_words)
		while (start >= 0)
		  {
		    if ((start == 0 || !IS_WORD_CONSTITUENT ((unsigned char) beg[start - 1]))
			&& (len == end - beg - 1
			    || !IS_WORD_CONSTITUENT ((unsigned char) beg[start + len])))
		      goto success;
		    if (len > 0)
		      {
			/* Try a shorter length anchored at the same place. */
			--len;
			cregex->patterns[i].regexbuf.not_eol = 1;
			len = re_match (&(cregex->patterns[i].regexbuf), beg,
					start + len, start,
					&(cregex->patterns[i].regs));
		      }
		    if (len <= 0)
		      {
			/* Try looking further on. */
			if (start == end - beg - 1)
			  break;
			++start;
			cregex->patterns[i].regexbuf.not_eol = 0;
			start = re_search (&(cregex->patterns[i].regexbuf), beg,
					   end - beg - 1,
					   start, end - beg - 1 - start,
					   &(cregex->patterns[i].regs));
			len = cregex->patterns[i].regs.end[0] - start;
		      }
		  }
	    }
	} /* for Regex patterns.  */
    } /* for (beg = end ..) */
#ifdef MBS_SUPPORT
  if (MB_CUR_MAX > 1 && mb_properties)
    free (mb_properties);
#endif /* MBS_SUPPORT */
  return (size_t) -1;

 success:
#ifdef MBS_SUPPORT
  if (MB_CUR_MAX > 1 && mb_properties)
    free (mb_properties);
#endif /* MBS_SUPPORT */
  *match_size = end - beg;
  return beg - buf;
}
Example #30
0
string *grep(char *regexp, char *line, int num_vars) 
{
  struct re_pattern_buffer *rc;
  struct re_registers *p;
  const_string ok;
  string *vars = NULL;
  string *lookup;
  int i;

  if (KPSE_DEBUG_P(MKTEX_FINE_DEBUG)) {
    fprintf(stderr, "Grep\n\t%s\n\tin\n\t%s\n", regexp, line);
  }

  if (test_file('z', line))
    return NULL;

  /* This will retrieve the precompiled regexp or compile it and
     remember it. vars contains the strings matched, num_vars the number
     of these strings. */
#if 0
  if ((lookup = hash_lookup(symtab, regexp)))
    rc = (struct re_pattern_buffer *)lookup[0];
  else
    rc = NULL;
  if (rc == NULL) {
#endif
    /* Compile the regexp and stores the result */

    if (KPSE_DEBUG_P(MKTEX_FINE_DEBUG)) {
      fprintf(stderr, "\tCompiling the regexp\n");
    }

    re_syntax_options = RE_SYNTAX_POSIX_EGREP;
    rc = (struct re_pattern_buffer *) calloc(1, sizeof(struct re_pattern_buffer));
    rc->regs_allocated = REGS_UNALLOCATED;
    if ((ok = re_compile_pattern(regexp, strlen(regexp), rc)) != 0)
      FATAL1("Can't compile regex %s\n", regexp);
#if 0
    hash_remove_all(symtab, regexp);
    hash_insert(symtab, regexp, (char *)rc);
  }
  else   if (KPSE_DEBUG_P(MKTEX_FINE_DEBUG)) {
    fprintf(stderr, "\tAlready compiled\n");
  }
#endif

  p = (struct re_registers *) calloc(1, sizeof(struct re_registers));
  p->num_regs = num_vars;
  if ((re_match(rc, line, strlen(line), 0, p)) > 0) {
    vars = (char **) xmalloc ((num_vars+1) * sizeof(char *));
    for (i = 0; i <= num_vars; i++) {
      vars[i] = malloc((p->end[i] - p->start[i] + 1)*sizeof(char));
      strncpy(vars[i], line+p->start[i], p->end[i] - p->start[i]);
      vars[i][p->end[i] - p->start[i]] = '\0';
    }
  }
  free (p);
  if (KPSE_DEBUG_P(MKTEX_FINE_DEBUG)) {
    if (vars)
      for(i = 0; i <= num_vars; i++)
	fprintf(stderr, "String %d matches %s\n", i, vars[i]);
  }
  return vars;
}