Exemple #1
0
/*
 - _XmRegexec - match a regexp against a string
 */
int
_XmRegexec(XmRegexpRec *prog, char *string)
{
	register char	*s;
	/*	extern char	*strchr();*/

	/* Be paranoid... */
	if (prog == NULL || string == NULL) {
		return(0);
	}

	/* Check validity of program. */
	if (UCHARAT(prog->program) != MAGIC) {
		return(0);
	}

	/* If there is a "must appear" string, look for it. */
	if (prog->regmust != NULL && MB_CUR_MAX == 1) {
		s = (char *)string;
		while ((s = strchr(s, prog->regmust[0])) != NULL) {
			if (strncmp(s, prog->regmust, prog->regmlen) == 0)
				break;	/* Found it. */
			s++;
		}
		if (s == NULL)	/* Not present. */
			return(0);
	}

	/* Mark beginning of line for ^ . */
	regbol = (char *)string;

	/* Simplest case:  anchored match need be tried only once. */
	if (prog->reganch)
		return(regtry(prog, string));

	/* Messy cases:  unanchored match. */
	s = (char *)string;
	if (prog->regstart != '\0' && MB_CUR_MAX == 1)
		/* We know what char it must start with. */
		while ((s = strchr(s, prog->regstart)) != NULL) {
			if (regtry(prog, s))
				return(1);
			s++;
		}
	else
		/* We don't -- general case. */
		while(1){
			int len;

			if (regtry(prog, s))
				return(1);
			if(!(len = CHARLEN(s)))
				break;
			s += len;
		};

	/* Failure. */
	return(0);
}
Exemple #2
0
/*
 - regexec - match a regexp against a string
 */
int
regexec(
    register regexp *prog,
    register const char *string )
{
    register char *s;

    /* Be paranoid... */
    if (prog == NULL || string == NULL) {
        regerror("NULL parameter");
        return(0);
    }

    /* Check validity of program. */
    if (UCHARAT(prog->program) != MAGIC) {
        regerror("corrupted program");
        return(0);
    }

    /* If there is a "must appear" string, look for it. */
    if ( prog->regmust != NULL )
    {
        s = (char *)string;
        while ( ( s = strchr( s, prog->regmust[ 0 ] ) ) != NULL )
        {
            if ( !strncmp( s, prog->regmust, prog->regmlen ) )
                break;  /* Found it. */
            ++s;
        }
        if ( s == NULL )  /* Not present. */
            return 0;
    }

    /* Mark beginning of line for ^ . */
    regbol = (char *)string;

    /* Simplest case:  anchored match need be tried only once. */
    if ( prog->reganch )
        return regtry( prog, string );

    /* Messy cases:  unanchored match. */
    s = (char *)string;
    if (prog->regstart != '\0')
        /* We know what char it must start with. */
        while ((s = strchr(s, prog->regstart)) != NULL) {
            if (regtry(prog, s))
                return(1);
            s++;
        }
    else
        /* We do not -- general case. */
        do {
            if ( regtry( prog, s ) )
                return( 1 );
        } while ( *s++ != '\0' );

    /* Failure. */
    return 0;
}
Exemple #3
0
/*
 - regexec - match a regexp against a string
 */
int
pgpRegExec(regexp *prog, char const *string)
{
	char const *s;
	regexecState s_res;
	regexecState *res = &s_res;

	/* Be paranoid... */
	if (prog == NULL || string == NULL) {
		FAIL("NULL parameter");
	}

	/* Check validity of program. */
	if (UCHARAT(prog->program) != MAGIC) {
		FAIL("corrupted program");
		return(0);
	}

	pgpClearMemory( &s_res, sizeof(s_res) );

	/* If there is a "must appear" string, look for it. */
	if (prog->regmust != NULL) {
		s = string;
		while ((s = strchr(s, prog->regmust[0])) != NULL) {
			if (strncmp(s, prog->regmust, prog->regmlen) == 0)
				break;	/* Found it. */
			s++;
		}
		if (s == NULL)	/* Not present. */
			return(0);
	}

	/* Mark beginning of line for ^ . */
	res->regbol = string;

	/* Simplest case:  anchored match need be tried only once. */
	if (prog->reganch)
		return(regtry(res, prog, string));

	/* Messy cases:  unanchored match. */
	s = string;
	if (prog->regstart != '\0')
		/* We know what char it must start with. */
		while ((s = strchr(s, prog->regstart)) != NULL) {
			if (regtry(res, prog, s))
				return(1);
			s++;
		}
	else
		/* We don't -- general case. */
		do {
			if (regtry(res, prog, s))
				return(1);
		} while (*s++ != '\0');

	/* Failure. */
	return(0);
}
Exemple #4
0
/*
   - regsub - perform substitutions after a regexp match
 */
char *regsub (regexp * prog, char * source, char * dest, int n)
{
    register char *src;
    register char *dst;
    register char c;
    register int no;
    register int len;

    if (prog == (regexp *) NULL ||
            source == (char *) NULL || dest == (char *) NULL) {
        regerror("NULL parm to regsub\n");
        return NULL;
    }
    if (UCHARAT(prog->program) != MAGIC) {
        regerror("damaged regexp fed to regsub\n");
        return NULL;
    }
    src = source;
    dst = dest;
    while ((c = *src++) != '\0') {
        if (c == '&')
            no = 0;
        else if (c == '\\' && '0' <= *src && *src <= '9')
            no = *src++ - '0';
        else
            no = -1;

        if (no < 0) {           /* Ordinary character. */
            if (c == '\\' && (*src == '\\' || *src == '&'))
                c = *src++;
            if (--n < 0) {      /* amylaar */
                regerror("line too long\n");
                return NULL;
            }
            *dst++ = c;
        } else if (prog->startp[no] != (char *) NULL &&
                prog->endp[no] != (char *) NULL) {
            len = prog->endp[no] - prog->startp[no];
            if ((n -= len) < 0) {       /* amylaar */
                regerror("line too long\n");
                return NULL;
            }
            strncpy(dst, prog->startp[no], len);
            dst += len;
            if (len != 0 && *(dst - 1) == '\0') {       /* strncpy hit NUL. */
                regerror("damaged match string\n");
                return NULL;
            }
        }
    }
    if (--n < 0) {              /* amylaar */
        regerror("line too long\n");
        return NULL;
    }
    *dst = '\0';
    return dst;
}
Exemple #5
0
/*
 - regsub - perform substitutions after a regexp match
 */
int regsub(regexp *prog, char *source, char *dest, int dlen, int *trunc)
{
	register char *src;
	register char *dst;
	register char c;
	register int no;
    
	if (prog == NULL || source == NULL || dest == NULL) {
		regerror("NULL parm to regsub");
		return -1;
	}
	if (UCHARAT(prog->program) != MAGIC) {
		regerror("damaged regexp fed to regsub");
		return -1;
	}

    *trunc = 0;
	src = source;
	dst = dest;
	while ((c = *src++) != '\0') {
		if (c == '&')
			no = 0;
		else if (c == '\\' && '0' <= *src && *src <= '9')
			no = *src++ - '0';
		else
			no = -1;

        if (dst - dest + 1>= dlen)
        {
            *trunc = 1;
            return dst - dest;
        }            
		if (no < 0) /* Ordinary character. */
			*dst++ = c;
		else if (prog->startp[no] != NULL && prog->endp[no] != NULL) {
            register int len = prog->endp[no] - prog->startp[no];
            
            if ((dst - dest) + len >= dlen)
            {
                *trunc = 1;
                return dst - dest;
            }                
			(void) strncpy(dst, prog->startp[no], len);
			dst += len;
			if (*(dst-1) == '\0') {		/* strncpy hit NUL. */
				regerror("damaged match string");
				return -1;
			}
		}
	}
	*dst++ = '\0';

    return dst - dest - 1;
}
Exemple #6
0
bool ossimRegExp::find (const char* string) {
    const char* s = 0;

    if(!string) return false;
    this->searchstring = string;

    // Check validity of program.
    if (!this->program || UCHARAT(this->program) != MAGIC) {
        //RAISE Error, SYM(ossimRegExp), SYM(Internal_Error),
        printf ("ossimRegExp::find(): Compiled regular expression corrupted.\n");
        return 0;
    }

    // If there is a "must appear" string, look for it.
    if (this->regmust != NULL) {
        s = string;
        while ((s = strchr(s, this->regmust[0])) != NULL) {
            if (strncmp(s, this->regmust, this->regmlen) == 0)
                break;		// Found it.
            s++;
        }
        if (s == NULL)		// Not present.
            return (0);
    }

    // Mark beginning of line for ^ .
    regbol = string;

    // Simplest case:  anchored match need be tried only once.
    if (this->reganch)
        return (regtry(string, this->startp, this->endp, this->program));

    // Messy cases:  unanchored match.
    s = string;
    if (this->regstart != '\0')
        // We know what char it must start with.
        while ((s = strchr(s, this->regstart)) != NULL) {
            if (regtry(s, this->startp, this->endp, this->program))
                return (1);
            s++;

        }
    else
        // We don't -- general case.
        do {
            if (regtry(s, this->startp, this->endp, this->program))
                return (1);
        } while (*s++ != '\0');

    // Failure.
    return (0);
}
/*
 - regsub - perform substitutions after a regexp match
 */
void
regsub (const regexp * prog, const char *source, char *dest)
{
  register char *src;
  register char *dst;
  register char c;
  register int no;
  register int len;

  if (prog == NULL || source == NULL || dest == NULL)
    {
      regerror ("NULL parm to regsub");
      return;
    }
  if (UCHARAT (prog->program) != MAGIC)
    {
      regerror ("damaged regexp fed to regsub");
      return;
    }

  src = (char *) source;
  dst = dest;
  while ((c = *src++) != '\0')
    {
      if (c == '&')
	no = 0;
      else if (c == '\\' && '0' <= *src && *src <= '9')
	no = *src++ - '0';
      else
	no = -1;
      if (no < 0)
	{			/* Ordinary character. */
	  if (c == '\\' && (*src == '\\' || *src == '&'))
	    c = *src++;
	  *dst++ = c;
	}
      else if (prog->startp[no] != NULL && prog->endp[no] != NULL)
	{
	  len = (int) (prog->endp[no] - prog->startp[no]);
	  (void) strncpy (dst, prog->startp[no], len);
	  dst += len;
	  if (len != 0 && *(dst - 1) == '\0')
	    {			/* strncpy hit NUL. */
	      regerror ("damaged match string");
	      return;
	    }
	}
    }
  *dst++ = '\0';
}
Exemple #8
0
/* return the size of the SRE structure (including the compiled expression) */
int
SREsize(SRE *prog)
{
	/* Be paranoid... */
	if (prog == (SRE *) NULL) {
		SREerror("NULL parameter");
		return(-1);
	}

	/* Check validity of program. */
	if (UCHARAT(prog->program) != SRE_MAGIC) {
		SREerror("corrupted program");
		return(-1);
	}

	return(prog->regsize);
}
Exemple #9
0
/* free the space allocated to the SRE */
void
SREfree(SRE *prog)
{
	/* Be paranoid... */
	if (prog == (SRE *) NULL) {
		SREerror("NULL parameter");
		return;
	}

	/* Check validity of program. */
	if (UCHARAT(prog->program) != SRE_MAGIC) {
		SREerror("corrupted program");
		return;
	}

	xfree(prog);
}
Exemple #10
0
/*
 - SREsub - perform substitutions after a regexp match
 */
void
SREsub(SRE *prog, char *source, char *dest)
{
	register char *src;
	register char *dst;
	register char c;
	register int no;
	register int len;
	extern char *strncpy();

	if (prog == NULL || source == NULL || dest == NULL) {
		SREerror("NULL parm to SREsub");
		return;
	}
	if (UCHARAT(prog->program) != SRE_MAGIC) {
		SREerror("damaged SRE fed to SREsub");
		return;
	}

	src = source;
	dst = dest;
	while ((c = *src++) != '\0') {
		if (c == '&') {
			no = 0;
		} else if (c == '\\' && '0' <= *src && *src <= '9') {
			no = *src++ - '0';
		} else {
			no = -1;
		}

		if (no < 0) {	/* Ordinary character. */
			*dst++ = c;
		} else if (prog->startp[no] != NULL && prog->endp[no] != NULL) {
			len = prog->endp[no] - prog->startp[no];
			(void) strncpy(dst, prog->startp[no], len);
			dst += len;
			if (len > 0 && *(dst-1) == '\0') {
				/* strncpy hit NUL. */
				SREerror("damaged match string");
				return;
			}
		}
	}
	*dst++ = '\0';
}
Exemple #11
0
/*
 - regatom - the lowest level
 *
 * Optimization:  gobbles an entire sequence of ordinary characters so that
 * it can turn them into a single node, which is smaller to store and
 * faster to run.  Backslashed characters are exceptions, each becoming a
 * separate node; the code is simpler that way and it's not worth fixing.
 */
static char *
regatom( int *flagp )
{
	register char *ret;
	int flags;

	*flagp = WORST;		/* Tentatively. */

	switch (*regparse++) {
	/* FIXME: these chars only have meaning at beg/end of pat? */
	case '^':
		ret = regnode(BOL);
		break;
	case '$':
		ret = regnode(EOL);
		break;
	case '.':
		ret = regnode(ANY);
		*flagp |= HASWIDTH|SIMPLE;
		break;
	case '[': {
			register int classr;
			register int classend;

			if (*regparse == '^') {	/* Complement of range. */
				ret = regnode(ANYBUT);
				regparse++;
			} else
				ret = regnode(ANYOF);
			if (*regparse == ']' || *regparse == '-')
				regc(*regparse++);
			while (*regparse != '\0' && *regparse != ']') {
				if (*regparse == '-') {
					regparse++;
					if (*regparse == ']' || *regparse == '\0')
						regc('-');
					else {
						classr = UCHARAT(regparse-2)+1;
						classend = UCHARAT(regparse);
						if (classr > classend+1)
							FAIL("invalid [] range");
						for (; classr <= classend; classr++)
							regc(classr);
						regparse++;
					}
				} else
					regc(*regparse++);
			}
			regc('\0');
			if (*regparse != ']')
				FAIL("unmatched []");
			regparse++;
			*flagp |= HASWIDTH|SIMPLE;
		}
		break;
	case '(':
		ret = reg(1, &flags);
		if (ret == NULL)
			return(NULL);
		*flagp |= flags&(HASWIDTH|SPSTART);
		break;
	case '\0':
	case '|':
	case '\n':
	case ')':
		FAIL("internal urp");	/* Supposed to be caught earlier. */
		break;
	case '?':
	case '+':
	case '*':
		FAIL("?+* follows nothing");
		break;
	case '\\':
		switch (*regparse++) {
		case '\0':
			FAIL("trailing \\");
			break;
		case '<':
			ret = regnode(WORDA);
			break;
		case '>':
			ret = regnode(WORDZ);
			break;
		/* FIXME: Someday handle \1, \2, ... */
		default:
			/* Handle general quoted chars in exact-match routine */
			goto de_fault;
		}
		break;
	de_fault:
	default:
		/*
		 * Encode a string of characters to be matched exactly.
		 *
		 * This is a bit tricky due to quoted chars and due to
		 * '*', '+', and '?' taking the SINGLE char previous
		 * as their operand.
		 *
		 * On entry, the char at regparse[-1] is going to go
		 * into the string, no matter what it is.  (It could be
		 * following a \ if we are entered from the '\' case.)
		 * 
		 * Basic idea is to pick up a good char in  ch  and
		 * examine the next char.  If it's *+? then we twiddle.
		 * If it's \ then we frozzle.  If it's other magic char
		 * we push  ch  and terminate the string.  If none of the
		 * above, we push  ch  on the string and go around again.
		 *
		 *  regprev  is used to remember where "the current char"
		 * starts in the string, if due to a *+? we need to back
		 * up and put the current char in a separate, 1-char, string.
		 * When  regprev  is NULL,  ch  is the only char in the
		 * string; this is used in *+? handling, and in setting
		 * flags |= SIMPLE at the end.
		 */
		{
			char *regprev;
			register char ch;

			regparse--;			/* Look at cur char */
			ret = regnode(EXACTLY);
			for ( regprev = 0 ; ; ) {
				ch = *regparse++;	/* Get current char */
				switch (*regparse) {	/* look at next one */

				default:
					regc(ch);	/* Add cur to string */
					break;

				case '.': case '[': case '(':
				case ')': case '|': case '\n':
				case '$': case '^':
				case '\0':
				/* FIXME, $ and ^ should not always be magic */
				magic:
					regc(ch);	/* dump cur char */
					goto done;	/* and we are done */

				case '?': case '+': case '*':
					if (!regprev) 	/* If just ch in str, */
						goto magic;	/* use it */
					/* End mult-char string one early */
					regparse = regprev; /* Back up parse */
					goto done;

				case '\\':
					regc(ch);	/* Cur char OK */
					switch (regparse[1]){ /* Look after \ */
					case '\0':
					case '<':
					case '>':
					/* FIXME: Someday handle \1, \2, ... */
						goto done; /* Not quoted */
					default:
						/* Backup point is \, scan							 * point is after it. */
						regprev = regparse;
						regparse++; 
						continue;	/* NOT break; */
					}
				}
				regprev = regparse;	/* Set backup point */
			}
		done:
			regc('\0');
			*flagp |= HASWIDTH;
			if (!regprev)		/* One char? */
				*flagp |= SIMPLE;
		}
		break;
	}

	return(ret);
}
Exemple #12
0
/*
 * REexec - match a RE_EXP against a string
 */
bool
REexec( RE_EXP *prog, char *string)
{
    register char *s;

    /* Be paranoid... */
    if (prog == NULL || string == NULL) {
        _error("NULL parameter");
        return( FALSE );
    }

    /* Check validity of program. */
    if (UCHARAT(prog->program) != MAGIC) {
        _error("corrupted program");
        return( FALSE );
    }

    /* If there is a "must appear" string, look for it. */
    if (prog->regmust != NULL) {
        s = string;
        while ((s = STchr(s, *prog->regmust)) != NULL) {
            if (STncmp( s, prog->regmust, prog->regmlen ) == 0
               )
                break;	/* Found it. */
            CMnext( s );
        }
        if (s == NULL)	/* Not present. */
            return( FALSE );
    }

    /* Mark beginning of line for ^ . */
    regbol = string;

    /* Simplest case:  anchored match need be tried only once. */
    if (prog->reganch)
        return(regtry(prog, string));

    /* Messy cases:  unanchored match. */
    s = string;
    if (prog->regstart != '\0')
        /* We know what char it must start with. */
        while ((s = STchr(s, prog->regstart)) != NULL) {
            if (regtry(prog, s))
                return( TRUE );
            CMnext( s );
        }
    else
        /* We don't -- general case. */
        while( TRUE ) {
            if (regtry(prog, s))
                return( TRUE );
            if( *s == '\0' )
                break;
            CMnext( s );
        }
# ifndef DOUBLEBYTE
    CMnext( s );
# else
    /*		CMnext( s ); */
# endif /* #ifndef DOUBLEBYTE */

    /* Failure. */
    return( FALSE );
}
Exemple #13
0
/*
 * regatom - the lowest level
 *
 * Optimization:  gobbles an entire sequence of ordinary characters so that
 * it can turn them into a single node, which is smaller to store and
 * faster to run.  Backslashed characters are exceptions, each becoming a
 * separate node; the code is simpler that way and it's not worth fixing.
 */
static char *
regatom(i4 *flagp)
{
    register char *ret;
    i4 flags;
    char null_byte = '\0';

    *flagp = WORST;		/* Tentatively. */

    switch (*regparse) {
    case '^':
        CMnext( regparse );
        ret = regnode(BOL);
        break;
    case '$':
        CMnext( regparse );
        ret = regnode(EOL);
        break;
    case '.':
        CMnext( regparse );
        ret = regnode(ANY);
        *flagp |= HASWIDTH|SIMPLE;
        break;
    case '[': {
        char *range_start = NULL;
        bool double_start;
        u_i2 first_u2, last_u2;
        u_char first_u1, last_u1;

        CMnext( regparse );
        if (*regparse == '^') {	/* Complement of range. */
            ret = regnode(ANYBUT);
            CMnext( regparse );
        } else
            ret = regnode(ANYOF);
        if (*regparse == ']' || *regparse == '-') {
            regc( regparse );
            CMnext( regparse );
        }
        while (*regparse != '\0' && *regparse != ']') {
            if (*regparse == '-') {
                char range_op = '-';

                CMnext( regparse );
                if( *regparse == ']' ||
                        *regparse == '\0'
                  )
                    regc( &range_op );
                else {
                    char *tmp;
                    bool invalid = FALSE;
                    bool double_end;

                    if( range_start == NULL )
                        invalid = TRUE;

                    double_end =
                        CMdbl1st( regparse );

                    if( !invalid &&
                            double_end
                            && !double_start
                      )
                        invalid = TRUE;

                    if( !invalid &&
                            double_start
                            && !double_start
                      )
                        invalid = TRUE;

                    if( !invalid &&
                            CMcmpcase( range_start,
                                       regparse ) > 0
                      )
                        invalid = TRUE;

                    if( double_start )
                        _FAIL("don't know how to support character classes containing double-byte ranges");

                    if( invalid )
                        _FAIL("invalid [] range");
                    /* no double-byte ranges! */
                    /*
                    ** Initialize the value for the end of the range.
                    */
                    last_u1 = UCHARAT(regparse);
                    for (; first_u1 <= last_u1;
                            first_u1++
                        )
                        regc( (char *)
                              &first_u1 );

                    CMnext( regparse );
                }
            } else {
                range_start = regparse;
                if( CMdbl1st( range_start ) )
                {
                    double_start = TRUE;
                    first_u2 = *(u_i2 *) range_start;
                }
                else
                {
                    double_start = FALSE;
                    first_u1 = UCHARAT(range_start);
                }
                regc( regparse );
                CMnext( regparse );
            }
        }
        regc( &null_byte );
        if (*regparse != ']')
            _FAIL("unmatched []");
        CMnext( regparse );
        *flagp |= HASWIDTH|SIMPLE;
    }
    break;
    case '(':
        CMnext( regparse );
        ret = reg(1, &flags);
        if (ret == NULL)
            return(NULL);
        *flagp |= flags&(HASWIDTH|SPSTART);
        break;
    case '\0':
    case '|':
    case ')':
        CMnext( regparse );
        _FAIL("internal urp");	/* Supposed to be caught earlier. */
        break;
    case '?':
    case '+':
    case '*':
        CMnext( regparse );
        _FAIL("?+* follows nothing");
        break;
    case '\\':
        CMnext( regparse );
        if (*regparse == '\0')
            _FAIL("trailing \\");
        ret = regnode(EXACTLY);
        regc( regparse );
        CMnext( regparse );
        regc( &null_byte );
        *flagp |= HASWIDTH|SIMPLE;
        break;
    default: {
        register i4  len;
        register char ender;

        len = my_strcspn(regparse, META);
        if (len <= 0)
            _FAIL("internal disaster");
        ender = *(regparse+len);
        if (len > 1 && ISMULT(ender))
            len--;	/* Back off clear of ?+* operand. */
        *flagp |= HASWIDTH;
        if (len == 1)
            *flagp |= SIMPLE;
        ret = regnode(EXACTLY);
        while (len > 0) {
            regc( regparse );
            CMbytedec( len, regparse );
            CMnext( regparse );
        }
        regc( &null_byte );
    }
    break;
    }

    return(ret);
}
Exemple #14
0
/*
 - regatom - the lowest level
 *
 * Optimization:  gobbles an entire sequence of ordinary characters so that
 * it can turn them into a single node, which is smaller to store and
 * faster to run.  Backslashed characters are exceptions, each becoming a
 * separate node; the code is simpler that way and it's not worth fixing.
 */
static char *
regatom(int *flagp)
{
	register char *ret;
	int flags;

	*flagp = WORST;		/* Tentatively. */

	switch (*regparse++) {
	case '^':
		ret = regnode(BOL);
		break;
	case '$':
		ret = regnode(EOL);
		break;
	case '.':
		ret = regnode(ANY);
		*flagp |= HASWIDTH|SIMPLE;
		break;
	case '[': {
			register int clss;
			register int classend;

			if (*regparse == '^') {	/* Complement of range. */
				ret = regnode(ANYBUT);
				regparse++;
			} else
				ret = regnode(ANYOF);
			if (*regparse == ']' || *regparse == '-')
				regc(*regparse++);
			while (*regparse != '\0' && *regparse != ']') {
				if (*regparse == '-') {
					regparse++;
					if (*regparse == ']' || *regparse == '\0')
						regc('-');
					else {
						clss = UCHARAT(regparse-2)+1;
						classend = UCHARAT(regparse);
						if (clss > classend+1)
							FAIL("invalid [] range");
						for (; clss <= classend; clss++)
							regc(clss);
						regparse++;
					}
				} else
					regc(*regparse++);
			}
			regc('\0');
			if (*regparse != ']')
				FAIL("unmatched []");
			regparse++;
			*flagp |= HASWIDTH|SIMPLE;
		}
		break;
	case '(':
		ret = reg(1, &flags);
		if (ret == NULL)
			return(NULL);
		*flagp |= flags&(HASWIDTH|SPSTART);
		break;
	case '\0':
	case '|':
	case ')':
		FAIL("internal urp");	/* Supposed to be caught earlier. */
		/* NOTREACHED */
		break;
	case '?':
	case '+':
	case '*':
		FAIL("?+* follows nothing");
		/* NOTREACHED */
		break;
	case '\\':
		if (*regparse == '\0')
			FAIL("trailing \\");
		ret = regnode(EXACTLY);
		regc(*regparse++);
		regc('\0');
		*flagp |= HASWIDTH|SIMPLE;
		break;
	default: {
			register int len;
			register char ender;

			regparse--;
			len = (int) strcspn(regparse, META);
			if (len <= 0)
				FAIL("internal disaster");
			ender = *(regparse+len);
			if (len > 1 && ISMULT(ender))
				len--;		/* Back off clear of ?+* operand. */
			*flagp |= HASWIDTH;
			if (len == 1)
				*flagp |= SIMPLE;
			ret = regnode(EXACTLY);
			while (len > 0) {
				regc(*regparse++);
				len--;
			}
			regc('\0');
		}
		break;
	}

	return(ret);
}
Exemple #15
0
/*
 - regatom - the lowest level
 *
 * Optimization:  gobbles an entire sequence of ordinary characters so that
 * it can turn them into a single node, which is smaller to store and
 * faster to run.  Backslashed characters are exceptions, each becoming a
 * separate node; the code is simpler that way and it's not worth fixing.
 */
char* ossimRegExp::regatom (int *flagp) {
    char* ret;
    int   flags;

    *flagp = WORST;		// Tentatively.

    switch (*regparse++) {
    case '^':
        ret = regnode(BOL);
        break;
    case '$':
        ret = regnode(EOL);
        break;
    case '.':
        ret = regnode(ANY);
        *flagp |= HASWIDTH | SIMPLE;
        break;
    case '[': {
        int    rxpclass;
        int    rxpclassend;

        if (*regparse == '^') {	// Complement of range.
            ret = regnode(ANYBUT);
            regparse++;
        }
        else
            ret = regnode(ANYOF);
        if (*regparse == ']' || *regparse == '-')
            regc(*regparse++);
        while (*regparse != '\0' && *regparse != ']') {
            if (*regparse == '-') {
                regparse++;
                if (*regparse == ']' || *regparse == '\0')
                    regc('-');
                else {
                    rxpclass = UCHARAT(regparse - 2) + 1;
                    rxpclassend = UCHARAT(regparse);
                    if (rxpclass > rxpclassend + 1) {
                        //RAISE Error, SYM(ossimRegExp), SYM(Invalid_Range),
                        printf ("ossimRegExp::compile(): Invalid range in [].\n");
                        return 0;
                    }
                    for (; rxpclass <= rxpclassend; rxpclass++)
                        regc(rxpclass);
                    regparse++;
                }
            }
            else
                regc(*regparse++);
        }
        regc('\0');
        if (*regparse != ']') {
            //RAISE Error, SYM(ossimRegExp), SYM(Unmatched_Bracket),
            printf ("ossimRegExp::compile(): Unmatched [].\n");
            return 0;
        }
        regparse++;
        *flagp |= HASWIDTH | SIMPLE;
    }
    break;
    case '(':
        ret = reg(1, &flags);
        if (ret == NULL)
            return (NULL);
        *flagp |= flags & (HASWIDTH | SPSTART);
        break;
    case '\0':
    case '|':
    case ')':
        //RAISE Error, SYM(ossimRegExp), SYM(Internal_Error),
        printf ("ossimRegExp::compile(): Internal error.\n"); // Never here
        return 0;
    case '?':
    case '+':
    case '*':
        //RAISE Error, SYM(ossimRegExp), SYM(No_Operand),
        printf ("ossimRegExp::compile(): ?+* follows nothing.\n");
        return 0;
    case '\\':
        if (*regparse == '\0') {
            //RAISE Error, SYM(ossimRegExp), SYM(Trailing_Backslash),
            printf ("ossimRegExp::compile(): Trailing backslash.\n");
            return 0;
        }
        ret = regnode(EXACTLY);
        regc(*regparse++);
        regc('\0');
        *flagp |= HASWIDTH | SIMPLE;
        break;
    default: {
        int    len;
        char   ender;

        regparse--;
        len = (int)strcspn(regparse, META);
        if (len <= 0) {
            //RAISE Error, SYM(ossimRegExp), SYM(Internal_Error),
            printf ("ossimRegExp::compile(): Internal error.\n");
            return 0;
        }
        ender = *(regparse + len);
        if (len > 1 && ISMULT(ender))
            len--;	// Back off clear of ?+* operand.
        *flagp |= HASWIDTH;
        if (len == 1)
            *flagp |= SIMPLE;
        ret = regnode(EXACTLY);
        while (len > 0) {
            regc(*regparse++);
            len--;
        }
        regc('\0');
    }
    break;
    }
    return (ret);
}
Exemple #16
0
/*
 - regexec - match a regexp against a string
 */
cst_regstate *
hs_regexec(const cst_regex *prog, const char *string)
{
	cst_regstate *state;
	char *s;

	/* Be paranoid... */
	if (prog == NULL || string == NULL) {
		FAIL("NULL parameter");
		return(0);
	}

	/* Check validity of program. */
	if (UCHARAT(prog->program) != CST_REGMAGIC) {
		FAIL("corrupted program");
		return(0);
	}

	/* If there is a "must appear" string, look for it. */
	if (prog->regmust != NULL) {
		s = (char *)string;
		while ((s = strchr(s, prog->regmust[0])) != NULL) {
			if (strncmp(s, prog->regmust, prog->regmlen) == 0)
				break;	/* Found it. */
			s++;
		}
		if (s == NULL)	/* Not present. */
			return(0);
	}

	state = cst_alloc(cst_regstate, 1);
	/* Mark beginning of line for ^ . */
	state->bol = string;

	/* Simplest case:  anchored match need be tried only once. */
	if (prog->reganch) {
		if (regtry(state, string, prog->program+1))
			return state;
		else {
			cst_free(state);
			return NULL;
		}
	}

	/* Messy cases:  unanchored match. */
	s = (char *)string;
	if (prog->regstart != '\0')
		/* We know what char it must start with. */
		while ((s = strchr(s, prog->regstart)) != NULL) {
			if (regtry(state, s, prog->program+1))
				return state;
			s++;
		}
	else
		/* We don't -- general case. */
		do {
			if (regtry(state, s, prog->program+1))
				return state;
		} while (*s++ != '\0');

	cst_free(state);
	return NULL;
}
Exemple #17
0
/*
 - regatom - the lowest level
 *
 * Optimization:  gobbles an entire sequence of ordinary characters so that
 * it can turn them into a single node, which is smaller to store and
 * faster to run.  Backslashed characters are exceptions, each becoming a
 * separate node; the code is simpler that way and it's not worth fixing.
 */
static char *
regatom(int *flagp)
{
	register char *ret;
	int flags;

	*flagp = WORST;		/* Tentatively. */

	switch (*regparse++) {
	case '^':
		ret = regnode(BOL);
		break;
	case '$':
		ret = regnode(EOL);
		break;
	case '.':
		ret = regnode(ANY);
		*flagp |= HASWIDTH|SIMPLE;
		break;
	case '[': {
			register int chclass;
			register int chclassend;

			if (*regparse == '^') {	/* Complement of range. */
				ret = regnode(ANYBUT);
				regparse++;
			} else {
				ret = regnode(ANYOF);
			}
			if (*regparse == ']' || *regparse == '-') {
				regc(*regparse++);
			}
			while (*regparse != '\0' && *regparse != ']') {
				if (*regparse == '-') {
					regparse++;
					if (*regparse == ']' || *regparse == '\0') {
						regc('-');
					} else {
						chclass = UCHARAT(regparse-2)+1;
						chclassend = UCHARAT(regparse);
						if (chclass > chclassend+1) {
							FAIL("invalid [] range");
						}
						for (; chclass <= chclassend; chclass++) {
							regc(chclass);
						}
						regparse++;
					}
				} else if (*regparse == '\\') {
					switch(*++regparse) {
					case 'n' :
						regc('\n');
						regparse++;
						break;
					case 't' :
						regc('\t');
						regparse++;
						break;
					case ']' :
						regc(']');
						regparse++;
						break;
					case '-' :
						regc('-');
						regparse++;
						break;
					case '\\' :
						regc('\\');
						regparse++;
						break;
					default :
						regparse--;
						regc(*regparse++);
					}
				} else {
					regc(*regparse++);
				}
			}
			regc('\0');
			if (*regparse != ']') {
				FAIL("unmatched []");
			}
			regparse++;
			*flagp |= HASWIDTH|SIMPLE;
		}
		break;
	case '(':
		ret = reg(1, &flags);
		if (ret == NULL) {
			return(NULL);
		}
		*flagp |= flags&(HASWIDTH|SPSTART);
		break;
	case '\0':
	case '|':
	case ')':
		FAIL("internal urp");	/* Supposed to be caught earlier. */
		break;
	case '?':
	case '+':
	case '*':
	case '{':
		FAIL("?+*{ follows nothing");
		break;
	case '\\':
		if (*regparse == '\0') {
			FAIL("trailing \\");
		}
		switch(*regparse) {
		case '<':
			ret = regnode(BEGWORD);
			break;
		case '>':
			ret = regnode(ENDWORD);
			break;
		case 'd':
			ret = regnode(DIGIT);
			*flagp |= (HASWIDTH|SIMPLE);
			break;
		case 'D':
			ret = regnode(NDIGIT);
			*flagp |= (HASWIDTH|SIMPLE);
			break;
		case 'n' :
			ret = regnode(EXACTLY);
			regc('\n');
			regc('\0');
			*flagp |= (HASWIDTH|SIMPLE);
			break;
		case 'p':
			ret = regnode(PRINT);
			*flagp |= HASWIDTH|SIMPLE;
			break;
		case 'P':
			ret = regnode(NPRINT);
			*flagp |= HASWIDTH|SIMPLE;
			break;
		case 's':
			ret = regnode(WHITESP);
			*flagp |= HASWIDTH|SIMPLE;
			break;
		case 'S':
			ret = regnode(NWHITESP);
			*flagp |= HASWIDTH|SIMPLE;
			break;
		case 't' :
			ret = regnode(EXACTLY);
			regc('\t');
			regc('\0');
			*flagp |= (HASWIDTH|SIMPLE);
			break;
		case 'w':
			ret = regnode(ALNUM);
			*flagp |= HASWIDTH|SIMPLE;
			break;
		case 'W':
			ret = regnode(NALNUM);
			*flagp |= HASWIDTH|SIMPLE;
			break;
		default :
			ret = regnode(EXACTLY);
			regc(*regparse);
			regc('\0');
			*flagp |= HASWIDTH|SIMPLE;
		}
		regparse++;
		break;
	default: {
			register int len;
			register char ender;

			regparse--;
			len = strcspn(regparse, META);
			if (len <= 0) {
				FAIL("internal disaster");
			}
			ender = *(regparse+len);
			if (len > 1 && ISMULT(ender)) {
				len--;		/* Back off clear of ?+* operand. */
			}
			*flagp |= HASWIDTH;
			if (len == 1) {
				*flagp |= SIMPLE;
			}
			ret = regnode(EXACTLY);
			while (len > 0) {
				regc(*regparse++);
				len--;
			}
			regc('\0');
		}
		break;
	}

	return(ret);
}
Exemple #18
0
/*
 - SREexec - match a SRE against a string
 */
int
SREexec(SRE *prog, char *string)
{
	register char *s;
	extern char *strchr();

	/* Be paranoid... */
	if (prog == (SRE *) NULL || string == (char *) NULL) {
		SREerror("NULL parameter");
		return(0);
	}

	/* Check validity of program. */
	if (UCHARAT(prog->program) != SRE_MAGIC) {
		SREerror("corrupted program");
		return(0);
	}

	/* If there is a "must appear" string, look for it. */
	if (prog->regmust != NULL) {
		s = string;
		while ((s = strchr(s, prog->regmust[0])) != NULL) {
			if (strncmp(s, prog->regmust, prog->regmlen) == 0) {
				break;	/* Found it. */
			}
			s++;
		}
		if (s == NULL)	/* Not present. */
			return(0);
	}

	/* Mark beginning of line for ^ . */
	regbol = string;

	/* Simplest case:  anchored match need be tried only once. */
	if (prog->reganch) {
		return(regtry(prog, string));
	}

	/* Messy cases:  unanchored match. */
	s = string;
	if (prog->regstart != '\0') {
		/* We know what char it must start with. */
		while ((s = strchr(s, prog->regstart)) != NULL) {
			if (regtry(prog, s)) {
				return(1);
			}
			if (*s++ == '\n') {
				regbol = s;
			}
		}
	} else {
		/* We don't -- general case. */
		do {
			if (regtry(prog, s)) {
				return(1);
			}
			if (*s++ == '\n') {
				regbol = s;
			}
		} while (*s != '\0');
	}
	/* Failure. */
	return(0);
}