/* - _XmRegexec - match a regexp against a string */ int _XmRegexec(XmRegexpRec *prog, char *string) { register char *s; /* extern char *strchr();*/ /* Be paranoid... */ if (prog == NULL || string == NULL) { return(0); } /* Check validity of program. */ if (UCHARAT(prog->program) != MAGIC) { return(0); } /* If there is a "must appear" string, look for it. */ if (prog->regmust != NULL && MB_CUR_MAX == 1) { s = (char *)string; while ((s = strchr(s, prog->regmust[0])) != NULL) { if (strncmp(s, prog->regmust, prog->regmlen) == 0) break; /* Found it. */ s++; } if (s == NULL) /* Not present. */ return(0); } /* Mark beginning of line for ^ . */ regbol = (char *)string; /* Simplest case: anchored match need be tried only once. */ if (prog->reganch) return(regtry(prog, string)); /* Messy cases: unanchored match. */ s = (char *)string; if (prog->regstart != '\0' && MB_CUR_MAX == 1) /* We know what char it must start with. */ while ((s = strchr(s, prog->regstart)) != NULL) { if (regtry(prog, s)) return(1); s++; } else /* We don't -- general case. */ while(1){ int len; if (regtry(prog, s)) return(1); if(!(len = CHARLEN(s))) break; s += len; }; /* Failure. */ return(0); }
/* - regexec - match a regexp against a string */ int regexec( register regexp *prog, register const char *string ) { register char *s; /* Be paranoid... */ if (prog == NULL || string == NULL) { regerror("NULL parameter"); return(0); } /* Check validity of program. */ if (UCHARAT(prog->program) != MAGIC) { regerror("corrupted program"); return(0); } /* If there is a "must appear" string, look for it. */ if ( prog->regmust != NULL ) { s = (char *)string; while ( ( s = strchr( s, prog->regmust[ 0 ] ) ) != NULL ) { if ( !strncmp( s, prog->regmust, prog->regmlen ) ) break; /* Found it. */ ++s; } if ( s == NULL ) /* Not present. */ return 0; } /* Mark beginning of line for ^ . */ regbol = (char *)string; /* Simplest case: anchored match need be tried only once. */ if ( prog->reganch ) return regtry( prog, string ); /* Messy cases: unanchored match. */ s = (char *)string; if (prog->regstart != '\0') /* We know what char it must start with. */ while ((s = strchr(s, prog->regstart)) != NULL) { if (regtry(prog, s)) return(1); s++; } else /* We do not -- general case. */ do { if ( regtry( prog, s ) ) return( 1 ); } while ( *s++ != '\0' ); /* Failure. */ return 0; }
/* - regexec - match a regexp against a string */ int pgpRegExec(regexp *prog, char const *string) { char const *s; regexecState s_res; regexecState *res = &s_res; /* Be paranoid... */ if (prog == NULL || string == NULL) { FAIL("NULL parameter"); } /* Check validity of program. */ if (UCHARAT(prog->program) != MAGIC) { FAIL("corrupted program"); return(0); } pgpClearMemory( &s_res, sizeof(s_res) ); /* If there is a "must appear" string, look for it. */ if (prog->regmust != NULL) { s = string; while ((s = strchr(s, prog->regmust[0])) != NULL) { if (strncmp(s, prog->regmust, prog->regmlen) == 0) break; /* Found it. */ s++; } if (s == NULL) /* Not present. */ return(0); } /* Mark beginning of line for ^ . */ res->regbol = string; /* Simplest case: anchored match need be tried only once. */ if (prog->reganch) return(regtry(res, prog, string)); /* Messy cases: unanchored match. */ s = string; if (prog->regstart != '\0') /* We know what char it must start with. */ while ((s = strchr(s, prog->regstart)) != NULL) { if (regtry(res, prog, s)) return(1); s++; } else /* We don't -- general case. */ do { if (regtry(res, prog, s)) return(1); } while (*s++ != '\0'); /* Failure. */ return(0); }
/* - regsub - perform substitutions after a regexp match */ char *regsub (regexp * prog, char * source, char * dest, int n) { register char *src; register char *dst; register char c; register int no; register int len; if (prog == (regexp *) NULL || source == (char *) NULL || dest == (char *) NULL) { regerror("NULL parm to regsub\n"); return NULL; } if (UCHARAT(prog->program) != MAGIC) { regerror("damaged regexp fed to regsub\n"); return NULL; } src = source; dst = dest; while ((c = *src++) != '\0') { if (c == '&') no = 0; else if (c == '\\' && '0' <= *src && *src <= '9') no = *src++ - '0'; else no = -1; if (no < 0) { /* Ordinary character. */ if (c == '\\' && (*src == '\\' || *src == '&')) c = *src++; if (--n < 0) { /* amylaar */ regerror("line too long\n"); return NULL; } *dst++ = c; } else if (prog->startp[no] != (char *) NULL && prog->endp[no] != (char *) NULL) { len = prog->endp[no] - prog->startp[no]; if ((n -= len) < 0) { /* amylaar */ regerror("line too long\n"); return NULL; } strncpy(dst, prog->startp[no], len); dst += len; if (len != 0 && *(dst - 1) == '\0') { /* strncpy hit NUL. */ regerror("damaged match string\n"); return NULL; } } } if (--n < 0) { /* amylaar */ regerror("line too long\n"); return NULL; } *dst = '\0'; return dst; }
/* - regsub - perform substitutions after a regexp match */ int regsub(regexp *prog, char *source, char *dest, int dlen, int *trunc) { register char *src; register char *dst; register char c; register int no; if (prog == NULL || source == NULL || dest == NULL) { regerror("NULL parm to regsub"); return -1; } if (UCHARAT(prog->program) != MAGIC) { regerror("damaged regexp fed to regsub"); return -1; } *trunc = 0; src = source; dst = dest; while ((c = *src++) != '\0') { if (c == '&') no = 0; else if (c == '\\' && '0' <= *src && *src <= '9') no = *src++ - '0'; else no = -1; if (dst - dest + 1>= dlen) { *trunc = 1; return dst - dest; } if (no < 0) /* Ordinary character. */ *dst++ = c; else if (prog->startp[no] != NULL && prog->endp[no] != NULL) { register int len = prog->endp[no] - prog->startp[no]; if ((dst - dest) + len >= dlen) { *trunc = 1; return dst - dest; } (void) strncpy(dst, prog->startp[no], len); dst += len; if (*(dst-1) == '\0') { /* strncpy hit NUL. */ regerror("damaged match string"); return -1; } } } *dst++ = '\0'; return dst - dest - 1; }
bool ossimRegExp::find (const char* string) { const char* s = 0; if(!string) return false; this->searchstring = string; // Check validity of program. if (!this->program || UCHARAT(this->program) != MAGIC) { //RAISE Error, SYM(ossimRegExp), SYM(Internal_Error), printf ("ossimRegExp::find(): Compiled regular expression corrupted.\n"); return 0; } // If there is a "must appear" string, look for it. if (this->regmust != NULL) { s = string; while ((s = strchr(s, this->regmust[0])) != NULL) { if (strncmp(s, this->regmust, this->regmlen) == 0) break; // Found it. s++; } if (s == NULL) // Not present. return (0); } // Mark beginning of line for ^ . regbol = string; // Simplest case: anchored match need be tried only once. if (this->reganch) return (regtry(string, this->startp, this->endp, this->program)); // Messy cases: unanchored match. s = string; if (this->regstart != '\0') // We know what char it must start with. while ((s = strchr(s, this->regstart)) != NULL) { if (regtry(s, this->startp, this->endp, this->program)) return (1); s++; } else // We don't -- general case. do { if (regtry(s, this->startp, this->endp, this->program)) return (1); } while (*s++ != '\0'); // Failure. return (0); }
/* - regsub - perform substitutions after a regexp match */ void regsub (const regexp * prog, const char *source, char *dest) { register char *src; register char *dst; register char c; register int no; register int len; if (prog == NULL || source == NULL || dest == NULL) { regerror ("NULL parm to regsub"); return; } if (UCHARAT (prog->program) != MAGIC) { regerror ("damaged regexp fed to regsub"); return; } src = (char *) source; dst = dest; while ((c = *src++) != '\0') { if (c == '&') no = 0; else if (c == '\\' && '0' <= *src && *src <= '9') no = *src++ - '0'; else no = -1; if (no < 0) { /* Ordinary character. */ if (c == '\\' && (*src == '\\' || *src == '&')) c = *src++; *dst++ = c; } else if (prog->startp[no] != NULL && prog->endp[no] != NULL) { len = (int) (prog->endp[no] - prog->startp[no]); (void) strncpy (dst, prog->startp[no], len); dst += len; if (len != 0 && *(dst - 1) == '\0') { /* strncpy hit NUL. */ regerror ("damaged match string"); return; } } } *dst++ = '\0'; }
/* return the size of the SRE structure (including the compiled expression) */ int SREsize(SRE *prog) { /* Be paranoid... */ if (prog == (SRE *) NULL) { SREerror("NULL parameter"); return(-1); } /* Check validity of program. */ if (UCHARAT(prog->program) != SRE_MAGIC) { SREerror("corrupted program"); return(-1); } return(prog->regsize); }
/* free the space allocated to the SRE */ void SREfree(SRE *prog) { /* Be paranoid... */ if (prog == (SRE *) NULL) { SREerror("NULL parameter"); return; } /* Check validity of program. */ if (UCHARAT(prog->program) != SRE_MAGIC) { SREerror("corrupted program"); return; } xfree(prog); }
/* - SREsub - perform substitutions after a regexp match */ void SREsub(SRE *prog, char *source, char *dest) { register char *src; register char *dst; register char c; register int no; register int len; extern char *strncpy(); if (prog == NULL || source == NULL || dest == NULL) { SREerror("NULL parm to SREsub"); return; } if (UCHARAT(prog->program) != SRE_MAGIC) { SREerror("damaged SRE fed to SREsub"); return; } src = source; dst = dest; while ((c = *src++) != '\0') { if (c == '&') { no = 0; } else if (c == '\\' && '0' <= *src && *src <= '9') { no = *src++ - '0'; } else { no = -1; } if (no < 0) { /* Ordinary character. */ *dst++ = c; } else if (prog->startp[no] != NULL && prog->endp[no] != NULL) { len = prog->endp[no] - prog->startp[no]; (void) strncpy(dst, prog->startp[no], len); dst += len; if (len > 0 && *(dst-1) == '\0') { /* strncpy hit NUL. */ SREerror("damaged match string"); return; } } } *dst++ = '\0'; }
/* - regatom - the lowest level * * Optimization: gobbles an entire sequence of ordinary characters so that * it can turn them into a single node, which is smaller to store and * faster to run. Backslashed characters are exceptions, each becoming a * separate node; the code is simpler that way and it's not worth fixing. */ static char * regatom( int *flagp ) { register char *ret; int flags; *flagp = WORST; /* Tentatively. */ switch (*regparse++) { /* FIXME: these chars only have meaning at beg/end of pat? */ case '^': ret = regnode(BOL); break; case '$': ret = regnode(EOL); break; case '.': ret = regnode(ANY); *flagp |= HASWIDTH|SIMPLE; break; case '[': { register int classr; register int classend; if (*regparse == '^') { /* Complement of range. */ ret = regnode(ANYBUT); regparse++; } else ret = regnode(ANYOF); if (*regparse == ']' || *regparse == '-') regc(*regparse++); while (*regparse != '\0' && *regparse != ']') { if (*regparse == '-') { regparse++; if (*regparse == ']' || *regparse == '\0') regc('-'); else { classr = UCHARAT(regparse-2)+1; classend = UCHARAT(regparse); if (classr > classend+1) FAIL("invalid [] range"); for (; classr <= classend; classr++) regc(classr); regparse++; } } else regc(*regparse++); } regc('\0'); if (*regparse != ']') FAIL("unmatched []"); regparse++; *flagp |= HASWIDTH|SIMPLE; } break; case '(': ret = reg(1, &flags); if (ret == NULL) return(NULL); *flagp |= flags&(HASWIDTH|SPSTART); break; case '\0': case '|': case '\n': case ')': FAIL("internal urp"); /* Supposed to be caught earlier. */ break; case '?': case '+': case '*': FAIL("?+* follows nothing"); break; case '\\': switch (*regparse++) { case '\0': FAIL("trailing \\"); break; case '<': ret = regnode(WORDA); break; case '>': ret = regnode(WORDZ); break; /* FIXME: Someday handle \1, \2, ... */ default: /* Handle general quoted chars in exact-match routine */ goto de_fault; } break; de_fault: default: /* * Encode a string of characters to be matched exactly. * * This is a bit tricky due to quoted chars and due to * '*', '+', and '?' taking the SINGLE char previous * as their operand. * * On entry, the char at regparse[-1] is going to go * into the string, no matter what it is. (It could be * following a \ if we are entered from the '\' case.) * * Basic idea is to pick up a good char in ch and * examine the next char. If it's *+? then we twiddle. * If it's \ then we frozzle. If it's other magic char * we push ch and terminate the string. If none of the * above, we push ch on the string and go around again. * * regprev is used to remember where "the current char" * starts in the string, if due to a *+? we need to back * up and put the current char in a separate, 1-char, string. * When regprev is NULL, ch is the only char in the * string; this is used in *+? handling, and in setting * flags |= SIMPLE at the end. */ { char *regprev; register char ch; regparse--; /* Look at cur char */ ret = regnode(EXACTLY); for ( regprev = 0 ; ; ) { ch = *regparse++; /* Get current char */ switch (*regparse) { /* look at next one */ default: regc(ch); /* Add cur to string */ break; case '.': case '[': case '(': case ')': case '|': case '\n': case '$': case '^': case '\0': /* FIXME, $ and ^ should not always be magic */ magic: regc(ch); /* dump cur char */ goto done; /* and we are done */ case '?': case '+': case '*': if (!regprev) /* If just ch in str, */ goto magic; /* use it */ /* End mult-char string one early */ regparse = regprev; /* Back up parse */ goto done; case '\\': regc(ch); /* Cur char OK */ switch (regparse[1]){ /* Look after \ */ case '\0': case '<': case '>': /* FIXME: Someday handle \1, \2, ... */ goto done; /* Not quoted */ default: /* Backup point is \, scan * point is after it. */ regprev = regparse; regparse++; continue; /* NOT break; */ } } regprev = regparse; /* Set backup point */ } done: regc('\0'); *flagp |= HASWIDTH; if (!regprev) /* One char? */ *flagp |= SIMPLE; } break; } return(ret); }
/* * REexec - match a RE_EXP against a string */ bool REexec( RE_EXP *prog, char *string) { register char *s; /* Be paranoid... */ if (prog == NULL || string == NULL) { _error("NULL parameter"); return( FALSE ); } /* Check validity of program. */ if (UCHARAT(prog->program) != MAGIC) { _error("corrupted program"); return( FALSE ); } /* If there is a "must appear" string, look for it. */ if (prog->regmust != NULL) { s = string; while ((s = STchr(s, *prog->regmust)) != NULL) { if (STncmp( s, prog->regmust, prog->regmlen ) == 0 ) break; /* Found it. */ CMnext( s ); } if (s == NULL) /* Not present. */ return( FALSE ); } /* Mark beginning of line for ^ . */ regbol = string; /* Simplest case: anchored match need be tried only once. */ if (prog->reganch) return(regtry(prog, string)); /* Messy cases: unanchored match. */ s = string; if (prog->regstart != '\0') /* We know what char it must start with. */ while ((s = STchr(s, prog->regstart)) != NULL) { if (regtry(prog, s)) return( TRUE ); CMnext( s ); } else /* We don't -- general case. */ while( TRUE ) { if (regtry(prog, s)) return( TRUE ); if( *s == '\0' ) break; CMnext( s ); } # ifndef DOUBLEBYTE CMnext( s ); # else /* CMnext( s ); */ # endif /* #ifndef DOUBLEBYTE */ /* Failure. */ return( FALSE ); }
/* * regatom - the lowest level * * Optimization: gobbles an entire sequence of ordinary characters so that * it can turn them into a single node, which is smaller to store and * faster to run. Backslashed characters are exceptions, each becoming a * separate node; the code is simpler that way and it's not worth fixing. */ static char * regatom(i4 *flagp) { register char *ret; i4 flags; char null_byte = '\0'; *flagp = WORST; /* Tentatively. */ switch (*regparse) { case '^': CMnext( regparse ); ret = regnode(BOL); break; case '$': CMnext( regparse ); ret = regnode(EOL); break; case '.': CMnext( regparse ); ret = regnode(ANY); *flagp |= HASWIDTH|SIMPLE; break; case '[': { char *range_start = NULL; bool double_start; u_i2 first_u2, last_u2; u_char first_u1, last_u1; CMnext( regparse ); if (*regparse == '^') { /* Complement of range. */ ret = regnode(ANYBUT); CMnext( regparse ); } else ret = regnode(ANYOF); if (*regparse == ']' || *regparse == '-') { regc( regparse ); CMnext( regparse ); } while (*regparse != '\0' && *regparse != ']') { if (*regparse == '-') { char range_op = '-'; CMnext( regparse ); if( *regparse == ']' || *regparse == '\0' ) regc( &range_op ); else { char *tmp; bool invalid = FALSE; bool double_end; if( range_start == NULL ) invalid = TRUE; double_end = CMdbl1st( regparse ); if( !invalid && double_end && !double_start ) invalid = TRUE; if( !invalid && double_start && !double_start ) invalid = TRUE; if( !invalid && CMcmpcase( range_start, regparse ) > 0 ) invalid = TRUE; if( double_start ) _FAIL("don't know how to support character classes containing double-byte ranges"); if( invalid ) _FAIL("invalid [] range"); /* no double-byte ranges! */ /* ** Initialize the value for the end of the range. */ last_u1 = UCHARAT(regparse); for (; first_u1 <= last_u1; first_u1++ ) regc( (char *) &first_u1 ); CMnext( regparse ); } } else { range_start = regparse; if( CMdbl1st( range_start ) ) { double_start = TRUE; first_u2 = *(u_i2 *) range_start; } else { double_start = FALSE; first_u1 = UCHARAT(range_start); } regc( regparse ); CMnext( regparse ); } } regc( &null_byte ); if (*regparse != ']') _FAIL("unmatched []"); CMnext( regparse ); *flagp |= HASWIDTH|SIMPLE; } break; case '(': CMnext( regparse ); ret = reg(1, &flags); if (ret == NULL) return(NULL); *flagp |= flags&(HASWIDTH|SPSTART); break; case '\0': case '|': case ')': CMnext( regparse ); _FAIL("internal urp"); /* Supposed to be caught earlier. */ break; case '?': case '+': case '*': CMnext( regparse ); _FAIL("?+* follows nothing"); break; case '\\': CMnext( regparse ); if (*regparse == '\0') _FAIL("trailing \\"); ret = regnode(EXACTLY); regc( regparse ); CMnext( regparse ); regc( &null_byte ); *flagp |= HASWIDTH|SIMPLE; break; default: { register i4 len; register char ender; len = my_strcspn(regparse, META); if (len <= 0) _FAIL("internal disaster"); ender = *(regparse+len); if (len > 1 && ISMULT(ender)) len--; /* Back off clear of ?+* operand. */ *flagp |= HASWIDTH; if (len == 1) *flagp |= SIMPLE; ret = regnode(EXACTLY); while (len > 0) { regc( regparse ); CMbytedec( len, regparse ); CMnext( regparse ); } regc( &null_byte ); } break; } return(ret); }
/* - regatom - the lowest level * * Optimization: gobbles an entire sequence of ordinary characters so that * it can turn them into a single node, which is smaller to store and * faster to run. Backslashed characters are exceptions, each becoming a * separate node; the code is simpler that way and it's not worth fixing. */ static char * regatom(int *flagp) { register char *ret; int flags; *flagp = WORST; /* Tentatively. */ switch (*regparse++) { case '^': ret = regnode(BOL); break; case '$': ret = regnode(EOL); break; case '.': ret = regnode(ANY); *flagp |= HASWIDTH|SIMPLE; break; case '[': { register int clss; register int classend; if (*regparse == '^') { /* Complement of range. */ ret = regnode(ANYBUT); regparse++; } else ret = regnode(ANYOF); if (*regparse == ']' || *regparse == '-') regc(*regparse++); while (*regparse != '\0' && *regparse != ']') { if (*regparse == '-') { regparse++; if (*regparse == ']' || *regparse == '\0') regc('-'); else { clss = UCHARAT(regparse-2)+1; classend = UCHARAT(regparse); if (clss > classend+1) FAIL("invalid [] range"); for (; clss <= classend; clss++) regc(clss); regparse++; } } else regc(*regparse++); } regc('\0'); if (*regparse != ']') FAIL("unmatched []"); regparse++; *flagp |= HASWIDTH|SIMPLE; } break; case '(': ret = reg(1, &flags); if (ret == NULL) return(NULL); *flagp |= flags&(HASWIDTH|SPSTART); break; case '\0': case '|': case ')': FAIL("internal urp"); /* Supposed to be caught earlier. */ /* NOTREACHED */ break; case '?': case '+': case '*': FAIL("?+* follows nothing"); /* NOTREACHED */ break; case '\\': if (*regparse == '\0') FAIL("trailing \\"); ret = regnode(EXACTLY); regc(*regparse++); regc('\0'); *flagp |= HASWIDTH|SIMPLE; break; default: { register int len; register char ender; regparse--; len = (int) strcspn(regparse, META); if (len <= 0) FAIL("internal disaster"); ender = *(regparse+len); if (len > 1 && ISMULT(ender)) len--; /* Back off clear of ?+* operand. */ *flagp |= HASWIDTH; if (len == 1) *flagp |= SIMPLE; ret = regnode(EXACTLY); while (len > 0) { regc(*regparse++); len--; } regc('\0'); } break; } return(ret); }
/* - regatom - the lowest level * * Optimization: gobbles an entire sequence of ordinary characters so that * it can turn them into a single node, which is smaller to store and * faster to run. Backslashed characters are exceptions, each becoming a * separate node; the code is simpler that way and it's not worth fixing. */ char* ossimRegExp::regatom (int *flagp) { char* ret; int flags; *flagp = WORST; // Tentatively. switch (*regparse++) { case '^': ret = regnode(BOL); break; case '$': ret = regnode(EOL); break; case '.': ret = regnode(ANY); *flagp |= HASWIDTH | SIMPLE; break; case '[': { int rxpclass; int rxpclassend; if (*regparse == '^') { // Complement of range. ret = regnode(ANYBUT); regparse++; } else ret = regnode(ANYOF); if (*regparse == ']' || *regparse == '-') regc(*regparse++); while (*regparse != '\0' && *regparse != ']') { if (*regparse == '-') { regparse++; if (*regparse == ']' || *regparse == '\0') regc('-'); else { rxpclass = UCHARAT(regparse - 2) + 1; rxpclassend = UCHARAT(regparse); if (rxpclass > rxpclassend + 1) { //RAISE Error, SYM(ossimRegExp), SYM(Invalid_Range), printf ("ossimRegExp::compile(): Invalid range in [].\n"); return 0; } for (; rxpclass <= rxpclassend; rxpclass++) regc(rxpclass); regparse++; } } else regc(*regparse++); } regc('\0'); if (*regparse != ']') { //RAISE Error, SYM(ossimRegExp), SYM(Unmatched_Bracket), printf ("ossimRegExp::compile(): Unmatched [].\n"); return 0; } regparse++; *flagp |= HASWIDTH | SIMPLE; } break; case '(': ret = reg(1, &flags); if (ret == NULL) return (NULL); *flagp |= flags & (HASWIDTH | SPSTART); break; case '\0': case '|': case ')': //RAISE Error, SYM(ossimRegExp), SYM(Internal_Error), printf ("ossimRegExp::compile(): Internal error.\n"); // Never here return 0; case '?': case '+': case '*': //RAISE Error, SYM(ossimRegExp), SYM(No_Operand), printf ("ossimRegExp::compile(): ?+* follows nothing.\n"); return 0; case '\\': if (*regparse == '\0') { //RAISE Error, SYM(ossimRegExp), SYM(Trailing_Backslash), printf ("ossimRegExp::compile(): Trailing backslash.\n"); return 0; } ret = regnode(EXACTLY); regc(*regparse++); regc('\0'); *flagp |= HASWIDTH | SIMPLE; break; default: { int len; char ender; regparse--; len = (int)strcspn(regparse, META); if (len <= 0) { //RAISE Error, SYM(ossimRegExp), SYM(Internal_Error), printf ("ossimRegExp::compile(): Internal error.\n"); return 0; } ender = *(regparse + len); if (len > 1 && ISMULT(ender)) len--; // Back off clear of ?+* operand. *flagp |= HASWIDTH; if (len == 1) *flagp |= SIMPLE; ret = regnode(EXACTLY); while (len > 0) { regc(*regparse++); len--; } regc('\0'); } break; } return (ret); }
/* - regexec - match a regexp against a string */ cst_regstate * hs_regexec(const cst_regex *prog, const char *string) { cst_regstate *state; char *s; /* Be paranoid... */ if (prog == NULL || string == NULL) { FAIL("NULL parameter"); return(0); } /* Check validity of program. */ if (UCHARAT(prog->program) != CST_REGMAGIC) { FAIL("corrupted program"); return(0); } /* If there is a "must appear" string, look for it. */ if (prog->regmust != NULL) { s = (char *)string; while ((s = strchr(s, prog->regmust[0])) != NULL) { if (strncmp(s, prog->regmust, prog->regmlen) == 0) break; /* Found it. */ s++; } if (s == NULL) /* Not present. */ return(0); } state = cst_alloc(cst_regstate, 1); /* Mark beginning of line for ^ . */ state->bol = string; /* Simplest case: anchored match need be tried only once. */ if (prog->reganch) { if (regtry(state, string, prog->program+1)) return state; else { cst_free(state); return NULL; } } /* Messy cases: unanchored match. */ s = (char *)string; if (prog->regstart != '\0') /* We know what char it must start with. */ while ((s = strchr(s, prog->regstart)) != NULL) { if (regtry(state, s, prog->program+1)) return state; s++; } else /* We don't -- general case. */ do { if (regtry(state, s, prog->program+1)) return state; } while (*s++ != '\0'); cst_free(state); return NULL; }
/* - regatom - the lowest level * * Optimization: gobbles an entire sequence of ordinary characters so that * it can turn them into a single node, which is smaller to store and * faster to run. Backslashed characters are exceptions, each becoming a * separate node; the code is simpler that way and it's not worth fixing. */ static char * regatom(int *flagp) { register char *ret; int flags; *flagp = WORST; /* Tentatively. */ switch (*regparse++) { case '^': ret = regnode(BOL); break; case '$': ret = regnode(EOL); break; case '.': ret = regnode(ANY); *flagp |= HASWIDTH|SIMPLE; break; case '[': { register int chclass; register int chclassend; if (*regparse == '^') { /* Complement of range. */ ret = regnode(ANYBUT); regparse++; } else { ret = regnode(ANYOF); } if (*regparse == ']' || *regparse == '-') { regc(*regparse++); } while (*regparse != '\0' && *regparse != ']') { if (*regparse == '-') { regparse++; if (*regparse == ']' || *regparse == '\0') { regc('-'); } else { chclass = UCHARAT(regparse-2)+1; chclassend = UCHARAT(regparse); if (chclass > chclassend+1) { FAIL("invalid [] range"); } for (; chclass <= chclassend; chclass++) { regc(chclass); } regparse++; } } else if (*regparse == '\\') { switch(*++regparse) { case 'n' : regc('\n'); regparse++; break; case 't' : regc('\t'); regparse++; break; case ']' : regc(']'); regparse++; break; case '-' : regc('-'); regparse++; break; case '\\' : regc('\\'); regparse++; break; default : regparse--; regc(*regparse++); } } else { regc(*regparse++); } } regc('\0'); if (*regparse != ']') { FAIL("unmatched []"); } regparse++; *flagp |= HASWIDTH|SIMPLE; } break; case '(': ret = reg(1, &flags); if (ret == NULL) { return(NULL); } *flagp |= flags&(HASWIDTH|SPSTART); break; case '\0': case '|': case ')': FAIL("internal urp"); /* Supposed to be caught earlier. */ break; case '?': case '+': case '*': case '{': FAIL("?+*{ follows nothing"); break; case '\\': if (*regparse == '\0') { FAIL("trailing \\"); } switch(*regparse) { case '<': ret = regnode(BEGWORD); break; case '>': ret = regnode(ENDWORD); break; case 'd': ret = regnode(DIGIT); *flagp |= (HASWIDTH|SIMPLE); break; case 'D': ret = regnode(NDIGIT); *flagp |= (HASWIDTH|SIMPLE); break; case 'n' : ret = regnode(EXACTLY); regc('\n'); regc('\0'); *flagp |= (HASWIDTH|SIMPLE); break; case 'p': ret = regnode(PRINT); *flagp |= HASWIDTH|SIMPLE; break; case 'P': ret = regnode(NPRINT); *flagp |= HASWIDTH|SIMPLE; break; case 's': ret = regnode(WHITESP); *flagp |= HASWIDTH|SIMPLE; break; case 'S': ret = regnode(NWHITESP); *flagp |= HASWIDTH|SIMPLE; break; case 't' : ret = regnode(EXACTLY); regc('\t'); regc('\0'); *flagp |= (HASWIDTH|SIMPLE); break; case 'w': ret = regnode(ALNUM); *flagp |= HASWIDTH|SIMPLE; break; case 'W': ret = regnode(NALNUM); *flagp |= HASWIDTH|SIMPLE; break; default : ret = regnode(EXACTLY); regc(*regparse); regc('\0'); *flagp |= HASWIDTH|SIMPLE; } regparse++; break; default: { register int len; register char ender; regparse--; len = strcspn(regparse, META); if (len <= 0) { FAIL("internal disaster"); } ender = *(regparse+len); if (len > 1 && ISMULT(ender)) { len--; /* Back off clear of ?+* operand. */ } *flagp |= HASWIDTH; if (len == 1) { *flagp |= SIMPLE; } ret = regnode(EXACTLY); while (len > 0) { regc(*regparse++); len--; } regc('\0'); } break; } return(ret); }
/* - SREexec - match a SRE against a string */ int SREexec(SRE *prog, char *string) { register char *s; extern char *strchr(); /* Be paranoid... */ if (prog == (SRE *) NULL || string == (char *) NULL) { SREerror("NULL parameter"); return(0); } /* Check validity of program. */ if (UCHARAT(prog->program) != SRE_MAGIC) { SREerror("corrupted program"); return(0); } /* If there is a "must appear" string, look for it. */ if (prog->regmust != NULL) { s = string; while ((s = strchr(s, prog->regmust[0])) != NULL) { if (strncmp(s, prog->regmust, prog->regmlen) == 0) { break; /* Found it. */ } s++; } if (s == NULL) /* Not present. */ return(0); } /* Mark beginning of line for ^ . */ regbol = string; /* Simplest case: anchored match need be tried only once. */ if (prog->reganch) { return(regtry(prog, string)); } /* Messy cases: unanchored match. */ s = string; if (prog->regstart != '\0') { /* We know what char it must start with. */ while ((s = strchr(s, prog->regstart)) != NULL) { if (regtry(prog, s)) { return(1); } if (*s++ == '\n') { regbol = s; } } } else { /* We don't -- general case. */ do { if (regtry(prog, s)) { return(1); } if (*s++ == '\n') { regbol = s; } } while (*s != '\0'); } /* Failure. */ return(0); }