/** * Add all characters in the inclusive range between lower and upper. * * Handles a swapped range (upper < lower). */ static void reg_addrange(regex_t *preg, int lower, int upper) { if (lower > upper) { reg_addrange(preg, upper, lower); } /* Add a range as length, start */ regc(preg, upper - lower + 1); regc(preg, lower); }
/* * regatom - the lowest level * * Optimization: gobbles an entire sequence of ordinary characters so that * it can turn them into a single node, which is smaller to store and * faster to run. Backslashed characters are exceptions, each becoming a * separate node; the code is simpler that way and it's not worth fixing. */ static char * regatom(i4 *flagp) { register char *ret; i4 flags; char null_byte = '\0'; *flagp = WORST; /* Tentatively. */ switch (*regparse) { case '^': CMnext( regparse ); ret = regnode(BOL); break; case '$': CMnext( regparse ); ret = regnode(EOL); break; case '.': CMnext( regparse ); ret = regnode(ANY); *flagp |= HASWIDTH|SIMPLE; break; case '[': { char *range_start = NULL; bool double_start; u_i2 first_u2, last_u2; u_char first_u1, last_u1; CMnext( regparse ); if (*regparse == '^') { /* Complement of range. */ ret = regnode(ANYBUT); CMnext( regparse ); } else ret = regnode(ANYOF); if (*regparse == ']' || *regparse == '-') { regc( regparse ); CMnext( regparse ); } while (*regparse != '\0' && *regparse != ']') { if (*regparse == '-') { char range_op = '-'; CMnext( regparse ); if( *regparse == ']' || *regparse == '\0' ) regc( &range_op ); else { char *tmp; bool invalid = FALSE; bool double_end; if( range_start == NULL ) invalid = TRUE; double_end = CMdbl1st( regparse ); if( !invalid && double_end && !double_start ) invalid = TRUE; if( !invalid && double_start && !double_start ) invalid = TRUE; if( !invalid && CMcmpcase( range_start, regparse ) > 0 ) invalid = TRUE; if( double_start ) _FAIL("don't know how to support character classes containing double-byte ranges"); if( invalid ) _FAIL("invalid [] range"); /* no double-byte ranges! */ /* ** Initialize the value for the end of the range. */ last_u1 = UCHARAT(regparse); for (; first_u1 <= last_u1; first_u1++ ) regc( (char *) &first_u1 ); CMnext( regparse ); } } else { range_start = regparse; if( CMdbl1st( range_start ) ) { double_start = TRUE; first_u2 = *(u_i2 *) range_start; } else { double_start = FALSE; first_u1 = UCHARAT(range_start); } regc( regparse ); CMnext( regparse ); } } regc( &null_byte ); if (*regparse != ']') _FAIL("unmatched []"); CMnext( regparse ); *flagp |= HASWIDTH|SIMPLE; } break; case '(': CMnext( regparse ); ret = reg(1, &flags); if (ret == NULL) return(NULL); *flagp |= flags&(HASWIDTH|SPSTART); break; case '\0': case '|': case ')': CMnext( regparse ); _FAIL("internal urp"); /* Supposed to be caught earlier. */ break; case '?': case '+': case '*': CMnext( regparse ); _FAIL("?+* follows nothing"); break; case '\\': CMnext( regparse ); if (*regparse == '\0') _FAIL("trailing \\"); ret = regnode(EXACTLY); regc( regparse ); CMnext( regparse ); regc( &null_byte ); *flagp |= HASWIDTH|SIMPLE; break; default: { register i4 len; register char ender; len = my_strcspn(regparse, META); if (len <= 0) _FAIL("internal disaster"); ender = *(regparse+len); if (len > 1 && ISMULT(ender)) len--; /* Back off clear of ?+* operand. */ *flagp |= HASWIDTH; if (len == 1) *flagp |= SIMPLE; ret = regnode(EXACTLY); while (len > 0) { regc( regparse ); CMbytedec( len, regparse ); CMnext( regparse ); } regc( &null_byte ); } break; } return(ret); }
/* * REcompile - compile a regular expression into internal code * * We can't allocate space until we know how big the compiled form will be, * but we can't compile it (and thus know how big it is) until we've got a * place to put the code. So we cheat: we compile it twice, once with code * generation turned off and size counting turned on, and once "for real". * This also means that we don't allocate space until we are sure that the * thing really will compile successfully, and we never have to move the * code and thus invalidate pointers into it. (Note that it has to be in * one piece because free() must be able to free it all.) * * Beware that the optimization-preparation code in here knows about some * of the structure of the compiled RE_EXP. */ STATUS REcompile( char *exp, RE_EXP **re_exp, i4 mem_tag ) { register RE_EXP *r; register char *scan; register char *longest; register i4 len; i4 flags; u_char magic = MAGIC; if (exp == NULL) { _error("NULL argument"); return (FAIL); } /* First pass: determine size, legality. */ regparse = exp; regnpar = 1; regsize = 0L; regcode = ®dummy; regc( (char *) &magic ); if (reg(0, &flags) == NULL) return( FAIL ); /* Small enough for pointer-storage convention? */ if (regsize >= 32767L) /* Probably could be 65535L. */ { _error("regular expression too big"); return (FAIL); } /* Allocate space. */ r = (RE_EXP *) MEreqmem( mem_tag, sizeof(RE_EXP) + (unsigned) regsize, FALSE, NULL); if (r == NULL) { _error("out of space"); return (FAIL); } /* Second pass: emit code. */ regparse = exp; regnpar = 1; regcode = r->program; regc( (char *) &magic ); if (reg(0, &flags) == NULL) return( FAIL ); /* Dig out information for optimizations. */ r->regstart = '\0'; /* Worst-case defaults. */ r->reganch = 0; r->regmust = NULL; r->regmlen = 0; scan = r->program+1; /* First BRANCH. */ if (OP(regnext(scan)) == END) { /* Only one top-level choice. */ scan = OPERAND(scan); /* Starting-point info. */ if (OP(scan) == EXACTLY) r->regstart = *OPERAND(scan); else if (OP(scan) == BOL) r->reganch++; /* * If there's something expensive in the r.e., find the * longest literal string that must appear and make it the * regmust. Resolve ties in favor of later strings, since * the regstart check works with the beginning of the r.e. * and avoiding duplication strengthens checking. Not a * strong reason, but sufficient in the absence of others. */ if (flags&SPSTART) { longest = NULL; len = 0; for (; scan != NULL; scan = regnext(scan)) if (OP(scan) == EXACTLY && STlength(OPERAND(scan)) >= len) { longest = OPERAND(scan); len = STlength(OPERAND(scan)); } r->regmust = longest; r->regmlen = len; } } *re_exp = r; return( OK ); }
/* * regatom - the lowest level * * Optimization: gobbles an entire sequence of ordinary characters so that * it can turn them into a single node, which is smaller to store and * faster to run. Backslashed characters are exceptions, each becoming a * separate node; the code is simpler that way and it's not worth fixing. */ static char *regatom( int *flagp ) { char *ret; int flags; *flagp = WORST; /* Tentatively. */ switch( *regparse++ ) { case '~': if( *regparse == 0 ) { FAIL( ERR_RE_INVALID_CASETOGGLE ); } ret = regnode( CASEI ); break; case '@': if( *regparse == 0 ) { FAIL( ERR_RE_INVALID_CASETOGGLE ); } ret = regnode( NOCASEI ); break; case '^': ret = regnode( BOL ); break; case '$': ret = regnode( EOL ); break; case '.': ret = regnode( ANY ); *flagp |= HASWIDTH | SIMPLE; break; case '[': { if( *regparse == '^' ) { /* Complement of range. */ ret = regnode( ANYBUT ); regparse++; } else { ret = regnode( ANYOF ); } if( *regparse == ']' || *regparse == '-' ) { regc( *regparse++ ); } while( *regparse != '\0' && *regparse != ']' ) { if( *regparse == '-' ) { regparse++; if( *regparse == ']' || *regparse == '\0' ) { regc( '-' ); } else { int class; int classend; class = UCHARAT( regparse - 2 ) + 1; classend = UCHARAT( regparse ); if( class > classend + 1 ) { FAIL( ERR_RE_INVALID_SB_RANGE ); } for( ; class <= classend; class++ ) { regc( (char)class ); } regparse++; } } else { if( *regparse == '\\' && *( regparse + 1 ) == 't' && REALTABS ) { regparse += 2; regc( '\t' ); } else { regc( *regparse++ ); } } } regc( '\0' ); if( *regparse != ']' ) { FAIL( ERR_RE_UNMATCHED_SQUARE_BRACKET ); } regparse++; *flagp |= HASWIDTH | SIMPLE; }
/* - RegComp - compile a regular expression into internal code * * We can't allocate space until we know how big the compiled form will be, * but we can't compile it (and thus know how big it is) until we've got a * place to put the code. So we cheat: we compile it twice, once with code * generation turned off and size counting turned on, and once "for real". * This also means that we don't allocate space until we are sure that the * thing really will compile successfully, and we never have to move the * code and thus invalidate pointers into it. (Note that it has to be in * one piece because free() must be able to free it all.) * * Beware that the optimization-preparation code in here knows about some * of the structure of the compiled regexp. */ regexp *RegComp( const char *instr ) { regexp *r; char *scan; char *longest; const char *exp; char buff[MAX_STR*2]; int flags, ignmag = FALSE; unsigned j; size_t i, k, len; #ifdef WANT_EXCLAMATION if( instr[0] == '!' ) { instr++; ignmag = TRUE; } #endif /* * flip roles of magic chars */ if( !ignmag && ( !MAGICFLAG && MAGICSTR != NULL ) ) { j = 0; k = strlen( instr ); for( i = 0; i < k; i++ ) { if( instr[i] == '\\' ) { if( strchr( MAGICSTR, instr[i + 1] ) == NULL ) { buff[j++] = '\\'; } i++; } else { if( strchr( MAGICSTR, instr[i] ) != NULL ) { buff[j++] = '\\'; } } buff[j++] = instr[i]; } buff[j] = 0; exp = buff; } else { exp = instr; } regError( ERR_NO_ERR ); if( exp == NULL ) { FAIL( ERR_RE_NULL_ARGUMENT ); } /* First pass: determine size, legality. */ regparse = exp; regnpar = 1; regsize = 0L; regcode = ®dummy; regc( MAGIC ); if( reg( 0, &flags ) == NULL ) { return( NULL ); } /* Allocate space. */ r = ALLOC( sizeof( regexp ) + ( unsigned ) regsize ); /* Second pass: emit code. */ regparse = exp; regnpar = 1; regcode = r->program; regc( MAGIC ); if( reg( 0, &flags ) == NULL ) { return( NULL ); } /* Dig out information for optimizations. */ r->regstart = '\0'; /* Worst-case defaults. */ r->reganch = 0; r->regmust = NULL; r->regmlen = 0; scan = r->program + 1; /* First BRANCH. */ if( OP( regnext( scan ) ) == END ) { /* Only one top-level choice. */ scan = OPERAND( scan ); /* Starting-point info. */ if( OP( scan ) == EXACTLY ) { r->regstart = *OPERAND( scan ); } else if( OP( scan ) == BOL ) { r->reganch++; } /* * If there's something expensive in the r.e., find the * longest literal string that must appear and make it the * regmust. Resolve ties in favor of later strings, since * the regstart check works with the beginning of the r.e. * and avoiding duplication strengthens checking. Not a * strong reason, but sufficient in the absence of others. */ if( flags & SPSTART ) { longest = NULL; len = 0; for( ; scan != NULL; scan = regnext( scan ) ) { if( OP( scan ) == EXACTLY && strlen( OPERAND( scan ) ) >= len ) { longest = OPERAND( scan ); len = strlen( OPERAND( scan ) ); } } r->regmust = longest; r->regmlen = (short)len; } } return( r ); }
TCHAR *CRegExp::regatom(int *flagp) { TCHAR *ret; int flags; *flagp = WORST; // Tentatively. switch (*regparse++) { case _T('^'): ret = regnode(BOL); break; case _T('$'): ret = regnode(EOL); break; case _T('.'): ret = regnode(ANY); *flagp |= HASWIDTH|SIMPLE; break; case _T('['): { int range; int rangeend; int c; if (*regparse == _T('^')) { // Complement of range. ret = regnode(ANYBUT); regparse++; } else ret = regnode(ANYOF); if ((c = *regparse) == _T(']') || c == _T('-')) { regc(c); regparse++; } while ((c = *regparse++) != _T('\0') && c != _T(']')) { if (c != _T('-')) regc(c); else if ((c = *regparse) == _T(']') || c == _T('\0')) regc(_T('-')); else { range = (unsigned) (TCHAR)*(regparse-2); rangeend = (unsigned) (TCHAR)c; if (range > rangeend) { TRACE0("invalid [] range\n"); return NULL; } for (range++; range <= rangeend; range++) regc(range); regparse++; } } regc(_T('\0')); if (c != _T(']')) { TRACE0("unmatched []\n"); return NULL; } *flagp |= HASWIDTH|SIMPLE; break; } case _T('('): ret = reg(1, &flags); if (ret == NULL) return(NULL); *flagp |= flags&(HASWIDTH|SPSTART); break; case _T('\0'): case _T('|'): case _T(')'): // supposed to be caught earlier TRACE0("internal error: \\0|) unexpected\n"); return NULL; break; case _T('?'): case _T('+'): case _T('*'): TRACE0("?+* follows nothing\n"); return NULL; break; case _T('\\'): if (*regparse == _T('\0')) { TRACE0("trailing \\\n"); return NULL; } ret = regnode(EXACTLY); regc(*regparse++); regc(_T('\0')); *flagp |= HASWIDTH|SIMPLE; break; default: { size_t len; TCHAR ender; regparse--; len = _tcscspn(regparse, META); if (len == 0) { TRACE0("internal error: strcspn 0\n"); return NULL; } ender = *(regparse+len); if (len > 1 && ISREPN(ender)) len--; // Back off clear of ?+* operand. *flagp |= HASWIDTH; if (len == 1) *flagp |= SIMPLE; ret = regnode(EXACTLY); for (; len > 0; len--) regc(*regparse++); regc(_T('\0')); break; } } return(ret); }
/* - regatom - the lowest level * * Optimization: gobbles an entire sequence of ordinary characters so that * it can turn them into a single node, which is smaller to store and * faster to run. Backslashed characters are exceptions, each becoming a * separate node; the code is simpler that way and it's not worth fixing. */ static char * regatom(int *flagp) { register char *ret; int flags; *flagp = WORST; /* Tentatively. */ switch (*regparse++) { case '^': ret = regnode(BOL); break; case '$': ret = regnode(EOL); break; case '.': ret = regnode(ANY); *flagp |= HASWIDTH|SIMPLE; break; case '[': { register int clss; register int classend; if (*regparse == '^') { /* Complement of range. */ ret = regnode(ANYBUT); regparse++; } else ret = regnode(ANYOF); if (*regparse == ']' || *regparse == '-') regc(*regparse++); while (*regparse != '\0' && *regparse != ']') { if (*regparse == '-') { regparse++; if (*regparse == ']' || *regparse == '\0') regc('-'); else { clss = UCHARAT(regparse-2)+1; classend = UCHARAT(regparse); if (clss > classend+1) FAIL("invalid [] range"); for (; clss <= classend; clss++) regc(clss); regparse++; } } else regc(*regparse++); } regc('\0'); if (*regparse != ']') FAIL("unmatched []"); regparse++; *flagp |= HASWIDTH|SIMPLE; } break; case '(': ret = reg(1, &flags); if (ret == NULL) return(NULL); *flagp |= flags&(HASWIDTH|SPSTART); break; case '\0': case '|': case ')': FAIL("internal urp"); /* Supposed to be caught earlier. */ /* NOTREACHED */ break; case '?': case '+': case '*': FAIL("?+* follows nothing"); /* NOTREACHED */ break; case '\\': if (*regparse == '\0') FAIL("trailing \\"); ret = regnode(EXACTLY); regc(*regparse++); regc('\0'); *flagp |= HASWIDTH|SIMPLE; break; default: { register int len; register char ender; regparse--; len = (int) strcspn(regparse, META); if (len <= 0) FAIL("internal disaster"); ender = *(regparse+len); if (len > 1 && ISMULT(ender)) len--; /* Back off clear of ?+* operand. */ *flagp |= HASWIDTH; if (len == 1) *flagp |= SIMPLE; ret = regnode(EXACTLY); while (len > 0) { regc(*regparse++); len--; } regc('\0'); } break; } return(ret); }
/* - regatom - the lowest level * * Optimization: gobbles an entire sequence of ordinary characters so that * it can turn them into a single node, which is smaller to store and * faster to run. Backslashed characters are exceptions, each becoming a * separate node; the code is simpler that way and it's not worth fixing. */ static char * regatom(int *flagp) { register char *ret; int flags; *flagp = WORST; /* Tentatively. */ switch (*regparse++) { case '^': ret = regnode(BOL); break; case '$': ret = regnode(EOL); break; case '.': ret = regnode(ANY); *flagp |= HASWIDTH|SIMPLE; break; case '[': { register int chclass; register int chclassend; if (*regparse == '^') { /* Complement of range. */ ret = regnode(ANYBUT); regparse++; } else { ret = regnode(ANYOF); } if (*regparse == ']' || *regparse == '-') { regc(*regparse++); } while (*regparse != '\0' && *regparse != ']') { if (*regparse == '-') { regparse++; if (*regparse == ']' || *regparse == '\0') { regc('-'); } else { chclass = UCHARAT(regparse-2)+1; chclassend = UCHARAT(regparse); if (chclass > chclassend+1) { FAIL("invalid [] range"); } for (; chclass <= chclassend; chclass++) { regc(chclass); } regparse++; } } else if (*regparse == '\\') { switch(*++regparse) { case 'n' : regc('\n'); regparse++; break; case 't' : regc('\t'); regparse++; break; case ']' : regc(']'); regparse++; break; case '-' : regc('-'); regparse++; break; case '\\' : regc('\\'); regparse++; break; default : regparse--; regc(*regparse++); } } else { regc(*regparse++); } } regc('\0'); if (*regparse != ']') { FAIL("unmatched []"); } regparse++; *flagp |= HASWIDTH|SIMPLE; } break; case '(': ret = reg(1, &flags); if (ret == NULL) { return(NULL); } *flagp |= flags&(HASWIDTH|SPSTART); break; case '\0': case '|': case ')': FAIL("internal urp"); /* Supposed to be caught earlier. */ break; case '?': case '+': case '*': case '{': FAIL("?+*{ follows nothing"); break; case '\\': if (*regparse == '\0') { FAIL("trailing \\"); } switch(*regparse) { case '<': ret = regnode(BEGWORD); break; case '>': ret = regnode(ENDWORD); break; case 'd': ret = regnode(DIGIT); *flagp |= (HASWIDTH|SIMPLE); break; case 'D': ret = regnode(NDIGIT); *flagp |= (HASWIDTH|SIMPLE); break; case 'n' : ret = regnode(EXACTLY); regc('\n'); regc('\0'); *flagp |= (HASWIDTH|SIMPLE); break; case 'p': ret = regnode(PRINT); *flagp |= HASWIDTH|SIMPLE; break; case 'P': ret = regnode(NPRINT); *flagp |= HASWIDTH|SIMPLE; break; case 's': ret = regnode(WHITESP); *flagp |= HASWIDTH|SIMPLE; break; case 'S': ret = regnode(NWHITESP); *flagp |= HASWIDTH|SIMPLE; break; case 't' : ret = regnode(EXACTLY); regc('\t'); regc('\0'); *flagp |= (HASWIDTH|SIMPLE); break; case 'w': ret = regnode(ALNUM); *flagp |= HASWIDTH|SIMPLE; break; case 'W': ret = regnode(NALNUM); *flagp |= HASWIDTH|SIMPLE; break; default : ret = regnode(EXACTLY); regc(*regparse); regc('\0'); *flagp |= HASWIDTH|SIMPLE; } regparse++; break; default: { register int len; register char ender; regparse--; len = strcspn(regparse, META); if (len <= 0) { FAIL("internal disaster"); } ender = *(regparse+len); if (len > 1 && ISMULT(ender)) { len--; /* Back off clear of ?+* operand. */ } *flagp |= HASWIDTH; if (len == 1) { *flagp |= SIMPLE; } ret = regnode(EXACTLY); while (len > 0) { regc(*regparse++); len--; } regc('\0'); } break; } return(ret); }
void ossimRegExp::compile (const char* exp) { const char* scan; const char* longest; unsigned long len; int flags; if (exp == NULL) { //RAISE Error, SYM(ossimRegExp), SYM(No_Expr), printf ("ossimRegExp::compile(): No expression supplied.\n"); return; } // First pass: determine size, legality. regparse = exp; regnpar = 1; regsize = 0L; regcode = ®dummy; regc(MAGIC); if(!reg(0, &flags)) { printf ("ossimRegExp::compile(): Error in compile.\n"); return; } this->startp[0] = this->endp[0] = this->searchstring = NULL; // Small enough for pointer-storage convention? if (regsize >= 32767L) { // Probably could be 65535L. //RAISE Error, SYM(ossimRegExp), SYM(Expr_Too_Big), printf ("ossimRegExp::compile(): Expression too big.\n"); return; } // Allocate space. //#ifndef WIN32 if (this->program != NULL) delete [] this->program; //#endif this->program = new char[regsize]; this->progsize = (int) regsize; if (this->program == NULL) { //RAISE Error, SYM(ossimRegExp), SYM(Out_Of_Memory), printf ("ossimRegExp::compile(): Out of memory.\n"); return; } // Second pass: emit code. regparse = exp; regnpar = 1; regcode = this->program; regc(MAGIC); reg(0, &flags); // Dig out information for optimizations. this->regstart = '\0'; // Worst-case defaults. this->reganch = 0; this->regmust = NULL; this->regmlen = 0; scan = this->program + 1; // First BRANCH. if (OP(regnext(scan)) == END) { // Only one top-level choice. scan = OPERAND(scan); // Starting-point info. if (OP(scan) == EXACTLY) this->regstart = *OPERAND(scan); else if (OP(scan) == BOL) this->reganch++; // // If there's something expensive in the r.e., find the longest // literal string that must appear and make it the regmust. Resolve // ties in favor of later strings, since the regstart check works // with the beginning of the r.e. and avoiding duplication // strengthens checking. Not a strong reason, but sufficient in the // absence of others. // if (flags & SPSTART) { longest = NULL; len = 0; for (; scan != NULL; scan = regnext(scan)) if (OP(scan) == EXACTLY && strlen(OPERAND(scan)) >= len) { longest = OPERAND(scan); len = (unsigned long)strlen(OPERAND(scan)); } this->regmust = longest; this->regmlen = len; } } }
/* - regatom - the lowest level * * Optimization: gobbles an entire sequence of ordinary characters so that * it can turn them into a single node, which is smaller to store and * faster to run. */ static char *regatom (int * flagp) { register char *ret; int flags; *flagp = WORST; /* Tentatively. */ switch (*regparse++) { case CARET: ret = regnode(BOL); break; case DOLLAR: ret = regnode(EOL); break; case DOT: ret = regnode(ANY); *flagp |= HASWIDTH | SIMPLE; break; case LSHBRAC: ret = regnode(WORDSTART); break; case RSHBRAC: ret = regnode(WORDEND); break; case LSQBRAC:{ register int classs; register int classend; if (*regparse == CARET) { /* Complement of range. */ ret = regnode(ANYBUT); regparse++; } else ret = regnode(ANYOF); if (*regparse == RSQBRAC || *regparse == '-') regc(*regparse++); while (*regparse != '\0' && *regparse != RSQBRAC) { if (*regparse == '-') { regparse++; if (*regparse == RSQBRAC || *regparse == '\0') regc('-'); else { classs = (CHARBITS & *(regparse - 2)) + 1; classend = (CHARBITS & *(regparse)); if (classs > classend + 1) FAIL("invalid [] range\n"); for (; classs <= classend; classs++) regc(classs); regparse++; } } else regc(*regparse++); } regc('\0'); if (*regparse != RSQBRAC) FAIL("unmatched []\n"); regparse++; *flagp |= HASWIDTH | SIMPLE; } break; case LBRAC: ret = reg(1, &flags); if (ret == (char *) NULL) return ((char *) NULL); *flagp |= flags & (HASWIDTH | SPSTART); break; case '\0': case OR_OP: case RBRAC: FAIL("internal urp\n"); /* Supposed to be caught earlier. */ break; case ASTERIX: FAIL("* follows nothing\n"); break; case PLUSS: FAIL("+ follows nothing\n"); break; case QMARK: FAIL("? follows nothing\n"); break; default:{ register int len; register short ender; regparse--; for (len = 0; regparse[len] && !(regparse[len] & SPECIAL) && regparse[len] != RSQBRAC; len++); if (len <= 0) { FAIL("unexpected ]\n"); } ender = *(regparse + len); if (len > 1 && ISMULT(ender)) len--; /* Back off clear of ?+* operand. */ *flagp |= HASWIDTH; if (len == 1) *flagp |= SIMPLE; ret = regnode(EXACTLY); while (len > 0) { regc(*regparse++); len--; } regc('\0'); } break; } return (ret); }
/* - regcomp - compile a regular expression into internal code * * We can't allocate space until we know how big the compiled form will be, * but we can't compile it (and thus know how big it is) until we've got a * place to put the code. So we cheat: we compile it twice, once with code * generation turned off and size counting turned on, and once "for real". * This also means that we don't allocate space until we are sure that the * thing really will compile successfully, and we never have to move the * code and thus invalidate pointers into it. (Note that it has to be in * one piece because FREE() must be able to free it all.) * * Beware that the optimization-preparation code in here knows about some * of the structure of the compiled regexp. */ regexp *regcomp (unsigned char * exp, int excompat) /* \( \) operators like in unix ex */ { register regexp *r; register unsigned char *scan; register char *longest; register int len; int flags; short *exp2, *dest, c; if (!exp) FAIL("NULL argument\n"); exp2 = (short *) DXALLOC((strlen((char *)exp) + 1) * (sizeof(short[8]) / sizeof(char[8])), TAG_TEMPORARY, "regcomp: 1"); for (scan = exp, dest = exp2; (c = *scan++);) { switch (c) { case '(': case ')': *dest++ = excompat ? c : c | SPECIAL; break; case '.': case '*': case '+': case '?': case '|': case '$': case '^': case '[': case ']': *dest++ = c | SPECIAL; break; case '\\': switch (c = *scan++) { case 0: FREE(exp2); FAIL("Regular expression cannot end with '\\'. Use \"\\\\\".\n"); break; case '(': case ')': *dest++ = excompat ? c | SPECIAL : c; break; case '<': case '>': *dest++ = c | SPECIAL; break; case '{': case '}': FREE(exp2); FAIL("sorry, unimplemented operator\n"); case 'b': *dest++ = '\b'; break; case 't': *dest++ = '\t'; break; case 'r': *dest++ = '\r'; break; default: *dest++ = c; } break; default: *dest++ = c; } } *dest = 0; /* First pass: determine size, legality. */ regparse = exp2; regnpar = 1; regsize = 0L; regcode = ®dummy; regc((char) MAGIC); if (reg(0, &flags) == (char *) NULL) { FREE(exp2); return ((regexp *) NULL); } /* Small enough for pointer-storage convention? */ if (regsize >= 32767L) /* Probably could be 65535L. */ { FREE(exp2); FAIL("regexp too big\n"); } /* Allocate space. */ r = (regexp *) DXALLOC(sizeof(regexp) + (unsigned) regsize, TAG_TEMPORARY, "regcomp: 2"); if (r == (regexp *) NULL) { FREE(exp2); FAIL("out of space\n"); } /* Second pass: emit code. */ regparse = exp2; regnpar = 1; regcode = (char *)(r->program); regc((char) MAGIC); if (reg(0, &flags) == NULL) { FREE(exp2); FREE(r); return ((regexp *) NULL); } /* Dig out information for optimizations. */ r->regstart = '\0'; /* Worst-case defaults. */ r->reganch = 0; r->regmust = NULL; r->regmlen = 0; scan = (unsigned char *)(r->program + 1); /* First BRANCH. */ if (OP(regnext((char *)scan)) == END) { /* Only one top-level choice. */ scan = OPERAND(scan); /* Starting-point info. */ if (OP(scan) == EXACTLY) r->regstart = *OPERAND(scan); else if (OP(scan) == BOL) r->reganch++; /* * If there's something expensive in the r.e., find the longest * literal string that must appear and make it the regmust. Resolve * ties in favor of later strings, since the regstart check works * with the beginning of the r.e. and avoiding duplication * strengthens checking. Not a strong reason, but sufficient in the * absence of others. */ if (flags & SPSTART) { longest = NULL; len = 0; for (; scan != NULL; scan = (unsigned char *)regnext((char *)scan)) { char *tmp = (char *)OPERAND(scan); int tlen; if (OP(scan) == EXACTLY && (tlen = strlen(tmp)) >= len) { longest = tmp; len = tlen; } } r->regmust = longest; r->regmlen = len; } } FREE((char *) exp2); return (r); }
/* - regatom - the lowest level * * Optimization: gobbles an entire sequence of ordinary characters so that * it can turn them into a single node, which is smaller to store and * faster to run. Backslashed characters are exceptions, each becoming a * separate node; the code is simpler that way and it's not worth fixing. */ static int regatom(regex_t *preg, int *flagp) { int ret; int flags; int nocase = (preg->cflags & REG_ICASE); int ch; int n = reg_utf8_tounicode_case(preg->regparse, &ch, nocase); *flagp = WORST; /* Tentatively. */ preg->regparse += n; switch (ch) { /* FIXME: these chars only have meaning at beg/end of pat? */ case '^': ret = regnode(preg, BOL); break; case '$': ret = regnode(preg, EOL); break; case '.': ret = regnode(preg, ANY); *flagp |= HASWIDTH|SIMPLE; break; case '[': { const char *pattern = preg->regparse; if (*pattern == '^') { /* Complement of range. */ ret = regnode(preg, ANYBUT); pattern++; } else ret = regnode(preg, ANYOF); /* Special case. If the first char is ']' or '-', it is part of the set */ if (*pattern == ']' || *pattern == '-') { reg_addrange(preg, *pattern, *pattern); pattern++; } while (*pattern && *pattern != ']') { /* Is this a range? a-z */ int start; int end; pattern += reg_utf8_tounicode_case(pattern, &start, nocase); if (start == '\\') { pattern += reg_decode_escape(pattern, &start); if (start == 0) { preg->err = REG_ERR_NULL_CHAR; return 0; } } if (pattern[0] == '-' && pattern[1] && pattern[1] != ']') { /* skip '-' */ pattern += utf8_tounicode(pattern, &end); pattern += reg_utf8_tounicode_case(pattern, &end, nocase); if (end == '\\') { pattern += reg_decode_escape(pattern, &end); if (end == 0) { preg->err = REG_ERR_NULL_CHAR; return 0; } } reg_addrange(preg, start, end); continue; } if (start == '[') { if (strncmp(pattern, ":alpha:]", 8) == 0) { if ((preg->cflags & REG_ICASE) == 0) { reg_addrange(preg, 'a', 'z'); } reg_addrange(preg, 'A', 'Z'); pattern += 8; continue; } if (strncmp(pattern, ":alnum:]", 8) == 0) { if ((preg->cflags & REG_ICASE) == 0) { reg_addrange(preg, 'a', 'z'); } reg_addrange(preg, 'A', 'Z'); reg_addrange(preg, '0', '9'); pattern += 8; continue; } if (strncmp(pattern, ":space:]", 8) == 0) { reg_addrange_str(preg, " \t\r\n\f\v"); pattern += 8; continue; } } /* Not a range, so just add the char */ reg_addrange(preg, start, start); } regc(preg, '\0'); if (*pattern) { pattern++; } preg->regparse = pattern; *flagp |= HASWIDTH|SIMPLE; } break; case '(': ret = reg(preg, 1, &flags); if (ret == 0) return 0; *flagp |= flags&(HASWIDTH|SPSTART); break; case '\0': case '|': case ')': preg->err = REG_ERR_INTERNAL; return 0; /* Supposed to be caught earlier. */ case '?': case '+': case '*': case '{': preg->err = REG_ERR_COUNT_FOLLOWS_NOTHING; return 0; case '\\': switch (*preg->regparse++) { case '\0': preg->err = REG_ERR_TRAILING_BACKSLASH; return 0; case '<': case 'm': ret = regnode(preg, WORDA); break; case '>': case 'M': ret = regnode(preg, WORDZ); break; case 'd': ret = regnode(preg, ANYOF); reg_addrange(preg, '0', '9'); regc(preg, '\0'); *flagp |= HASWIDTH|SIMPLE; break; case 'w': ret = regnode(preg, ANYOF); if ((preg->cflags & REG_ICASE) == 0) { reg_addrange(preg, 'a', 'z'); } reg_addrange(preg, 'A', 'Z'); reg_addrange(preg, '0', '9'); reg_addrange(preg, '_', '_'); regc(preg, '\0'); *flagp |= HASWIDTH|SIMPLE; break; case 's': ret = regnode(preg, ANYOF); reg_addrange_str(preg," \t\r\n\f\v"); regc(preg, '\0'); *flagp |= HASWIDTH|SIMPLE; break; /* FIXME: Someday handle \1, \2, ... */ default: /* Handle general quoted chars in exact-match routine */ /* Back up to include the backslash */ preg->regparse--; goto de_fault; } break; de_fault: default: { /* * Encode a string of characters to be matched exactly. */ int added = 0; /* Back up to pick up the first char of interest */ preg->regparse -= n; ret = regnode(preg, EXACTLY); /* Note that a META operator such as ? or * consumes the * preceding char. * Thus we must be careful to look ahead by 2 and add the * last char as it's own EXACTLY if necessary */ /* Until end of string or a META char is reached */ while (*preg->regparse && strchr(META, *preg->regparse) == NULL) { n = reg_utf8_tounicode_case(preg->regparse, &ch, (preg->cflags & REG_ICASE)); if (ch == '\\' && preg->regparse[n]) { /* Non-trailing backslash. * Is this a special escape, or a regular escape? */ if (strchr("<>mMwds", preg->regparse[n])) { /* A special escape. All done with EXACTLY */ break; } /* Decode it. Note that we add the length for the escape * sequence to the length for the backlash so we can skip * the entire sequence, or not as required. */ n += reg_decode_escape(preg->regparse + n, &ch); if (ch == 0) { preg->err = REG_ERR_NULL_CHAR; return 0; } } /* Now we have one char 'ch' of length 'n'. * Check to see if the following char is a MULT */ if (ISMULT(preg->regparse[n])) { /* Yes. But do we already have some EXACTLY chars? */ if (added) { /* Yes, so return what we have and pick up the current char next time around */ break; } /* No, so add this single char and finish */ regc(preg, ch); added++; preg->regparse += n; break; } /* No, so just add this char normally */ regc(preg, ch); added++; preg->regparse += n; } regc(preg, '\0'); *flagp |= HASWIDTH; if (added == 1) *flagp |= SIMPLE; break; } break; } return(ret); }
/* - regcomp - compile a regular expression into internal code * * We can't allocate space until we know how big the compiled form will be, * but we can't compile it (and thus know how big it is) until we've got a * place to put the code. So we cheat: we compile it twice, once with code * generation turned off and size counting turned on, and once "for real". * This also means that we don't allocate space until we are sure that the * thing really will compile successfully, and we never have to move the * code and thus invalidate pointers into it. (Note that it has to be in * one piece because free() must be able to free it all.) * * Beware that the optimization-preparation code in here knows about some * of the structure of the compiled regexp. */ int regcomp(regex_t *preg, const char *exp, int cflags) { int scan; int longest; unsigned len; int flags; #ifdef DEBUG fprintf(stderr, "Compiling: '%s'\n", exp); #endif memset(preg, 0, sizeof(*preg)); if (exp == NULL) FAIL(preg, REG_ERR_NULL_ARGUMENT); /* First pass: determine size, legality. */ preg->cflags = cflags; preg->regparse = exp; /* Allocate space. */ preg->proglen = (strlen(exp) + 1) * 5; preg->program = malloc(preg->proglen * sizeof(int)); if (preg->program == NULL) FAIL(preg, REG_ERR_NOMEM); /* Note that since we store a magic value as the first item in the program, * program offsets will never be 0 */ regc(preg, REG_MAGIC); if (reg(preg, 0, &flags) == 0) { return preg->err; } /* Small enough for pointer-storage convention? */ if (preg->re_nsub >= REG_MAX_PAREN) /* Probably could be 65535L. */ FAIL(preg,REG_ERR_TOO_BIG); /* Dig out information for optimizations. */ preg->regstart = 0; /* Worst-case defaults. */ preg->reganch = 0; preg->regmust = 0; preg->regmlen = 0; scan = 1; /* First BRANCH. */ if (OP(preg, regnext(preg, scan)) == END) { /* Only one top-level choice. */ scan = OPERAND(scan); /* Starting-point info. */ if (OP(preg, scan) == EXACTLY) { preg->regstart = preg->program[OPERAND(scan)]; } else if (OP(preg, scan) == BOL) preg->reganch++; /* * If there's something expensive in the r.e., find the * longest literal string that must appear and make it the * regmust. Resolve ties in favor of later strings, since * the regstart check works with the beginning of the r.e. * and avoiding duplication strengthens checking. Not a * strong reason, but sufficient in the absence of others. */ if (flags&SPSTART) { longest = 0; len = 0; for (; scan != 0; scan = regnext(preg, scan)) { if (OP(preg, scan) == EXACTLY) { int plen = str_int_len(preg->program + OPERAND(scan)); if (plen >= len) { longest = OPERAND(scan); len = plen; } } } preg->regmust = longest; preg->regmlen = len; } } #ifdef DEBUG regdump(preg); #endif return 0; }
/* - regcomp - compile a regular expression into internal code * * We can't allocate space until we know how big the compiled form will be, * but we can't compile it (and thus know how big it is) until we've got a * place to put the code. So we cheat: we compile it twice, once with code * generation turned off and size counting turned on, and once "for real". * This also means that we don't allocate space until we are sure that the * thing really will compile successfully, and we never have to move the * code and thus invalidate pointers into it. (Note that it has to be in * one piece because free() must be able to free it all.) * * Beware that the optimization-preparation code in here knows about some * of the structure of the compiled regexp. */ regexp * regcomp( const char *exp ) { register regexp *r; register char *scan; register char *longest; register unsigned len; int flags; if (exp == NULL) FAIL("NULL argument"); /* First pass: determine size, legality. */ #ifdef notdef if (exp[0] == '.' && exp[1] == '*') exp += 2; /* aid grep */ #endif regparse = (char *)exp; regnpar = 1; regsize = 0L; regcode = ®dummy; regc(MAGIC); if (reg(0, &flags) == NULL) return(NULL); /* Small enough for pointer-storage convention? */ if (regsize >= 32767L) /* Probably could be 65535L. */ FAIL("regexp too big"); /* Allocate space. */ r = (regexp *)malloc(sizeof(regexp) + (unsigned)regsize); if (r == NULL) FAIL("out of space"); /* Second pass: emit code. */ regparse = (char *)exp; regnpar = 1; regcode = r->program; regc(MAGIC); if (reg(0, &flags) == NULL) return(NULL); /* Dig out information for optimizations. */ r->regstart = '\0'; /* Worst-case defaults. */ r->reganch = 0; r->regmust = NULL; r->regmlen = 0; scan = r->program+1; /* First BRANCH. */ if (OP(regnext(scan)) == END) { /* Only one top-level choice. */ scan = OPERAND(scan); /* Starting-point info. */ if (OP(scan) == EXACTLY) r->regstart = *OPERAND(scan); else if (OP(scan) == BOL) r->reganch++; /* * If there's something expensive in the r.e., find the * longest literal string that must appear and make it the * regmust. Resolve ties in favor of later strings, since * the regstart check works with the beginning of the r.e. * and avoiding duplication strengthens checking. Not a * strong reason, but sufficient in the absence of others. */ if (flags&SPSTART) { longest = NULL; len = 0; for (; scan != NULL; scan = regnext(scan)) if (OP(scan) == EXACTLY && strlen(OPERAND(scan)) >= len) { longest = OPERAND(scan); len = strlen(OPERAND(scan)); } r->regmust = longest; r->regmlen = len; } } return(r); }
/* - regatom - the lowest level * * Optimization: gobbles an entire sequence of ordinary characters so that * it can turn them into a single node, which is smaller to store and * faster to run. Backslashed characters are exceptions, each becoming a * separate node; the code is simpler that way and it's not worth fixing. */ char* ossimRegExp::regatom (int *flagp) { char* ret; int flags; *flagp = WORST; // Tentatively. switch (*regparse++) { case '^': ret = regnode(BOL); break; case '$': ret = regnode(EOL); break; case '.': ret = regnode(ANY); *flagp |= HASWIDTH | SIMPLE; break; case '[': { int rxpclass; int rxpclassend; if (*regparse == '^') { // Complement of range. ret = regnode(ANYBUT); regparse++; } else ret = regnode(ANYOF); if (*regparse == ']' || *regparse == '-') regc(*regparse++); while (*regparse != '\0' && *regparse != ']') { if (*regparse == '-') { regparse++; if (*regparse == ']' || *regparse == '\0') regc('-'); else { rxpclass = UCHARAT(regparse - 2) + 1; rxpclassend = UCHARAT(regparse); if (rxpclass > rxpclassend + 1) { //RAISE Error, SYM(ossimRegExp), SYM(Invalid_Range), printf ("ossimRegExp::compile(): Invalid range in [].\n"); return 0; } for (; rxpclass <= rxpclassend; rxpclass++) regc(rxpclass); regparse++; } } else regc(*regparse++); } regc('\0'); if (*regparse != ']') { //RAISE Error, SYM(ossimRegExp), SYM(Unmatched_Bracket), printf ("ossimRegExp::compile(): Unmatched [].\n"); return 0; } regparse++; *flagp |= HASWIDTH | SIMPLE; } break; case '(': ret = reg(1, &flags); if (ret == NULL) return (NULL); *flagp |= flags & (HASWIDTH | SPSTART); break; case '\0': case '|': case ')': //RAISE Error, SYM(ossimRegExp), SYM(Internal_Error), printf ("ossimRegExp::compile(): Internal error.\n"); // Never here return 0; case '?': case '+': case '*': //RAISE Error, SYM(ossimRegExp), SYM(No_Operand), printf ("ossimRegExp::compile(): ?+* follows nothing.\n"); return 0; case '\\': if (*regparse == '\0') { //RAISE Error, SYM(ossimRegExp), SYM(Trailing_Backslash), printf ("ossimRegExp::compile(): Trailing backslash.\n"); return 0; } ret = regnode(EXACTLY); regc(*regparse++); regc('\0'); *flagp |= HASWIDTH | SIMPLE; break; default: { int len; char ender; regparse--; len = (int)strcspn(regparse, META); if (len <= 0) { //RAISE Error, SYM(ossimRegExp), SYM(Internal_Error), printf ("ossimRegExp::compile(): Internal error.\n"); return 0; } ender = *(regparse + len); if (len > 1 && ISMULT(ender)) len--; // Back off clear of ?+* operand. *flagp |= HASWIDTH; if (len == 1) *flagp |= SIMPLE; ret = regnode(EXACTLY); while (len > 0) { regc(*regparse++); len--; } regc('\0'); } break; } return (ret); }
/* - regatom - the lowest level * * Optimization: gobbles an entire sequence of ordinary characters so that * it can turn them into a single node, which is smaller to store and * faster to run. Backslashed characters are exceptions, each becoming a * separate node; the code is simpler that way and it's not worth fixing. */ static char * regatom( int *flagp ) { register char *ret; int flags; *flagp = WORST; /* Tentatively. */ switch (*regparse++) { /* FIXME: these chars only have meaning at beg/end of pat? */ case '^': ret = regnode(BOL); break; case '$': ret = regnode(EOL); break; case '.': ret = regnode(ANY); *flagp |= HASWIDTH|SIMPLE; break; case '[': { register int classr; register int classend; if (*regparse == '^') { /* Complement of range. */ ret = regnode(ANYBUT); regparse++; } else ret = regnode(ANYOF); if (*regparse == ']' || *regparse == '-') regc(*regparse++); while (*regparse != '\0' && *regparse != ']') { if (*regparse == '-') { regparse++; if (*regparse == ']' || *regparse == '\0') regc('-'); else { classr = UCHARAT(regparse-2)+1; classend = UCHARAT(regparse); if (classr > classend+1) FAIL("invalid [] range"); for (; classr <= classend; classr++) regc(classr); regparse++; } } else regc(*regparse++); } regc('\0'); if (*regparse != ']') FAIL("unmatched []"); regparse++; *flagp |= HASWIDTH|SIMPLE; } break; case '(': ret = reg(1, &flags); if (ret == NULL) return(NULL); *flagp |= flags&(HASWIDTH|SPSTART); break; case '\0': case '|': case '\n': case ')': FAIL("internal urp"); /* Supposed to be caught earlier. */ break; case '?': case '+': case '*': FAIL("?+* follows nothing"); break; case '\\': switch (*regparse++) { case '\0': FAIL("trailing \\"); break; case '<': ret = regnode(WORDA); break; case '>': ret = regnode(WORDZ); break; /* FIXME: Someday handle \1, \2, ... */ default: /* Handle general quoted chars in exact-match routine */ goto de_fault; } break; de_fault: default: /* * Encode a string of characters to be matched exactly. * * This is a bit tricky due to quoted chars and due to * '*', '+', and '?' taking the SINGLE char previous * as their operand. * * On entry, the char at regparse[-1] is going to go * into the string, no matter what it is. (It could be * following a \ if we are entered from the '\' case.) * * Basic idea is to pick up a good char in ch and * examine the next char. If it's *+? then we twiddle. * If it's \ then we frozzle. If it's other magic char * we push ch and terminate the string. If none of the * above, we push ch on the string and go around again. * * regprev is used to remember where "the current char" * starts in the string, if due to a *+? we need to back * up and put the current char in a separate, 1-char, string. * When regprev is NULL, ch is the only char in the * string; this is used in *+? handling, and in setting * flags |= SIMPLE at the end. */ { char *regprev; register char ch; regparse--; /* Look at cur char */ ret = regnode(EXACTLY); for ( regprev = 0 ; ; ) { ch = *regparse++; /* Get current char */ switch (*regparse) { /* look at next one */ default: regc(ch); /* Add cur to string */ break; case '.': case '[': case '(': case ')': case '|': case '\n': case '$': case '^': case '\0': /* FIXME, $ and ^ should not always be magic */ magic: regc(ch); /* dump cur char */ goto done; /* and we are done */ case '?': case '+': case '*': if (!regprev) /* If just ch in str, */ goto magic; /* use it */ /* End mult-char string one early */ regparse = regprev; /* Back up parse */ goto done; case '\\': regc(ch); /* Cur char OK */ switch (regparse[1]){ /* Look after \ */ case '\0': case '<': case '>': /* FIXME: Someday handle \1, \2, ... */ goto done; /* Not quoted */ default: /* Backup point is \, scan * point is after it. */ regprev = regparse; regparse++; continue; /* NOT break; */ } } regprev = regparse; /* Set backup point */ } done: regc('\0'); *flagp |= HASWIDTH; if (!regprev) /* One char? */ *flagp |= SIMPLE; } break; } return(ret); }
/* - regcomp - compile a regular expression into internal code * * We can't allocate space until we know how big the compiled form will be, * but we can't compile it (and thus know how big it is) until we've got a * place to put the code. So we cheat: we compile it twice, once with code * generation turned off and size counting turned on, and once "for real". * This also means that we don't allocate space until we are sure that the * thing really will compile successfully, and we never have to move the * code and thus invalidate pointers into it. (Note that it has to be in * one piece because free() must be able to free it all.) * * Beware that the optimization-preparation code in here knows about some * of the structure of the compiled regexp. */ PGPError pgpRegComp(PGPContextRef context, char const *exp, regexp **pregexp) { regexp *r; char const *scan; char const *longest; int len; int flags; regcompState s_rcs; regcompState *rcs = &s_rcs; PGPValidateContext( context ); PGPValidatePtr( exp ); PGPValidatePtr( pregexp ); *pregexp = NULL; pgpClearMemory( &s_rcs, sizeof(s_rcs) ); /* First pass: determine size, legality. */ rcs->regparse = exp; rcs->regnpar = 1; rcs->regsize = 0L; rcs->regcode = ®dummy; regc(rcs, MAGIC); if (reg(rcs, 0, &flags) == NULL) return(kPGPError_OutOfMemory); /* Small enough for pointer-storage convention? */ if (rcs->regsize >= 32767L) /* Probably could be 65535L. */ return(kPGPError_BadParams); /* Allocate space. */ r = (regexp *)pgpContextMemAlloc(context, sizeof(regexp) + (unsigned)rcs->regsize, 0); if (r == NULL) return kPGPError_OutOfMemory; /* Second pass: emit code. */ rcs->regparse = exp; rcs->regnpar = 1; rcs->regcode = r->program; regc(rcs, MAGIC); if (reg(rcs, 0, &flags) == NULL) return(kPGPError_OutOfMemory); /* Dig out information for optimizations. */ r->regstart = '\0'; /* Worst-case defaults. */ r->reganch = 0; r->regmust = NULL; r->regmlen = 0; scan = r->program+1; /* First BRANCH. */ if (OP(regnext(scan)) == END) { /* Only one top-level choice. */ scan = OPERAND(scan); /* Starting-point info. */ if (OP(scan) == EXACTLY) r->regstart = *OPERAND(scan); else if (OP(scan) == BOL) r->reganch++; /* * If there's something expensive in the r.e., find the * longest literal string that must appear and make it the * regmust. Resolve ties in favor of later strings, since * the regstart check works with the beginning of the r.e. * and avoiding duplication strengthens checking. Not a * strong reason, but sufficient in the absence of others. */ if (flags&SPSTART) { longest = NULL; len = 0; for (; scan != NULL; scan = regnext(scan)) if (OP(scan) == EXACTLY && strlen(OPERAND(scan)) >= (unsigned)len) { longest = OPERAND(scan); len = strlen(OPERAND(scan)); } r->regmust = longest; r->regmlen = len; } } *pregexp = r; return(kPGPError_NoErr); }