/* - regpiece - something followed by possible [*+?] * * Note that the branching code sequences used for ? and the general cases * of * and + are somewhat optimized: they use the same NOTHING node as * both the endmarker for their branch list and the body of the last branch. * It might seem that this node could be dispensed with entirely, but the * endmarker role is not redundant. */ char* ossimRegExp::regpiece (int *flagp) { char* ret; char op; char* next; int flags; ret = regatom(&flags); if (ret == NULL) return (NULL); op = *regparse; if (!ISMULT(op)) { *flagp = flags; return (ret); } if (!(flags & HASWIDTH) && op != '?') { //RAISE Error, SYM(ossimRegExp), SYM(Empty_Operand), printf ("ossimRegExp::compile() : *+ operand could be empty.\n"); return 0; } *flagp = (op != '+') ? (WORST | SPSTART) : (WORST | HASWIDTH); if (op == '*' && (flags & SIMPLE)) reginsert(STAR, ret); else if (op == '*') { // Emit x* as (x&|), where & means "self". reginsert(BRANCH, ret); // Either x regoptail(ret, regnode(BACK)); // and loop regoptail(ret, ret); // back regtail(ret, regnode(BRANCH)); // or regtail(ret, regnode(NOTHING)); // null. } else if (op == '+' && (flags & SIMPLE)) reginsert(PLUS, ret); else if (op == '+') { // Emit x+ as x(&|), where & means "self". next = regnode(BRANCH); // Either regtail(ret, next); regtail(regnode(BACK), ret); // loop back regtail(next, regnode(BRANCH)); // or regtail(ret, regnode(NOTHING)); // null. } else if (op == '?') { // Emit x? as (x|) reginsert(BRANCH, ret); // Either x regtail(ret, regnode(BRANCH)); // or next = regnode(NOTHING);// null. regtail(ret, next); regoptail(ret, next); } regparse++; if (ISMULT(*regparse)) { //RAISE Error, SYM(ossimRegExp), SYM(Nested_Operand), printf ("ossimRegExp::compile(): Nested *?+.\n"); return 0; } return (ret); }
/* - regpiece - something followed by possible [*+?] * * Note that the branching code sequences used for ? and the general cases * of * and + are somewhat optimized: they use the same NOTHING node as * both the endmarker for their branch list and the body of the last branch. * It might seem that this node could be dispensed with entirely, but the * endmarker role is not redundant. */ static char * regpiece(int *flagp) { register char *ret; register char *op; register char *next; int flags; int len = 0; ret = regatom(&flags); if (ret == NULL) return(NULL); op = regparse; if (!ISMULT(op)) { *flagp = flags; return(ret); } len = CHARLEN(op); if (!(flags & HASWIDTH) && ((len != 1) || (*op != '?')) ) FAIL("*+ operand could be empty"); *flagp = ((len != 1) || (*op != '+')) ? (WORST | SPSTART) : (WORST | HASWIDTH); if ((len == 1) && (*op == '*') && (flags & SIMPLE)) reginsert(STAR, ret); else if ((len == 1) && (*op == '*')) { /* Emit x* as (x&|), where & means "self". */ reginsert(BRANCH, ret); /* Either x */ regoptail(ret, regnode(BACK)); /* and loop */ regoptail(ret, ret); /* back */ regtail(ret, regnode(BRANCH)); /* or */ regtail(ret, regnode(NOTHING)); /* null. */ } else if ((len == 1) && (*op == '+') && (flags & SIMPLE)) reginsert(PLUS, ret); else if ((len == 1) && (*op == '+')) { /* Emit x+ as x(&|), where & means "self". */ next = regnode(BRANCH); /* Either */ regtail(ret, next); regtail(regnode(BACK), ret); /* loop back */ regtail(next, regnode(BRANCH)); /* or */ regtail(ret, regnode(NOTHING)); /* null. */ } else if ((len == 1) && (*op == '?')) { /* Emit x? as (x|) */ reginsert(BRANCH, ret); /* Either x */ regtail(ret, regnode(BRANCH)); /* or */ next = regnode(NOTHING); /* null. */ regtail(ret, next); regoptail(ret, next); } regparse += INCRLEN(len); if (ISMULT(regparse)) FAIL("nested *?+"); return(ret); }
/* * regpiece - something followed by possible [*+?] * * Note that the branching code sequences used for ? and the general cases * of * and + are somewhat optimized: they use the same NOTHING node as * both the endmarker for their branch list and the body of the last branch. * It might seem that this node could be dispensed with entirely, but the * endmarker role is not redundant. */ static char * regpiece(i4 *flagp) { register char *ret; register char op; register char *next; i4 flags; ret = regatom(&flags); if (ret == NULL) return(NULL); op = *regparse; if (!ISMULT(op)) { *flagp = flags; return(ret); } if (!(flags&HASWIDTH) && op != '?') _FAIL("*+ operand could be empty"); *flagp = (op != '+') ? (WORST|SPSTART) : (WORST|HASWIDTH); if (op == '*' && (flags&SIMPLE)) reginsert(STAR, ret); else if (op == '*') { /* Emit x* as (x&|), where & means "self". */ reginsert(BRANCH, ret); /* Either x */ regoptail(ret, regnode(BACK)); /* and loop */ regoptail(ret, ret); /* back */ regtail(ret, regnode(BRANCH)); /* or */ regtail(ret, regnode(NOTHING)); /* null. */ } else if (op == '+' && (flags&SIMPLE)) reginsert(PLUS, ret); else if (op == '+') { /* Emit x+ as x(&|), where & means "self". */ next = regnode(BRANCH); /* Either */ regtail(ret, next); regtail(regnode(BACK), ret); /* loop back */ regtail(next, regnode(BRANCH)); /* or */ regtail(ret, regnode(NOTHING)); /* null. */ } else if (op == '?') { /* Emit x? as (x|) */ reginsert(BRANCH, ret); /* Either x */ regtail(ret, regnode(BRANCH)); /* or */ next = regnode(NOTHING); /* null. */ regtail(ret, next); regoptail(ret, next); } CMnext( regparse ); if (ISMULT(*regparse)) _FAIL("nested *?+"); return(ret); }
/* - regpiece - something followed by possible [*+?] * * Note that the branching code sequences used for ? and the general cases * of * and + are somewhat optimized: they use the same NOTHING node as * both the endmarker for their branch list and the body of the last branch. * It might seem that this node could be dispensed with entirely, but the * endmarker role is not redundant. */ static char *regpiece( int *flagp ) { char *ret, op, *next; int flags; ret = regatom( &flags ); if( ret == NULL ) { return( NULL ); } op = *regparse; if( !ISMULT( op ) ) { *flagp = flags; return( ret ); } if( !( flags & HASWIDTH ) && op != '?' ) { FAIL( ERR_RE_EMPTY_OPERAND ); } *flagp = ( op != '+' ) ? ( WORST | SPSTART ) : ( WORST | HASWIDTH ); if( op == '*' && ( flags & SIMPLE ) ) { reginsert( STAR, ret ); } else if( op == '*' ) { /* Emit x* as (x&|), where & means "self". */ reginsert( BRANCH, ret ); /* Either x */ regoptail( ret, regnode( BACK ) ); /* and loop */ regoptail( ret, ret ); /* back */ regtail( ret, regnode( BRANCH ) ); /* or */ regtail( ret, regnode( NOTHING ) ); /* null. */ } else if( op == '+' && ( flags & SIMPLE ) ) { reginsert( PLUS, ret ); } else if( op == '+' ) { /* Emit x+ as x(&|), where & means "self". */ next = regnode( BRANCH ); /* Either */ regtail( ret, next ); regtail( regnode( BACK ), ret ); /* loop back */ regtail( next, regnode( BRANCH ) ); /* or */ regtail( ret, regnode( NOTHING ) ); /* null. */ } else if( op == '?' ) { /* Emit x? as (x|) */ reginsert( BRANCH, ret ); /* Either x */ regtail( ret, regnode( BRANCH ) ); /* or */ next = regnode( NOTHING ); /* null. */ regtail( ret, next ); regoptail( ret, next ); } regparse++; if( ISMULT( *regparse ) ) { FAIL( ERR_RE_NESTED_OPERAND ); } return( ret ); }
/* - regpiece - something followed by possible [*+?] * * Note that the branching code sequence used for ? and the general cases of * * and + are somewhat optimized: they use the same NOTHING node as both the * endmarker for their branch list and the body of the last branch. It might * seem that this node could be dispensed with entirely, but the endmarker * role is not redundant. */ static char *regpiece (int * flagp) { register char *ret; register short op; register char *nxt; int flags; ret = regatom(&flags); if (ret == (char *) NULL) return ((char *) NULL); op = *regparse; if (!ISMULT(op)) { *flagp = flags; return (ret); } if (!(flags & HASWIDTH) && op != QMARK) FAIL("*+ operand could be empty\n"); *flagp = (op != PLUSS) ? (WORST | SPSTART) : (WORST | HASWIDTH); if (op == ASTERIX && (flags & SIMPLE)) reginsert(STAR, ret); else if (op == ASTERIX) { /* Emit x* as (x&|), where & means "self". */ reginsert(BRANCH, ret); /* Either x */ regoptail(ret, regnode(BACK)); /* and loop */ regoptail(ret, ret); /* back */ regtail(ret, regnode(BRANCH)); /* or */ regtail(ret, regnode(NOTHING)); /* null. */ } else if (op == PLUSS && (flags & SIMPLE)) reginsert(PLUS, ret); else if (op == PLUSS) { /* Emit x+ as x(&|), where & means "self". */ nxt = regnode(BRANCH); /* Either */ regtail(ret, nxt); regtail(regnode(BACK), ret); /* loop back */ regtail(nxt, regnode(BRANCH)); /* or */ regtail(ret, regnode(NOTHING)); /* null. */ } else if (op == QMARK) { /* Emit x? as (x|) */ reginsert(BRANCH, ret); /* Either x */ regtail(ret, regnode(BRANCH)); /* or */ nxt = regnode(NOTHING); /* null. */ regtail(ret, nxt); regoptail(ret, nxt); } regparse++; if (ISMULT(*regparse)) FAIL("nested *?+\n"); return (ret); }
/* - reg - regular expression, i.e. main body or parenthesized thing * * Caller must absorb opening parenthesis. * * Combining parenthesis handling with the base level of regular expression * is a trifle forced, but the need to tie the tails of the branches to what * follows makes it hard to avoid. */ static char * reg( int paren, /* Parenthesized? */ int *flagp ) { register char *ret; register char *br; register char *ender; register int parno; int flags; *flagp = HASWIDTH; /* Tentatively. */ /* Make an OPEN node, if parenthesized. */ if (paren) { if (regnpar >= NSUBEXP) FAIL("too many ()"); parno = regnpar; regnpar++; ret = regnode(OPEN+parno); } else ret = NULL; /* Pick up the branches, linking them together. */ br = regbranch(&flags); if (br == NULL) return(NULL); if (ret != NULL) regtail(ret, br); /* OPEN -> first. */ else ret = br; if (!(flags&HASWIDTH)) *flagp &= ~HASWIDTH; *flagp |= flags&SPSTART; while (*regparse == '|' || *regparse == '\n') { regparse++; br = regbranch(&flags); if (br == NULL) return(NULL); regtail(ret, br); /* BRANCH -> BRANCH. */ if (!(flags&HASWIDTH)) *flagp &= ~HASWIDTH; *flagp |= flags&SPSTART; } /* Make a closing node, and hook it on the end. */ ender = regnode((paren) ? CLOSE+parno : END); regtail(ret, ender); /* Hook the tails of the branches to the closing node. */ for (br = ret; br != NULL; br = regnext(br)) regoptail(br, ender); /* Check for proper termination. */ if (paren && *regparse++ != ')') { FAIL("unmatched ()"); } else if (!paren && *regparse != '\0') { if (*regparse == ')') { FAIL("unmatched ()"); } else FAIL("junk on end"); /* "Can't happen". */ /* NOTREACHED */ } return(ret); }
/* * reg - regular expression, i.e. main body or parenthesized thing * * Caller must absorb opening parenthesis. * * Combining parenthesis handling with the base level of regular expression * is a trifle forced, but the need to tie the tails of the branches to what * follows makes it hard to avoid. */ static char *reg( int paren, int *flagp ) { char *ret, *br, *ender; int flags; char parno = 0; *flagp = HASWIDTH; /* Tentatively. */ /* Make an OPEN node, if parenthesized. */ if( paren ) { if( regnpar >= NSUBEXP ) { FAIL( ERR_RE_TOO_MANY_ROUND_BRACKETS ); } parno = regnpar; regnpar++; ret = regnode( OPEN + parno ); } else { ret = NULL; } /* Pick up the branches, linking them together. */ br = regbranch( &flags ); if( br == NULL ) { return( NULL ); } if( ret != NULL ) { regtail( ret, br ); /* OPEN -> first. */ } else { ret = br; } if( !( flags & HASWIDTH ) ) { *flagp &= ~HASWIDTH; } *flagp |= flags & SPSTART; while( *regparse == '|' ) { regparse++; br = regbranch( &flags ); if( br == NULL ) { return( NULL ); } regtail( ret, br ); /* BRANCH -> BRANCH. */ if( !( flags & HASWIDTH ) ) { *flagp &= ~HASWIDTH; } *flagp |= flags & SPSTART; } /* Make a closing node, and hook it on the end. */ ender = regnode( ( paren ) ? CLOSE + parno : END ); regtail( ret, ender ); /* Hook the tails of the branches to the closing node. */ for( br = ret; br != NULL; br = regnext( br ) ) { regoptail( br, ender ); } /* Check for proper termination. */ if( paren && *regparse++ != ')' ) { FAIL( ERR_RE_UNMATCHED_ROUND_BRACKETS ); } else if( !paren && *regparse != '\0' ) { if( *regparse == ')' ) { FAIL( ERR_RE_UNMATCHED_ROUND_BRACKETS ); } else { FAIL( ERR_RE_INTERNAL_FOULUP ); /* "Can't happen". */ } } return( ret ); }
TCHAR *CRegExp::regpiece(int *flagp) { TCHAR *ret; TCHAR op; TCHAR *next; int flags; ret = regatom(&flags); if (ret == NULL) return(NULL); op = *regparse; if (!ISREPN(op)) { *flagp = flags; return(ret); } if (!(flags&HASWIDTH) && op != _T('?')) { TRACE0("*+ operand could be empty\n"); return NULL; } switch (op) { case _T('*'): *flagp = WORST|SPSTART; break; case _T('+'): *flagp = WORST|SPSTART|HASWIDTH; break; case _T('?'): *flagp = WORST; break; } if (op == _T('*') && (flags&SIMPLE)) reginsert(STAR, ret); else if (op == _T('*')) { // Emit x* as (x&|), where & means "self". reginsert(BRANCH, ret); // Either x regoptail(ret, regnode(BACK)); // and loop regoptail(ret, ret); // back regtail(ret, regnode(BRANCH)); // or regtail(ret, regnode(NOTHING)); // null. } else if (op == _T('+') && (flags&SIMPLE)) reginsert(PLUS, ret); else if (op == _T('+')) { // Emit x+ as x(&|), where & means "self". next = regnode(BRANCH); // Either regtail(ret, next); regtail(regnode(BACK), ret); // loop back regtail(next, regnode(BRANCH)); // or regtail(ret, regnode(NOTHING)); // null. } else if (op == _T('?')) { // Emit x? as (x|) reginsert(BRANCH, ret); // Either x regtail(ret, regnode(BRANCH)); // or next = regnode(NOTHING); // null. regtail(ret, next); regoptail(ret, next); } regparse++; if (ISREPN(*regparse)) { TRACE0("nested *?+\n"); return NULL; } return(ret); }
TCHAR *CRegExp::reg(int paren, int *flagp) { char *ret; char *br; char *ender; int parno; int flags; *flagp = HASWIDTH; // Tentatively. if (paren) { // Make an OPEN node. if (regnpar >= NSUBEXP) { TRACE1("Too many (). NSUBEXP is set to %d\n", NSUBEXP ); return NULL; } parno = regnpar; regnpar++; ret = regnode(OPEN+parno); } // Pick up the branches, linking them together. br = regbranch(&flags); if (br == NULL) return(NULL); if (paren) regtail(ret, br); // OPEN -> first. else ret = br; *flagp &= ~(~flags&HASWIDTH); // Clear bit if bit 0. *flagp |= flags&SPSTART; while (*regparse == _T('|')) { regparse++; br = regbranch(&flags); if (br == NULL) return(NULL); regtail(ret, br); // BRANCH -> BRANCH. *flagp &= ~(~flags&HASWIDTH); *flagp |= flags&SPSTART; } // Make a closing node, and hook it on the end. ender = regnode((paren) ? CLOSE+parno : END); regtail(ret, ender); // Hook the tails of the branches to the closing node. for (br = ret; br != NULL; br = regnext(br)) regoptail(br, ender); // Check for proper termination. if (paren && *regparse++ != _T(')')) { TRACE0("unterminated ()\n"); return NULL; } else if (!paren && *regparse != _T('\0')) { if (*regparse == _T(')')) { TRACE0("unmatched ()\n"); return NULL; } else { TRACE0("internal error: junk on end\n"); return NULL; } // NOTREACHED } return(ret); }
/* - reg - regular expression, i.e. main body or parenthesized thing * * Caller must absorb opening parenthesis. * * Combining parenthesis handling with the base level of regular expression * is a trifle forced, but the need to tie the tails of the branches to what * follows makes it hard to avoid. */ char* ossimRegExp::reg (int paren, int *flagp) { char* ret; char* br; char* ender; int parno =0; int flags; *flagp = HASWIDTH; // Tentatively. // Make an OPEN node, if parenthesized. if (paren) { if (regnpar >= NSUBEXP) { //RAISE Error, SYM(ossimRegExp), SYM(Too_Many_Parens), printf ("ossimRegExp::compile(): Too many parentheses.\n"); return 0; } parno = regnpar; regnpar++; ret = regnode(OPEN + parno); } else ret = NULL; // Pick up the branches, linking them together. br = regbranch(&flags); if (br == NULL) return (NULL); if (ret != NULL) regtail(ret, br); // OPEN -> first. else ret = br; if (!(flags & HASWIDTH)) *flagp &= ~HASWIDTH; *flagp |= flags & SPSTART; while (*regparse == '|') { regparse++; br = regbranch(&flags); if (br == NULL) return (NULL); regtail(ret, br); // BRANCH -> BRANCH. if (!(flags & HASWIDTH)) *flagp &= ~HASWIDTH; *flagp |= flags & SPSTART; } // Make a closing node, and hook it on the end. ender = regnode((paren) ? CLOSE + parno : END); regtail(ret, ender); // Hook the tails of the branches to the closing node. for (br = ret; br != NULL; br = regnext(br)) regoptail(br, ender); // Check for proper termination. if (paren && *regparse++ != ')') { //RAISE Error, SYM(ossimRegExp), SYM(Unmatched_Parens), printf ("ossimRegExp::compile(): Unmatched parentheses.\n"); return 0; } else if (!paren && *regparse != '\0') { if (*regparse == ')') { //RAISE Error, SYM(ossimRegExp), SYM(Unmatched_Parens), printf ("ossimRegExp::compile(): Unmatched parentheses.\n"); return 0; } else { //RAISE Error, SYM(ossimRegExp), SYM(Internal_Error), printf ("ossimRegExp::compile(): Internal error.\n"); return 0; } // NOTREACHED } return (ret); }
/* - reg - regular expression, i.e. main body or parenthesized thing * * Caller must absorb opening parenthesis. * * Combining parenthesis handling with the base level of regular expression * is a trifle forced, but the need to tie the tails of the branches to what * follows makes it hard to avoid. */ static int reg(regex_t *preg, int paren /* Parenthesized? */, int *flagp ) { int ret; int br; int ender; int parno = 0; int flags; *flagp = HASWIDTH; /* Tentatively. */ /* Make an OPEN node, if parenthesized. */ if (paren) { if (preg->regparse[0] == '?' && preg->regparse[1] == ':') { /* non-capturing paren */ preg->regparse += 2; parno = -1; } else { parno = ++preg->re_nsub; } ret = regnode(preg, OPEN+parno); } else ret = 0; /* Pick up the branches, linking them together. */ br = regbranch(preg, &flags); if (br == 0) return 0; if (ret != 0) regtail(preg, ret, br); /* OPEN -> first. */ else ret = br; if (!(flags&HASWIDTH)) *flagp &= ~HASWIDTH; *flagp |= flags&SPSTART; while (*preg->regparse == '|') { preg->regparse++; br = regbranch(preg, &flags); if (br == 0) return 0; regtail(preg, ret, br); /* BRANCH -> BRANCH. */ if (!(flags&HASWIDTH)) *flagp &= ~HASWIDTH; *flagp |= flags&SPSTART; } /* Make a closing node, and hook it on the end. */ ender = regnode(preg, (paren) ? CLOSE+parno : END); regtail(preg, ret, ender); /* Hook the tails of the branches to the closing node. */ for (br = ret; br != 0; br = regnext(preg, br)) regoptail(preg, br, ender); /* Check for proper termination. */ if (paren && *preg->regparse++ != ')') { preg->err = REG_ERR_UNMATCHED_PAREN; return 0; } else if (!paren && *preg->regparse != '\0') { if (*preg->regparse == ')') { preg->err = REG_ERR_UNMATCHED_PAREN; return 0; } else { preg->err = REG_ERR_JUNK_ON_END; return 0; } } return(ret); }
/* - regpiece - something followed by possible [*+?{] * * Note that the branching code sequences used for ? and the general cases * of * and + are somewhat optimized: they use the same NOTHING node as * both the endmarker for their branch list and the body of the last branch. * It might seem that this node could be dispensed with entirely, but the * endmarker role is not redundant. */ static char * regpiece(int *flagp) { register char *next; register char *ret; register char op; unsigned char max; unsigned char min; int flags; ret = regatom(&flags); if (ret == NULL) { return(NULL); } op = *regparse; if (!ISMULT(op)) { *flagp = flags; return(ret); } if (!(flags&HASWIDTH) && op != '?') { FAIL("*+{ operand could be empty"); } *flagp = (op != '+' && op != '{') ? (WORST|SPSTART) : (WORST|HASWIDTH); if (op == '*' && (flags&SIMPLE)) { reginsert(STAR, ret); } else if (op == '*') { /* Emit x* as (x&|), where & means "self". */ reginsert(BRANCH, ret); /* Either x */ regoptail(ret, regnode(BACK)); /* and loop */ regoptail(ret, ret); /* back */ regtail(ret, regnode(BRANCH)); /* or */ regtail(ret, regnode(NOTHING)); /* null. */ } else if (op == '+' && (flags&SIMPLE)) { reginsert(PLUS, ret); } else if (op == '+') { /* Emit x+ as x(&|), where & means "self". */ next = regnode(BRANCH); /* Either */ regtail(ret, next); regtail(regnode(BACK), ret); /* loop back */ regtail(next, regnode(BRANCH)); /* or */ regtail(ret, regnode(NOTHING)); /* null. */ } else if (op == '{') { for (min = 0, regparse++ ; *regparse && isdigit(*regparse) ; regparse++) { min = min * 10 + (*regparse - '0'); } for (max = 0, regparse++ ; *regparse && isdigit(*regparse) ; regparse++) { max = max * 10 + (*regparse - '0'); } reginsert(max, ret); next = OPERAND(ret); reginsert(min, ret); next = OPERAND(next); reginsert(MINMAX, ret); regtail(ret, OPERAND(next)); /* MINMAX->next = x */ } else if (op == '?') { /* Emit x? as (x|) */ reginsert(BRANCH, ret); /* Either x */ regtail(ret, regnode(BRANCH)); /* or */ next = regnode(NOTHING); /* null. */ regtail(ret, next); regoptail(ret, next); } regparse++; if (ISMULT(*regparse)) { FAIL("nested *?+{"); } return(ret); }