/* - regpiece - something followed by possible [*+?] * * Note that the branching code sequences used for ? and the general cases * of * and + are somewhat optimized: they use the same NOTHING node as * both the endmarker for their branch list and the body of the last branch. * It might seem that this node could be dispensed with entirely, but the * endmarker role is not redundant. */ char* ossimRegExp::regpiece (int *flagp) { char* ret; char op; char* next; int flags; ret = regatom(&flags); if (ret == NULL) return (NULL); op = *regparse; if (!ISMULT(op)) { *flagp = flags; return (ret); } if (!(flags & HASWIDTH) && op != '?') { //RAISE Error, SYM(ossimRegExp), SYM(Empty_Operand), printf ("ossimRegExp::compile() : *+ operand could be empty.\n"); return 0; } *flagp = (op != '+') ? (WORST | SPSTART) : (WORST | HASWIDTH); if (op == '*' && (flags & SIMPLE)) reginsert(STAR, ret); else if (op == '*') { // Emit x* as (x&|), where & means "self". reginsert(BRANCH, ret); // Either x regoptail(ret, regnode(BACK)); // and loop regoptail(ret, ret); // back regtail(ret, regnode(BRANCH)); // or regtail(ret, regnode(NOTHING)); // null. } else if (op == '+' && (flags & SIMPLE)) reginsert(PLUS, ret); else if (op == '+') { // Emit x+ as x(&|), where & means "self". next = regnode(BRANCH); // Either regtail(ret, next); regtail(regnode(BACK), ret); // loop back regtail(next, regnode(BRANCH)); // or regtail(ret, regnode(NOTHING)); // null. } else if (op == '?') { // Emit x? as (x|) reginsert(BRANCH, ret); // Either x regtail(ret, regnode(BRANCH)); // or next = regnode(NOTHING);// null. regtail(ret, next); regoptail(ret, next); } regparse++; if (ISMULT(*regparse)) { //RAISE Error, SYM(ossimRegExp), SYM(Nested_Operand), printf ("ossimRegExp::compile(): Nested *?+.\n"); return 0; } return (ret); }
/* - regpiece - something followed by possible [*+?] * * Note that the branching code sequences used for ? and the general cases * of * and + are somewhat optimized: they use the same NOTHING node as * both the endmarker for their branch list and the body of the last branch. * It might seem that this node could be dispensed with entirely, but the * endmarker role is not redundant. */ static char * regpiece(int *flagp) { register char *ret; register char *op; register char *next; int flags; int len = 0; ret = regatom(&flags); if (ret == NULL) return(NULL); op = regparse; if (!ISMULT(op)) { *flagp = flags; return(ret); } len = CHARLEN(op); if (!(flags & HASWIDTH) && ((len != 1) || (*op != '?')) ) FAIL("*+ operand could be empty"); *flagp = ((len != 1) || (*op != '+')) ? (WORST | SPSTART) : (WORST | HASWIDTH); if ((len == 1) && (*op == '*') && (flags & SIMPLE)) reginsert(STAR, ret); else if ((len == 1) && (*op == '*')) { /* Emit x* as (x&|), where & means "self". */ reginsert(BRANCH, ret); /* Either x */ regoptail(ret, regnode(BACK)); /* and loop */ regoptail(ret, ret); /* back */ regtail(ret, regnode(BRANCH)); /* or */ regtail(ret, regnode(NOTHING)); /* null. */ } else if ((len == 1) && (*op == '+') && (flags & SIMPLE)) reginsert(PLUS, ret); else if ((len == 1) && (*op == '+')) { /* Emit x+ as x(&|), where & means "self". */ next = regnode(BRANCH); /* Either */ regtail(ret, next); regtail(regnode(BACK), ret); /* loop back */ regtail(next, regnode(BRANCH)); /* or */ regtail(ret, regnode(NOTHING)); /* null. */ } else if ((len == 1) && (*op == '?')) { /* Emit x? as (x|) */ reginsert(BRANCH, ret); /* Either x */ regtail(ret, regnode(BRANCH)); /* or */ next = regnode(NOTHING); /* null. */ regtail(ret, next); regoptail(ret, next); } regparse += INCRLEN(len); if (ISMULT(regparse)) FAIL("nested *?+"); return(ret); }
/* * regpiece - something followed by possible [*+?] * * Note that the branching code sequences used for ? and the general cases * of * and + are somewhat optimized: they use the same NOTHING node as * both the endmarker for their branch list and the body of the last branch. * It might seem that this node could be dispensed with entirely, but the * endmarker role is not redundant. */ static char * regpiece(i4 *flagp) { register char *ret; register char op; register char *next; i4 flags; ret = regatom(&flags); if (ret == NULL) return(NULL); op = *regparse; if (!ISMULT(op)) { *flagp = flags; return(ret); } if (!(flags&HASWIDTH) && op != '?') _FAIL("*+ operand could be empty"); *flagp = (op != '+') ? (WORST|SPSTART) : (WORST|HASWIDTH); if (op == '*' && (flags&SIMPLE)) reginsert(STAR, ret); else if (op == '*') { /* Emit x* as (x&|), where & means "self". */ reginsert(BRANCH, ret); /* Either x */ regoptail(ret, regnode(BACK)); /* and loop */ regoptail(ret, ret); /* back */ regtail(ret, regnode(BRANCH)); /* or */ regtail(ret, regnode(NOTHING)); /* null. */ } else if (op == '+' && (flags&SIMPLE)) reginsert(PLUS, ret); else if (op == '+') { /* Emit x+ as x(&|), where & means "self". */ next = regnode(BRANCH); /* Either */ regtail(ret, next); regtail(regnode(BACK), ret); /* loop back */ regtail(next, regnode(BRANCH)); /* or */ regtail(ret, regnode(NOTHING)); /* null. */ } else if (op == '?') { /* Emit x? as (x|) */ reginsert(BRANCH, ret); /* Either x */ regtail(ret, regnode(BRANCH)); /* or */ next = regnode(NOTHING); /* null. */ regtail(ret, next); regoptail(ret, next); } CMnext( regparse ); if (ISMULT(*regparse)) _FAIL("nested *?+"); return(ret); }
/* - regpiece - something followed by possible [*+?] * * Note that the branching code sequences used for ? and the general cases * of * and + are somewhat optimized: they use the same NOTHING node as * both the endmarker for their branch list and the body of the last branch. * It might seem that this node could be dispensed with entirely, but the * endmarker role is not redundant. */ static char *regpiece( int *flagp ) { char *ret, op, *next; int flags; ret = regatom( &flags ); if( ret == NULL ) { return( NULL ); } op = *regparse; if( !ISMULT( op ) ) { *flagp = flags; return( ret ); } if( !( flags & HASWIDTH ) && op != '?' ) { FAIL( ERR_RE_EMPTY_OPERAND ); } *flagp = ( op != '+' ) ? ( WORST | SPSTART ) : ( WORST | HASWIDTH ); if( op == '*' && ( flags & SIMPLE ) ) { reginsert( STAR, ret ); } else if( op == '*' ) { /* Emit x* as (x&|), where & means "self". */ reginsert( BRANCH, ret ); /* Either x */ regoptail( ret, regnode( BACK ) ); /* and loop */ regoptail( ret, ret ); /* back */ regtail( ret, regnode( BRANCH ) ); /* or */ regtail( ret, regnode( NOTHING ) ); /* null. */ } else if( op == '+' && ( flags & SIMPLE ) ) { reginsert( PLUS, ret ); } else if( op == '+' ) { /* Emit x+ as x(&|), where & means "self". */ next = regnode( BRANCH ); /* Either */ regtail( ret, next ); regtail( regnode( BACK ), ret ); /* loop back */ regtail( next, regnode( BRANCH ) ); /* or */ regtail( ret, regnode( NOTHING ) ); /* null. */ } else if( op == '?' ) { /* Emit x? as (x|) */ reginsert( BRANCH, ret ); /* Either x */ regtail( ret, regnode( BRANCH ) ); /* or */ next = regnode( NOTHING ); /* null. */ regtail( ret, next ); regoptail( ret, next ); } regparse++; if( ISMULT( *regparse ) ) { FAIL( ERR_RE_NESTED_OPERAND ); } return( ret ); }
/* - regpiece - something followed by possible [*+?] * * Note that the branching code sequence used for ? and the general cases of * * and + are somewhat optimized: they use the same NOTHING node as both the * endmarker for their branch list and the body of the last branch. It might * seem that this node could be dispensed with entirely, but the endmarker * role is not redundant. */ static char *regpiece (int * flagp) { register char *ret; register short op; register char *nxt; int flags; ret = regatom(&flags); if (ret == (char *) NULL) return ((char *) NULL); op = *regparse; if (!ISMULT(op)) { *flagp = flags; return (ret); } if (!(flags & HASWIDTH) && op != QMARK) FAIL("*+ operand could be empty\n"); *flagp = (op != PLUSS) ? (WORST | SPSTART) : (WORST | HASWIDTH); if (op == ASTERIX && (flags & SIMPLE)) reginsert(STAR, ret); else if (op == ASTERIX) { /* Emit x* as (x&|), where & means "self". */ reginsert(BRANCH, ret); /* Either x */ regoptail(ret, regnode(BACK)); /* and loop */ regoptail(ret, ret); /* back */ regtail(ret, regnode(BRANCH)); /* or */ regtail(ret, regnode(NOTHING)); /* null. */ } else if (op == PLUSS && (flags & SIMPLE)) reginsert(PLUS, ret); else if (op == PLUSS) { /* Emit x+ as x(&|), where & means "self". */ nxt = regnode(BRANCH); /* Either */ regtail(ret, nxt); regtail(regnode(BACK), ret); /* loop back */ regtail(nxt, regnode(BRANCH)); /* or */ regtail(ret, regnode(NOTHING)); /* null. */ } else if (op == QMARK) { /* Emit x? as (x|) */ reginsert(BRANCH, ret); /* Either x */ regtail(ret, regnode(BRANCH)); /* or */ nxt = regnode(NOTHING); /* null. */ regtail(ret, nxt); regoptail(ret, nxt); } regparse++; if (ISMULT(*regparse)) FAIL("nested *?+\n"); return (ret); }
TCHAR *CRegExp::regpiece(int *flagp) { TCHAR *ret; TCHAR op; TCHAR *next; int flags; ret = regatom(&flags); if (ret == NULL) return(NULL); op = *regparse; if (!ISREPN(op)) { *flagp = flags; return(ret); } if (!(flags&HASWIDTH) && op != _T('?')) { TRACE0("*+ operand could be empty\n"); return NULL; } switch (op) { case _T('*'): *flagp = WORST|SPSTART; break; case _T('+'): *flagp = WORST|SPSTART|HASWIDTH; break; case _T('?'): *flagp = WORST; break; } if (op == _T('*') && (flags&SIMPLE)) reginsert(STAR, ret); else if (op == _T('*')) { // Emit x* as (x&|), where & means "self". reginsert(BRANCH, ret); // Either x regoptail(ret, regnode(BACK)); // and loop regoptail(ret, ret); // back regtail(ret, regnode(BRANCH)); // or regtail(ret, regnode(NOTHING)); // null. } else if (op == _T('+') && (flags&SIMPLE)) reginsert(PLUS, ret); else if (op == _T('+')) { // Emit x+ as x(&|), where & means "self". next = regnode(BRANCH); // Either regtail(ret, next); regtail(regnode(BACK), ret); // loop back regtail(next, regnode(BRANCH)); // or regtail(ret, regnode(NOTHING)); // null. } else if (op == _T('?')) { // Emit x? as (x|) reginsert(BRANCH, ret); // Either x regtail(ret, regnode(BRANCH)); // or next = regnode(NOTHING); // null. regtail(ret, next); regoptail(ret, next); } regparse++; if (ISREPN(*regparse)) { TRACE0("nested *?+\n"); return NULL; } return(ret); }
/* - regpiece - something followed by possible [*+?] * * Note that the branching code sequences used for ? and the general cases * of * and + are somewhat optimized: they use the same NOTHING node as * both the endmarker for their branch list and the body of the last branch. * It might seem that this node could be dispensed with entirely, but the * endmarker role is not redundant. */ static int regpiece(regex_t *preg, int *flagp) { int ret; char op; int next; int flags; int min; int max; ret = regatom(preg, &flags); if (ret == 0) return 0; op = *preg->regparse; if (!ISMULT(op)) { *flagp = flags; return(ret); } if (!(flags&HASWIDTH) && op != '?') { preg->err = REG_ERR_OPERAND_COULD_BE_EMPTY; return 0; } /* Handle braces (counted repetition) by expansion */ if (op == '{') { char *end; min = strtoul(preg->regparse + 1, &end, 10); if (end == preg->regparse + 1) { preg->err = REG_ERR_BAD_COUNT; return 0; } if (*end == '}') { max = min; } else { preg->regparse = end; max = strtoul(preg->regparse + 1, &end, 10); if (*end != '}') { preg->err = REG_ERR_UNMATCHED_BRACES; return 0; } } if (end == preg->regparse + 1) { max = MAX_REP_COUNT; } else if (max < min || max >= 100) { preg->err = REG_ERR_BAD_COUNT; return 0; } if (min >= 100) { preg->err = REG_ERR_BAD_COUNT; return 0; } preg->regparse = strchr(preg->regparse, '}'); } else { min = (op == '+'); max = (op == '?' ? 1 : MAX_REP_COUNT); } if (preg->regparse[1] == '?') { preg->regparse++; next = reginsert(preg, flags & SIMPLE ? REPMIN : REPXMIN, 5, ret); } else { next = reginsert(preg, flags & SIMPLE ? REP: REPX, 5, ret); } preg->program[ret + 2] = max; preg->program[ret + 3] = min; preg->program[ret + 4] = 0; *flagp = (min) ? (WORST|HASWIDTH) : (WORST|SPSTART); if (!(flags & SIMPLE)) { int back = regnode(preg, BACK); regtail(preg, back, ret); regtail(preg, next, back); } preg->regparse++; if (ISMULT(*preg->regparse)) { preg->err = REG_ERR_NESTED_COUNT; return 0; } return ret; }
/* - regpiece - something followed by possible [*+?{] * * Note that the branching code sequences used for ? and the general cases * of * and + are somewhat optimized: they use the same NOTHING node as * both the endmarker for their branch list and the body of the last branch. * It might seem that this node could be dispensed with entirely, but the * endmarker role is not redundant. */ static char * regpiece(int *flagp) { register char *next; register char *ret; register char op; unsigned char max; unsigned char min; int flags; ret = regatom(&flags); if (ret == NULL) { return(NULL); } op = *regparse; if (!ISMULT(op)) { *flagp = flags; return(ret); } if (!(flags&HASWIDTH) && op != '?') { FAIL("*+{ operand could be empty"); } *flagp = (op != '+' && op != '{') ? (WORST|SPSTART) : (WORST|HASWIDTH); if (op == '*' && (flags&SIMPLE)) { reginsert(STAR, ret); } else if (op == '*') { /* Emit x* as (x&|), where & means "self". */ reginsert(BRANCH, ret); /* Either x */ regoptail(ret, regnode(BACK)); /* and loop */ regoptail(ret, ret); /* back */ regtail(ret, regnode(BRANCH)); /* or */ regtail(ret, regnode(NOTHING)); /* null. */ } else if (op == '+' && (flags&SIMPLE)) { reginsert(PLUS, ret); } else if (op == '+') { /* Emit x+ as x(&|), where & means "self". */ next = regnode(BRANCH); /* Either */ regtail(ret, next); regtail(regnode(BACK), ret); /* loop back */ regtail(next, regnode(BRANCH)); /* or */ regtail(ret, regnode(NOTHING)); /* null. */ } else if (op == '{') { for (min = 0, regparse++ ; *regparse && isdigit(*regparse) ; regparse++) { min = min * 10 + (*regparse - '0'); } for (max = 0, regparse++ ; *regparse && isdigit(*regparse) ; regparse++) { max = max * 10 + (*regparse - '0'); } reginsert(max, ret); next = OPERAND(ret); reginsert(min, ret); next = OPERAND(next); reginsert(MINMAX, ret); regtail(ret, OPERAND(next)); /* MINMAX->next = x */ } else if (op == '?') { /* Emit x? as (x|) */ reginsert(BRANCH, ret); /* Either x */ regtail(ret, regnode(BRANCH)); /* or */ next = regnode(NOTHING); /* null. */ regtail(ret, next); regoptail(ret, next); } regparse++; if (ISMULT(*regparse)) { FAIL("nested *?+{"); } return(ret); }