/* - regpiece - something followed by possible [*+?] * * Note that the branching code sequences used for ? and the general cases * of * and + are somewhat optimized: they use the same NOTHING node as * both the endmarker for their branch list and the body of the last branch. * It might seem that this node could be dispensed with entirely, but the * endmarker role is not redundant. */ char* ossimRegExp::regpiece (int *flagp) { char* ret; char op; char* next; int flags; ret = regatom(&flags); if (ret == NULL) return (NULL); op = *regparse; if (!ISMULT(op)) { *flagp = flags; return (ret); } if (!(flags & HASWIDTH) && op != '?') { //RAISE Error, SYM(ossimRegExp), SYM(Empty_Operand), printf ("ossimRegExp::compile() : *+ operand could be empty.\n"); return 0; } *flagp = (op != '+') ? (WORST | SPSTART) : (WORST | HASWIDTH); if (op == '*' && (flags & SIMPLE)) reginsert(STAR, ret); else if (op == '*') { // Emit x* as (x&|), where & means "self". reginsert(BRANCH, ret); // Either x regoptail(ret, regnode(BACK)); // and loop regoptail(ret, ret); // back regtail(ret, regnode(BRANCH)); // or regtail(ret, regnode(NOTHING)); // null. } else if (op == '+' && (flags & SIMPLE)) reginsert(PLUS, ret); else if (op == '+') { // Emit x+ as x(&|), where & means "self". next = regnode(BRANCH); // Either regtail(ret, next); regtail(regnode(BACK), ret); // loop back regtail(next, regnode(BRANCH)); // or regtail(ret, regnode(NOTHING)); // null. } else if (op == '?') { // Emit x? as (x|) reginsert(BRANCH, ret); // Either x regtail(ret, regnode(BRANCH)); // or next = regnode(NOTHING);// null. regtail(ret, next); regoptail(ret, next); } regparse++; if (ISMULT(*regparse)) { //RAISE Error, SYM(ossimRegExp), SYM(Nested_Operand), printf ("ossimRegExp::compile(): Nested *?+.\n"); return 0; } return (ret); }
/* - regpiece - something followed by possible [*+?] * * Note that the branching code sequences used for ? and the general cases * of * and + are somewhat optimized: they use the same NOTHING node as * both the endmarker for their branch list and the body of the last branch. * It might seem that this node could be dispensed with entirely, but the * endmarker role is not redundant. */ static char * regpiece(int *flagp) { register char *ret; register char *op; register char *next; int flags; int len = 0; ret = regatom(&flags); if (ret == NULL) return(NULL); op = regparse; if (!ISMULT(op)) { *flagp = flags; return(ret); } len = CHARLEN(op); if (!(flags & HASWIDTH) && ((len != 1) || (*op != '?')) ) FAIL("*+ operand could be empty"); *flagp = ((len != 1) || (*op != '+')) ? (WORST | SPSTART) : (WORST | HASWIDTH); if ((len == 1) && (*op == '*') && (flags & SIMPLE)) reginsert(STAR, ret); else if ((len == 1) && (*op == '*')) { /* Emit x* as (x&|), where & means "self". */ reginsert(BRANCH, ret); /* Either x */ regoptail(ret, regnode(BACK)); /* and loop */ regoptail(ret, ret); /* back */ regtail(ret, regnode(BRANCH)); /* or */ regtail(ret, regnode(NOTHING)); /* null. */ } else if ((len == 1) && (*op == '+') && (flags & SIMPLE)) reginsert(PLUS, ret); else if ((len == 1) && (*op == '+')) { /* Emit x+ as x(&|), where & means "self". */ next = regnode(BRANCH); /* Either */ regtail(ret, next); regtail(regnode(BACK), ret); /* loop back */ regtail(next, regnode(BRANCH)); /* or */ regtail(ret, regnode(NOTHING)); /* null. */ } else if ((len == 1) && (*op == '?')) { /* Emit x? as (x|) */ reginsert(BRANCH, ret); /* Either x */ regtail(ret, regnode(BRANCH)); /* or */ next = regnode(NOTHING); /* null. */ regtail(ret, next); regoptail(ret, next); } regparse += INCRLEN(len); if (ISMULT(regparse)) FAIL("nested *?+"); return(ret); }
/* * regpiece - something followed by possible [*+?] * * Note that the branching code sequences used for ? and the general cases * of * and + are somewhat optimized: they use the same NOTHING node as * both the endmarker for their branch list and the body of the last branch. * It might seem that this node could be dispensed with entirely, but the * endmarker role is not redundant. */ static char * regpiece(i4 *flagp) { register char *ret; register char op; register char *next; i4 flags; ret = regatom(&flags); if (ret == NULL) return(NULL); op = *regparse; if (!ISMULT(op)) { *flagp = flags; return(ret); } if (!(flags&HASWIDTH) && op != '?') _FAIL("*+ operand could be empty"); *flagp = (op != '+') ? (WORST|SPSTART) : (WORST|HASWIDTH); if (op == '*' && (flags&SIMPLE)) reginsert(STAR, ret); else if (op == '*') { /* Emit x* as (x&|), where & means "self". */ reginsert(BRANCH, ret); /* Either x */ regoptail(ret, regnode(BACK)); /* and loop */ regoptail(ret, ret); /* back */ regtail(ret, regnode(BRANCH)); /* or */ regtail(ret, regnode(NOTHING)); /* null. */ } else if (op == '+' && (flags&SIMPLE)) reginsert(PLUS, ret); else if (op == '+') { /* Emit x+ as x(&|), where & means "self". */ next = regnode(BRANCH); /* Either */ regtail(ret, next); regtail(regnode(BACK), ret); /* loop back */ regtail(next, regnode(BRANCH)); /* or */ regtail(ret, regnode(NOTHING)); /* null. */ } else if (op == '?') { /* Emit x? as (x|) */ reginsert(BRANCH, ret); /* Either x */ regtail(ret, regnode(BRANCH)); /* or */ next = regnode(NOTHING); /* null. */ regtail(ret, next); regoptail(ret, next); } CMnext( regparse ); if (ISMULT(*regparse)) _FAIL("nested *?+"); return(ret); }
/* - regpiece - something followed by possible [*+?] * * Note that the branching code sequences used for ? and the general cases * of * and + are somewhat optimized: they use the same NOTHING node as * both the endmarker for their branch list and the body of the last branch. * It might seem that this node could be dispensed with entirely, but the * endmarker role is not redundant. */ static char *regpiece( int *flagp ) { char *ret, op, *next; int flags; ret = regatom( &flags ); if( ret == NULL ) { return( NULL ); } op = *regparse; if( !ISMULT( op ) ) { *flagp = flags; return( ret ); } if( !( flags & HASWIDTH ) && op != '?' ) { FAIL( ERR_RE_EMPTY_OPERAND ); } *flagp = ( op != '+' ) ? ( WORST | SPSTART ) : ( WORST | HASWIDTH ); if( op == '*' && ( flags & SIMPLE ) ) { reginsert( STAR, ret ); } else if( op == '*' ) { /* Emit x* as (x&|), where & means "self". */ reginsert( BRANCH, ret ); /* Either x */ regoptail( ret, regnode( BACK ) ); /* and loop */ regoptail( ret, ret ); /* back */ regtail( ret, regnode( BRANCH ) ); /* or */ regtail( ret, regnode( NOTHING ) ); /* null. */ } else if( op == '+' && ( flags & SIMPLE ) ) { reginsert( PLUS, ret ); } else if( op == '+' ) { /* Emit x+ as x(&|), where & means "self". */ next = regnode( BRANCH ); /* Either */ regtail( ret, next ); regtail( regnode( BACK ), ret ); /* loop back */ regtail( next, regnode( BRANCH ) ); /* or */ regtail( ret, regnode( NOTHING ) ); /* null. */ } else if( op == '?' ) { /* Emit x? as (x|) */ reginsert( BRANCH, ret ); /* Either x */ regtail( ret, regnode( BRANCH ) ); /* or */ next = regnode( NOTHING ); /* null. */ regtail( ret, next ); regoptail( ret, next ); } regparse++; if( ISMULT( *regparse ) ) { FAIL( ERR_RE_NESTED_OPERAND ); } return( ret ); }
/* - regpiece - something followed by possible [*+?] * * Note that the branching code sequence used for ? and the general cases of * * and + are somewhat optimized: they use the same NOTHING node as both the * endmarker for their branch list and the body of the last branch. It might * seem that this node could be dispensed with entirely, but the endmarker * role is not redundant. */ static char *regpiece (int * flagp) { register char *ret; register short op; register char *nxt; int flags; ret = regatom(&flags); if (ret == (char *) NULL) return ((char *) NULL); op = *regparse; if (!ISMULT(op)) { *flagp = flags; return (ret); } if (!(flags & HASWIDTH) && op != QMARK) FAIL("*+ operand could be empty\n"); *flagp = (op != PLUSS) ? (WORST | SPSTART) : (WORST | HASWIDTH); if (op == ASTERIX && (flags & SIMPLE)) reginsert(STAR, ret); else if (op == ASTERIX) { /* Emit x* as (x&|), where & means "self". */ reginsert(BRANCH, ret); /* Either x */ regoptail(ret, regnode(BACK)); /* and loop */ regoptail(ret, ret); /* back */ regtail(ret, regnode(BRANCH)); /* or */ regtail(ret, regnode(NOTHING)); /* null. */ } else if (op == PLUSS && (flags & SIMPLE)) reginsert(PLUS, ret); else if (op == PLUSS) { /* Emit x+ as x(&|), where & means "self". */ nxt = regnode(BRANCH); /* Either */ regtail(ret, nxt); regtail(regnode(BACK), ret); /* loop back */ regtail(nxt, regnode(BRANCH)); /* or */ regtail(ret, regnode(NOTHING)); /* null. */ } else if (op == QMARK) { /* Emit x? as (x|) */ reginsert(BRANCH, ret); /* Either x */ regtail(ret, regnode(BRANCH)); /* or */ nxt = regnode(NOTHING); /* null. */ regtail(ret, nxt); regoptail(ret, nxt); } regparse++; if (ISMULT(*regparse)) FAIL("nested *?+\n"); return (ret); }
/* * regatom - the lowest level * * Optimization: gobbles an entire sequence of ordinary characters so that * it can turn them into a single node, which is smaller to store and * faster to run. Backslashed characters are exceptions, each becoming a * separate node; the code is simpler that way and it's not worth fixing. */ static char * regatom(i4 *flagp) { register char *ret; i4 flags; char null_byte = '\0'; *flagp = WORST; /* Tentatively. */ switch (*regparse) { case '^': CMnext( regparse ); ret = regnode(BOL); break; case '$': CMnext( regparse ); ret = regnode(EOL); break; case '.': CMnext( regparse ); ret = regnode(ANY); *flagp |= HASWIDTH|SIMPLE; break; case '[': { char *range_start = NULL; bool double_start; u_i2 first_u2, last_u2; u_char first_u1, last_u1; CMnext( regparse ); if (*regparse == '^') { /* Complement of range. */ ret = regnode(ANYBUT); CMnext( regparse ); } else ret = regnode(ANYOF); if (*regparse == ']' || *regparse == '-') { regc( regparse ); CMnext( regparse ); } while (*regparse != '\0' && *regparse != ']') { if (*regparse == '-') { char range_op = '-'; CMnext( regparse ); if( *regparse == ']' || *regparse == '\0' ) regc( &range_op ); else { char *tmp; bool invalid = FALSE; bool double_end; if( range_start == NULL ) invalid = TRUE; double_end = CMdbl1st( regparse ); if( !invalid && double_end && !double_start ) invalid = TRUE; if( !invalid && double_start && !double_start ) invalid = TRUE; if( !invalid && CMcmpcase( range_start, regparse ) > 0 ) invalid = TRUE; if( double_start ) _FAIL("don't know how to support character classes containing double-byte ranges"); if( invalid ) _FAIL("invalid [] range"); /* no double-byte ranges! */ /* ** Initialize the value for the end of the range. */ last_u1 = UCHARAT(regparse); for (; first_u1 <= last_u1; first_u1++ ) regc( (char *) &first_u1 ); CMnext( regparse ); } } else { range_start = regparse; if( CMdbl1st( range_start ) ) { double_start = TRUE; first_u2 = *(u_i2 *) range_start; } else { double_start = FALSE; first_u1 = UCHARAT(range_start); } regc( regparse ); CMnext( regparse ); } } regc( &null_byte ); if (*regparse != ']') _FAIL("unmatched []"); CMnext( regparse ); *flagp |= HASWIDTH|SIMPLE; } break; case '(': CMnext( regparse ); ret = reg(1, &flags); if (ret == NULL) return(NULL); *flagp |= flags&(HASWIDTH|SPSTART); break; case '\0': case '|': case ')': CMnext( regparse ); _FAIL("internal urp"); /* Supposed to be caught earlier. */ break; case '?': case '+': case '*': CMnext( regparse ); _FAIL("?+* follows nothing"); break; case '\\': CMnext( regparse ); if (*regparse == '\0') _FAIL("trailing \\"); ret = regnode(EXACTLY); regc( regparse ); CMnext( regparse ); regc( &null_byte ); *flagp |= HASWIDTH|SIMPLE; break; default: { register i4 len; register char ender; len = my_strcspn(regparse, META); if (len <= 0) _FAIL("internal disaster"); ender = *(regparse+len); if (len > 1 && ISMULT(ender)) len--; /* Back off clear of ?+* operand. */ *flagp |= HASWIDTH; if (len == 1) *flagp |= SIMPLE; ret = regnode(EXACTLY); while (len > 0) { regc( regparse ); CMbytedec( len, regparse ); CMnext( regparse ); } regc( &null_byte ); } break; } return(ret); }
/* - regatom - the lowest level * * Optimization: gobbles an entire sequence of ordinary characters so that * it can turn them into a single node, which is smaller to store and * faster to run. Backslashed characters are exceptions, each becoming a * separate node; the code is simpler that way and it's not worth fixing. */ static char * regatom(int *flagp) { register char *ret; int flags; *flagp = WORST; /* Tentatively. */ switch (*regparse++) { case '^': ret = regnode(BOL); break; case '$': ret = regnode(EOL); break; case '.': ret = regnode(ANY); *flagp |= HASWIDTH|SIMPLE; break; case '[': { register int clss; register int classend; if (*regparse == '^') { /* Complement of range. */ ret = regnode(ANYBUT); regparse++; } else ret = regnode(ANYOF); if (*regparse == ']' || *regparse == '-') regc(*regparse++); while (*regparse != '\0' && *regparse != ']') { if (*regparse == '-') { regparse++; if (*regparse == ']' || *regparse == '\0') regc('-'); else { clss = UCHARAT(regparse-2)+1; classend = UCHARAT(regparse); if (clss > classend+1) FAIL("invalid [] range"); for (; clss <= classend; clss++) regc(clss); regparse++; } } else regc(*regparse++); } regc('\0'); if (*regparse != ']') FAIL("unmatched []"); regparse++; *flagp |= HASWIDTH|SIMPLE; } break; case '(': ret = reg(1, &flags); if (ret == NULL) return(NULL); *flagp |= flags&(HASWIDTH|SPSTART); break; case '\0': case '|': case ')': FAIL("internal urp"); /* Supposed to be caught earlier. */ /* NOTREACHED */ break; case '?': case '+': case '*': FAIL("?+* follows nothing"); /* NOTREACHED */ break; case '\\': if (*regparse == '\0') FAIL("trailing \\"); ret = regnode(EXACTLY); regc(*regparse++); regc('\0'); *flagp |= HASWIDTH|SIMPLE; break; default: { register int len; register char ender; regparse--; len = (int) strcspn(regparse, META); if (len <= 0) FAIL("internal disaster"); ender = *(regparse+len); if (len > 1 && ISMULT(ender)) len--; /* Back off clear of ?+* operand. */ *flagp |= HASWIDTH; if (len == 1) *flagp |= SIMPLE; ret = regnode(EXACTLY); while (len > 0) { regc(*regparse++); len--; } regc('\0'); } break; } return(ret); }
/* * add_pack: * Pick up an object and add it to the pack. If the argument is non-null * use it as the linked_list pointer instead of gettting it off the ground. */ void add_pack(struct linked_list *item, int silent) { struct linked_list *ip, *lp; struct object *obj, *op; int exact, from_floor; if (item == NULL) { from_floor = TRUE; if ((item = find_obj(hero.y, hero.x)) == NULL) return; } else from_floor = FALSE; obj = (struct object *) ldata(item); /* * Link it into the pack. Search the pack for a object of similar type * if there isn't one, stuff it at the beginning, if there is, look for one * that is exactly the same and just increment the count if there is. * it that. Food is always put at the beginning for ease of access, but * is not ordered so that you can't tell good food from bad. First check * to see if there is something in thr same group and if there is then * increment the count. */ if (obj->o_group) { for (ip = pack; ip != NULL; ip = next(ip)) { op = (struct object *) ldata(ip); if (op->o_group == obj->o_group) { /* * Put it in the pack and notify the user */ op->o_count++; if (from_floor) { detach(lvl_obj, item); mvaddch(hero.y, hero.x, (roomin(&hero) == NULL ? PASSAGE : FLOOR)); } discard(item); item = ip; goto picked_up; } } } /* * Check if there is room */ if (inpack == MAXPACK-1) { msg("You can't carry anything else."); return; } /* * Check for and deal with scare monster scrolls */ if (obj->o_type == SCROLL && obj->o_which == S_SCARE) if (obj->o_flags & ISFOUND) { msg("The scroll turns to dust as you pick it up."); detach(lvl_obj, item); mvaddch(hero.y, hero.x, FLOOR); return; } else obj->o_flags |= ISFOUND; inpack++; if (from_floor) { detach(lvl_obj, item); mvaddch(hero.y, hero.x, (roomin(&hero) == NULL ? PASSAGE : FLOOR)); } /* * Search for an object of the same type */ exact = FALSE; for (ip = pack; ip != NULL; ip = next(ip)) { op = (struct object *) ldata(ip); if (obj->o_type == op->o_type) break; } if (ip == NULL) { /* * Put it at the end of the pack since it is a new type */ for (ip = pack; ip != NULL; ip = next(ip)) { op = (struct object *) ldata(ip); if (op->o_type != FOOD) break; lp = ip; } } else { /* * Search for an object which is exactly the same */ while (ip != NULL && op->o_type == obj->o_type) { if (op->o_which == obj->o_which) { exact = TRUE; break; } lp = ip; if ((ip = next(ip)) == NULL) break; op = (struct object *) ldata(ip); } } if (ip == NULL) { /* * Didn't find an exact match, just stick it here */ if (pack == NULL) pack = item; else { lp->l_next = item; item->l_prev = lp; item->l_next = NULL; } } else { /* * If we found an exact match. If it is a potion, food, or a * scroll, increase the count, otherwise put it with its clones. */ if (exact && ISMULT(obj->o_type)) { op->o_count++; discard(item); item = ip; goto picked_up; } if ((item->l_prev = prev(ip)) != NULL) item->l_prev->l_next = item; else pack = item; item->l_next = ip; ip->l_prev = item; } picked_up: /* * Notify the user */ obj = (struct object *) ldata(item); if (notify && !silent) { if (!terse) addmsg("You now have "); msg("%s (%c)", inv_name(obj, !terse), pack_char(obj)); } if (obj->o_type == AMULET) amulet = TRUE; }
/* - regatom - the lowest level * * Optimization: gobbles an entire sequence of ordinary characters so that * it can turn them into a single node, which is smaller to store and * faster to run. Backslashed characters are exceptions, each becoming a * separate node; the code is simpler that way and it's not worth fixing. */ char* ossimRegExp::regatom (int *flagp) { char* ret; int flags; *flagp = WORST; // Tentatively. switch (*regparse++) { case '^': ret = regnode(BOL); break; case '$': ret = regnode(EOL); break; case '.': ret = regnode(ANY); *flagp |= HASWIDTH | SIMPLE; break; case '[': { int rxpclass; int rxpclassend; if (*regparse == '^') { // Complement of range. ret = regnode(ANYBUT); regparse++; } else ret = regnode(ANYOF); if (*regparse == ']' || *regparse == '-') regc(*regparse++); while (*regparse != '\0' && *regparse != ']') { if (*regparse == '-') { regparse++; if (*regparse == ']' || *regparse == '\0') regc('-'); else { rxpclass = UCHARAT(regparse - 2) + 1; rxpclassend = UCHARAT(regparse); if (rxpclass > rxpclassend + 1) { //RAISE Error, SYM(ossimRegExp), SYM(Invalid_Range), printf ("ossimRegExp::compile(): Invalid range in [].\n"); return 0; } for (; rxpclass <= rxpclassend; rxpclass++) regc(rxpclass); regparse++; } } else regc(*regparse++); } regc('\0'); if (*regparse != ']') { //RAISE Error, SYM(ossimRegExp), SYM(Unmatched_Bracket), printf ("ossimRegExp::compile(): Unmatched [].\n"); return 0; } regparse++; *flagp |= HASWIDTH | SIMPLE; } break; case '(': ret = reg(1, &flags); if (ret == NULL) return (NULL); *flagp |= flags & (HASWIDTH | SPSTART); break; case '\0': case '|': case ')': //RAISE Error, SYM(ossimRegExp), SYM(Internal_Error), printf ("ossimRegExp::compile(): Internal error.\n"); // Never here return 0; case '?': case '+': case '*': //RAISE Error, SYM(ossimRegExp), SYM(No_Operand), printf ("ossimRegExp::compile(): ?+* follows nothing.\n"); return 0; case '\\': if (*regparse == '\0') { //RAISE Error, SYM(ossimRegExp), SYM(Trailing_Backslash), printf ("ossimRegExp::compile(): Trailing backslash.\n"); return 0; } ret = regnode(EXACTLY); regc(*regparse++); regc('\0'); *flagp |= HASWIDTH | SIMPLE; break; default: { int len; char ender; regparse--; len = (int)strcspn(regparse, META); if (len <= 0) { //RAISE Error, SYM(ossimRegExp), SYM(Internal_Error), printf ("ossimRegExp::compile(): Internal error.\n"); return 0; } ender = *(regparse + len); if (len > 1 && ISMULT(ender)) len--; // Back off clear of ?+* operand. *flagp |= HASWIDTH; if (len == 1) *flagp |= SIMPLE; ret = regnode(EXACTLY); while (len > 0) { regc(*regparse++); len--; } regc('\0'); } break; } return (ret); }
/* - regatom - the lowest level * * Optimization: gobbles an entire sequence of ordinary characters so that * it can turn them into a single node, which is smaller to store and * faster to run. */ static char *regatom (int * flagp) { register char *ret; int flags; *flagp = WORST; /* Tentatively. */ switch (*regparse++) { case CARET: ret = regnode(BOL); break; case DOLLAR: ret = regnode(EOL); break; case DOT: ret = regnode(ANY); *flagp |= HASWIDTH | SIMPLE; break; case LSHBRAC: ret = regnode(WORDSTART); break; case RSHBRAC: ret = regnode(WORDEND); break; case LSQBRAC:{ register int classs; register int classend; if (*regparse == CARET) { /* Complement of range. */ ret = regnode(ANYBUT); regparse++; } else ret = regnode(ANYOF); if (*regparse == RSQBRAC || *regparse == '-') regc(*regparse++); while (*regparse != '\0' && *regparse != RSQBRAC) { if (*regparse == '-') { regparse++; if (*regparse == RSQBRAC || *regparse == '\0') regc('-'); else { classs = (CHARBITS & *(regparse - 2)) + 1; classend = (CHARBITS & *(regparse)); if (classs > classend + 1) FAIL("invalid [] range\n"); for (; classs <= classend; classs++) regc(classs); regparse++; } } else regc(*regparse++); } regc('\0'); if (*regparse != RSQBRAC) FAIL("unmatched []\n"); regparse++; *flagp |= HASWIDTH | SIMPLE; } break; case LBRAC: ret = reg(1, &flags); if (ret == (char *) NULL) return ((char *) NULL); *flagp |= flags & (HASWIDTH | SPSTART); break; case '\0': case OR_OP: case RBRAC: FAIL("internal urp\n"); /* Supposed to be caught earlier. */ break; case ASTERIX: FAIL("* follows nothing\n"); break; case PLUSS: FAIL("+ follows nothing\n"); break; case QMARK: FAIL("? follows nothing\n"); break; default:{ register int len; register short ender; regparse--; for (len = 0; regparse[len] && !(regparse[len] & SPECIAL) && regparse[len] != RSQBRAC; len++); if (len <= 0) { FAIL("unexpected ]\n"); } ender = *(regparse + len); if (len > 1 && ISMULT(ender)) len--; /* Back off clear of ?+* operand. */ *flagp |= HASWIDTH; if (len == 1) *flagp |= SIMPLE; ret = regnode(EXACTLY); while (len > 0) { regc(*regparse++); len--; } regc('\0'); } break; } return (ret); }
/* - regatom - the lowest level * * Optimization: gobbles an entire sequence of ordinary characters so that * it can turn them into a single node, which is smaller to store and * faster to run. Backslashed characters are exceptions, each becoming a * separate node; the code is simpler that way and it's not worth fixing. */ static int regatom(regex_t *preg, int *flagp) { int ret; int flags; int nocase = (preg->cflags & REG_ICASE); int ch; int n = reg_utf8_tounicode_case(preg->regparse, &ch, nocase); *flagp = WORST; /* Tentatively. */ preg->regparse += n; switch (ch) { /* FIXME: these chars only have meaning at beg/end of pat? */ case '^': ret = regnode(preg, BOL); break; case '$': ret = regnode(preg, EOL); break; case '.': ret = regnode(preg, ANY); *flagp |= HASWIDTH|SIMPLE; break; case '[': { const char *pattern = preg->regparse; if (*pattern == '^') { /* Complement of range. */ ret = regnode(preg, ANYBUT); pattern++; } else ret = regnode(preg, ANYOF); /* Special case. If the first char is ']' or '-', it is part of the set */ if (*pattern == ']' || *pattern == '-') { reg_addrange(preg, *pattern, *pattern); pattern++; } while (*pattern && *pattern != ']') { /* Is this a range? a-z */ int start; int end; pattern += reg_utf8_tounicode_case(pattern, &start, nocase); if (start == '\\') { pattern += reg_decode_escape(pattern, &start); if (start == 0) { preg->err = REG_ERR_NULL_CHAR; return 0; } } if (pattern[0] == '-' && pattern[1] && pattern[1] != ']') { /* skip '-' */ pattern += utf8_tounicode(pattern, &end); pattern += reg_utf8_tounicode_case(pattern, &end, nocase); if (end == '\\') { pattern += reg_decode_escape(pattern, &end); if (end == 0) { preg->err = REG_ERR_NULL_CHAR; return 0; } } reg_addrange(preg, start, end); continue; } if (start == '[') { if (strncmp(pattern, ":alpha:]", 8) == 0) { if ((preg->cflags & REG_ICASE) == 0) { reg_addrange(preg, 'a', 'z'); } reg_addrange(preg, 'A', 'Z'); pattern += 8; continue; } if (strncmp(pattern, ":alnum:]", 8) == 0) { if ((preg->cflags & REG_ICASE) == 0) { reg_addrange(preg, 'a', 'z'); } reg_addrange(preg, 'A', 'Z'); reg_addrange(preg, '0', '9'); pattern += 8; continue; } if (strncmp(pattern, ":space:]", 8) == 0) { reg_addrange_str(preg, " \t\r\n\f\v"); pattern += 8; continue; } } /* Not a range, so just add the char */ reg_addrange(preg, start, start); } regc(preg, '\0'); if (*pattern) { pattern++; } preg->regparse = pattern; *flagp |= HASWIDTH|SIMPLE; } break; case '(': ret = reg(preg, 1, &flags); if (ret == 0) return 0; *flagp |= flags&(HASWIDTH|SPSTART); break; case '\0': case '|': case ')': preg->err = REG_ERR_INTERNAL; return 0; /* Supposed to be caught earlier. */ case '?': case '+': case '*': case '{': preg->err = REG_ERR_COUNT_FOLLOWS_NOTHING; return 0; case '\\': switch (*preg->regparse++) { case '\0': preg->err = REG_ERR_TRAILING_BACKSLASH; return 0; case '<': case 'm': ret = regnode(preg, WORDA); break; case '>': case 'M': ret = regnode(preg, WORDZ); break; case 'd': ret = regnode(preg, ANYOF); reg_addrange(preg, '0', '9'); regc(preg, '\0'); *flagp |= HASWIDTH|SIMPLE; break; case 'w': ret = regnode(preg, ANYOF); if ((preg->cflags & REG_ICASE) == 0) { reg_addrange(preg, 'a', 'z'); } reg_addrange(preg, 'A', 'Z'); reg_addrange(preg, '0', '9'); reg_addrange(preg, '_', '_'); regc(preg, '\0'); *flagp |= HASWIDTH|SIMPLE; break; case 's': ret = regnode(preg, ANYOF); reg_addrange_str(preg," \t\r\n\f\v"); regc(preg, '\0'); *flagp |= HASWIDTH|SIMPLE; break; /* FIXME: Someday handle \1, \2, ... */ default: /* Handle general quoted chars in exact-match routine */ /* Back up to include the backslash */ preg->regparse--; goto de_fault; } break; de_fault: default: { /* * Encode a string of characters to be matched exactly. */ int added = 0; /* Back up to pick up the first char of interest */ preg->regparse -= n; ret = regnode(preg, EXACTLY); /* Note that a META operator such as ? or * consumes the * preceding char. * Thus we must be careful to look ahead by 2 and add the * last char as it's own EXACTLY if necessary */ /* Until end of string or a META char is reached */ while (*preg->regparse && strchr(META, *preg->regparse) == NULL) { n = reg_utf8_tounicode_case(preg->regparse, &ch, (preg->cflags & REG_ICASE)); if (ch == '\\' && preg->regparse[n]) { /* Non-trailing backslash. * Is this a special escape, or a regular escape? */ if (strchr("<>mMwds", preg->regparse[n])) { /* A special escape. All done with EXACTLY */ break; } /* Decode it. Note that we add the length for the escape * sequence to the length for the backlash so we can skip * the entire sequence, or not as required. */ n += reg_decode_escape(preg->regparse + n, &ch); if (ch == 0) { preg->err = REG_ERR_NULL_CHAR; return 0; } } /* Now we have one char 'ch' of length 'n'. * Check to see if the following char is a MULT */ if (ISMULT(preg->regparse[n])) { /* Yes. But do we already have some EXACTLY chars? */ if (added) { /* Yes, so return what we have and pick up the current char next time around */ break; } /* No, so add this single char and finish */ regc(preg, ch); added++; preg->regparse += n; break; } /* No, so just add this char normally */ regc(preg, ch); added++; preg->regparse += n; } regc(preg, '\0'); *flagp |= HASWIDTH; if (added == 1) *flagp |= SIMPLE; break; } break; } return(ret); }
/* - regpiece - something followed by possible [*+?] * * Note that the branching code sequences used for ? and the general cases * of * and + are somewhat optimized: they use the same NOTHING node as * both the endmarker for their branch list and the body of the last branch. * It might seem that this node could be dispensed with entirely, but the * endmarker role is not redundant. */ static int regpiece(regex_t *preg, int *flagp) { int ret; char op; int next; int flags; int min; int max; ret = regatom(preg, &flags); if (ret == 0) return 0; op = *preg->regparse; if (!ISMULT(op)) { *flagp = flags; return(ret); } if (!(flags&HASWIDTH) && op != '?') { preg->err = REG_ERR_OPERAND_COULD_BE_EMPTY; return 0; } /* Handle braces (counted repetition) by expansion */ if (op == '{') { char *end; min = strtoul(preg->regparse + 1, &end, 10); if (end == preg->regparse + 1) { preg->err = REG_ERR_BAD_COUNT; return 0; } if (*end == '}') { max = min; } else { preg->regparse = end; max = strtoul(preg->regparse + 1, &end, 10); if (*end != '}') { preg->err = REG_ERR_UNMATCHED_BRACES; return 0; } } if (end == preg->regparse + 1) { max = MAX_REP_COUNT; } else if (max < min || max >= 100) { preg->err = REG_ERR_BAD_COUNT; return 0; } if (min >= 100) { preg->err = REG_ERR_BAD_COUNT; return 0; } preg->regparse = strchr(preg->regparse, '}'); } else { min = (op == '+'); max = (op == '?' ? 1 : MAX_REP_COUNT); } if (preg->regparse[1] == '?') { preg->regparse++; next = reginsert(preg, flags & SIMPLE ? REPMIN : REPXMIN, 5, ret); } else { next = reginsert(preg, flags & SIMPLE ? REP: REPX, 5, ret); } preg->program[ret + 2] = max; preg->program[ret + 3] = min; preg->program[ret + 4] = 0; *flagp = (min) ? (WORST|HASWIDTH) : (WORST|SPSTART); if (!(flags & SIMPLE)) { int back = regnode(preg, BACK); regtail(preg, back, ret); regtail(preg, next, back); } preg->regparse++; if (ISMULT(*preg->regparse)) { preg->err = REG_ERR_NESTED_COUNT; return 0; } return ret; }
void add_pack(THING *obj, bool silent) { THING *op, *lp; bool from_floor; from_floor = FALSE; if (obj == NULL) { if ((obj = find_obj(hero.y, hero.x)) == NULL) return; from_floor = TRUE; } /* * Check for and deal with scare monster scrolls */ if (obj->o_type == SCROLL && obj->o_which == S_SCARE) if (obj->o_flags & ISFOUND) { detach(lvl_obj, obj); mvaddch(hero.y, hero.x, floor_ch()); chat(hero.y, hero.x) = (proom->r_flags & ISGONE) ? PASSAGE : FLOOR; discard(obj); msg("the scroll turns to dust as you pick it up"); return; } if (pack == NULL) { pack = obj; obj->o_packch = pack_char(); inpack++; } else { lp = NULL; for (op = pack; op != NULL; op = next(op)) { if (op->o_type != obj->o_type) lp = op; else { while (op->o_type == obj->o_type && op->o_which != obj->o_which) { lp = op; if (next(op) == NULL) break; else op = next(op); } if (op->o_type == obj->o_type && op->o_which == obj->o_which) { if (ISMULT(op->o_type)) { if (!pack_room(from_floor, obj)) return; op->o_count++; dump_it: discard(obj); obj = op; lp = NULL; goto out; } else if (obj->o_group) { lp = op; while (op->o_type == obj->o_type && op->o_which == obj->o_which && op->o_group != obj->o_group) { lp = op; if (next(op) == NULL) break; else op = next(op); } if (op->o_type == obj->o_type && op->o_which == obj->o_which && op->o_group == obj->o_group) { op->o_count += obj->o_count; inpack--; if (!pack_room(from_floor, obj)) return; goto dump_it; } } else lp = op; } out: break; } } if (lp != NULL) { if (!pack_room(from_floor, obj)) return; else { obj->o_packch = pack_char(); next(obj) = next(lp); prev(obj) = lp; if (next(lp) != NULL) prev(next(lp)) = obj; next(lp) = obj; } } } obj->o_flags |= ISFOUND; /* * If this was the object of something's desire, that monster will * get mad and run at the hero. */ for (op = mlist; op != NULL; op = next(op)) if (op->t_dest == &obj->o_pos) op->t_dest = &hero; if (obj->o_type == AMULET) amulet = TRUE; /* * Notify the user */ if (!silent) { if (!terse) addmsg("you now have "); msg("%s (%c)", inv_name(obj, !terse), obj->o_packch); } }
/* - regatom - the lowest level * * Optimization: gobbles an entire sequence of ordinary characters so that * it can turn them into a single node, which is smaller to store and * faster to run. Backslashed characters are exceptions, each becoming a * separate node; the code is simpler that way and it's not worth fixing. */ static char * regatom(int *flagp) { register char *ret; int flags; *flagp = WORST; /* Tentatively. */ switch (*regparse++) { case '^': ret = regnode(BOL); break; case '$': ret = regnode(EOL); break; case '.': ret = regnode(ANY); *flagp |= HASWIDTH|SIMPLE; break; case '[': { register int chclass; register int chclassend; if (*regparse == '^') { /* Complement of range. */ ret = regnode(ANYBUT); regparse++; } else { ret = regnode(ANYOF); } if (*regparse == ']' || *regparse == '-') { regc(*regparse++); } while (*regparse != '\0' && *regparse != ']') { if (*regparse == '-') { regparse++; if (*regparse == ']' || *regparse == '\0') { regc('-'); } else { chclass = UCHARAT(regparse-2)+1; chclassend = UCHARAT(regparse); if (chclass > chclassend+1) { FAIL("invalid [] range"); } for (; chclass <= chclassend; chclass++) { regc(chclass); } regparse++; } } else if (*regparse == '\\') { switch(*++regparse) { case 'n' : regc('\n'); regparse++; break; case 't' : regc('\t'); regparse++; break; case ']' : regc(']'); regparse++; break; case '-' : regc('-'); regparse++; break; case '\\' : regc('\\'); regparse++; break; default : regparse--; regc(*regparse++); } } else { regc(*regparse++); } } regc('\0'); if (*regparse != ']') { FAIL("unmatched []"); } regparse++; *flagp |= HASWIDTH|SIMPLE; } break; case '(': ret = reg(1, &flags); if (ret == NULL) { return(NULL); } *flagp |= flags&(HASWIDTH|SPSTART); break; case '\0': case '|': case ')': FAIL("internal urp"); /* Supposed to be caught earlier. */ break; case '?': case '+': case '*': case '{': FAIL("?+*{ follows nothing"); break; case '\\': if (*regparse == '\0') { FAIL("trailing \\"); } switch(*regparse) { case '<': ret = regnode(BEGWORD); break; case '>': ret = regnode(ENDWORD); break; case 'd': ret = regnode(DIGIT); *flagp |= (HASWIDTH|SIMPLE); break; case 'D': ret = regnode(NDIGIT); *flagp |= (HASWIDTH|SIMPLE); break; case 'n' : ret = regnode(EXACTLY); regc('\n'); regc('\0'); *flagp |= (HASWIDTH|SIMPLE); break; case 'p': ret = regnode(PRINT); *flagp |= HASWIDTH|SIMPLE; break; case 'P': ret = regnode(NPRINT); *flagp |= HASWIDTH|SIMPLE; break; case 's': ret = regnode(WHITESP); *flagp |= HASWIDTH|SIMPLE; break; case 'S': ret = regnode(NWHITESP); *flagp |= HASWIDTH|SIMPLE; break; case 't' : ret = regnode(EXACTLY); regc('\t'); regc('\0'); *flagp |= (HASWIDTH|SIMPLE); break; case 'w': ret = regnode(ALNUM); *flagp |= HASWIDTH|SIMPLE; break; case 'W': ret = regnode(NALNUM); *flagp |= HASWIDTH|SIMPLE; break; default : ret = regnode(EXACTLY); regc(*regparse); regc('\0'); *flagp |= HASWIDTH|SIMPLE; } regparse++; break; default: { register int len; register char ender; regparse--; len = strcspn(regparse, META); if (len <= 0) { FAIL("internal disaster"); } ender = *(regparse+len); if (len > 1 && ISMULT(ender)) { len--; /* Back off clear of ?+* operand. */ } *flagp |= HASWIDTH; if (len == 1) { *flagp |= SIMPLE; } ret = regnode(EXACTLY); while (len > 0) { regc(*regparse++); len--; } regc('\0'); } break; } return(ret); }
/* - regpiece - something followed by possible [*+?{] * * Note that the branching code sequences used for ? and the general cases * of * and + are somewhat optimized: they use the same NOTHING node as * both the endmarker for their branch list and the body of the last branch. * It might seem that this node could be dispensed with entirely, but the * endmarker role is not redundant. */ static char * regpiece(int *flagp) { register char *next; register char *ret; register char op; unsigned char max; unsigned char min; int flags; ret = regatom(&flags); if (ret == NULL) { return(NULL); } op = *regparse; if (!ISMULT(op)) { *flagp = flags; return(ret); } if (!(flags&HASWIDTH) && op != '?') { FAIL("*+{ operand could be empty"); } *flagp = (op != '+' && op != '{') ? (WORST|SPSTART) : (WORST|HASWIDTH); if (op == '*' && (flags&SIMPLE)) { reginsert(STAR, ret); } else if (op == '*') { /* Emit x* as (x&|), where & means "self". */ reginsert(BRANCH, ret); /* Either x */ regoptail(ret, regnode(BACK)); /* and loop */ regoptail(ret, ret); /* back */ regtail(ret, regnode(BRANCH)); /* or */ regtail(ret, regnode(NOTHING)); /* null. */ } else if (op == '+' && (flags&SIMPLE)) { reginsert(PLUS, ret); } else if (op == '+') { /* Emit x+ as x(&|), where & means "self". */ next = regnode(BRANCH); /* Either */ regtail(ret, next); regtail(regnode(BACK), ret); /* loop back */ regtail(next, regnode(BRANCH)); /* or */ regtail(ret, regnode(NOTHING)); /* null. */ } else if (op == '{') { for (min = 0, regparse++ ; *regparse && isdigit(*regparse) ; regparse++) { min = min * 10 + (*regparse - '0'); } for (max = 0, regparse++ ; *regparse && isdigit(*regparse) ; regparse++) { max = max * 10 + (*regparse - '0'); } reginsert(max, ret); next = OPERAND(ret); reginsert(min, ret); next = OPERAND(next); reginsert(MINMAX, ret); regtail(ret, OPERAND(next)); /* MINMAX->next = x */ } else if (op == '?') { /* Emit x? as (x|) */ reginsert(BRANCH, ret); /* Either x */ regtail(ret, regnode(BRANCH)); /* or */ next = regnode(NOTHING); /* null. */ regtail(ret, next); regoptail(ret, next); } regparse++; if (ISMULT(*regparse)) { FAIL("nested *?+{"); } return(ret); }