static int jsY_lexx(js_State *J) { J->newline = 0; while (1) { J->lexline = J->line; /* save location of beginning of token */ while (jsY_iswhite(J->lexchar)) jsY_next(J); if (jsY_accept(J, '\n')) { J->newline = 1; if (isnlthcontext(J->lasttoken)) return ';'; continue; } if (jsY_accept(J, '/')) { if (jsY_accept(J, '/')) { lexlinecomment(J); continue; } else if (jsY_accept(J, '*')) { if (lexcomment(J)) jsY_error(J, "multi-line comment not terminated"); continue; } else if (isregexpcontext(J->lasttoken)) { return lexregexp(J); } else if (jsY_accept(J, '=')) { return TK_DIV_ASS; } else { return '/'; } } if (J->lexchar >= '0' && J->lexchar <= '9') { return lexnumber(J); } switch (J->lexchar) { case '(': jsY_next(J); return '('; case ')': jsY_next(J); return ')'; case ',': jsY_next(J); return ','; case ':': jsY_next(J); return ':'; case ';': jsY_next(J); return ';'; case '?': jsY_next(J); return '?'; case '[': jsY_next(J); return '['; case ']': jsY_next(J); return ']'; case '{': jsY_next(J); return '{'; case '}': jsY_next(J); return '}'; case '~': jsY_next(J); return '~'; case '\'': case '"': return lexstring(J); case '.': return lexnumber(J); case '<': jsY_next(J); if (jsY_accept(J, '<')) { if (jsY_accept(J, '=')) return TK_SHL_ASS; return TK_SHL; } if (jsY_accept(J, '=')) return TK_LE; return '<'; case '>': jsY_next(J); if (jsY_accept(J, '>')) { if (jsY_accept(J, '>')) { if (jsY_accept(J, '=')) return TK_USHR_ASS; return TK_USHR; } if (jsY_accept(J, '=')) return TK_SHR_ASS; return TK_SHR; } if (jsY_accept(J, '=')) return TK_GE; return '>'; case '=': jsY_next(J); if (jsY_accept(J, '=')) { if (jsY_accept(J, '=')) return TK_STRICTEQ; return TK_EQ; } return '='; case '!': jsY_next(J); if (jsY_accept(J, '=')) { if (jsY_accept(J, '=')) return TK_STRICTNE; return TK_NE; } return '!'; case '+': jsY_next(J); if (jsY_accept(J, '+')) return TK_INC; if (jsY_accept(J, '=')) return TK_ADD_ASS; return '+'; case '-': jsY_next(J); if (jsY_accept(J, '-')) return TK_DEC; if (jsY_accept(J, '=')) return TK_SUB_ASS; return '-'; case '*': jsY_next(J); if (jsY_accept(J, '=')) return TK_MUL_ASS; return '*'; case '%': jsY_next(J); if (jsY_accept(J, '=')) return TK_MOD_ASS; return '%'; case '&': jsY_next(J); if (jsY_accept(J, '&')) return TK_AND; if (jsY_accept(J, '=')) return TK_AND_ASS; return '&'; case '|': jsY_next(J); if (jsY_accept(J, '|')) return TK_OR; if (jsY_accept(J, '=')) return TK_OR_ASS; return '|'; case '^': jsY_next(J); if (jsY_accept(J, '=')) return TK_XOR_ASS; return '^'; case 0: return 0; /* EOF */ } /* Handle \uXXXX escapes in identifiers */ jsY_unescape(J); if (jsY_isidentifierstart(J->lexchar)) { textinit(J); textpush(J, J->lexchar); jsY_next(J); jsY_unescape(J); while (jsY_isidentifierpart(J->lexchar)) { textpush(J, J->lexchar); jsY_next(J); jsY_unescape(J); } textend(J); return jsY_findkeyword(J, J->lexbuf.text); } if (J->lexchar >= 0x20 && J->lexchar <= 0x7E) jsY_error(J, "unexpected character: '%c'", J->lexchar); jsY_error(J, "unexpected character: \\u%04X", J->lexchar); } }
/* * The lexical analyzer. */ int yylex() { wint_t c, c1; int i; static int savetoken = 0; static int wasfield; static int isfuncdef; static int nbrace, nparen, nbracket; static struct ctosymstruct { wint_t c, sym; } ctosym[] = { { '|', BAR }, { '^', CARAT }, { '~', TILDE }, { '<', LANGLE }, { '>', RANGLE }, { '+', PLUSC }, { '-', HYPHEN }, { '*', STAR }, { '/', SLASH }, { '%', PERCENT }, { '!', EXCLAMATION }, { '$', DOLLAR }, { '[', LSQUARE }, { ']', RSQUARE }, { '(', LPAREN }, { ')', RPAREN }, { ';', SEMI }, { '{', LBRACE }, { '}', RBRACE }, { 0, 0 } }; if (savetoken) { c = savetoken; savetoken = 0; } else if (redelim != '\0') { c = redelim; redelim = 0; catterm = 0; savetoken = c; c = lexlast = lexregexp(c); goto out; } else while ((c = lexgetc()) != WEOF) { if (iswalpha(c) || c == '_') { c = lexid(c); } else if (iswdigit(c) || c == '.') { c = lexnumber(c); } else if (isWblank(c)) { continue; } else switch (c) { #if DOS || OS2 case 032: /* ^Z */ continue; #endif case '"': c = lexstring(c); break; case '#': while ((c = lexgetc()) != '\n' && c != WEOF) ; lexungetc(c); continue; case '+': if ((c1 = lexgetc()) == '+') c = INC; else if (c1 == '=') c = AADD; else lexungetc(c1); break; case '-': if ((c1 = lexgetc()) == '-') c = DEC; else if (c1 == '=') c = ASUB; else lexungetc(c1); break; case '*': if ((c1 = lexgetc()) == '=') c = AMUL; else if (c1 == '*') { if ((c1 = lexgetc()) == '=') c = AEXP; else { c = EXP; lexungetc(c1); } } else lexungetc(c1); break; case '^': if ((c1 = lexgetc()) == '=') { c = AEXP; } else { c = EXP; lexungetc(c1); } break; case '/': if ((c1 = lexgetc()) == '=' && lexlast != RE && lexlast != NRE && lexlast != ';' && lexlast != '\n' && lexlast != ',' && lexlast != '(') c = ADIV; else lexungetc(c1); break; case '%': if ((c1 = lexgetc()) == '=') c = AREM; else lexungetc(c1); break; case '&': if ((c1 = lexgetc()) == '&') c = AND; else lexungetc(c1); break; case '|': if ((c1 = lexgetc()) == '|') c = OR; else { lexungetc(c1); if (inprint) c = PIPE; } break; case '>': if ((c1 = lexgetc()) == '=') c = GE; else if (c1 == '>') c = APPEND; else { lexungetc(c1); if (nparen == 0 && inprint) c = WRITE; } break; case '<': if ((c1 = lexgetc()) == '=') c = LE; else lexungetc(c1); break; case '!': if ((c1 = lexgetc()) == '=') c = NE; else if (c1 == '~') c = NRE; else lexungetc(c1); break; case '=': if ((c1 = lexgetc()) == '=') c = EQ; else { lexungetc(c1); c = ASG; } break; case '\n': switch (lexlast) { case ')': if (catterm || inprint) { c = ';'; break; } /* FALLTHROUGH */ case AND: case OR: case COMMA: case '{': case ELSE: case ';': case DO: continue; case '}': if (nbrace != 0) continue; /* FALLTHROUGH */ default: c = ';'; break; } break; case ELSE: if (lexlast != ';') { savetoken = ELSE; c = ';'; } break; case '(': ++nparen; break; case ')': if (--nparen < 0) awkerr(unbal, "()"); break; case '{': nbrace++; break; case '}': if (--nbrace < 0) { char brk[3]; brk[0] = '{'; brk[1] = '}'; brk[2] = '\0'; awkerr(unbal, brk); } if (lexlast != ';') { savetoken = c; c = ';'; } break; case '[': ++nbracket; break; case ']': if (--nbracket < 0) { char brk[3]; brk[0] = '['; brk[1] = ']'; brk[2] = '\0'; awkerr(unbal, brk); } break; case '\\': if ((c1 = lexgetc()) == '\n') continue; lexungetc(c1); break; case ',': c = COMMA; break; case '?': c = QUEST; break; case ':': c = COLON; break; default: if (!iswprint(c)) awkerr( gettext("invalid character \"%s\""), toprint(c)); break; } break; } switch (c) { case ']': ++catterm; break; case VAR: if (catterm) { savetoken = c; c = CONCAT; catterm = 0; } else if (!isfuncdef) { if ((c1 = lexgetc()) != '(') ++catterm; lexungetc(c1); } isfuncdef = 0; break; case PARM: case CONSTANT: if (catterm) { savetoken = c; c = CONCAT; catterm = 0; } else { if (lexlast == '$') wasfield = 2; ++catterm; } break; case INC: case DEC: if (!catterm || lexlast != CONSTANT || wasfield) break; /* FALLTHROUGH */ case UFUNC: case FUNC: case GETLINE: case '!': case '$': case '(': if (catterm) { savetoken = c; c = CONCAT; catterm = 0; } break; case '}': if (nbrace == 0) savetoken = ';'; /* FALLTHROUGH */ case ';': inprint = 0; /* FALLTHROUGH */ default: if (c == DEFFUNC) isfuncdef = 1; catterm = 0; } lexlast = c; if (wasfield) wasfield--; /* * Map character constants to symbolic names. */ for (i = 0; ctosym[i].c != 0; i++) if (c == ctosym[i].c) { c = ctosym[i].sym; break; } out: #ifdef DEBUG if (dflag) (void) printf("%d\n", (int)c); #endif return ((int)c); }