int jsY_lexjson(js_State *J) { while (1) { J->lexline = J->line; /* save location of beginning of token */ while (jsY_iswhite(J->lexchar) || J->lexchar == '\n') jsY_next(J); if (J->lexchar >= '0' && J->lexchar <= '9') { return lexnumber(J); } switch (J->lexchar) { case ',': jsY_next(J); return ','; case ':': jsY_next(J); return ':'; case '[': jsY_next(J); return '['; case ']': jsY_next(J); return ']'; case '{': jsY_next(J); return '{'; case '}': jsY_next(J); return '}'; case '"': return lexstring(J); case '.': return lexnumber(J); case 'f': jsY_next(J); jsY_expect(J, 'a'); jsY_expect(J, 'l'); jsY_expect(J, 's'); jsY_expect(J, 'e'); return TK_FALSE; case 'n': jsY_next(J); jsY_expect(J, 'u'); jsY_expect(J, 'l'); jsY_expect(J, 'l'); return TK_NULL; case 't': jsY_next(J); jsY_expect(J, 'r'); jsY_expect(J, 'u'); jsY_expect(J, 'e'); return TK_TRUE; case 0: return 0; /* EOF */ } if (J->lexchar >= 0x20 && J->lexchar <= 0x7E) jsY_error(J, "unexpected character: '%c'", J->lexchar); jsY_error(J, "unexpected character: \\u%04X", J->lexchar); } }
static int jsY_lexx(js_State *J) { J->newline = 0; while (1) { J->lexline = J->line; /* save location of beginning of token */ while (jsY_iswhite(J->lexchar)) jsY_next(J); if (jsY_accept(J, '\n')) { J->newline = 1; if (isnlthcontext(J->lasttoken)) return ';'; continue; } if (jsY_accept(J, '/')) { if (jsY_accept(J, '/')) { lexlinecomment(J); continue; } else if (jsY_accept(J, '*')) { if (lexcomment(J)) jsY_error(J, "multi-line comment not terminated"); continue; } else if (isregexpcontext(J->lasttoken)) { return lexregexp(J); } else if (jsY_accept(J, '=')) { return TK_DIV_ASS; } else { return '/'; } } if (J->lexchar >= '0' && J->lexchar <= '9') { return lexnumber(J); } switch (J->lexchar) { case '(': jsY_next(J); return '('; case ')': jsY_next(J); return ')'; case ',': jsY_next(J); return ','; case ':': jsY_next(J); return ':'; case ';': jsY_next(J); return ';'; case '?': jsY_next(J); return '?'; case '[': jsY_next(J); return '['; case ']': jsY_next(J); return ']'; case '{': jsY_next(J); return '{'; case '}': jsY_next(J); return '}'; case '~': jsY_next(J); return '~'; case '\'': case '"': return lexstring(J); case '.': return lexnumber(J); case '<': jsY_next(J); if (jsY_accept(J, '<')) { if (jsY_accept(J, '=')) return TK_SHL_ASS; return TK_SHL; } if (jsY_accept(J, '=')) return TK_LE; return '<'; case '>': jsY_next(J); if (jsY_accept(J, '>')) { if (jsY_accept(J, '>')) { if (jsY_accept(J, '=')) return TK_USHR_ASS; return TK_USHR; } if (jsY_accept(J, '=')) return TK_SHR_ASS; return TK_SHR; } if (jsY_accept(J, '=')) return TK_GE; return '>'; case '=': jsY_next(J); if (jsY_accept(J, '=')) { if (jsY_accept(J, '=')) return TK_STRICTEQ; return TK_EQ; } return '='; case '!': jsY_next(J); if (jsY_accept(J, '=')) { if (jsY_accept(J, '=')) return TK_STRICTNE; return TK_NE; } return '!'; case '+': jsY_next(J); if (jsY_accept(J, '+')) return TK_INC; if (jsY_accept(J, '=')) return TK_ADD_ASS; return '+'; case '-': jsY_next(J); if (jsY_accept(J, '-')) return TK_DEC; if (jsY_accept(J, '=')) return TK_SUB_ASS; return '-'; case '*': jsY_next(J); if (jsY_accept(J, '=')) return TK_MUL_ASS; return '*'; case '%': jsY_next(J); if (jsY_accept(J, '=')) return TK_MOD_ASS; return '%'; case '&': jsY_next(J); if (jsY_accept(J, '&')) return TK_AND; if (jsY_accept(J, '=')) return TK_AND_ASS; return '&'; case '|': jsY_next(J); if (jsY_accept(J, '|')) return TK_OR; if (jsY_accept(J, '=')) return TK_OR_ASS; return '|'; case '^': jsY_next(J); if (jsY_accept(J, '=')) return TK_XOR_ASS; return '^'; case 0: return 0; /* EOF */ } /* Handle \uXXXX escapes in identifiers */ jsY_unescape(J); if (jsY_isidentifierstart(J->lexchar)) { textinit(J); textpush(J, J->lexchar); jsY_next(J); jsY_unescape(J); while (jsY_isidentifierpart(J->lexchar)) { textpush(J, J->lexchar); jsY_next(J); jsY_unescape(J); } textend(J); return jsY_findkeyword(J, J->lexbuf.text); } if (J->lexchar >= 0x20 && J->lexchar <= 0x7E) jsY_error(J, "unexpected character: '%c'", J->lexchar); jsY_error(J, "unexpected character: \\u%04X", J->lexchar); } }
/* * The lexical analyzer. */ int yylex() { wint_t c, c1; int i; static int savetoken = 0; static int wasfield; static int isfuncdef; static int nbrace, nparen, nbracket; static struct ctosymstruct { wint_t c, sym; } ctosym[] = { { '|', BAR }, { '^', CARAT }, { '~', TILDE }, { '<', LANGLE }, { '>', RANGLE }, { '+', PLUSC }, { '-', HYPHEN }, { '*', STAR }, { '/', SLASH }, { '%', PERCENT }, { '!', EXCLAMATION }, { '$', DOLLAR }, { '[', LSQUARE }, { ']', RSQUARE }, { '(', LPAREN }, { ')', RPAREN }, { ';', SEMI }, { '{', LBRACE }, { '}', RBRACE }, { 0, 0 } }; if (savetoken) { c = savetoken; savetoken = 0; } else if (redelim != '\0') { c = redelim; redelim = 0; catterm = 0; savetoken = c; c = lexlast = lexregexp(c); goto out; } else while ((c = lexgetc()) != WEOF) { if (iswalpha(c) || c == '_') { c = lexid(c); } else if (iswdigit(c) || c == '.') { c = lexnumber(c); } else if (isWblank(c)) { continue; } else switch (c) { #if DOS || OS2 case 032: /* ^Z */ continue; #endif case '"': c = lexstring(c); break; case '#': while ((c = lexgetc()) != '\n' && c != WEOF) ; lexungetc(c); continue; case '+': if ((c1 = lexgetc()) == '+') c = INC; else if (c1 == '=') c = AADD; else lexungetc(c1); break; case '-': if ((c1 = lexgetc()) == '-') c = DEC; else if (c1 == '=') c = ASUB; else lexungetc(c1); break; case '*': if ((c1 = lexgetc()) == '=') c = AMUL; else if (c1 == '*') { if ((c1 = lexgetc()) == '=') c = AEXP; else { c = EXP; lexungetc(c1); } } else lexungetc(c1); break; case '^': if ((c1 = lexgetc()) == '=') { c = AEXP; } else { c = EXP; lexungetc(c1); } break; case '/': if ((c1 = lexgetc()) == '=' && lexlast != RE && lexlast != NRE && lexlast != ';' && lexlast != '\n' && lexlast != ',' && lexlast != '(') c = ADIV; else lexungetc(c1); break; case '%': if ((c1 = lexgetc()) == '=') c = AREM; else lexungetc(c1); break; case '&': if ((c1 = lexgetc()) == '&') c = AND; else lexungetc(c1); break; case '|': if ((c1 = lexgetc()) == '|') c = OR; else { lexungetc(c1); if (inprint) c = PIPE; } break; case '>': if ((c1 = lexgetc()) == '=') c = GE; else if (c1 == '>') c = APPEND; else { lexungetc(c1); if (nparen == 0 && inprint) c = WRITE; } break; case '<': if ((c1 = lexgetc()) == '=') c = LE; else lexungetc(c1); break; case '!': if ((c1 = lexgetc()) == '=') c = NE; else if (c1 == '~') c = NRE; else lexungetc(c1); break; case '=': if ((c1 = lexgetc()) == '=') c = EQ; else { lexungetc(c1); c = ASG; } break; case '\n': switch (lexlast) { case ')': if (catterm || inprint) { c = ';'; break; } /* FALLTHROUGH */ case AND: case OR: case COMMA: case '{': case ELSE: case ';': case DO: continue; case '}': if (nbrace != 0) continue; /* FALLTHROUGH */ default: c = ';'; break; } break; case ELSE: if (lexlast != ';') { savetoken = ELSE; c = ';'; } break; case '(': ++nparen; break; case ')': if (--nparen < 0) awkerr(unbal, "()"); break; case '{': nbrace++; break; case '}': if (--nbrace < 0) { char brk[3]; brk[0] = '{'; brk[1] = '}'; brk[2] = '\0'; awkerr(unbal, brk); } if (lexlast != ';') { savetoken = c; c = ';'; } break; case '[': ++nbracket; break; case ']': if (--nbracket < 0) { char brk[3]; brk[0] = '['; brk[1] = ']'; brk[2] = '\0'; awkerr(unbal, brk); } break; case '\\': if ((c1 = lexgetc()) == '\n') continue; lexungetc(c1); break; case ',': c = COMMA; break; case '?': c = QUEST; break; case ':': c = COLON; break; default: if (!iswprint(c)) awkerr( gettext("invalid character \"%s\""), toprint(c)); break; } break; } switch (c) { case ']': ++catterm; break; case VAR: if (catterm) { savetoken = c; c = CONCAT; catterm = 0; } else if (!isfuncdef) { if ((c1 = lexgetc()) != '(') ++catterm; lexungetc(c1); } isfuncdef = 0; break; case PARM: case CONSTANT: if (catterm) { savetoken = c; c = CONCAT; catterm = 0; } else { if (lexlast == '$') wasfield = 2; ++catterm; } break; case INC: case DEC: if (!catterm || lexlast != CONSTANT || wasfield) break; /* FALLTHROUGH */ case UFUNC: case FUNC: case GETLINE: case '!': case '$': case '(': if (catterm) { savetoken = c; c = CONCAT; catterm = 0; } break; case '}': if (nbrace == 0) savetoken = ';'; /* FALLTHROUGH */ case ';': inprint = 0; /* FALLTHROUGH */ default: if (c == DEFFUNC) isfuncdef = 1; catterm = 0; } lexlast = c; if (wasfield) wasfield--; /* * Map character constants to symbolic names. */ for (i = 0; ctosym[i].c != 0; i++) if (c == ctosym[i].c) { c = ctosym[i].sym; break; } out: #ifdef DEBUG if (dflag) (void) printf("%d\n", (int)c); #endif return ((int)c); }
fz_error pdf_lex(int *tok, fz_stream *f, char *buf, int n, int *sl) { while (1) { int c = fz_readbyte(f); switch (c) { case EOF: *tok = PDF_TEOF; return fz_okay; case ISWHITE: lexwhite(f); break; case '%': lexcomment(f); break; case '/': lexname(f, buf, n); *sl = strlen(buf); *tok = PDF_TNAME; return fz_okay; case '(': *sl = lexstring(f, buf, n); *tok = PDF_TSTRING; return fz_okay; case ')': *tok = PDF_TERROR; goto cleanuperror; case '<': c = fz_readbyte(f); if (c == '<') { *tok = PDF_TODICT; } else { fz_unreadbyte(f); *sl = lexhexstring(f, buf, n); *tok = PDF_TSTRING; } return fz_okay; case '>': c = fz_readbyte(f); if (c == '>') { *tok = PDF_TCDICT; return fz_okay; } *tok = PDF_TERROR; goto cleanuperror; case '[': *tok = PDF_TOARRAY; return fz_okay; case ']': *tok = PDF_TCARRAY; return fz_okay; case '{': *tok = PDF_TOBRACE; return fz_okay; case '}': *tok = PDF_TCBRACE; return fz_okay; case ISNUMBER: fz_unreadbyte(f); *sl = lexnumber(f, buf, n, tok); return fz_okay; default: /* isregular: !isdelim && !iswhite && c != EOF */ fz_unreadbyte(f); lexname(f, buf, n); *sl = strlen(buf); *tok = pdf_tokenfromkeyword(buf); return fz_okay; } } cleanuperror: *tok = PDF_TERROR; return fz_throw("lexical error"); }
token_t lexnext(secd_parser_t *p) { /* skip spaces */ while (isspace(p->lc)) nextchar(p); switch (p->lc) { case EOF: return (p->token = TOK_EOF); case ';': /* consume comment */ do nextchar(p); while (p->lc != '\n'); return lexnext(p); case '(': case ')': p->token = p->lc; nextchar(p); return p->token; case '#': /* one-char tokens */ p->token = p->lc; nextchar(p); switch (p->lc) { case 'f': case 't': p->symtok[0] = '#'; p->symtok[1] = p->lc; p->symtok[2] = '\0'; nextchar(p); return (p->token = TOK_SYM); case ';': nextchar(p); free_cell(p->secd, read_token(p->secd, p)); return lexnext(p); case '|': lex_mltln_comment(p); return lexnext(p); case '!': do { nextchar(p); } while (p->lc != '\n'); return lexnext(p); /* chars */ case '\\': nextchar(p); return lexchar(p); /* numbers */ case 'x': case 'X': nextchar(p); return lexnumber(p, 16); case 'o': case 'O': nextchar(p); return lexnumber(p, 8); case 'b': case 'B': nextchar(p); return lexnumber(p, 2); case 'd': case 'D': nextchar(p); return lexnumber(p, 10); } return p->token; case '\'': nextchar(p); return (p->token = TOK_QUOTE); case '`': nextchar(p); return (p->token = TOK_QQ); case ',': /* may be ',' or ',@' */ nextchar(p); if (p->lc == '@') { nextchar(p); return (p->token = TOK_UQSPL); } return (p->token = TOK_UQ); case '"': return lexstring(p); } if (isdigit(p->lc)) return lexnumber(p, 10); if (p->issymbc[(unsigned char)p->lc]) return lexsymbol(p); return TOK_ERR; /* nothing fits */ }
int pdf_lex(fz_stream *f, unsigned char *buf, int n, int *sl) { int c; while (1) { c = fz_peekbyte(f); if (c == EOF) return PDF_TEOF; else if (iswhite(c)) lexwhite(f); else if (c == '%') lexcomment(f); else if (c == '/') { fz_readbyte(f); lexname(f, buf, n); *sl = strlen(buf); return PDF_TNAME; } else if (c == '(') { fz_readbyte(f); *sl = lexstring(f, buf, n); return PDF_TSTRING; } else if (c == '<') { fz_readbyte(f); c = fz_peekbyte(f); if (c == '<') { fz_readbyte(f); return PDF_TODICT; } else { *sl = lexhexstring(f, buf, n); return PDF_TSTRING; } } else if (c == '>') { fz_readbyte(f); c = fz_readbyte(f); if (c == '>') return PDF_TCDICT; return PDF_TERROR; } else if (c == '[') { fz_readbyte(f); return PDF_TOARRAY; } else if (c == ']') { fz_readbyte(f); return PDF_TCARRAY; } else if (c == '{') { fz_readbyte(f); return PDF_TOBRACE; } else if (c == '}') { fz_readbyte(f); return PDF_TCBRACE; } else if (isnumber(c)) { lexnumber(f, buf, n); *sl = strlen(buf); if (strchr(buf, '.')) return PDF_TREAL; return PDF_TINT; } else if (isregular(c)) { lexname(f, buf, n); *sl = strlen(buf); return tokenfromkeyword(buf); } else return PDF_TERROR; } }
fz_error pdf_lex(pdf_token_e *tok, fz_stream *f, char *buf, int n, int *sl) { fz_error error; int c; while (1) { c = fz_peekbyte(f); if (c == EOF) { *tok = PDF_TEOF; goto cleanupokay; } else if (iswhite(c)) lexwhite(f); else if (c == '%') lexcomment(f); else if (c == '/') { fz_readbyte(f); lexname(f, buf, n); *sl = strlen(buf); *tok = PDF_TNAME; goto cleanupokay; } else if (c == '(') { fz_readbyte(f); *sl = lexstring(f, buf, n); *tok = PDF_TSTRING; goto cleanupokay; } else if (c == '<') { fz_readbyte(f); c = fz_peekbyte(f); if (c == '<') { fz_readbyte(f); *tok = PDF_TODICT; goto cleanupokay; } else { *sl = lexhexstring(f, buf, n); *tok = PDF_TSTRING; goto cleanupokay; } } else if (c == '>') { fz_readbyte(f); c = fz_readbyte(f); if (c == '>') { *tok = PDF_TCDICT; goto cleanupokay; } *tok = PDF_TERROR; goto cleanuperror; } else if (c == '[') { fz_readbyte(f); *tok = PDF_TOARRAY; goto cleanupokay; } else if (c == ']') { fz_readbyte(f); *tok = PDF_TCARRAY; goto cleanupokay; } else if (c == '{') { fz_readbyte(f); *tok = PDF_TOBRACE; goto cleanupokay; } else if (c == '}') { fz_readbyte(f); *tok = PDF_TCBRACE; goto cleanupokay; } else if (isnumber(c)) { lexnumber(f, buf, n); *sl = strlen(buf); if (strchr(buf, '.')) { *tok = PDF_TREAL; goto cleanupokay; } *tok = PDF_TINT; goto cleanupokay; } else if (isregular(c)) { lexname(f, buf, n); *sl = strlen(buf); *tok = pdf_tokenfromkeyword(buf); goto cleanupokay; } else { *tok = PDF_TERROR; goto cleanuperror; } } cleanupokay: error = fz_readerror(f); if (error) { *tok = PDF_TERROR; return fz_rethrow(error, "cannot read token"); } return fz_okay; cleanuperror: error = fz_readerror(f); if (error) { *tok = PDF_TERROR; return fz_rethrow(error, "cannot read token"); } *tok = PDF_TERROR; return fz_throw("lexical error"); }