static int jsY_lexx(js_State *J) { J->newline = 0; while (1) { J->lexline = J->line; /* save location of beginning of token */ while (jsY_iswhite(J->lexchar)) jsY_next(J); if (jsY_accept(J, '\n')) { J->newline = 1; if (isnlthcontext(J->lasttoken)) return ';'; continue; } if (jsY_accept(J, '/')) { if (jsY_accept(J, '/')) { lexlinecomment(J); continue; } else if (jsY_accept(J, '*')) { if (lexcomment(J)) jsY_error(J, "multi-line comment not terminated"); continue; } else if (isregexpcontext(J->lasttoken)) { return lexregexp(J); } else if (jsY_accept(J, '=')) { return TK_DIV_ASS; } else { return '/'; } } if (J->lexchar >= '0' && J->lexchar <= '9') { return lexnumber(J); } switch (J->lexchar) { case '(': jsY_next(J); return '('; case ')': jsY_next(J); return ')'; case ',': jsY_next(J); return ','; case ':': jsY_next(J); return ':'; case ';': jsY_next(J); return ';'; case '?': jsY_next(J); return '?'; case '[': jsY_next(J); return '['; case ']': jsY_next(J); return ']'; case '{': jsY_next(J); return '{'; case '}': jsY_next(J); return '}'; case '~': jsY_next(J); return '~'; case '\'': case '"': return lexstring(J); case '.': return lexnumber(J); case '<': jsY_next(J); if (jsY_accept(J, '<')) { if (jsY_accept(J, '=')) return TK_SHL_ASS; return TK_SHL; } if (jsY_accept(J, '=')) return TK_LE; return '<'; case '>': jsY_next(J); if (jsY_accept(J, '>')) { if (jsY_accept(J, '>')) { if (jsY_accept(J, '=')) return TK_USHR_ASS; return TK_USHR; } if (jsY_accept(J, '=')) return TK_SHR_ASS; return TK_SHR; } if (jsY_accept(J, '=')) return TK_GE; return '>'; case '=': jsY_next(J); if (jsY_accept(J, '=')) { if (jsY_accept(J, '=')) return TK_STRICTEQ; return TK_EQ; } return '='; case '!': jsY_next(J); if (jsY_accept(J, '=')) { if (jsY_accept(J, '=')) return TK_STRICTNE; return TK_NE; } return '!'; case '+': jsY_next(J); if (jsY_accept(J, '+')) return TK_INC; if (jsY_accept(J, '=')) return TK_ADD_ASS; return '+'; case '-': jsY_next(J); if (jsY_accept(J, '-')) return TK_DEC; if (jsY_accept(J, '=')) return TK_SUB_ASS; return '-'; case '*': jsY_next(J); if (jsY_accept(J, '=')) return TK_MUL_ASS; return '*'; case '%': jsY_next(J); if (jsY_accept(J, '=')) return TK_MOD_ASS; return '%'; case '&': jsY_next(J); if (jsY_accept(J, '&')) return TK_AND; if (jsY_accept(J, '=')) return TK_AND_ASS; return '&'; case '|': jsY_next(J); if (jsY_accept(J, '|')) return TK_OR; if (jsY_accept(J, '=')) return TK_OR_ASS; return '|'; case '^': jsY_next(J); if (jsY_accept(J, '=')) return TK_XOR_ASS; return '^'; case 0: return 0; /* EOF */ } /* Handle \uXXXX escapes in identifiers */ jsY_unescape(J); if (jsY_isidentifierstart(J->lexchar)) { textinit(J); textpush(J, J->lexchar); jsY_next(J); jsY_unescape(J); while (jsY_isidentifierpart(J->lexchar)) { textpush(J, J->lexchar); jsY_next(J); jsY_unescape(J); } textend(J); return jsY_findkeyword(J, J->lexbuf.text); } if (J->lexchar >= 0x20 && J->lexchar <= 0x7E) jsY_error(J, "unexpected character: '%c'", J->lexchar); jsY_error(J, "unexpected character: \\u%04X", J->lexchar); } }
int pdf_lex(fz_stream *f, unsigned char *buf, int n, int *sl) { int c; while (1) { c = fz_peekbyte(f); if (c == EOF) return PDF_TEOF; else if (iswhite(c)) lexwhite(f); else if (c == '%') lexcomment(f); else if (c == '/') { fz_readbyte(f); lexname(f, buf, n); *sl = strlen(buf); return PDF_TNAME; } else if (c == '(') { fz_readbyte(f); *sl = lexstring(f, buf, n); return PDF_TSTRING; } else if (c == '<') { fz_readbyte(f); c = fz_peekbyte(f); if (c == '<') { fz_readbyte(f); return PDF_TODICT; } else { *sl = lexhexstring(f, buf, n); return PDF_TSTRING; } } else if (c == '>') { fz_readbyte(f); c = fz_readbyte(f); if (c == '>') return PDF_TCDICT; return PDF_TERROR; } else if (c == '[') { fz_readbyte(f); return PDF_TOARRAY; } else if (c == ']') { fz_readbyte(f); return PDF_TCARRAY; } else if (c == '{') { fz_readbyte(f); return PDF_TOBRACE; } else if (c == '}') { fz_readbyte(f); return PDF_TCBRACE; } else if (isnumber(c)) { lexnumber(f, buf, n); *sl = strlen(buf); if (strchr(buf, '.')) return PDF_TREAL; return PDF_TINT; } else if (isregular(c)) { lexname(f, buf, n); *sl = strlen(buf); return tokenfromkeyword(buf); } else return PDF_TERROR; } }
fz_error pdf_lex(int *tok, fz_stream *f, char *buf, int n, int *sl) { while (1) { int c = fz_readbyte(f); switch (c) { case EOF: *tok = PDF_TEOF; return fz_okay; case ISWHITE: lexwhite(f); break; case '%': lexcomment(f); break; case '/': lexname(f, buf, n); *sl = strlen(buf); *tok = PDF_TNAME; return fz_okay; case '(': *sl = lexstring(f, buf, n); *tok = PDF_TSTRING; return fz_okay; case ')': *tok = PDF_TERROR; goto cleanuperror; case '<': c = fz_readbyte(f); if (c == '<') { *tok = PDF_TODICT; } else { fz_unreadbyte(f); *sl = lexhexstring(f, buf, n); *tok = PDF_TSTRING; } return fz_okay; case '>': c = fz_readbyte(f); if (c == '>') { *tok = PDF_TCDICT; return fz_okay; } *tok = PDF_TERROR; goto cleanuperror; case '[': *tok = PDF_TOARRAY; return fz_okay; case ']': *tok = PDF_TCARRAY; return fz_okay; case '{': *tok = PDF_TOBRACE; return fz_okay; case '}': *tok = PDF_TCBRACE; return fz_okay; case ISNUMBER: fz_unreadbyte(f); *sl = lexnumber(f, buf, n, tok); return fz_okay; default: /* isregular: !isdelim && !iswhite && c != EOF */ fz_unreadbyte(f); lexname(f, buf, n); *sl = strlen(buf); *tok = pdf_tokenfromkeyword(buf); return fz_okay; } } cleanuperror: *tok = PDF_TERROR; return fz_throw("lexical error"); }
fz_error pdf_lex(pdf_token_e *tok, fz_stream *f, char *buf, int n, int *sl) { fz_error error; int c; while (1) { c = fz_peekbyte(f); if (c == EOF) { *tok = PDF_TEOF; goto cleanupokay; } else if (iswhite(c)) lexwhite(f); else if (c == '%') lexcomment(f); else if (c == '/') { fz_readbyte(f); lexname(f, buf, n); *sl = strlen(buf); *tok = PDF_TNAME; goto cleanupokay; } else if (c == '(') { fz_readbyte(f); *sl = lexstring(f, buf, n); *tok = PDF_TSTRING; goto cleanupokay; } else if (c == '<') { fz_readbyte(f); c = fz_peekbyte(f); if (c == '<') { fz_readbyte(f); *tok = PDF_TODICT; goto cleanupokay; } else { *sl = lexhexstring(f, buf, n); *tok = PDF_TSTRING; goto cleanupokay; } } else if (c == '>') { fz_readbyte(f); c = fz_readbyte(f); if (c == '>') { *tok = PDF_TCDICT; goto cleanupokay; } *tok = PDF_TERROR; goto cleanuperror; } else if (c == '[') { fz_readbyte(f); *tok = PDF_TOARRAY; goto cleanupokay; } else if (c == ']') { fz_readbyte(f); *tok = PDF_TCARRAY; goto cleanupokay; } else if (c == '{') { fz_readbyte(f); *tok = PDF_TOBRACE; goto cleanupokay; } else if (c == '}') { fz_readbyte(f); *tok = PDF_TCBRACE; goto cleanupokay; } else if (isnumber(c)) { lexnumber(f, buf, n); *sl = strlen(buf); if (strchr(buf, '.')) { *tok = PDF_TREAL; goto cleanupokay; } *tok = PDF_TINT; goto cleanupokay; } else if (isregular(c)) { lexname(f, buf, n); *sl = strlen(buf); *tok = pdf_tokenfromkeyword(buf); goto cleanupokay; } else { *tok = PDF_TERROR; goto cleanuperror; } } cleanupokay: error = fz_readerror(f); if (error) { *tok = PDF_TERROR; return fz_rethrow(error, "cannot read token"); } return fz_okay; cleanuperror: error = fz_readerror(f); if (error) { *tok = PDF_TERROR; return fz_rethrow(error, "cannot read token"); } *tok = PDF_TERROR; return fz_throw("lexical error"); }