int pdf_lex(fz_stream *f, pdf_lexbuf *buf) { while (1) { int c = fz_read_byte(f); switch (c) { case EOF: return PDF_TOK_EOF; case IS_WHITE: lex_white(f); break; case '%': lex_comment(f); break; case '/': lex_name(f, buf); return PDF_TOK_NAME; case '(': return lex_string(f, buf); case ')': fz_warn(f->ctx, "lexical error (unexpected ')')"); continue; case '<': c = fz_read_byte(f); if (c == '<') { return PDF_TOK_OPEN_DICT; } else { fz_unread_byte(f); return lex_hex_string(f, buf); } case '>': c = fz_read_byte(f); if (c == '>') { return PDF_TOK_CLOSE_DICT; } fz_warn(f->ctx, "lexical error (unexpected '>')"); continue; case '[': return PDF_TOK_OPEN_ARRAY; case ']': return PDF_TOK_CLOSE_ARRAY; case '{': return PDF_TOK_OPEN_BRACE; case '}': return PDF_TOK_CLOSE_BRACE; case IS_NUMBER: return lex_number(f, buf, c); default: /* isregular: !isdelim && !iswhite && c != EOF */ fz_unread_byte(f); lex_name(f, buf); return pdf_token_from_keyword(buf->scratch); } } }
pdf_token pdf_lex(fz_stream *f, pdf_lexbuf *buf) { while (1) { int c = fz_read_byte(f); switch (c) { case EOF: return PDF_TOK_EOF; case IS_WHITE: lex_white(f); break; case '%': lex_comment(f); break; case '/': lex_name(f, buf); return PDF_TOK_NAME; case '(': return lex_string(f, buf); case ')': fz_warn(f->ctx, "lexical error (unexpected ')')"); continue; case '<': c = fz_read_byte(f); if (c == '<') { return PDF_TOK_OPEN_DICT; } else { fz_unread_byte(f); return lex_hex_string(f, buf); } case '>': c = fz_read_byte(f); if (c == '>') { return PDF_TOK_CLOSE_DICT; } fz_warn(f->ctx, "lexical error (unexpected '>')"); if (c == EOF) { return PDF_TOK_EOF; } fz_unread_byte(f); continue; case '[': return PDF_TOK_OPEN_ARRAY; case ']': return PDF_TOK_CLOSE_ARRAY; case '{': return PDF_TOK_OPEN_BRACE; case '}': return PDF_TOK_CLOSE_BRACE; case IS_NUMBER: /* cf. https://code.google.com/p/sumatrapdf/issues/detail?id=2231 */ { int tok = lex_number(f, buf, c); while (1) { c = fz_peek_byte(f); switch (c) { case IS_NUMBER: fz_warn(f->ctx, "ignoring invalid character after number: '%c'", c); fz_read_byte(f); continue; default: return tok; } } } default: /* isregular: !isdelim && !iswhite && c != EOF */ fz_unread_byte(f); lex_name(f, buf); return pdf_token_from_keyword(buf->scratch); } } }
fz_error pdf_lex(int *tok, fz_stream *f, char *buf, int n, int *sl) { while (1) { int c = fz_read_byte(f); switch (c) { case EOF: *tok = PDF_TOK_EOF; return fz_okay; case IS_WHITE: lex_white(f); break; case '%': lex_comment(f); break; case '/': lex_name(f, buf, n); *sl = strlen(buf); *tok = PDF_TOK_NAME; return fz_okay; case '(': *sl = lex_string(f, buf, n); *tok = PDF_TOK_STRING; return fz_okay; case ')': *tok = PDF_TOK_ERROR; goto cleanuperror; case '<': c = fz_read_byte(f); if (c == '<') { *tok = PDF_TOK_OPEN_DICT; } else { fz_unread_byte(f); *sl = lex_hex_string(f, buf, n); *tok = PDF_TOK_STRING; } return fz_okay; case '>': c = fz_read_byte(f); if (c == '>') { *tok = PDF_TOK_CLOSE_DICT; return fz_okay; } *tok = PDF_TOK_ERROR; goto cleanuperror; case '[': *tok = PDF_TOK_OPEN_ARRAY; return fz_okay; case ']': *tok = PDF_TOK_CLOSE_ARRAY; return fz_okay; case '{': *tok = PDF_TOK_OPEN_BRACE; return fz_okay; case '}': *tok = PDF_TOK_CLOSE_BRACE; return fz_okay; case IS_NUMBER: fz_unread_byte(f); *sl = lex_number(f, buf, n, tok); return fz_okay; default: /* isregular: !isdelim && !iswhite && c != EOF */ fz_unread_byte(f); lex_name(f, buf, n); *sl = strlen(buf); *tok = pdf_token_from_keyword(buf); return fz_okay; } } cleanuperror: *tok = PDF_TOK_ERROR; return fz_throw("lexical error"); }