int pdf_lex(fz_stream *f, pdf_lexbuf *buf) { while (1) { int c = fz_read_byte(f); switch (c) { case EOF: return PDF_TOK_EOF; case IS_WHITE: lex_white(f); break; case '%': lex_comment(f); break; case '/': lex_name(f, buf); return PDF_TOK_NAME; case '(': return lex_string(f, buf); case ')': fz_warn(f->ctx, "lexical error (unexpected ')')"); continue; case '<': c = fz_read_byte(f); if (c == '<') { return PDF_TOK_OPEN_DICT; } else { fz_unread_byte(f); return lex_hex_string(f, buf); } case '>': c = fz_read_byte(f); if (c == '>') { return PDF_TOK_CLOSE_DICT; } fz_warn(f->ctx, "lexical error (unexpected '>')"); continue; case '[': return PDF_TOK_OPEN_ARRAY; case ']': return PDF_TOK_CLOSE_ARRAY; case '{': return PDF_TOK_OPEN_BRACE; case '}': return PDF_TOK_CLOSE_BRACE; case IS_NUMBER: return lex_number(f, buf, c); default: /* isregular: !isdelim && !iswhite && c != EOF */ fz_unread_byte(f); lex_name(f, buf); return pdf_token_from_keyword(buf->scratch); } } }
pdf_token pdf_lex(fz_stream *f, pdf_lexbuf *buf) { while (1) { int c = fz_read_byte(f); switch (c) { case EOF: return PDF_TOK_EOF; case IS_WHITE: lex_white(f); break; case '%': lex_comment(f); break; case '/': lex_name(f, buf); return PDF_TOK_NAME; case '(': return lex_string(f, buf); case ')': fz_warn(f->ctx, "lexical error (unexpected ')')"); continue; case '<': c = fz_read_byte(f); if (c == '<') { return PDF_TOK_OPEN_DICT; } else { fz_unread_byte(f); return lex_hex_string(f, buf); } case '>': c = fz_read_byte(f); if (c == '>') { return PDF_TOK_CLOSE_DICT; } fz_warn(f->ctx, "lexical error (unexpected '>')"); if (c == EOF) { return PDF_TOK_EOF; } fz_unread_byte(f); continue; case '[': return PDF_TOK_OPEN_ARRAY; case ']': return PDF_TOK_CLOSE_ARRAY; case '{': return PDF_TOK_OPEN_BRACE; case '}': return PDF_TOK_CLOSE_BRACE; case IS_NUMBER: /* cf. https://code.google.com/p/sumatrapdf/issues/detail?id=2231 */ { int tok = lex_number(f, buf, c); while (1) { c = fz_peek_byte(f); switch (c) { case IS_NUMBER: fz_warn(f->ctx, "ignoring invalid character after number: '%c'", c); fz_read_byte(f); continue; default: return tok; } } } default: /* isregular: !isdelim && !iswhite && c != EOF */ fz_unread_byte(f); lex_name(f, buf); return pdf_token_from_keyword(buf->scratch); } } }
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * LEXICAL SCANNER */ LOCAL te_dp_event yylex(void) { lastToken = DP_EV_INVALID; scanAgain: /* * Start the process of locating a token. * We branch here after skipping over a comment * or processing a directive (which may change our context). */ if (IS_WHITESPACE_CHAR(*pCurCtx->pzScan)) trim_whitespace(); switch (*pCurCtx->pzScan) { case NUL: /* * IF we are not inside an include context, * THEN go finish. */ if (pCurCtx->pCtx == NULL) goto lex_done; pop_context(); goto scanAgain; case '#': { extern char * processDirective(char*); char * pz = processDirective(pCurCtx->pzScan+1); /* * Ensure that the compiler doesn't try to save a copy of * "pCurCtx" in a register. It must be reloaded from memory. */ pCurCtx->pzScan = pz; goto scanAgain; } case '{': SET_LIT_TKN(O_BRACE); break; case '=': SET_LIT_TKN(EQ); break; case '}': SET_LIT_TKN(C_BRACE); break; case '[': SET_LIT_TKN(OPEN_BKT); break; case ']': SET_LIT_TKN(CLOSE_BKT); break; case ';': SET_LIT_TKN(SEMI); break; case ',': SET_LIT_TKN(COMMA); break; case '\'': case '"': { char* pz = ao_string_cook(pCurCtx->pzScan, &(pCurCtx->lineNo)); if (pz == NULL) goto NUL_error; pz_token = pCurCtx->pzScan; lastToken = DP_EV_STRING; pCurCtx->pzScan = pz; break; } case '<': switch (lex_here_string()) { case SUCCESS: break; case FAILURE: goto BrokenToken; case PROBLEM: return DP_EV_INVALID; } break; case '(': loadScheme(); break; case '\\': if (strncmp(pCurCtx->pzScan+1, "'(", (size_t)2) == 0) { alist_to_autogen_def(); goto scanAgain; } lex_escaped_char(); break; case '`': switch (lex_backquote()) { case FAILURE: goto NUL_error; case PROBLEM: goto scanAgain; case SUCCESS: break; } break; case '/': switch (lex_comment()) { case SUCCESS: goto scanAgain; default: break; } /* FALLTHROUGH */ /* to Invalid input char */ default: BrokenToken: pCurCtx->pzScan = assembleName(pCurCtx->pzScan, &lastToken); break; } /* switch (*pCurCtx->pzScan) */ return lastToken; NUL_error: AG_ABEND(aprf(zErrMsg, pzProg, "unterminated quote in definition", pCurCtx->pzCtxFname, pCurCtx->lineNo)); return DP_EV_INVALID; lex_done: /* * First time through, return the DP_EV_END token. * Second time through, we really finish. */ if (pCurCtx->pzScan == zNil) { pCurCtx->pCtx = pDoneCtx; pDoneCtx = pCurCtx; return DP_EV_INVALID; } pCurCtx->pzScan = (char*)zNil; return DP_EV_END; }
fz_error pdf_lex(int *tok, fz_stream *f, char *buf, int n, int *sl) { while (1) { int c = fz_read_byte(f); switch (c) { case EOF: *tok = PDF_TOK_EOF; return fz_okay; case IS_WHITE: lex_white(f); break; case '%': lex_comment(f); break; case '/': lex_name(f, buf, n); *sl = strlen(buf); *tok = PDF_TOK_NAME; return fz_okay; case '(': *sl = lex_string(f, buf, n); *tok = PDF_TOK_STRING; return fz_okay; case ')': *tok = PDF_TOK_ERROR; goto cleanuperror; case '<': c = fz_read_byte(f); if (c == '<') { *tok = PDF_TOK_OPEN_DICT; } else { fz_unread_byte(f); *sl = lex_hex_string(f, buf, n); *tok = PDF_TOK_STRING; } return fz_okay; case '>': c = fz_read_byte(f); if (c == '>') { *tok = PDF_TOK_CLOSE_DICT; return fz_okay; } *tok = PDF_TOK_ERROR; goto cleanuperror; case '[': *tok = PDF_TOK_OPEN_ARRAY; return fz_okay; case ']': *tok = PDF_TOK_CLOSE_ARRAY; return fz_okay; case '{': *tok = PDF_TOK_OPEN_BRACE; return fz_okay; case '}': *tok = PDF_TOK_CLOSE_BRACE; return fz_okay; case IS_NUMBER: fz_unread_byte(f); *sl = lex_number(f, buf, n, tok); return fz_okay; default: /* isregular: !isdelim && !iswhite && c != EOF */ fz_unread_byte(f); lex_name(f, buf, n); *sl = strlen(buf); *tok = pdf_token_from_keyword(buf); return fz_okay; } } cleanuperror: *tok = PDF_TOK_ERROR; return fz_throw("lexical error"); }