static fz_error pdf_readnewxrefsection(pdf_xref *xref, fz_stream *stm, int i0, int i1, int w0, int w1, int w2) { int i, n; if (i0 < 0 || i0 + i1 > xref->len) return fz_throw("xref stream has too many entries"); for (i = i0; i < i0 + i1; i++) { int a = 0; int b = 0; int c = 0; if (fz_peekbyte(stm) == EOF) return fz_throw("truncated xref stream"); for (n = 0; n < w0; n++) a = (a << 8) + fz_readbyte(stm); for (n = 0; n < w1; n++) b = (b << 8) + fz_readbyte(stm); for (n = 0; n < w2; n++) c = (c << 8) + fz_readbyte(stm); if (!xref->table[i].type) { int t = w0 ? a : 1; xref->table[i].type = t == 0 ? 'f' : t == 1 ? 'n' : t == 2 ? 'o' : 0; xref->table[i].ofs = w1 ? b : 0; xref->table[i].gen = w2 ? c : 0; } } return fz_okay; }
static void lexname(fz_stream *f, unsigned char *s, int n) { unsigned char *p = s; unsigned char *q = s; while (n > 1) { if (!isregular(fz_peekbyte(f))) break; *s++ = fz_readbyte(f); n--; } *s = '\0'; while (*p) { if (p[0] == '#' && p[1] != 0 && p[2] != 0) { *q++ = fromhex(p[1]) * 16 + fromhex(p[2]); p += 3; } else *q++ = *p++; } *q = '\0'; }
static fz_error pdf_readxref(fz_obj **trailerp, pdf_xref *xref, int ofs, char *buf, int cap) { fz_error error; int c; fz_seek(xref->file, ofs, 0); while (iswhite(fz_peekbyte(xref->file))) fz_readbyte(xref->file); c = fz_peekbyte(xref->file); if (c == 'x') { error = pdf_readoldxref(trailerp, xref, buf, cap); if (error) return fz_rethrow(error, "cannot read xref (ofs=%d)", ofs); } else if (c >= '0' && c <= '9') { error = pdf_readnewxref(trailerp, xref, buf, cap); if (error) return fz_rethrow(error, "cannot read xref (ofs=%d)", ofs); } else { return fz_throw("cannot recognize xref format"); } return fz_okay; }
static int lexhexstring(fz_stream *f, char *buf, int n) { char *s = buf; char *e = buf + n; int a = 0, x = 0; int c; while (s < e) { c = fz_readbyte(f); switch (c) { case ISWHITE: break; case ISHEX: if (x) { *s++ = a * 16 + fromhex(c); x = !x; } else { a = fromhex(c); x = !x; } break; case '>': default: goto end; } } end: return s - buf; }
static fz_error pdf_readtrailer(pdf_xref *xref, char *buf, int cap) { fz_error error; int c; fz_seek(xref->file, xref->startxref, 0); while (iswhite(fz_peekbyte(xref->file))) fz_readbyte(xref->file); c = fz_peekbyte(xref->file); if (c == 'x') { error = pdf_readoldtrailer(xref, buf, cap); if (error) return fz_rethrow(error, "cannot read trailer"); } else if (c >= '0' && c <= '9') { error = pdf_readnewtrailer(xref, buf, cap); if (error) return fz_rethrow(error, "cannot read trailer"); } else { return fz_throw("cannot recognize xref format: '%c'", c); } return fz_okay; }
static int lexhexstring(fz_stream *f, unsigned char *buf, int n) { unsigned char *s = buf; unsigned char *e = buf + n; int a = 0, x = 0; int c; while (s < e) { c = fz_readbyte(f); if (c == '>') break; else if (iswhite(c)) continue; else if (ishex(c)) { if (x) { *s++ = a * 16 + fromhex(c); x = !x; } else { a = fromhex(c); x = !x; } } else break; } return s - buf; }
static fz_error loadpostscriptfunc(pdf_function *func, pdf_xref *xref, fz_obj *dict, int oid, int gen) { fz_error error; fz_stream *stream; int codeptr; pdf_logrsrc("load postscript function (%d %d R)\n", oid, gen); error = pdf_openstream(&stream, xref, oid, gen); if (error) return fz_rethrow(error, "cannot open calculator function stream"); if (fz_readbyte(stream) != '{') { fz_dropstream(stream); return fz_throw("stream is not a calculator function"); } func->u.p.code = nil; func->u.p.cap = 0; codeptr = 0; error = parsecode(func, stream, &codeptr); if (error) { fz_dropstream(stream); return fz_rethrow(error, "cannot parse calculator function"); } fz_dropstream(stream); return fz_okay; }
static inline void lexcomment(fz_stream *f) { int c; do { c = fz_readbyte(f); } while ((c != '\012') && (c != '\015') && (c != EOF)); }
static void lexnumber(fz_stream *f, unsigned char *s, int n) { while (n > 1) { if (!isnumber(fz_peekbyte(f))) break; *s++ = fz_readbyte(f); n--; } *s = '\0'; }
static inline void lexcomment(fz_stream *f) { int c; while (1) { c = fz_readbyte(f); if (c == '\012') break; if (c == '\015') break; if (c == EOF) break; } }
static inline void lexwhite(fz_stream *f) { int c; while (1) { c = fz_peekbyte(f); if (!iswhite(c)) break; fz_readbyte(f); } }
static inline void lexwhite(fz_stream *f) { int c; do { c = fz_readbyte(f); } while ((c <= 32) && (iswhite(c))); if (c != EOF) fz_unreadbyte(f); }
static inline int fillbits(fz_lzwd *lzw) { while (lzw->bidx >= 8) { int c = fz_readbyte(lzw->chain); if (c == EOF) return EOF; lzw->bidx -= 8; lzw->word |= c << lzw->bidx; } return 0; }
int fz_readline(fz_stream *stm, char *mem, int n) { char *s = mem; int c = EOF; while (n > 1) { c = fz_readbyte(stm); if (c == EOF) break; if (c == '\r') { c = fz_peekbyte(stm); if (c == '\n') c = fz_readbyte(stm); break; } if (c == '\n') break; *s++ = c; n--; } if (n) *s = '\0'; return s - mem; }
static int readahxd(fz_stream *stm, unsigned char *buf, int len) { fz_ahxd *state = stm->state; unsigned char *p = buf; unsigned char *ep = buf + len; int a, b, c, odd; odd = 0; while (p < ep) { if (state->eod) return p - buf; c = fz_readbyte(state->chain); if (c < 0) return p - buf; if (ishex(c)) { if (!odd) { a = fromhex(c); odd = 1; } else { b = fromhex(c); *p++ = (a << 4) | b; odd = 0; } } else if (c == '>') { if (odd) *p++ = (a << 4); state->eod = 1; } else if (!iswhite(c)) { return fz_throw("bad data in ahxd: '%c'", c); } } return p - buf; }
static int getdata(fz_stream *stream, int bps) { unsigned int bitmask = (1 << bps) - 1; unsigned int buf = 0; int bits = 0; int s; while (bits < bps) { buf = (buf << 8) | (fz_readbyte(stream) & 0xff); bits += 8; } s = buf >> (bits - bps); if (bps < 32) s = s & bitmask; bits -= bps; return s; }
static int lexhexstring(fz_stream *f, char *buf, int n) { char *s = buf; char *e = buf + n; int a = 0, x = 0; int c; while (s < e) { c = fz_readbyte(f); if (c == '>') break; else if (iswhite(c)) continue; else if (ishex(c)) { if (x) { *s++ = a * 16 + fromhex(c); x = !x; } else { a = fromhex(c); x = !x; } } else { if (c == EOF) { break; } // cf. http://code.google.com/p/sumatrapdf/issues/detail?id=624 fz_warn("Ignoring invalid character in hexstring: %c", c); } } return s - buf; }
fz_error pdf_repairxref(pdf_xref *xref, char *buf, int bufsize) { fz_error error; fz_obj *dict, *obj; fz_obj *length; fz_obj *encrypt = nil; fz_obj *id = nil; fz_obj *root = nil; fz_obj *info = nil; struct entry *list = nil; int listlen; int listcap; int maxnum = 0; int num = 0; int gen = 0; int tmpofs, numofs = 0, genofs = 0; int stmlen, stmofs = 0; int tok; int next; int i, n; pdf_logxref("repairxref %p\n", xref); fz_seek(xref->file, 0, 0); listlen = 0; listcap = 1024; list = fz_calloc(listcap, sizeof(struct entry)); /* look for '%PDF' version marker within first kilobyte of file */ n = fz_read(xref->file, (unsigned char *)buf, MAX(bufsize, 1024)); if (n < 0) { error = fz_rethrow(n, "cannot read from file"); goto cleanup; } fz_seek(xref->file, 0, 0); for (i = 0; i < n - 4; i++) { if (memcmp(buf + i, "%PDF", 4) == 0) { fz_seek(xref->file, i, 0); break; } } while (1) { tmpofs = fz_tell(xref->file); if (tmpofs < 0) { error = fz_throw("cannot tell in file"); goto cleanup; } error = pdf_lex(&tok, xref->file, buf, bufsize, &n); if (error) { fz_catch(error, "ignoring the rest of the file"); break; } if (tok == PDF_TINT) { numofs = genofs; num = gen; genofs = tmpofs; gen = atoi(buf); } if (tok == PDF_TOBJ) { error = fz_repairobj(xref->file, buf, bufsize, &stmofs, &stmlen, &encrypt, &id); if (error) { error = fz_rethrow(error, "cannot parse object (%d %d R)", num, gen); goto cleanup; } pdf_logxref("found object: (%d %d R)\n", num, gen); if (listlen + 1 == listcap) { listcap = (listcap * 3) / 2; list = fz_realloc(list, listcap, sizeof(struct entry)); } list[listlen].num = num; list[listlen].gen = gen; list[listlen].ofs = numofs; list[listlen].stmofs = stmofs; list[listlen].stmlen = stmlen; listlen ++; if (num > maxnum) maxnum = num; } /* trailer dictionary */ if (tok == PDF_TODICT) { error = pdf_parsedict(&dict, xref, xref->file, buf, bufsize); if (error) { error = fz_rethrow(error, "cannot parse object"); goto cleanup; } obj = fz_dictgets(dict, "Encrypt"); if (obj) { if (encrypt) fz_dropobj(encrypt); encrypt = fz_keepobj(obj); } obj = fz_dictgets(dict, "ID"); if (obj) { if (id) fz_dropobj(id); id = fz_keepobj(obj); } obj = fz_dictgets(dict, "Root"); if (obj) { if (root) fz_dropobj(root); root = fz_keepobj(obj); } obj = fz_dictgets(dict, "Info"); if (obj) { if (info) fz_dropobj(info); info = fz_keepobj(obj); } fz_dropobj(dict); } if (tok == PDF_TERROR) fz_readbyte(xref->file); if (tok == PDF_TEOF) break; } /* make xref reasonable */ pdf_resizexref(xref, maxnum + 1); for (i = 0; i < listlen; i++) { xref->table[list[i].num].type = 'n'; xref->table[list[i].num].ofs = list[i].ofs; xref->table[list[i].num].gen = list[i].gen; xref->table[list[i].num].stmofs = list[i].stmofs; /* corrected stream length */ if (list[i].stmlen >= 0) { pdf_logxref("correct stream length %d %d = %d\n", list[i].num, list[i].gen, list[i].stmlen); error = pdf_loadobject(&dict, xref, list[i].num, list[i].gen); if (error) { error = fz_rethrow(error, "cannot load stream object (%d %d R)", list[i].num, list[i].gen); goto cleanup; } length = fz_newint(list[i].stmlen); fz_dictputs(dict, "Length", length); fz_dropobj(length); fz_dropobj(dict); } } xref->table[0].type = 'f'; xref->table[0].ofs = 0; xref->table[0].gen = 65535; xref->table[0].stmofs = 0; xref->table[0].obj = nil; next = 0; for (i = xref->len - 1; i >= 0; i--) { if (xref->table[i].type == 'f') { xref->table[i].ofs = next; if (xref->table[i].gen < 65535) xref->table[i].gen ++; next = i; } } /* create a repaired trailer, Root will be added later */ xref->trailer = fz_newdict(5); obj = fz_newint(maxnum + 1); fz_dictputs(xref->trailer, "Size", obj); fz_dropobj(obj); if (root) { fz_dictputs(xref->trailer, "Root", root); fz_dropobj(root); } if (info) { fz_dictputs(xref->trailer, "Info", info); fz_dropobj(info); } if (encrypt) { if (fz_isindirect(encrypt)) { /* create new reference with non-nil xref pointer */ obj = fz_newindirect(fz_tonum(encrypt), fz_togen(encrypt), xref); fz_dropobj(encrypt); encrypt = obj; } fz_dictputs(xref->trailer, "Encrypt", encrypt); fz_dropobj(encrypt); } if (id) { if (fz_isindirect(id)) { /* create new reference with non-nil xref pointer */ obj = fz_newindirect(fz_tonum(id), fz_togen(id), xref); fz_dropobj(id); id = obj; } fz_dictputs(xref->trailer, "ID", id); fz_dropobj(id); } fz_free(list); return fz_okay; cleanup: if (encrypt) fz_dropobj(encrypt); if (id) fz_dropobj(id); if (root) fz_dropobj(root); if (info) fz_dropobj(info); fz_free(list); return error; /* already rethrown */ }
static fz_error fz_repairobj(fz_stream *file, char *buf, int cap, int *stmofsp, int *stmlenp, fz_obj **encrypt, fz_obj **id) { fz_error error; int tok; int stmlen; int len; int n; *stmofsp = 0; *stmlenp = -1; stmlen = 0; error = pdf_lex(&tok, file, buf, cap, &len); if (error) return fz_rethrow(error, "cannot parse object"); if (tok == PDF_TODICT) { fz_obj *dict, *obj; /* Send nil xref so we don't try to resolve references */ error = pdf_parsedict(&dict, nil, file, buf, cap); if (error) return fz_rethrow(error, "cannot parse object"); obj = fz_dictgets(dict, "Type"); if (fz_isname(obj) && !strcmp(fz_toname(obj), "XRef")) { obj = fz_dictgets(dict, "Encrypt"); if (obj) { if (*encrypt) fz_dropobj(*encrypt); *encrypt = fz_keepobj(obj); } obj = fz_dictgets(dict, "ID"); if (obj) { if (*id) fz_dropobj(*id); *id = fz_keepobj(obj); } } obj = fz_dictgets(dict, "Length"); if (fz_isint(obj)) stmlen = fz_toint(obj); fz_dropobj(dict); } while ( tok != PDF_TSTREAM && tok != PDF_TENDOBJ && tok != PDF_TERROR && tok != PDF_TEOF ) { error = pdf_lex(&tok, file, buf, cap, &len); if (error) return fz_rethrow(error, "cannot scan for endobj or stream token"); } if (tok == PDF_TSTREAM) { int c = fz_readbyte(file); if (c == '\r') { c = fz_peekbyte(file); if (c == '\n') fz_readbyte(file); } *stmofsp = fz_tell(file); if (*stmofsp < 0) return fz_throw("cannot seek in file"); if (stmlen > 0) { fz_seek(file, *stmofsp + stmlen, 0); error = pdf_lex(&tok, file, buf, cap, &len); if (error) fz_catch(error, "cannot find endstream token, falling back to scanning"); if (tok == PDF_TENDSTREAM) goto atobjend; fz_seek(file, *stmofsp, 0); } n = fz_read(file, (unsigned char *) buf, 9); if (n < 0) return fz_rethrow(n, "cannot read from file"); while (memcmp(buf, "endstream", 9) != 0) { c = fz_readbyte(file); if (c == EOF) break; memmove(buf, buf + 1, 8); buf[8] = c; } *stmlenp = fz_tell(file) - *stmofsp - 9; atobjend: error = pdf_lex(&tok, file, buf, cap, &len); if (error) return fz_rethrow(error, "cannot scan for endobj token"); if (tok != PDF_TENDOBJ) fz_warn("object missing 'endobj' token"); } return fz_okay; }
fz_error pdf_parseindobj(fz_obj **op, pdf_xref *xref, fz_stream *file, char *buf, int cap, int *onum, int *ogen, int *ostmofs) { fz_error error = fz_okay; fz_obj *obj = nil; int num = 0, gen = 0, stmofs; pdf_token_e tok; int len; int a, b; error = pdf_lex(&tok, file, buf, cap, &len); if (error) return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen); if (tok != PDF_TINT) return fz_throw("cannot parse indirect object (%d %d R)", num, gen); num = atoi(buf); error = pdf_lex(&tok, file, buf, cap, &len); if (error) return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen); if (tok != PDF_TINT) return fz_throw("cannot parse indirect object (%d %d R)", num, gen); gen = atoi(buf); error = pdf_lex(&tok, file, buf, cap, &len); if (error) return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen); if (tok != PDF_TOBJ) return fz_throw("cannot parse indirect object (%d %d R)", num, gen); error = pdf_lex(&tok, file, buf, cap, &len); if (error) return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen); switch (tok) { case PDF_TOARRAY: error = pdf_parsearray(&obj, xref, file, buf, cap); if (error) return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen); break; case PDF_TODICT: error = pdf_parsedict(&obj, xref, file, buf, cap); if (error) return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen); break; case PDF_TNAME: obj = fz_newname(buf); break; case PDF_TREAL: obj = fz_newreal(atof(buf)); break; case PDF_TSTRING: obj = fz_newstring(buf, len); break; case PDF_TTRUE: obj = fz_newbool(1); break; case PDF_TFALSE: obj = fz_newbool(0); break; case PDF_TNULL: obj = fz_newnull(); break; case PDF_TINT: a = atoi(buf); error = pdf_lex(&tok, file, buf, cap, &len); if (error) return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen); if (tok == PDF_TSTREAM || tok == PDF_TENDOBJ) { obj = fz_newint(a); goto skip; } if (tok == PDF_TINT) { b = atoi(buf); error = pdf_lex(&tok, file, buf, cap, &len); if (error) return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen); if (tok == PDF_TR) { obj = fz_newindirect(a, b, xref); break; } } return fz_throw("cannot parse indirect object (%d %d R)", num, gen); case PDF_TENDOBJ: obj = fz_newnull(); goto skip; default: return fz_throw("cannot parse indirect object (%d %d R)", num, gen); } error = pdf_lex(&tok, file, buf, cap, &len); if (error) { fz_dropobj(obj); return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen); } skip: if (tok == PDF_TSTREAM) { int c = fz_readbyte(file); while (c == ' ') c = fz_readbyte(file); if (c == '\r') { c = fz_peekbyte(file); if (c != '\n') fz_warn("line feed missing after stream begin marker (%d %d R)", num, gen); else fz_readbyte(file); } stmofs = fz_tell(file); } else if (tok == PDF_TENDOBJ) { stmofs = 0; } else { fz_warn("expected endobj or stream keyword (%d %d R)", num, gen); stmofs = 0; } if (onum) *onum = num; if (ogen) *ogen = gen; if (ostmofs) *ostmofs = stmofs; *op = obj; return fz_okay; }
static int lexnumber(fz_stream *f, char *s, int n, int *tok) { char *buf = s; *tok = PDF_TINT; /* Initially we might have +, -, . or a digit */ if (n > 1) { int c = fz_readbyte(f); switch (c) { case '.': *tok = PDF_TREAL; *s++ = c; n--; goto loop_after_dot; case '+': case '-': case RANGE_0_9: *s++ = c; n--; goto loop_after_sign; default: fz_unreadbyte(f); goto end; case EOF: goto end; } } /* We can't accept a sign from here on in, just . or a digit */ loop_after_sign: while (n > 1) { int c = fz_readbyte(f); switch (c) { case '.': *tok = PDF_TREAL; *s++ = c; n--; goto loop_after_dot; case RANGE_0_9: *s++ = c; break; default: fz_unreadbyte(f); goto end; case EOF: goto end; } n--; } /* In here, we've seen a dot, so can accept just digits */ loop_after_dot: while (n > 1) { int c = fz_readbyte(f); switch (c) { case RANGE_0_9: *s++ = c; break; default: fz_unreadbyte(f); goto end; case EOF: goto end; } n--; } end: *s = '\0'; return s-buf; }
fz_error pdf_lex(pdf_token_e *tok, fz_stream *f, char *buf, int n, int *sl) { fz_error error; int c; while (1) { c = fz_peekbyte(f); if (c == EOF) { *tok = PDF_TEOF; goto cleanupokay; } else if (iswhite(c)) lexwhite(f); else if (c == '%') lexcomment(f); else if (c == '/') { fz_readbyte(f); lexname(f, buf, n); *sl = strlen(buf); *tok = PDF_TNAME; goto cleanupokay; } else if (c == '(') { fz_readbyte(f); *sl = lexstring(f, buf, n); *tok = PDF_TSTRING; goto cleanupokay; } else if (c == '<') { fz_readbyte(f); c = fz_peekbyte(f); if (c == '<') { fz_readbyte(f); *tok = PDF_TODICT; goto cleanupokay; } else { *sl = lexhexstring(f, buf, n); *tok = PDF_TSTRING; goto cleanupokay; } } else if (c == '>') { fz_readbyte(f); c = fz_readbyte(f); if (c == '>') { *tok = PDF_TCDICT; goto cleanupokay; } *tok = PDF_TERROR; goto cleanuperror; } else if (c == '[') { fz_readbyte(f); *tok = PDF_TOARRAY; goto cleanupokay; } else if (c == ']') { fz_readbyte(f); *tok = PDF_TCARRAY; goto cleanupokay; } else if (c == '{') { fz_readbyte(f); *tok = PDF_TOBRACE; goto cleanupokay; } else if (c == '}') { fz_readbyte(f); *tok = PDF_TCBRACE; goto cleanupokay; } else if (isnumber(c)) { lexnumber(f, buf, n); *sl = strlen(buf); if (strchr(buf, '.')) { *tok = PDF_TREAL; goto cleanupokay; } *tok = PDF_TINT; goto cleanupokay; } else if (isregular(c)) { lexname(f, buf, n); *sl = strlen(buf); *tok = pdf_tokenfromkeyword(buf); goto cleanupokay; } else { *tok = PDF_TERROR; goto cleanuperror; } } cleanupokay: error = fz_readerror(f); if (error) { *tok = PDF_TERROR; return fz_rethrow(error, "cannot read token"); } return fz_okay; cleanuperror: error = fz_readerror(f); if (error) { *tok = PDF_TERROR; return fz_rethrow(error, "cannot read token"); } *tok = PDF_TERROR; return fz_throw("lexical error"); }
static int lexstring(fz_stream *f, unsigned char *buf, int n) { unsigned char *s = buf; unsigned char *e = buf + n; int bal = 1; int oct; int c; while (s < e) { c = fz_readbyte(f); if (c == '(') { bal++; *s++ = c; } else if (c == ')') { bal --; if (bal == 0) break; *s++ = c; } else if (c == '\\') { c = fz_readbyte(f); if (c == 'n') *s++ = '\n'; else if (c == 'r') *s++ = '\r'; else if (c == 't') *s++ = '\t'; else if (c == 'b') *s++ = '\b'; else if (c == 'f') *s++ = '\f'; else if (c == '(') *s++ = '('; else if (c == ')') *s++ = ')'; else if (c == '\\') *s++ = '\\'; else if (c >= '0' && c <= '9') { oct = c - '0'; c = fz_peekbyte(f); if (c >= '0' && c <= '9') { fz_readbyte(f); oct = oct * 8 + (c - '0'); c = fz_peekbyte(f); if (c >= '0' && c <= '9') { fz_readbyte(f); oct = oct * 8 + (c - '0'); } } *s++ = oct; } else if (c == '\n') ; else if (c == '\r') { c = fz_peekbyte(f); if (c == '\n') fz_readbyte(f); } else *s++ = c; } else { *s++ = c; } } return s - buf; }
fz_error pdf_lex(int *tok, fz_stream *f, char *buf, int n, int *sl) { while (1) { int c = fz_readbyte(f); switch (c) { case EOF: *tok = PDF_TEOF; return fz_okay; case ISWHITE: lexwhite(f); break; case '%': lexcomment(f); break; case '/': lexname(f, buf, n); *sl = strlen(buf); *tok = PDF_TNAME; return fz_okay; case '(': *sl = lexstring(f, buf, n); *tok = PDF_TSTRING; return fz_okay; case ')': *tok = PDF_TERROR; goto cleanuperror; case '<': c = fz_readbyte(f); if (c == '<') { *tok = PDF_TODICT; } else { fz_unreadbyte(f); *sl = lexhexstring(f, buf, n); *tok = PDF_TSTRING; } return fz_okay; case '>': c = fz_readbyte(f); if (c == '>') { *tok = PDF_TCDICT; return fz_okay; } *tok = PDF_TERROR; goto cleanuperror; case '[': *tok = PDF_TOARRAY; return fz_okay; case ']': *tok = PDF_TCARRAY; return fz_okay; case '{': *tok = PDF_TOBRACE; return fz_okay; case '}': *tok = PDF_TCBRACE; return fz_okay; case ISNUMBER: fz_unreadbyte(f); *sl = lexnumber(f, buf, n, tok); return fz_okay; default: /* isregular: !isdelim && !iswhite && c != EOF */ fz_unreadbyte(f); lexname(f, buf, n); *sl = strlen(buf); *tok = pdf_tokenfromkeyword(buf); return fz_okay; } } cleanuperror: *tok = PDF_TERROR; return fz_throw("lexical error"); }
int pdf_lex(fz_stream *f, unsigned char *buf, int n, int *sl) { int c; while (1) { c = fz_peekbyte(f); if (c == EOF) return PDF_TEOF; else if (iswhite(c)) lexwhite(f); else if (c == '%') lexcomment(f); else if (c == '/') { fz_readbyte(f); lexname(f, buf, n); *sl = strlen(buf); return PDF_TNAME; } else if (c == '(') { fz_readbyte(f); *sl = lexstring(f, buf, n); return PDF_TSTRING; } else if (c == '<') { fz_readbyte(f); c = fz_peekbyte(f); if (c == '<') { fz_readbyte(f); return PDF_TODICT; } else { *sl = lexhexstring(f, buf, n); return PDF_TSTRING; } } else if (c == '>') { fz_readbyte(f); c = fz_readbyte(f); if (c == '>') return PDF_TCDICT; return PDF_TERROR; } else if (c == '[') { fz_readbyte(f); return PDF_TOARRAY; } else if (c == ']') { fz_readbyte(f); return PDF_TCARRAY; } else if (c == '{') { fz_readbyte(f); return PDF_TOBRACE; } else if (c == '}') { fz_readbyte(f); return PDF_TCBRACE; } else if (isnumber(c)) { lexnumber(f, buf, n); *sl = strlen(buf); if (strchr(buf, '.')) return PDF_TREAL; return PDF_TINT; } else if (isregular(c)) { lexname(f, buf, n); *sl = strlen(buf); return tokenfromkeyword(buf); } else return PDF_TERROR; } }
static int lexstring(fz_stream *f, char *buf, int n) { char *s = buf; char *e = buf + n; int bal = 1; int oct; int c; while (s < e) { c = fz_readbyte(f); switch (c) { case EOF: goto end; case '(': bal++; *s++ = c; break; case ')': bal --; if (bal == 0) goto end; *s++ = c; break; case '\\': c = fz_readbyte(f); switch (c) { case EOF: goto end; case 'n': *s++ = '\n'; break; case 'r': *s++ = '\r'; break; case 't': *s++ = '\t'; break; case 'b': *s++ = '\b'; break; case 'f': *s++ = '\f'; break; case '(': *s++ = '('; break; case ')': *s++ = ')'; break; case '\\': *s++ = '\\'; break; case RANGE_0_9: oct = c - '0'; c = fz_readbyte(f); if (c >= '0' && c <= '9') { oct = oct * 8 + (c - '0'); c = fz_readbyte(f); if (c >= '0' && c <= '9') oct = oct * 8 + (c - '0'); else if (c != EOF) fz_unreadbyte(f); } else if (c != EOF) fz_unreadbyte(f); *s++ = oct; break; case '\n': break; case '\r': c = fz_readbyte(f); if ((c != '\n') && (c != EOF)) fz_unreadbyte(f); break; default: *s++ = c; } break; default: *s++ = c; break; } } end: return s - buf; }
static void lexname(fz_stream *f, char *s, int n) { while (n > 1) { int c = fz_readbyte(f); switch (c) { case ISWHITE: case ISDELIM: fz_unreadbyte(f); goto end; case EOF: goto end; case '#': { int d; c = fz_readbyte(f); switch (c) { case RANGE_0_9: d = (c - '0') << 4; break; case RANGE_a_f: d = (c - 'a' + 10) << 4; break; case RANGE_A_F: d = (c - 'A' + 10) << 4; break; default: fz_unreadbyte(f); /* fallthrough */ case EOF: goto end; } c = fz_readbyte(f); switch (c) { case RANGE_0_9: c -= '0'; break; case RANGE_a_f: c -= 'a' - 10; break; case RANGE_A_F: c -= 'A' - 10; break; default: fz_unreadbyte(f); /* fallthrough */ case EOF: *s++ = d; n--; goto end; } *s++ = d + c; n--; break; } default: *s++ = c; n--; break; } } end: *s = '\0'; }
fz_error fz_seek(fz_stream *stm, int offset, int whence) { fz_error error; fz_buffer *buf = stm->buffer; int t, c; if (stm->dead) return fz_throw("assert: seek in dead stream"); if (whence == 1) { int cur = fz_tell(stm); if (cur < 0) return fz_throw("cannot tell current position"); offset = cur + offset; whence = 0; } buf->eof = 0; switch (stm->kind) { case FZ_SFILE: t = lseek(stm->file, offset, whence); if (t < 0) { stm->dead = 1; return fz_throw("syserr: lseek: %s", strerror(errno)); } buf->rp = buf->bp; buf->wp = buf->bp; return fz_okay; case FZ_SFILTER: if (whence == 0) { if (offset < fz_tell(stm)) { stm->dead = 1; return fz_throw("assert: seek backwards in filter"); } while (fz_tell(stm) < offset) { c = fz_readbyte(stm); if (c == EOF) { error = fz_readerror(stm); if (error) return fz_rethrow(error, "cannot seek forward in filter"); break; } } return fz_okay; } stm->dead = 1; return fz_throw("assert: relative seek in filter"); case FZ_SBUFFER: if (whence == 0) buf->rp = CLAMP(buf->bp + offset, buf->bp, buf->ep); else buf->rp = CLAMP(buf->ep + offset, buf->bp, buf->ep); return fz_okay; default: return fz_throw("unknown stream type"); } }
fz_error * pdf_parseindobj(fz_obj **op, fz_stream *file, char *buf, int cap, int *ooid, int *ogid, int *ostmofs) { fz_error *error = nil; fz_obj *obj = nil; int oid = 0, gid = 0, stmofs; int tok, len; int a, b; tok = pdf_lex(file, buf, cap, &len); if (tok != PDF_TINT) goto cleanup; oid = atoi(buf); tok = pdf_lex(file, buf, cap, &len); if (tok != PDF_TINT) goto cleanup; gid = atoi(buf); tok = pdf_lex(file, buf, cap, &len); if (tok != PDF_TOBJ) goto cleanup; tok = pdf_lex(file, buf, cap, &len); switch (tok) { case PDF_TOARRAY: error = pdf_parsearray(&obj, file, buf, cap); break; case PDF_TODICT: error = pdf_parsedict(&obj, file, buf, cap); break; case PDF_TNAME: error = fz_newname(&obj, buf); break; case PDF_TREAL: error = fz_newreal(&obj, atof(buf)); break; case PDF_TSTRING: error = fz_newstring(&obj, buf, len); break; case PDF_TTRUE: error = fz_newbool(&obj, 1); break; case PDF_TFALSE: error = fz_newbool(&obj, 0); break; case PDF_TNULL: error = fz_newnull(&obj); break; case PDF_TINT: a = atoi(buf); tok = pdf_lex(file, buf, cap, &len); if (tok == PDF_TSTREAM || tok == PDF_TENDOBJ) { error = fz_newint(&obj, a); if (error) goto cleanup; goto skip; } if (tok == PDF_TINT) { b = atoi(buf); tok = pdf_lex(file, buf, cap, &len); if (tok == PDF_TR) { error = fz_newindirect(&obj, a, b); break; } } goto cleanup; default: goto cleanup; } if (error) goto cleanup; tok = pdf_lex(file, buf, cap, &len); skip: if (tok == PDF_TSTREAM) { int c = fz_readbyte(file); if (c == '\r') { c = fz_peekbyte(file); if (c != '\n') fz_warn("syntaxerror: DOS format line ending after stream keyword (%d %d)\n", oid, gid); else c = fz_readbyte(file); } stmofs = fz_tell(file); } else if (tok == PDF_TENDOBJ) stmofs = 0; else goto cleanup; if (ooid) *ooid = oid; if (ogid) *ogid = gid; if (ostmofs) *ostmofs = stmofs; *op = obj; return nil; cleanup: if (obj) fz_dropobj(obj); if (error) return error; return fz_throw("syntaxerror: corrupt indirect object (%d %d)", oid, gid); }