static fz_error readnewxrefsection(pdf_xref *xref, fz_stream *stm, int i0, int i1, int w0, int w1, int w2) { fz_error error; int i, n; if (i0 < 0 || i0 + i1 > xref->len) return fz_throw("xref stream has too many entries"); for (i = i0; i < i0 + i1; i++) { int a = 0; int b = 0; int c = 0; if (fz_peekbyte(stm) == EOF) { error = fz_readerror(stm); if (error) return fz_rethrow(error, "truncated xref stream"); return fz_throw("truncated xref stream"); } for (n = 0; n < w0; n++) a = (a << 8) + fz_readbyte(stm); for (n = 0; n < w1; n++) b = (b << 8) + fz_readbyte(stm); for (n = 0; n < w2; n++) c = (c << 8) + fz_readbyte(stm); error = fz_readerror(stm); if (error) return fz_rethrow(error, "truncated xref stream"); if (!xref->table[i].type) { int t = w0 ? a : 1; xref->table[i].type = t == 0 ? 'f' : t == 1 ? 'n' : t == 2 ? 'o' : 0; xref->table[i].ofs = w1 ? b : 0; xref->table[i].gen = w2 ? c : 0; } } return fz_okay; }
static fz_error readxref(fz_obj **trailerp, pdf_xref *xref, int ofs, char *buf, int cap) { fz_error error; int c; error = fz_seek(xref->file, ofs, 0); if (error) return fz_rethrow(error, "cannot seek to xref"); while (iswhite(fz_peekbyte(xref->file))) fz_readbyte(xref->file); c = fz_peekbyte(xref->file); error = fz_readerror(xref->file); if (error) return fz_rethrow(error, "cannot read trailer"); if (c == 'x') { error = readoldxref(trailerp, xref, buf, cap); if (error) return fz_rethrow(error, "cannot read xref (ofs=%d)", ofs); } else if (c >= '0' && c <= '9') { error = readnewxref(trailerp, xref, buf, cap); if (error) return fz_rethrow(error, "cannot read xref (ofs=%d)", ofs); } else { return fz_throw("cannot recognize xref format"); } return fz_okay; }
static fz_error readtrailer(pdf_xref *xref, char *buf, int cap) { fz_error error; int c; error = fz_seek(xref->file, xref->startxref, 0); if (error) return fz_rethrow(error, "cannot seek to startxref"); while (iswhite(fz_peekbyte(xref->file))) fz_readbyte(xref->file); c = fz_peekbyte(xref->file); error = fz_readerror(xref->file); if (error) return fz_rethrow(error, "cannot read trailer"); if (c == 'x') { error = readoldtrailer(xref, buf, cap); if (error) return fz_rethrow(error, "cannot read trailer"); } else if (c >= '0' && c <= '9') { error = readnewtrailer(xref, buf, cap); if (error) return fz_rethrow(error, "cannot read trailer"); } else { return fz_throw("cannot recognize xref format: '%c'", c); } return fz_okay; }
fz_error fz_seek(fz_stream *stm, int offset, int whence) { fz_error error; fz_buffer *buf = stm->buffer; int t, c; if (stm->dead) return fz_throw("assert: seek in dead stream"); if (whence == 1) { int cur = fz_tell(stm); if (cur < 0) return fz_throw("cannot tell current position"); offset = cur + offset; whence = 0; } buf->eof = 0; switch (stm->kind) { case FZ_SFILE: t = lseek(stm->file, offset, whence); if (t < 0) { stm->dead = 1; return fz_throw("syserr: lseek: %s", strerror(errno)); } buf->rp = buf->bp; buf->wp = buf->bp; return fz_okay; case FZ_SFILTER: if (whence == 0) { if (offset < fz_tell(stm)) { stm->dead = 1; return fz_throw("assert: seek backwards in filter"); } while (fz_tell(stm) < offset) { c = fz_readbyte(stm); if (c == EOF) { error = fz_readerror(stm); if (error) return fz_rethrow(error, "cannot seek forward in filter"); break; } } return fz_okay; } stm->dead = 1; return fz_throw("assert: relative seek in filter"); case FZ_SBUFFER: if (whence == 0) buf->rp = CLAMP(buf->bp + offset, buf->bp, buf->ep); else buf->rp = CLAMP(buf->ep + offset, buf->bp, buf->ep); return fz_okay; default: return fz_throw("unknown stream type"); } }
fz_error pdf_parseindobj(fz_obj **op, pdf_xref *xref, fz_stream *file, char *buf, int cap, int *onum, int *ogen, int *ostmofs) { fz_error error = fz_okay; fz_obj *obj = nil; int num = 0, gen = 0, stmofs; pdf_token_e tok; int len; int a, b; error = pdf_lex(&tok, file, buf, cap, &len); if (error) return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen); if (tok != PDF_TINT) return fz_throw("cannot parse indirect object (%d %d R)", num, gen); num = atoi(buf); error = pdf_lex(&tok, file, buf, cap, &len); if (error) return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen); if (tok != PDF_TINT) return fz_throw("cannot parse indirect object (%d %d R)", num, gen); gen = atoi(buf); error = pdf_lex(&tok, file, buf, cap, &len); if (error) return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen); if (tok != PDF_TOBJ) return fz_throw("cannot parse indirect object (%d %d R)", num, gen); error = pdf_lex(&tok, file, buf, cap, &len); if (error) return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen); switch (tok) { case PDF_TOARRAY: error = pdf_parsearray(&obj, xref, file, buf, cap); if (error) return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen); break; case PDF_TODICT: error = pdf_parsedict(&obj, xref, file, buf, cap); if (error) return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen); break; case PDF_TNAME: obj = fz_newname(buf); break; case PDF_TREAL: obj = fz_newreal(atof(buf)); break; case PDF_TSTRING: obj = fz_newstring(buf, len); break; case PDF_TTRUE: obj = fz_newbool(1); break; case PDF_TFALSE: obj = fz_newbool(0); break; case PDF_TNULL: obj = fz_newnull(); break; case PDF_TINT: a = atoi(buf); error = pdf_lex(&tok, file, buf, cap, &len); if (error) return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen); if (tok == PDF_TSTREAM || tok == PDF_TENDOBJ) { obj = fz_newint(a); goto skip; } if (tok == PDF_TINT) { b = atoi(buf); error = pdf_lex(&tok, file, buf, cap, &len); if (error) return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen); if (tok == PDF_TR) { obj = fz_newindirect(a, b, xref); break; } } return fz_throw("cannot parse indirect object (%d %d R)", num, gen); case PDF_TENDOBJ: obj = fz_newnull(); goto skip; default: return fz_throw("cannot parse indirect object (%d %d R)", num, gen); } error = pdf_lex(&tok, file, buf, cap, &len); if (error) { fz_dropobj(obj); return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen); } skip: if (tok == PDF_TSTREAM) { int c = fz_readbyte(file); if (c == '\r') { c = fz_peekbyte(file); if (c != '\n') fz_warn("line feed missing after stream begin marker (%d %d R)", num, gen); else c = fz_readbyte(file); } error = fz_readerror(file); if (error) { fz_dropobj(obj); return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen); } stmofs = fz_tell(file); } else if (tok == PDF_TENDOBJ) { stmofs = 0; } else { fz_warn("expected endobj or stream keyword (%d %d R)", num, gen); stmofs = 0; } if (onum) *onum = num; if (ogen) *ogen = gen; if (ostmofs) *ostmofs = stmofs; *op = obj; return fz_okay; }
static fz_error readoldtrailer(pdf_xref *xref, char *buf, int cap) { fz_error error; int ofs, len; char *s; int n; int t; pdf_token_e tok; int c; pdf_logxref("load old xref format trailer\n"); error = fz_readline(xref->file, buf, cap); if (error) return fz_rethrow(error, "cannot read xref marker"); if (strncmp(buf, "xref", 4) != 0) return fz_throw("cannot find xref marker"); while (1) { c = fz_peekbyte(xref->file); if (!(c >= '0' && c <= '9')) break; error = fz_readline(xref->file, buf, cap); if (error) return fz_rethrow(error, "cannot read xref count"); s = buf; ofs = atoi(strsep(&s, " ")); if (!s) return fz_throw("invalid range marker in xref"); len = atoi(strsep(&s, " ")); /* broken pdfs where the section is not on a separate line */ if (s && *s != '\0') { error = fz_seek(xref->file, -(2 + strlen(s)), 1); if (error) return fz_rethrow(error, "cannot seek in file"); } t = fz_tell(xref->file); if (t < 0) return fz_throw("cannot tell in file"); error = fz_seek(xref->file, t + 20 * len, 0); if (error) return fz_rethrow(error, "cannot seek in file"); } error = fz_readerror(xref->file); if (error) return fz_rethrow(error, "cannot read from file"); error = pdf_lex(&tok, xref->file, buf, cap, &n); if (error) return fz_rethrow(error, "cannot parse trailer"); if (tok != PDF_TTRAILER) return fz_throw("expected trailer marker"); error = pdf_lex(&tok, xref->file, buf, cap, &n); if (error) return fz_rethrow(error, "cannot parse trailer"); if (tok != PDF_TODICT) return fz_throw("expected trailer dictionary"); error = pdf_parsedict(&xref->trailer, xref, xref->file, buf, cap); if (error) return fz_rethrow(error, "cannot parse trailer"); return fz_okay; }
fz_error pdf_lex(pdf_token_e *tok, fz_stream *f, char *buf, int n, int *sl) { fz_error error; int c; while (1) { c = fz_peekbyte(f); if (c == EOF) { *tok = PDF_TEOF; goto cleanupokay; } else if (iswhite(c)) lexwhite(f); else if (c == '%') lexcomment(f); else if (c == '/') { fz_readbyte(f); lexname(f, buf, n); *sl = strlen(buf); *tok = PDF_TNAME; goto cleanupokay; } else if (c == '(') { fz_readbyte(f); *sl = lexstring(f, buf, n); *tok = PDF_TSTRING; goto cleanupokay; } else if (c == '<') { fz_readbyte(f); c = fz_peekbyte(f); if (c == '<') { fz_readbyte(f); *tok = PDF_TODICT; goto cleanupokay; } else { *sl = lexhexstring(f, buf, n); *tok = PDF_TSTRING; goto cleanupokay; } } else if (c == '>') { fz_readbyte(f); c = fz_readbyte(f); if (c == '>') { *tok = PDF_TCDICT; goto cleanupokay; } *tok = PDF_TERROR; goto cleanuperror; } else if (c == '[') { fz_readbyte(f); *tok = PDF_TOARRAY; goto cleanupokay; } else if (c == ']') { fz_readbyte(f); *tok = PDF_TCARRAY; goto cleanupokay; } else if (c == '{') { fz_readbyte(f); *tok = PDF_TOBRACE; goto cleanupokay; } else if (c == '}') { fz_readbyte(f); *tok = PDF_TCBRACE; goto cleanupokay; } else if (isnumber(c)) { lexnumber(f, buf, n); *sl = strlen(buf); if (strchr(buf, '.')) { *tok = PDF_TREAL; goto cleanupokay; } *tok = PDF_TINT; goto cleanupokay; } else if (isregular(c)) { lexname(f, buf, n); *sl = strlen(buf); *tok = pdf_tokenfromkeyword(buf); goto cleanupokay; } else { *tok = PDF_TERROR; goto cleanuperror; } } cleanupokay: error = fz_readerror(f); if (error) { *tok = PDF_TERROR; return fz_rethrow(error, "cannot read token"); } return fz_okay; cleanuperror: error = fz_readerror(f); if (error) { *tok = PDF_TERROR; return fz_rethrow(error, "cannot read token"); } *tok = PDF_TERROR; return fz_throw("lexical error"); }