void fz_seek(fz_stream *stm, int offset, int whence) { stm->avail = 0; /* Reset bit reading */ if (stm->seek) { if (whence == 1) { offset = fz_tell(stm) + offset; whence = 0; } stm->seek(stm, offset, whence); stm->eof = 0; } else if (whence != 2) { if (whence == 0) offset -= fz_tell(stm); if (offset < 0) fz_warn(stm->ctx, "cannot seek backwards"); /* dog slow, but rare enough */ while (offset-- > 0) { if (fz_read_byte(stm) == EOF) { fz_warn(stm->ctx, "seek failed"); break; } } } else fz_warn(stm->ctx, "cannot seek"); }
static fz_error pdf_readstartxref(pdf_xref *xref) { unsigned char buf[1024]; int t, n; int i; fz_seek(xref->file, 0, 2); xref->filesize = fz_tell(xref->file); t = MAX(0, xref->filesize - (int)sizeof buf); fz_seek(xref->file, t, 0); n = fz_read(xref->file, buf, sizeof buf); if (n < 0) return fz_rethrow(n, "cannot read from file"); for (i = n - 9; i >= 0; i--) { if (memcmp(buf + i, "startxref", 9) == 0) { i += 9; while (iswhite(buf[i]) && i < n) i ++; xref->startxref = atoi((char*)(buf + i)); pdf_logxref("startxref %d\n", xref->startxref); return fz_okay; } } return fz_throw("cannot find startxref"); }
static int xps_find_and_read_zip_dir(xps_context *ctx) { unsigned char buf[512]; int file_size, back, maxback; int i, n; fz_seek(ctx->file, 0, SEEK_END); file_size = fz_tell(ctx->file); maxback = MIN(file_size, 0xFFFF + sizeof buf); back = MIN(maxback, sizeof buf); while (back < maxback) { fz_seek(ctx->file, file_size - back, 0); n = fz_read(ctx->file, buf, sizeof buf); if (n < 0) return fz_error_make(ctx->ctx, "cannot read end of central directory"); for (i = n - 4; i > 0; i--) if (!memcmp(buf + i, "PK\5\6", 4)) return xps_read_zip_dir(ctx, file_size - back + i); back += sizeof buf - 4; } return fz_error_make(ctx->ctx, "cannot find end of central directory"); }
static fz_error readstartxref(pdf_xref *xref) { fz_error error; unsigned char buf[1024]; int t, n; int i; error = fz_seek(xref->file, 0, 2); if (error) return fz_rethrow(error, "cannot seek to end of file"); t = MAX(0, fz_tell(xref->file) - ((int)sizeof buf)); error = fz_seek(xref->file, t, 0); if (error) return fz_rethrow(error, "cannot seek to offset %d", t); error = fz_read(&n, xref->file, buf, sizeof buf); if (error) return fz_rethrow(error, "cannot read from file"); for (i = n - 9; i >= 0; i--) { if (memcmp(buf + i, "startxref", 9) == 0) { i += 9; while (iswhite(buf[i]) && i < n) i ++; xref->startxref = atoi((char*)(buf + i)); return fz_okay; } } return fz_throw("cannot find startxref"); }
static fz_error pdf_readoldtrailer(pdf_xref *xref, char *buf, int cap) { fz_error error; int len; char *s; int n; int t; pdf_token_e tok; int c; pdf_logxref("load old xref format trailer\n"); fz_readline(xref->file, buf, cap); if (strncmp(buf, "xref", 4) != 0) return fz_throw("cannot find xref marker"); while (1) { c = fz_peekbyte(xref->file); if (!(c >= '0' && c <= '9')) break; fz_readline(xref->file, buf, cap); s = buf; fz_strsep(&s, " "); /* ignore ofs */ if (!s) return fz_throw("invalid range marker in xref"); len = atoi(fz_strsep(&s, " ")); /* broken pdfs where the section is not on a separate line */ if (s && *s != '\0') fz_seek(xref->file, -(2 + (int)strlen(s)), 1); t = fz_tell(xref->file); if (t < 0) return fz_throw("cannot tell in file"); fz_seek(xref->file, t + 20 * len, 0); } error = pdf_lex(&tok, xref->file, buf, cap, &n); if (error) return fz_rethrow(error, "cannot parse trailer"); if (tok != PDF_TTRAILER) return fz_throw("expected trailer marker"); error = pdf_lex(&tok, xref->file, buf, cap, &n); if (error) return fz_rethrow(error, "cannot parse trailer"); if (tok != PDF_TODICT) return fz_throw("expected trailer dictionary"); error = pdf_parsedict(&xref->trailer, xref, xref->file, buf, cap); if (error) return fz_rethrow(error, "cannot parse trailer"); return fz_okay; }
/* SumatraPDF: allow to clone a stream */ fz_stream * fz_clone_stream(fz_context *ctx, fz_stream *stm) { fz_stream *clone; if (!stm->reopen) fz_throw(ctx, FZ_ERROR_GENERIC, "can't clone stream without reopening"); clone = stm->reopen(ctx, stm); fz_seek(clone, fz_tell(stm), 0); return clone; }
void fz_seek(fz_stream *stm, int offset, int whence) { if (stm->seek) { if (whence == 1) { offset = fz_tell(stm) + offset; whence = 0; } if (whence == 0) { int dist = stm->pos - offset; if (dist >= 0 && dist <= stm->wp - stm->bp) { stm->rp = stm->wp - dist; stm->eof = 0; return; } } stm->seek(stm, offset, whence); stm->eof = 0; } else if (whence != 2) { if (whence == 0) offset -= fz_tell(stm); if (offset < 0) printf("cannot seek backwards\n"); /* dog slow, but rare enough */ while (offset-- > 0) fz_read_byte(stm); } else printf("cannot seek\n"); }
/* * Construct a filter to decode a stream, without * constraining to stream length, and without decryption. */ fz_stream * pdf_open_inline_stream(pdf_document *xref, pdf_obj *stmobj, int length, fz_stream *chain, pdf_image_params *imparams) { pdf_obj *filters; pdf_obj *params; filters = pdf_dict_getsa(stmobj, "Filter", "F"); params = pdf_dict_getsa(stmobj, "DecodeParms", "DP"); /* don't close chain when we close this filter */ fz_keep_stream(chain); if (pdf_is_name(filters)) return build_filter(chain, xref, filters, params, 0, 0, imparams); if (pdf_array_len(filters) > 0) return build_filter_chain(chain, xref, filters, params, 0, 0, imparams); return fz_open_null(chain, length, fz_tell(chain)); }
/* * Construct a filter to decode a stream, without * constraining to stream length, and without decryption. */ fz_stream * pdf_open_inline_stream(fz_context *ctx, pdf_document *doc, pdf_obj *stmobj, int length, fz_stream *chain, fz_compression_params *imparams) { pdf_obj *filters; pdf_obj *params; filters = pdf_dict_geta(ctx, stmobj, PDF_NAME_Filter, PDF_NAME_F); params = pdf_dict_geta(ctx, stmobj, PDF_NAME_DecodeParms, PDF_NAME_DP); /* don't close chain when we close this filter */ fz_keep_stream(ctx, chain); if (pdf_is_name(ctx, filters)) return build_filter(ctx, chain, doc, filters, params, 0, 0, imparams); if (pdf_array_len(ctx, filters) > 0) return build_filter_chain(ctx, chain, doc, filters, params, 0, 0, imparams); if (imparams) imparams->type = FZ_IMAGE_RAW; return fz_open_null(ctx, chain, length, fz_tell(ctx, chain)); }
fz_error pdf_parseindobj(fz_obj **op, pdf_xref *xref, fz_stream *file, char *buf, int cap, int *onum, int *ogen, int *ostmofs) { fz_error error = fz_okay; fz_obj *obj = nil; int num = 0, gen = 0, stmofs; pdf_token_e tok; int len; int a, b; error = pdf_lex(&tok, file, buf, cap, &len); if (error) return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen); if (tok != PDF_TINT) return fz_throw("cannot parse indirect object (%d %d R)", num, gen); num = atoi(buf); error = pdf_lex(&tok, file, buf, cap, &len); if (error) return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen); if (tok != PDF_TINT) return fz_throw("cannot parse indirect object (%d %d R)", num, gen); gen = atoi(buf); error = pdf_lex(&tok, file, buf, cap, &len); if (error) return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen); if (tok != PDF_TOBJ) return fz_throw("cannot parse indirect object (%d %d R)", num, gen); error = pdf_lex(&tok, file, buf, cap, &len); if (error) return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen); switch (tok) { case PDF_TOARRAY: error = pdf_parsearray(&obj, xref, file, buf, cap); if (error) return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen); break; case PDF_TODICT: error = pdf_parsedict(&obj, xref, file, buf, cap); if (error) return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen); break; case PDF_TNAME: obj = fz_newname(buf); break; case PDF_TREAL: obj = fz_newreal(atof(buf)); break; case PDF_TSTRING: obj = fz_newstring(buf, len); break; case PDF_TTRUE: obj = fz_newbool(1); break; case PDF_TFALSE: obj = fz_newbool(0); break; case PDF_TNULL: obj = fz_newnull(); break; case PDF_TINT: a = atoi(buf); error = pdf_lex(&tok, file, buf, cap, &len); if (error) return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen); if (tok == PDF_TSTREAM || tok == PDF_TENDOBJ) { obj = fz_newint(a); goto skip; } if (tok == PDF_TINT) { b = atoi(buf); error = pdf_lex(&tok, file, buf, cap, &len); if (error) return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen); if (tok == PDF_TR) { obj = fz_newindirect(a, b, xref); break; } } return fz_throw("cannot parse indirect object (%d %d R)", num, gen); case PDF_TENDOBJ: obj = fz_newnull(); goto skip; default: return fz_throw("cannot parse indirect object (%d %d R)", num, gen); } error = pdf_lex(&tok, file, buf, cap, &len); if (error) { fz_dropobj(obj); return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen); } skip: if (tok == PDF_TSTREAM) { int c = fz_readbyte(file); while (c == ' ') c = fz_readbyte(file); if (c == '\r') { c = fz_peekbyte(file); if (c != '\n') fz_warn("line feed missing after stream begin marker (%d %d R)", num, gen); else fz_readbyte(file); } stmofs = fz_tell(file); } else if (tok == PDF_TENDOBJ) { stmofs = 0; } else { fz_warn("expected endobj or stream keyword (%d %d R)", num, gen); stmofs = 0; } if (onum) *onum = num; if (ogen) *ogen = gen; if (ostmofs) *ostmofs = stmofs; *op = obj; return fz_okay; }
fz_error fz_seek(fz_stream *stm, int offset, int whence) { fz_error error; fz_buffer *buf = stm->buffer; int t, c; if (stm->dead) return fz_throw("assert: seek in dead stream"); if (whence == 1) { int cur = fz_tell(stm); if (cur < 0) return fz_throw("cannot tell current position"); offset = cur + offset; whence = 0; } buf->eof = 0; switch (stm->kind) { case FZ_SFILE: t = lseek(stm->file, offset, whence); if (t < 0) { stm->dead = 1; return fz_throw("syserr: lseek: %s", strerror(errno)); } buf->rp = buf->bp; buf->wp = buf->bp; return fz_okay; case FZ_SFILTER: if (whence == 0) { if (offset < fz_tell(stm)) { stm->dead = 1; return fz_throw("assert: seek backwards in filter"); } while (fz_tell(stm) < offset) { c = fz_readbyte(stm); if (c == EOF) { error = fz_readerror(stm); if (error) return fz_rethrow(error, "cannot seek forward in filter"); break; } } return fz_okay; } stm->dead = 1; return fz_throw("assert: relative seek in filter"); case FZ_SBUFFER: if (whence == 0) buf->rp = CLAMP(buf->bp + offset, buf->bp, buf->ep); else buf->rp = CLAMP(buf->ep + offset, buf->bp, buf->ep); return fz_okay; default: return fz_throw("unknown stream type"); } }
pdf_obj * pdf_parse_ind_obj(fz_context *ctx, pdf_document *doc, fz_stream *file, pdf_lexbuf *buf, int *onum, int *ogen, fz_off_t *ostmofs, int *try_repair) { pdf_obj *obj = NULL; int num = 0, gen = 0; fz_off_t stm_ofs; pdf_token tok; fz_off_t a, b; fz_var(obj); tok = pdf_lex(ctx, file, buf); if (tok != PDF_TOK_INT) { if (try_repair) *try_repair = 1; fz_throw(ctx, FZ_ERROR_GENERIC, "expected object number"); } num = buf->i; tok = pdf_lex(ctx, file, buf); if (tok != PDF_TOK_INT) { if (try_repair) *try_repair = 1; fz_throw(ctx, FZ_ERROR_GENERIC, "expected generation number (%d ? obj)", num); } gen = buf->i; tok = pdf_lex(ctx, file, buf); if (tok != PDF_TOK_OBJ) { if (try_repair) *try_repair = 1; fz_throw(ctx, FZ_ERROR_GENERIC, "expected 'obj' keyword (%d %d ?)", num, gen); } tok = pdf_lex(ctx, file, buf); switch (tok) { case PDF_TOK_OPEN_ARRAY: obj = pdf_parse_array(ctx, doc, file, buf); break; case PDF_TOK_OPEN_DICT: obj = pdf_parse_dict(ctx, doc, file, buf); break; case PDF_TOK_NAME: obj = pdf_new_name(ctx, doc, buf->scratch); break; case PDF_TOK_REAL: obj = pdf_new_real(ctx, doc, buf->f); break; case PDF_TOK_STRING: obj = pdf_new_string(ctx, doc, buf->scratch, buf->len); break; case PDF_TOK_TRUE: obj = pdf_new_bool(ctx, doc, 1); break; case PDF_TOK_FALSE: obj = pdf_new_bool(ctx, doc, 0); break; case PDF_TOK_NULL: obj = pdf_new_null(ctx, doc); break; case PDF_TOK_INT: a = buf->i; tok = pdf_lex(ctx, file, buf); if (tok == PDF_TOK_STREAM || tok == PDF_TOK_ENDOBJ) { obj = pdf_new_int_offset(ctx, doc, a); goto skip; } if (tok == PDF_TOK_INT) { b = buf->i; tok = pdf_lex(ctx, file, buf); if (tok == PDF_TOK_R) { obj = pdf_new_indirect(ctx, doc, a, b); break; } } fz_throw(ctx, FZ_ERROR_GENERIC, "expected 'R' keyword (%d %d R)", num, gen); case PDF_TOK_ENDOBJ: obj = pdf_new_null(ctx, doc); goto skip; default: fz_throw(ctx, FZ_ERROR_GENERIC, "syntax error in object (%d %d R)", num, gen); } fz_try(ctx) { tok = pdf_lex(ctx, file, buf); } fz_catch(ctx) { pdf_drop_obj(ctx, obj); fz_rethrow(ctx); } skip: if (tok == PDF_TOK_STREAM) { int c = fz_read_byte(ctx, file); while (c == ' ') c = fz_read_byte(ctx, file); if (c == '\r') { c = fz_peek_byte(ctx, file); if (c != '\n') fz_warn(ctx, "line feed missing after stream begin marker (%d %d R)", num, gen); else fz_read_byte(ctx, file); } stm_ofs = fz_tell(ctx, file); } else if (tok == PDF_TOK_ENDOBJ) { stm_ofs = 0; } else { fz_warn(ctx, "expected 'endobj' or 'stream' keyword (%d %d R)", num, gen); stm_ofs = 0; } if (onum) *onum = num; if (ogen) *ogen = gen; if (ostmofs) *ostmofs = stm_ofs; return obj; }
fz_error * pdf_parseindobj(fz_obj **op, fz_stream *file, char *buf, int cap, int *ooid, int *ogid, int *ostmofs) { fz_error *error = nil; fz_obj *obj = nil; int oid = 0, gid = 0, stmofs; int tok, len; int a, b; tok = pdf_lex(file, buf, cap, &len); if (tok != PDF_TINT) goto cleanup; oid = atoi(buf); tok = pdf_lex(file, buf, cap, &len); if (tok != PDF_TINT) goto cleanup; gid = atoi(buf); tok = pdf_lex(file, buf, cap, &len); if (tok != PDF_TOBJ) goto cleanup; tok = pdf_lex(file, buf, cap, &len); switch (tok) { case PDF_TOARRAY: error = pdf_parsearray(&obj, file, buf, cap); break; case PDF_TODICT: error = pdf_parsedict(&obj, file, buf, cap); break; case PDF_TNAME: error = fz_newname(&obj, buf); break; case PDF_TREAL: error = fz_newreal(&obj, atof(buf)); break; case PDF_TSTRING: error = fz_newstring(&obj, buf, len); break; case PDF_TTRUE: error = fz_newbool(&obj, 1); break; case PDF_TFALSE: error = fz_newbool(&obj, 0); break; case PDF_TNULL: error = fz_newnull(&obj); break; case PDF_TINT: a = atoi(buf); tok = pdf_lex(file, buf, cap, &len); if (tok == PDF_TSTREAM || tok == PDF_TENDOBJ) { error = fz_newint(&obj, a); if (error) goto cleanup; goto skip; } if (tok == PDF_TINT) { b = atoi(buf); tok = pdf_lex(file, buf, cap, &len); if (tok == PDF_TR) { error = fz_newindirect(&obj, a, b); break; } } goto cleanup; default: goto cleanup; } if (error) goto cleanup; tok = pdf_lex(file, buf, cap, &len); skip: if (tok == PDF_TSTREAM) { int c = fz_readbyte(file); if (c == '\r') { c = fz_peekbyte(file); if (c != '\n') fz_warn("syntaxerror: DOS format line ending after stream keyword (%d %d)\n", oid, gid); else c = fz_readbyte(file); } stmofs = fz_tell(file); } else if (tok == PDF_TENDOBJ) stmofs = 0; else goto cleanup; if (ooid) *ooid = oid; if (ogid) *ogid = gid; if (ostmofs) *ostmofs = stmofs; *op = obj; return nil; cleanup: if (obj) fz_dropobj(obj); if (error) return error; return fz_throw("syntaxerror: corrupt indirect object (%d %d)", oid, gid); }
fz_error * pdf_savexref(pdf_xref *xref, char *path, pdf_crypt *encrypt) { fz_error *error; fz_stream *out; int oid; int startxref; int *ofsbuf; fz_obj *obj; int eoid, egen; pdf_logxref("savexref '%s' %p\n", path, xref); /* need to add encryption object for acrobat < 6 */ if (encrypt) { pdf_logxref("make encryption dict\n"); error = pdf_allocobject(xref, &eoid, &egen); if (error) return error; pdf_cryptobj(encrypt, encrypt->encrypt, eoid, egen); error = pdf_updateobject(xref, eoid, egen, encrypt->encrypt); if (error) return error; } ofsbuf = fz_malloc(sizeof(int) * xref->len); if (!ofsbuf) return fz_outofmem; error = fz_openwfile(&out, path); if (error) { fz_free(ofsbuf); return error; } fz_print(out, "%%PDF-%1.1f\n", xref->version); fz_print(out, "%%\342\343\317\323\n\n"); for (oid = 0; oid < xref->len; oid++) { pdf_xrefentry *x = xref->table + oid; if (x->type == 'n' || x->type == 'o' || x->type == 'a') { ofsbuf[oid] = fz_tell(out); error = writeobject(out, xref, encrypt, oid, x->type == 'o' ? 0 : x->gen); if (error) goto cleanup; } else { ofsbuf[oid] = x->ofs; } } startxref = fz_tell(out); fz_print(out, "xref\n"); fz_print(out, "0 %d\n", xref->len); for (oid = 0; oid < xref->len; oid++) { int gen = xref->table[oid].gen; int type = xref->table[oid].type; if (type == 'o') gen = 0; if (type == 'a' || type == 'o') type = 'n'; if (type == 'd') type = 'f'; fz_print(out, "%010d %05d %c \n", ofsbuf[oid], gen, type); } fz_print(out, "\n"); fz_print(out, "trailer\n<<\n /Size %d", xref->len); obj = fz_dictgets(xref->trailer, "Root"); fz_print(out, "\n /Root %d %d R", fz_tonum(obj), fz_togen(obj)); obj = fz_dictgets(xref->trailer, "Info"); if (obj) fz_print(out, "\n /Info %d %d R", fz_tonum(obj), fz_togen(obj)); if (encrypt) { fz_print(out, "\n /Encrypt %d %d R", eoid, egen); fz_print(out, "\n /ID ["); fz_printobj(out, encrypt->id, 1); fz_printobj(out, encrypt->id, 1); fz_print(out, "]"); pdf_cryptobj(encrypt, encrypt->encrypt, eoid, egen); } fz_print(out, "\n>>\n\n"); fz_print(out, "startxref\n"); fz_print(out, "%d\n", startxref); fz_print(out, "%%%%EOF\n"); xref->startxref = startxref; if(ofsbuf) fz_free(ofsbuf); fz_dropstream(out); return nil; cleanup: if(ofsbuf) fz_free(ofsbuf); fz_dropstream(out); return error; }
fz_error * pdf_updatexref(pdf_xref *xref, char *path) { fz_error *error; fz_stream *out; int oid; int i, n; int startxref; fz_obj *obj; pdf_logxref("updatexref '%s' %p\n", path, xref); error = fz_openafile(&out, path); if (error) return error; fz_print(out, "\n"); for (oid = 0; oid < xref->len; oid++) { if (xref->table[oid].type == 'a') { xref->table[oid].ofs = fz_tell(out); error = writeobject(out, xref, xref->crypt, oid, xref->table[oid].gen); if (error) goto cleanup; } } /* always write out entry 0 in appended xref sections */ xref->table[0].type = 'd'; startxref = fz_tell(out); fz_print(out, "xref\n"); oid = 0; while (oid < xref->len) { n = countmodified(xref, oid); pdf_logxref(" section %d +%d\n", oid, n); fz_print(out, "%d %d\n", oid, n); for (i = 0; i < n; i++) { if (xref->table[oid + i].type == 'd') xref->table[oid + i].type = 'f'; if (xref->table[oid + i].type == 'a') xref->table[oid + i].type = 'n'; fz_print(out, "%010d %05d %c \n", xref->table[oid + i].ofs, xref->table[oid + i].gen, xref->table[oid + i].type); } oid += n; while (oid < xref->len && xref->table[oid].type != 'a' && xref->table[oid].type != 'd') oid ++; } fz_print(out, "\n"); fz_print(out, "trailer\n<<\n /Size %d\n /Prev %d", xref->len, xref->startxref); obj = fz_dictgets(xref->trailer, "Root"); fz_print(out,"\n /Root %d %d R", fz_tonum(obj), fz_togen(obj)); obj = fz_dictgets(xref->trailer, "Info"); if (obj) fz_print(out,"\n /Info %d %d R", fz_tonum(obj), fz_togen(obj)); obj = fz_dictgets(xref->trailer, "Encrypt"); if (obj) { fz_print(out,"\n /Encrypt "); fz_printobj(out, obj, TIGHT); } obj = fz_dictgets(xref->trailer, "ID"); if (obj) { fz_print(out,"\n /ID "); fz_printobj(out, obj, TIGHT); } fz_print(out, "\n>>\n\n"); fz_print(out, "startxref\n"); fz_print(out, "%d\n", startxref); fz_print(out, "%%%%EOF\n"); xref->startxref = startxref; fz_dropstream(out); return nil; cleanup: fz_dropstream(out); return error; }
fz_error pdf_repairxref(pdf_xref *xref, char *buf, int bufsize) { fz_error error; fz_obj *dict, *obj; fz_obj *length; fz_obj *encrypt = nil; fz_obj *id = nil; fz_obj *root = nil; fz_obj *info = nil; struct entry *list = nil; int listlen; int listcap; int maxnum = 0; int num = 0; int gen = 0; int tmpofs, numofs = 0, genofs = 0; int stmlen, stmofs = 0; int tok; int next; int i, n; pdf_logxref("repairxref %p\n", xref); fz_seek(xref->file, 0, 0); listlen = 0; listcap = 1024; list = fz_calloc(listcap, sizeof(struct entry)); /* look for '%PDF' version marker within first kilobyte of file */ n = fz_read(xref->file, (unsigned char *)buf, MAX(bufsize, 1024)); if (n < 0) { error = fz_rethrow(n, "cannot read from file"); goto cleanup; } fz_seek(xref->file, 0, 0); for (i = 0; i < n - 4; i++) { if (memcmp(buf + i, "%PDF", 4) == 0) { fz_seek(xref->file, i, 0); break; } } while (1) { tmpofs = fz_tell(xref->file); if (tmpofs < 0) { error = fz_throw("cannot tell in file"); goto cleanup; } error = pdf_lex(&tok, xref->file, buf, bufsize, &n); if (error) { fz_catch(error, "ignoring the rest of the file"); break; } if (tok == PDF_TINT) { numofs = genofs; num = gen; genofs = tmpofs; gen = atoi(buf); } if (tok == PDF_TOBJ) { error = fz_repairobj(xref->file, buf, bufsize, &stmofs, &stmlen, &encrypt, &id); if (error) { error = fz_rethrow(error, "cannot parse object (%d %d R)", num, gen); goto cleanup; } pdf_logxref("found object: (%d %d R)\n", num, gen); if (listlen + 1 == listcap) { listcap = (listcap * 3) / 2; list = fz_realloc(list, listcap, sizeof(struct entry)); } list[listlen].num = num; list[listlen].gen = gen; list[listlen].ofs = numofs; list[listlen].stmofs = stmofs; list[listlen].stmlen = stmlen; listlen ++; if (num > maxnum) maxnum = num; } /* trailer dictionary */ if (tok == PDF_TODICT) { error = pdf_parsedict(&dict, xref, xref->file, buf, bufsize); if (error) { error = fz_rethrow(error, "cannot parse object"); goto cleanup; } obj = fz_dictgets(dict, "Encrypt"); if (obj) { if (encrypt) fz_dropobj(encrypt); encrypt = fz_keepobj(obj); } obj = fz_dictgets(dict, "ID"); if (obj) { if (id) fz_dropobj(id); id = fz_keepobj(obj); } obj = fz_dictgets(dict, "Root"); if (obj) { if (root) fz_dropobj(root); root = fz_keepobj(obj); } obj = fz_dictgets(dict, "Info"); if (obj) { if (info) fz_dropobj(info); info = fz_keepobj(obj); } fz_dropobj(dict); } if (tok == PDF_TERROR) fz_readbyte(xref->file); if (tok == PDF_TEOF) break; } /* make xref reasonable */ pdf_resizexref(xref, maxnum + 1); for (i = 0; i < listlen; i++) { xref->table[list[i].num].type = 'n'; xref->table[list[i].num].ofs = list[i].ofs; xref->table[list[i].num].gen = list[i].gen; xref->table[list[i].num].stmofs = list[i].stmofs; /* corrected stream length */ if (list[i].stmlen >= 0) { pdf_logxref("correct stream length %d %d = %d\n", list[i].num, list[i].gen, list[i].stmlen); error = pdf_loadobject(&dict, xref, list[i].num, list[i].gen); if (error) { error = fz_rethrow(error, "cannot load stream object (%d %d R)", list[i].num, list[i].gen); goto cleanup; } length = fz_newint(list[i].stmlen); fz_dictputs(dict, "Length", length); fz_dropobj(length); fz_dropobj(dict); } } xref->table[0].type = 'f'; xref->table[0].ofs = 0; xref->table[0].gen = 65535; xref->table[0].stmofs = 0; xref->table[0].obj = nil; next = 0; for (i = xref->len - 1; i >= 0; i--) { if (xref->table[i].type == 'f') { xref->table[i].ofs = next; if (xref->table[i].gen < 65535) xref->table[i].gen ++; next = i; } } /* create a repaired trailer, Root will be added later */ xref->trailer = fz_newdict(5); obj = fz_newint(maxnum + 1); fz_dictputs(xref->trailer, "Size", obj); fz_dropobj(obj); if (root) { fz_dictputs(xref->trailer, "Root", root); fz_dropobj(root); } if (info) { fz_dictputs(xref->trailer, "Info", info); fz_dropobj(info); } if (encrypt) { if (fz_isindirect(encrypt)) { /* create new reference with non-nil xref pointer */ obj = fz_newindirect(fz_tonum(encrypt), fz_togen(encrypt), xref); fz_dropobj(encrypt); encrypt = obj; } fz_dictputs(xref->trailer, "Encrypt", encrypt); fz_dropobj(encrypt); } if (id) { if (fz_isindirect(id)) { /* create new reference with non-nil xref pointer */ obj = fz_newindirect(fz_tonum(id), fz_togen(id), xref); fz_dropobj(id); id = obj; } fz_dictputs(xref->trailer, "ID", id); fz_dropobj(id); } fz_free(list); return fz_okay; cleanup: if (encrypt) fz_dropobj(encrypt); if (id) fz_dropobj(id); if (root) fz_dropobj(root); if (info) fz_dropobj(info); fz_free(list); return error; /* already rethrown */ }
static fz_error fz_repairobj(fz_stream *file, char *buf, int cap, int *stmofsp, int *stmlenp, fz_obj **encrypt, fz_obj **id) { fz_error error; int tok; int stmlen; int len; int n; *stmofsp = 0; *stmlenp = -1; stmlen = 0; error = pdf_lex(&tok, file, buf, cap, &len); if (error) return fz_rethrow(error, "cannot parse object"); if (tok == PDF_TODICT) { fz_obj *dict, *obj; /* Send nil xref so we don't try to resolve references */ error = pdf_parsedict(&dict, nil, file, buf, cap); if (error) return fz_rethrow(error, "cannot parse object"); obj = fz_dictgets(dict, "Type"); if (fz_isname(obj) && !strcmp(fz_toname(obj), "XRef")) { obj = fz_dictgets(dict, "Encrypt"); if (obj) { if (*encrypt) fz_dropobj(*encrypt); *encrypt = fz_keepobj(obj); } obj = fz_dictgets(dict, "ID"); if (obj) { if (*id) fz_dropobj(*id); *id = fz_keepobj(obj); } } obj = fz_dictgets(dict, "Length"); if (fz_isint(obj)) stmlen = fz_toint(obj); fz_dropobj(dict); } while ( tok != PDF_TSTREAM && tok != PDF_TENDOBJ && tok != PDF_TERROR && tok != PDF_TEOF ) { error = pdf_lex(&tok, file, buf, cap, &len); if (error) return fz_rethrow(error, "cannot scan for endobj or stream token"); } if (tok == PDF_TSTREAM) { int c = fz_readbyte(file); if (c == '\r') { c = fz_peekbyte(file); if (c == '\n') fz_readbyte(file); } *stmofsp = fz_tell(file); if (*stmofsp < 0) return fz_throw("cannot seek in file"); if (stmlen > 0) { fz_seek(file, *stmofsp + stmlen, 0); error = pdf_lex(&tok, file, buf, cap, &len); if (error) fz_catch(error, "cannot find endstream token, falling back to scanning"); if (tok == PDF_TENDSTREAM) goto atobjend; fz_seek(file, *stmofsp, 0); } n = fz_read(file, (unsigned char *) buf, 9); if (n < 0) return fz_rethrow(n, "cannot read from file"); while (memcmp(buf, "endstream", 9) != 0) { c = fz_readbyte(file); if (c == EOF) break; memmove(buf, buf + 1, 8); buf[8] = c; } *stmlenp = fz_tell(file) - *stmofsp - 9; atobjend: error = pdf_lex(&tok, file, buf, cap, &len); if (error) return fz_rethrow(error, "cannot scan for endobj token"); if (tok != PDF_TENDOBJ) fz_warn("object missing 'endobj' token"); } return fz_okay; }
pdf_obj * pdf_parse_ind_obj(pdf_document *xref, fz_stream *file, pdf_lexbuf *buf, int *onum, int *ogen, int *ostmofs) { pdf_obj *obj = NULL; int num = 0, gen = 0, stm_ofs; int tok; int a, b; fz_context *ctx = file->ctx; fz_var(obj); tok = pdf_lex(file, buf); /* RJW: cannot parse indirect object (%d %d R)", num, gen */ if (tok != PDF_TOK_INT) fz_throw(ctx, "expected object number (%d %d R)", num, gen); num = buf->i; tok = pdf_lex(file, buf); /* RJW: "cannot parse indirect object (%d %d R)", num, gen */ if (tok != PDF_TOK_INT) fz_throw(ctx, "expected generation number (%d %d R)", num, gen); gen = buf->i; tok = pdf_lex(file, buf); /* RJW: "cannot parse indirect object (%d %d R)", num, gen */ if (tok != PDF_TOK_OBJ) fz_throw(ctx, "expected 'obj' keyword (%d %d R)", num, gen); tok = pdf_lex(file, buf); /* RJW: "cannot parse indirect object (%d %d R)", num, gen */ switch (tok) { case PDF_TOK_OPEN_ARRAY: obj = pdf_parse_array(xref, file, buf); /* RJW: "cannot parse indirect object (%d %d R)", num, gen */ break; case PDF_TOK_OPEN_DICT: obj = pdf_parse_dict(xref, file, buf); /* RJW: "cannot parse indirect object (%d %d R)", num, gen */ break; case PDF_TOK_NAME: obj = fz_new_name(ctx, buf->scratch); break; case PDF_TOK_REAL: obj = pdf_new_real(ctx, buf->f); break; case PDF_TOK_STRING: obj = pdf_new_string(ctx, buf->scratch, buf->len); break; case PDF_TOK_TRUE: obj = pdf_new_bool(ctx, 1); break; case PDF_TOK_FALSE: obj = pdf_new_bool(ctx, 0); break; case PDF_TOK_NULL: obj = pdf_new_null(ctx); break; case PDF_TOK_INT: a = buf->i; tok = pdf_lex(file, buf); /* "cannot parse indirect object (%d %d R)", num, gen */ if (tok == PDF_TOK_STREAM || tok == PDF_TOK_ENDOBJ) { obj = pdf_new_int(ctx, a); goto skip; } if (tok == PDF_TOK_INT) { b = buf->i; tok = pdf_lex(file, buf); /* RJW: "cannot parse indirect object (%d %d R)", num, gen); */ if (tok == PDF_TOK_R) { obj = pdf_new_indirect(ctx, a, b, xref); break; } } fz_throw(ctx, "expected 'R' keyword (%d %d R)", num, gen); case PDF_TOK_ENDOBJ: obj = pdf_new_null(ctx); goto skip; default: fz_throw(ctx, "syntax error in object (%d %d R)", num, gen); } fz_try(ctx) { tok = pdf_lex(file, buf); } fz_catch(ctx) { pdf_drop_obj(obj); fz_throw(ctx, "cannot parse indirect object (%d %d R)", num, gen); } skip: if (tok == PDF_TOK_STREAM) { int c = fz_read_byte(file); while (c == ' ') c = fz_read_byte(file); if (c == '\r') { c = fz_peek_byte(file); if (c != '\n') fz_warn(ctx, "line feed missing after stream begin marker (%d %d R)", num, gen); else fz_read_byte(file); } stm_ofs = fz_tell(file); } else if (tok == PDF_TOK_ENDOBJ) { stm_ofs = 0; } else { fz_warn(ctx, "expected 'endobj' or 'stream' keyword (%d %d R)", num, gen); stm_ofs = 0; } if (onum) *onum = num; if (ogen) *ogen = gen; if (ostmofs) *ostmofs = stm_ofs; return obj; }
fz_error pdf_parse_ind_obj(fz_obj **op, pdf_xref *xref, fz_stream *file, char *buf, int cap, int *onum, int *ogen, int *ostmofs) { fz_error error = fz_okay; fz_obj *obj = NULL; int num = 0, gen = 0, stm_ofs; int tok; int len; int a, b; error = pdf_lex(&tok, file, buf, cap, &len); if (error) return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen); if (tok != PDF_TOK_INT) return fz_throw("expected object number (%d %d R)", num, gen); num = atoi(buf); error = pdf_lex(&tok, file, buf, cap, &len); if (error) return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen); if (tok != PDF_TOK_INT) return fz_throw("expected generation number (%d %d R)", num, gen); gen = atoi(buf); error = pdf_lex(&tok, file, buf, cap, &len); if (error) return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen); if (tok != PDF_TOK_OBJ) return fz_throw("expected 'obj' keyword (%d %d R)", num, gen); error = pdf_lex(&tok, file, buf, cap, &len); if (error) return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen); switch (tok) { case PDF_TOK_OPEN_ARRAY: error = pdf_parse_array(&obj, xref, file, buf, cap); if (error) return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen); break; case PDF_TOK_OPEN_DICT: error = pdf_parse_dict(&obj, xref, file, buf, cap); if (error) return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen); break; case PDF_TOK_NAME: obj = fz_new_name(buf); break; case PDF_TOK_REAL: obj = fz_new_real(fz_atof(buf)); break; case PDF_TOK_STRING: obj = fz_new_string(buf, len); break; case PDF_TOK_TRUE: obj = fz_new_bool(1); break; case PDF_TOK_FALSE: obj = fz_new_bool(0); break; case PDF_TOK_NULL: obj = fz_new_null(); break; case PDF_TOK_INT: a = atoi(buf); error = pdf_lex(&tok, file, buf, cap, &len); if (error) return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen); if (tok == PDF_TOK_STREAM || tok == PDF_TOK_ENDOBJ) { obj = fz_new_int(a); goto skip; } if (tok == PDF_TOK_INT) { b = atoi(buf); error = pdf_lex(&tok, file, buf, cap, &len); if (error) return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen); if (tok == PDF_TOK_R) { obj = fz_new_indirect(a, b, xref); break; } } return fz_throw("expected 'R' keyword (%d %d R)", num, gen); case PDF_TOK_ENDOBJ: obj = fz_new_null(); goto skip; default: return fz_throw("syntax error in object (%d %d R)", num, gen); } error = pdf_lex(&tok, file, buf, cap, &len); if (error) { fz_drop_obj(obj); return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen); } skip: if (tok == PDF_TOK_STREAM) { int c = fz_read_byte(file); while (c == ' ') c = fz_read_byte(file); if (c == '\r') { c = fz_peek_byte(file); if (c != '\n') fz_warn("line feed missing after stream begin marker (%d %d R)", num, gen); else fz_read_byte(file); } stm_ofs = fz_tell(file); } else if (tok == PDF_TOK_ENDOBJ) { stm_ofs = 0; } else { fz_warn("expected 'endobj' or 'stream' keyword (%d %d R)", num, gen); stm_ofs = 0; } if (onum) *onum = num; if (ogen) *ogen = gen; if (ostmofs) *ostmofs = stm_ofs; *op = obj; return fz_okay; }