void pdf_storeitem(pdf_store *store, pdf_itemkind kind, fz_obj *key, void *val) { pdf_item *item; item = fz_malloc(sizeof(pdf_item)); item->kind = kind; item->key = fz_keepobj(key); item->val = val; item->age = 0; item->next = nil; if (fz_isindirect(key)) { struct refkey refkey; pdf_logrsrc("store item %s (%d %d R) ptr=%p\n", kindstr(kind), fz_tonum(key), fz_togen(key), val); refkey.kind = kind; refkey.oid = fz_tonum(key); refkey.gen = fz_togen(key); fz_hashinsert(store->hash, &refkey, item); } else { pdf_logrsrc("store item %s: ... = %p\n", kindstr(kind), val); item->next = store->root; store->root = item; } keepitem(kind, val); }
void openxref(char *filename, char *password, int dieonbadpass) { fz_error error; int okay; basename = strrchr(filename, '/'); if (!basename) basename = filename; else basename++; xref = pdf_newxref(); error = pdf_loadxref(xref, filename); if (error) { fz_catch(error, "trying to repair"); error = pdf_repairxref(xref, filename); if (error) die(error); } error = pdf_decryptxref(xref); if (error) die(error); if (pdf_needspassword(xref)) { okay = pdf_authenticatepassword(xref, password); if (!okay && !dieonbadpass) fz_warn("invalid password, attempting to continue."); else if (!okay && dieonbadpass) die(fz_throw("invalid password")); } xref->root = fz_dictgets(xref->trailer, "Root"); if (xref->root) fz_keepobj(xref->root); xref->info = fz_dictgets(xref->trailer, "Info"); if (xref->info) fz_keepobj(xref->info); pagecount = pdf_getpagecount(xref); }
pdf_link * pdf_newlink(pdf_linkkind kind, fz_rect bbox, fz_obj *dest) { pdf_link *link = fz_malloc(sizeof(pdf_link)); link->kind = kind; link->rect = bbox; link->dest = fz_keepobj(dest); link->next = nil; return link; }
fz_error * pdf_storeitem(pdf_store *store, pdf_itemkind kind, fz_obj *key, void *val) { fz_error *error; switch (kind) { case PDF_KCOLORSPACE: fz_keepcolorspace(val); break; case PDF_KFUNCTION: pdf_keepfunction(val); break; case PDF_KXOBJECT: pdf_keepxobject(val); break; case PDF_KIMAGE: fz_keepimage(val); break; case PDF_KPATTERN: pdf_keeppattern(val); break; case PDF_KSHADE: fz_keepshade(val); break; case PDF_KCMAP: pdf_keepcmap(val); break; case PDF_KFONT: fz_keepfont(val); break; } if (fz_isindirect(key)) { struct refkey item; pdf_logrsrc("store item %d: %d %d R = %p\n", kind, fz_tonum(key), fz_togen(key), val); item.kind = kind; item.oid = fz_tonum(key); item.gen = fz_togen(key); error = fz_hashinsert(store->hash, &item, val); if (error) return error; } else { pdf_item *item; item = fz_malloc(sizeof(pdf_item)); if (!item) return fz_outofmem; pdf_logrsrc("store item %d: ... = %p\n", kind, val); item->kind = kind; item->key = fz_keepobj(key); item->val = val; item->next = store->root; store->root = item; } return nil; }
fz_error pdf_loadobject(fz_obj **objp, pdf_xref *xref, int num, int gen) { fz_error error; error = pdf_cacheobject(xref, num, gen); if (error) return fz_rethrow(error, "cannot load object (%d %d R) into cache", num, gen); assert(xref->table[num].obj); *objp = fz_keepobj(xref->table[num].obj); return fz_okay; }
fz_error * pdf_newlink(pdf_link **linkp, fz_rect bbox, fz_obj *dest, pdf_linkkind kind) { pdf_link *link; link = fz_malloc(sizeof(pdf_link)); if (!link) return fz_outofmem; link->rect = bbox; link->dest = fz_keepobj(dest); link->kind = kind; link->next = nil; *linkp = link; return nil; }
fz_error pdf_newlink(pdf_link **linkp, pdf_linkkind kind, fz_rect bbox, fz_obj *dest) { pdf_link *link; link = fz_malloc(sizeof(pdf_link)); if (!link) return fz_rethrow(-1, "out of memory"); link->kind = kind; link->rect = bbox; link->dest = fz_keepobj(dest); link->next = nil; *linkp = link; return fz_okay; }
/* Replace numbered object -- for use by pdfclean and similar tools */ void pdf_updateobject(pdf_xref *xref, int num, int gen, fz_obj *newobj) { pdf_xrefentry *x; if (num < 0 || num >= xref->len) { fz_warn("object out of range (%d %d R); xref size %d", num, gen, xref->len); return; } x = &xref->table[num]; if (x->obj) fz_dropobj(x->obj); x->obj = fz_keepobj(newobj); x->type = 'n'; x->ofs = 0; }
fz_error pdf_repairxref(pdf_xref *xref, char *buf, int bufsize) { fz_error error; fz_obj *dict, *obj; fz_obj *length; fz_obj *encrypt = nil; fz_obj *id = nil; fz_obj *root = nil; fz_obj *info = nil; struct entry *list = nil; int listlen; int listcap; int maxnum = 0; int num = 0; int gen = 0; int tmpofs, numofs = 0, genofs = 0; int stmlen, stmofs = 0; int tok; int next; int i, n; pdf_logxref("repairxref %p\n", xref); fz_seek(xref->file, 0, 0); listlen = 0; listcap = 1024; list = fz_calloc(listcap, sizeof(struct entry)); /* look for '%PDF' version marker within first kilobyte of file */ n = fz_read(xref->file, (unsigned char *)buf, MAX(bufsize, 1024)); if (n < 0) { error = fz_rethrow(n, "cannot read from file"); goto cleanup; } fz_seek(xref->file, 0, 0); for (i = 0; i < n - 4; i++) { if (memcmp(buf + i, "%PDF", 4) == 0) { fz_seek(xref->file, i, 0); break; } } while (1) { tmpofs = fz_tell(xref->file); if (tmpofs < 0) { error = fz_throw("cannot tell in file"); goto cleanup; } error = pdf_lex(&tok, xref->file, buf, bufsize, &n); if (error) { fz_catch(error, "ignoring the rest of the file"); break; } if (tok == PDF_TINT) { numofs = genofs; num = gen; genofs = tmpofs; gen = atoi(buf); } if (tok == PDF_TOBJ) { error = fz_repairobj(xref->file, buf, bufsize, &stmofs, &stmlen, &encrypt, &id); if (error) { error = fz_rethrow(error, "cannot parse object (%d %d R)", num, gen); goto cleanup; } pdf_logxref("found object: (%d %d R)\n", num, gen); if (listlen + 1 == listcap) { listcap = (listcap * 3) / 2; list = fz_realloc(list, listcap, sizeof(struct entry)); } list[listlen].num = num; list[listlen].gen = gen; list[listlen].ofs = numofs; list[listlen].stmofs = stmofs; list[listlen].stmlen = stmlen; listlen ++; if (num > maxnum) maxnum = num; } /* trailer dictionary */ if (tok == PDF_TODICT) { error = pdf_parsedict(&dict, xref, xref->file, buf, bufsize); if (error) { error = fz_rethrow(error, "cannot parse object"); goto cleanup; } obj = fz_dictgets(dict, "Encrypt"); if (obj) { if (encrypt) fz_dropobj(encrypt); encrypt = fz_keepobj(obj); } obj = fz_dictgets(dict, "ID"); if (obj) { if (id) fz_dropobj(id); id = fz_keepobj(obj); } obj = fz_dictgets(dict, "Root"); if (obj) { if (root) fz_dropobj(root); root = fz_keepobj(obj); } obj = fz_dictgets(dict, "Info"); if (obj) { if (info) fz_dropobj(info); info = fz_keepobj(obj); } fz_dropobj(dict); } if (tok == PDF_TERROR) fz_readbyte(xref->file); if (tok == PDF_TEOF) break; } /* make xref reasonable */ pdf_resizexref(xref, maxnum + 1); for (i = 0; i < listlen; i++) { xref->table[list[i].num].type = 'n'; xref->table[list[i].num].ofs = list[i].ofs; xref->table[list[i].num].gen = list[i].gen; xref->table[list[i].num].stmofs = list[i].stmofs; /* corrected stream length */ if (list[i].stmlen >= 0) { pdf_logxref("correct stream length %d %d = %d\n", list[i].num, list[i].gen, list[i].stmlen); error = pdf_loadobject(&dict, xref, list[i].num, list[i].gen); if (error) { error = fz_rethrow(error, "cannot load stream object (%d %d R)", list[i].num, list[i].gen); goto cleanup; } length = fz_newint(list[i].stmlen); fz_dictputs(dict, "Length", length); fz_dropobj(length); fz_dropobj(dict); } } xref->table[0].type = 'f'; xref->table[0].ofs = 0; xref->table[0].gen = 65535; xref->table[0].stmofs = 0; xref->table[0].obj = nil; next = 0; for (i = xref->len - 1; i >= 0; i--) { if (xref->table[i].type == 'f') { xref->table[i].ofs = next; if (xref->table[i].gen < 65535) xref->table[i].gen ++; next = i; } } /* create a repaired trailer, Root will be added later */ xref->trailer = fz_newdict(5); obj = fz_newint(maxnum + 1); fz_dictputs(xref->trailer, "Size", obj); fz_dropobj(obj); if (root) { fz_dictputs(xref->trailer, "Root", root); fz_dropobj(root); } if (info) { fz_dictputs(xref->trailer, "Info", info); fz_dropobj(info); } if (encrypt) { if (fz_isindirect(encrypt)) { /* create new reference with non-nil xref pointer */ obj = fz_newindirect(fz_tonum(encrypt), fz_togen(encrypt), xref); fz_dropobj(encrypt); encrypt = obj; } fz_dictputs(xref->trailer, "Encrypt", encrypt); fz_dropobj(encrypt); } if (id) { if (fz_isindirect(id)) { /* create new reference with non-nil xref pointer */ obj = fz_newindirect(fz_tonum(id), fz_togen(id), xref); fz_dropobj(id); id = obj; } fz_dictputs(xref->trailer, "ID", id); fz_dropobj(id); } fz_free(list); return fz_okay; cleanup: if (encrypt) fz_dropobj(encrypt); if (id) fz_dropobj(id); if (root) fz_dropobj(root); if (info) fz_dropobj(info); fz_free(list); return error; /* already rethrown */ }
fz_error pdf_loadtype3font(pdf_fontdesc **fontdescp, pdf_xref *xref, fz_obj *rdb, fz_obj *dict) { fz_error error; char buf[256]; char *estrings[256]; pdf_fontdesc *fontdesc; fz_obj *encoding; fz_obj *widths; fz_obj *charprocs; fz_obj *obj; int first, last; int i, k, n; fz_rect bbox; fz_matrix matrix; obj = fz_dictgets(dict, "Name"); if (fz_isname(obj)) fz_strlcpy(buf, fz_toname(obj), sizeof buf); else sprintf(buf, "Unnamed-T3"); fontdesc = pdf_newfontdesc(); pdf_logfont("load type3 font (%d %d R) ptr=%p {\n", fz_tonum(dict), fz_togen(dict), fontdesc); pdf_logfont("name %s\n", buf); obj = fz_dictgets(dict, "FontMatrix"); matrix = pdf_tomatrix(obj); pdf_logfont("matrix [%g %g %g %g %g %g]\n", matrix.a, matrix.b, matrix.c, matrix.d, matrix.e, matrix.f); obj = fz_dictgets(dict, "FontBBox"); bbox = pdf_torect(obj); pdf_logfont("bbox [%g %g %g %g]\n", bbox.x0, bbox.y0, bbox.x1, bbox.y1); fontdesc->font = fz_newtype3font(buf, matrix); fz_setfontbbox(fontdesc->font, bbox.x0, bbox.y0, bbox.x1, bbox.y1); /* Encoding */ for (i = 0; i < 256; i++) estrings[i] = nil; encoding = fz_dictgets(dict, "Encoding"); if (!encoding) { error = fz_throw("syntaxerror: Type3 font missing Encoding"); goto cleanup; } if (fz_isname(encoding)) pdf_loadencoding(estrings, fz_toname(encoding)); if (fz_isdict(encoding)) { fz_obj *base, *diff, *item; base = fz_dictgets(encoding, "BaseEncoding"); if (fz_isname(base)) pdf_loadencoding(estrings, fz_toname(base)); diff = fz_dictgets(encoding, "Differences"); if (fz_isarray(diff)) { n = fz_arraylen(diff); k = 0; for (i = 0; i < n; i++) { item = fz_arrayget(diff, i); if (fz_isint(item)) k = fz_toint(item); if (fz_isname(item)) estrings[k++] = fz_toname(item); if (k < 0) k = 0; if (k > 255) k = 255; } } } fontdesc->encoding = pdf_newidentitycmap(0, 1); error = pdf_loadtounicode(fontdesc, xref, estrings, nil, fz_dictgets(dict, "ToUnicode")); if (error) goto cleanup; /* Widths */ pdf_setdefaulthmtx(fontdesc, 0); first = fz_toint(fz_dictgets(dict, "FirstChar")); last = fz_toint(fz_dictgets(dict, "LastChar")); widths = fz_dictgets(dict, "Widths"); if (!widths) { error = fz_throw("syntaxerror: Type3 font missing Widths"); goto cleanup; } for (i = first; i <= last; i++) { float w = fz_toreal(fz_arrayget(widths, i - first)); w = fontdesc->font->t3matrix.a * w * 1000; fontdesc->font->t3widths[i] = w * 0.001f; pdf_addhmtx(fontdesc, i, i, w); } pdf_endhmtx(fontdesc); /* Resources -- inherit page resources if the font doesn't have its own */ fontdesc->font->t3resources = fz_dictgets(dict, "Resources"); if (!fontdesc->font->t3resources) fontdesc->font->t3resources = rdb; if (fontdesc->font->t3resources) fz_keepobj(fontdesc->font->t3resources); if (!fontdesc->font->t3resources) fz_warn("no resource dictionary for type 3 font!"); fontdesc->font->t3xref = xref; fontdesc->font->t3run = pdf_runcontents; /* CharProcs */ charprocs = fz_dictgets(dict, "CharProcs"); if (!charprocs) { error = fz_throw("syntaxerror: Type3 font missing CharProcs"); goto cleanup; } for (i = 0; i < 256; i++) { if (estrings[i]) { obj = fz_dictgets(charprocs, estrings[i]); if (pdf_isstream(xref, fz_tonum(obj), fz_togen(obj))) { error = pdf_loadstream(&fontdesc->font->t3procs[i], xref, fz_tonum(obj), fz_togen(obj)); if (error) goto cleanup; } } } pdf_logfont("}\n"); *fontdescp = fontdesc; return fz_okay; cleanup: fz_dropfont(fontdesc->font); fz_free(fontdesc); return fz_rethrow(error, "cannot load type3 font (%d %d R)", fz_tonum(dict), fz_togen(dict)); }
static fz_error * loadpagetree(pdf_xref *xref, pdf_pagetree *pages, struct stuff inherit, fz_obj *obj, fz_obj *ref, int *pagenum) { fz_error *error; fz_obj *type; fz_obj *kids; fz_obj *kref, *kobj; fz_obj *inh; int i; type = fz_dictgets(obj, "Type"); if (strcmp(fz_toname(type), "Page") == 0) { pdf_logpage("page %d, %d %d\n", *pagenum, ref->u.r.oid, ref->u.r.gid); (*pagenum)++; if (inherit.resources && !fz_dictgets(obj, "Resources")) { pdf_logpage("inherit resources (%d)\n", pages->cursor); error = fz_dictputs(obj, "Resources", inherit.resources); if (error) return fz_rethrow(error, "cannot inherit page tree resources"); } if (inherit.mediabox && !fz_dictgets(obj, "MediaBox")) { pdf_logpage("inherit mediabox (%d)\n", pages->cursor); error = fz_dictputs(obj, "MediaBox", inherit.mediabox); if (error) return fz_rethrow(error, "cannot inherit page tree mediabox"); } if (inherit.cropbox && !fz_dictgets(obj, "CropBox")) { pdf_logpage("inherit cropbox (%d)\n", pages->cursor); error = fz_dictputs(obj, "CropBox", inherit.cropbox); if (error) return fz_rethrow(error, "cannot inherit page tree cropbox"); } if (inherit.rotate && !fz_dictgets(obj, "Rotate")) { pdf_logpage("inherit rotate (%d)\n", pages->cursor); error = fz_dictputs(obj, "Rotate", inherit.rotate); if (error) return fz_rethrow(error, "cannot inherit page tree rotate"); } pages->pref[pages->cursor] = fz_keepobj(ref); pages->pobj[pages->cursor] = fz_keepobj(obj); pages->cursor ++; } else if (strcmp(fz_toname(type), "Pages") == 0) { inh = fz_dictgets(obj, "Resources"); if (inh) inherit.resources = inh; inh = fz_dictgets(obj, "MediaBox"); if (inh) inherit.mediabox = inh; inh = fz_dictgets(obj, "CropBox"); if (inh) inherit.cropbox = inh; inh = fz_dictgets(obj, "Rotate"); if (inh) inherit.rotate = inh; kids = fz_dictgets(obj, "Kids"); error = pdf_resolve(&kids, xref); if (error) return fz_rethrow(error, "cannot resolve /Kids"); pdf_logpage("subtree %d pages, %d %d {\n", fz_arraylen(kids), ref->u.r.oid, ref->u.r.gid); for (i = 0; i < fz_arraylen(kids); i++) { kref = fz_arrayget(kids, i); error = pdf_loadindirect(&kobj, xref, kref); if (error) { fz_dropobj(kids); return fz_rethrow(error, "cannot load kid"); } if (kobj == obj) { /* prevent infinite recursion possible in maliciously crafted PDFs */ fz_dropobj(kids); return fz_throw("corrupted pdf file"); } error = loadpagetree(xref, pages, inherit, kobj, kref, pagenum); fz_dropobj(kobj); if (error) { fz_dropobj(kids); return fz_rethrow(error, "cannot load subtree"); } } fz_dropobj(kids); pdf_logpage("}\n"); } else return fz_throw("pagetree node has unexpected type %s", fz_toname(type)); return fz_okay; }
void cleanmain(int argc, char **argv) { int doencrypt = 0; int dogarbage = 0; int doexpand = 0; pdf_crypt *encrypt = nil; char *infile; char *outfile = "out.pdf"; char *userpw = ""; char *ownerpw = ""; unsigned perms = 0xfffff0c0; /* nothing allowed */ int keylen = 40; char *password = ""; fz_error *error; int c; while ((c = getopt(argc, argv, "d:egn:o:p:u:x")) != -1) { switch (c) { case 'p': /* see TABLE 3.15 User access permissions */ perms = 0xfffff0c0; if (strchr(optarg, 'p')) /* print */ perms |= (1 << 2) | (1 << 11); if (strchr(optarg, 'm')) /* modify */ perms |= (1 << 3) | (1 << 10); if (strchr(optarg, 'c')) /* copy */ perms |= (1 << 4) | (1 << 9); if (strchr(optarg, 'a')) /* annotate / forms */ perms |= (1 << 5) | (1 << 8); break; case 'd': password = optarg; break; case 'e': doencrypt ++; break; case 'g': dogarbage ++; break; case 'n': keylen = atoi(optarg); break; case 'o': ownerpw = optarg; break; case 'u': userpw = optarg; break; case 'x': doexpand ++; break; default: cleanusage(); break; } } if (argc - optind < 1) cleanusage(); infile = argv[optind++]; if (argc - optind > 0) outfile = argv[optind++]; opensrc(infile, password, 0); if (doencrypt) { fz_obj *id = fz_dictgets(src->trailer, "ID"); if (!id) { error = fz_packobj(&id, "[(ABCDEFGHIJKLMNOP)(ABCDEFGHIJKLMNOP)]"); if (error) die(error); } else fz_keepobj(id); error = pdf_newencrypt(&encrypt, userpw, ownerpw, perms, keylen, id); if (error) die(error); fz_dropobj(id); } if (doexpand) cleanexpand(); if (dogarbage) { preloadobjstms(); pdf_garbagecollect(src); } error = pdf_savexref(src, outfile, encrypt); if (error) die(error); if (encrypt) pdf_dropcrypt(encrypt); pdf_closexref(src); }
static fz_error readnewxref(fz_obj **trailerp, pdf_xref *xref, char *buf, int cap) { fz_error error; fz_stream *stm; fz_obj *trailer; fz_obj *index; fz_obj *obj; int oid, gen, stmofs; int size, w0, w1, w2; int t; int i; pdf_logxref("load new xref format\n"); error = pdf_parseindobj(&trailer, xref, xref->file, buf, cap, &oid, &gen, &stmofs); if (error) return fz_rethrow(error, "cannot parse compressed xref stream object"); obj = fz_dictgets(trailer, "Size"); if (!obj) { fz_dropobj(trailer); return fz_throw("xref stream missing Size entry"); } size = fz_toint(obj); if (size >= xref->cap) { xref->cap = size + 1; /* for hack to allow broken pdf generators with off-by-one errors */ xref->table = fz_realloc(xref->table, xref->cap * sizeof(pdf_xrefentry)); } if (size > xref->len) { for (i = xref->len; i < xref->cap; i++) { xref->table[i].ofs = 0; xref->table[i].gen = 0; xref->table[i].stmofs = 0; xref->table[i].obj = nil; xref->table[i].type = 0; } xref->len = size; } if (oid < 0 || oid >= xref->len) { if (oid == xref->len && oid < xref->cap) { /* allow broken pdf files that have off-by-one errors in the xref */ fz_warn("object id (%d %d R) out of range (0..%d)", oid, gen, xref->len - 1); xref->len ++; } else { fz_dropobj(trailer); return fz_throw("object id (%d %d R) out of range (0..%d)", oid, gen, xref->len - 1); } } xref->table[oid].type = 'n'; xref->table[oid].gen = gen; xref->table[oid].obj = fz_keepobj(trailer); xref->table[oid].stmofs = stmofs; xref->table[oid].ofs = 0; obj = fz_dictgets(trailer, "W"); if (!obj) { fz_dropobj(trailer); return fz_throw("xref stream missing W entry"); } w0 = fz_toint(fz_arrayget(obj, 0)); w1 = fz_toint(fz_arrayget(obj, 1)); w2 = fz_toint(fz_arrayget(obj, 2)); index = fz_dictgets(trailer, "Index"); error = pdf_openstream(&stm, xref, oid, gen); if (error) { fz_dropobj(trailer); return fz_rethrow(error, "cannot open compressed xref stream"); } if (!index) { error = readnewxrefsection(xref, stm, 0, size, w0, w1, w2); if (error) { fz_dropstream(stm); fz_dropobj(trailer); return fz_rethrow(error, "cannot read xref stream"); } } else { for (t = 0; t < fz_arraylen(index); t += 2) { int i0 = fz_toint(fz_arrayget(index, t + 0)); int i1 = fz_toint(fz_arrayget(index, t + 1)); error = readnewxrefsection(xref, stm, i0, i1, w0, w1, w2); if (error) { fz_dropstream(stm); fz_dropobj(trailer); return fz_rethrow(error, "cannot read xref stream section"); } } } fz_dropstream(stm); *trailerp = trailer; return fz_okay; }
fz_error pdf_loadpage(pdf_page **pagep, pdf_xref *xref, fz_obj *dict) { fz_error error; pdf_page *page; fz_obj *obj; fz_bbox bbox; pdf_logpage("load page {\n"); // TODO: move this to a more appropriate place /* Ensure that we have a store for resource objects */ if (!xref->store) xref->store = pdf_newstore(); page = fz_malloc(sizeof(pdf_page)); page->resources = nil; page->contents = nil; page->transparency = 0; page->list = nil; page->text = nil; page->links = nil; page->annots = nil; obj = fz_dictgets(dict, "MediaBox"); bbox = fz_roundrect(pdf_torect(obj)); if (fz_isemptyrect(pdf_torect(obj))) { fz_warn("cannot find page bounds, guessing page bounds."); bbox.x0 = 0; bbox.y0 = 0; bbox.x1 = 612; bbox.y1 = 792; } obj = fz_dictgets(dict, "CropBox"); if (fz_isarray(obj)) { fz_bbox cropbox = fz_roundrect(pdf_torect(obj)); bbox = fz_intersectbbox(bbox, cropbox); } page->mediabox.x0 = MIN(bbox.x0, bbox.x1); page->mediabox.y0 = MIN(bbox.y0, bbox.y1); page->mediabox.x1 = MAX(bbox.x0, bbox.x1); page->mediabox.y1 = MAX(bbox.y0, bbox.y1); if (page->mediabox.x1 - page->mediabox.x0 < 1 || page->mediabox.y1 - page->mediabox.y0 < 1) return fz_throw("invalid page size"); page->rotate = fz_toint(fz_dictgets(dict, "Rotate")); pdf_logpage("bbox [%d %d %d %d]\n", bbox.x0, bbox.y0, bbox.x1, bbox.y1); pdf_logpage("rotate %d\n", page->rotate); obj = fz_dictgets(dict, "Annots"); if (obj) { pdf_loadlinks(&page->links, xref, obj); pdf_loadannots(&page->annots, xref, obj); } page->resources = fz_dictgets(dict, "Resources"); if (page->resources) fz_keepobj(page->resources); obj = fz_dictgets(dict, "Contents"); error = pdf_loadpagecontents(&page->contents, xref, obj); if (error) { pdf_freepage(page); return fz_rethrow(error, "cannot load page contents (%d %d R)", fz_tonum(obj), fz_togen(obj)); } if (page->resources && pdf_resourcesuseblending(page->resources)) page->transparency = 1; pdf_logpage("} %p\n", page); *pagep = page; return fz_okay; }
fz_error pdf_loadpattern(pdf_pattern **patp, pdf_xref *xref, fz_obj *dict) { fz_error error; pdf_pattern *pat; fz_obj *obj; if ((*patp = pdf_finditem(xref->store, pdf_droppattern, dict))) { pdf_keeppattern(*patp); return fz_okay; } pdf_logrsrc("load pattern (%d %d R) {\n", fz_tonum(dict), fz_togen(dict)); pat = fz_malloc(sizeof(pdf_pattern)); pat->refs = 1; pat->resources = nil; pat->contents = nil; /* Store pattern now, to avoid possible recursion if objects refer back to this one */ pdf_storeitem(xref->store, pdf_keeppattern, pdf_droppattern, dict, pat); pat->ismask = fz_toint(fz_dictgets(dict, "PaintType")) == 2; pat->xstep = fz_toreal(fz_dictgets(dict, "XStep")); pat->ystep = fz_toreal(fz_dictgets(dict, "YStep")); pdf_logrsrc("mask %d\n", pat->ismask); pdf_logrsrc("xstep %g\n", pat->xstep); pdf_logrsrc("ystep %g\n", pat->ystep); obj = fz_dictgets(dict, "BBox"); pat->bbox = pdf_torect(obj); pdf_logrsrc("bbox [%g %g %g %g]\n", pat->bbox.x0, pat->bbox.y0, pat->bbox.x1, pat->bbox.y1); obj = fz_dictgets(dict, "Matrix"); if (obj) pat->matrix = pdf_tomatrix(obj); else pat->matrix = fz_identity; pdf_logrsrc("matrix [%g %g %g %g %g %g]\n", pat->matrix.a, pat->matrix.b, pat->matrix.c, pat->matrix.d, pat->matrix.e, pat->matrix.f); pat->resources = fz_dictgets(dict, "Resources"); if (pat->resources) fz_keepobj(pat->resources); error = pdf_loadstream(&pat->contents, xref, fz_tonum(dict), fz_togen(dict)); if (error) { pdf_removeitem(xref->store, pdf_droppattern, dict); pdf_droppattern(pat); return fz_rethrow(error, "cannot load pattern stream (%d %d R)", fz_tonum(dict), fz_togen(dict)); } pdf_logrsrc("}\n"); *patp = pat; return fz_okay; }
static fz_error fz_repairobj(fz_stream *file, char *buf, int cap, int *stmofsp, int *stmlenp, fz_obj **encrypt, fz_obj **id) { fz_error error; int tok; int stmlen; int len; int n; *stmofsp = 0; *stmlenp = -1; stmlen = 0; error = pdf_lex(&tok, file, buf, cap, &len); if (error) return fz_rethrow(error, "cannot parse object"); if (tok == PDF_TODICT) { fz_obj *dict, *obj; /* Send nil xref so we don't try to resolve references */ error = pdf_parsedict(&dict, nil, file, buf, cap); if (error) return fz_rethrow(error, "cannot parse object"); obj = fz_dictgets(dict, "Type"); if (fz_isname(obj) && !strcmp(fz_toname(obj), "XRef")) { obj = fz_dictgets(dict, "Encrypt"); if (obj) { if (*encrypt) fz_dropobj(*encrypt); *encrypt = fz_keepobj(obj); } obj = fz_dictgets(dict, "ID"); if (obj) { if (*id) fz_dropobj(*id); *id = fz_keepobj(obj); } } obj = fz_dictgets(dict, "Length"); if (fz_isint(obj)) stmlen = fz_toint(obj); fz_dropobj(dict); } while ( tok != PDF_TSTREAM && tok != PDF_TENDOBJ && tok != PDF_TERROR && tok != PDF_TEOF ) { error = pdf_lex(&tok, file, buf, cap, &len); if (error) return fz_rethrow(error, "cannot scan for endobj or stream token"); } if (tok == PDF_TSTREAM) { int c = fz_readbyte(file); if (c == '\r') { c = fz_peekbyte(file); if (c == '\n') fz_readbyte(file); } *stmofsp = fz_tell(file); if (*stmofsp < 0) return fz_throw("cannot seek in file"); if (stmlen > 0) { fz_seek(file, *stmofsp + stmlen, 0); error = pdf_lex(&tok, file, buf, cap, &len); if (error) fz_catch(error, "cannot find endstream token, falling back to scanning"); if (tok == PDF_TENDSTREAM) goto atobjend; fz_seek(file, *stmofsp, 0); } n = fz_read(file, (unsigned char *) buf, 9); if (n < 0) return fz_rethrow(n, "cannot read from file"); while (memcmp(buf, "endstream", 9) != 0) { c = fz_readbyte(file); if (c == EOF) break; memmove(buf, buf + 1, 8); buf[8] = c; } *stmlenp = fz_tell(file) - *stmofsp - 9; atobjend: error = pdf_lex(&tok, file, buf, cap, &len); if (error) return fz_rethrow(error, "cannot scan for endobj token"); if (tok != PDF_TENDOBJ) fz_warn("object missing 'endobj' token"); } return fz_okay; }
fz_error pdf_loadxobject(pdf_xobject **formp, pdf_xref *xref, fz_obj *dict) { fz_error error; pdf_xobject *form; fz_obj *obj; if ((*formp = pdf_finditem(xref->store, PDF_KXOBJECT, dict))) { pdf_keepxobject(*formp); return fz_okay; } form = fz_malloc(sizeof(pdf_xobject)); form->refs = 1; form->resources = nil; form->contents = nil; /* Store item immediately, to avoid possible recursion if objects refer back to this one */ pdf_storeitem(xref->store, PDF_KXOBJECT, dict, form); pdf_logrsrc("load xobject (%d %d R) ptr=%p {\n", fz_tonum(dict), fz_togen(dict), form); obj = fz_dictgets(dict, "BBox"); form->bbox = pdf_torect(obj); pdf_logrsrc("bbox [%g %g %g %g]\n", form->bbox.x0, form->bbox.y0, form->bbox.x1, form->bbox.y1); obj = fz_dictgets(dict, "Matrix"); if (obj) form->matrix = pdf_tomatrix(obj); else form->matrix = fz_identity(); pdf_logrsrc("matrix [%g %g %g %g %g %g]\n", form->matrix.a, form->matrix.b, form->matrix.c, form->matrix.d, form->matrix.e, form->matrix.f); form->isolated = 0; form->knockout = 0; form->transparency = 0; obj = fz_dictgets(dict, "Group"); if (obj) { fz_obj *attrs = obj; form->isolated = fz_tobool(fz_dictgets(attrs, "I")); form->knockout = fz_tobool(fz_dictgets(attrs, "K")); obj = fz_dictgets(attrs, "S"); if (fz_isname(obj) && !strcmp(fz_toname(obj), "Transparency")) form->transparency = 1; } pdf_logrsrc("isolated %d\n", form->isolated); pdf_logrsrc("knockout %d\n", form->knockout); pdf_logrsrc("transparency %d\n", form->transparency); form->resources = fz_dictgets(dict, "Resources"); if (form->resources) fz_keepobj(form->resources); error = pdf_loadstream(&form->contents, xref, fz_tonum(dict), fz_togen(dict)); if (error) { pdf_removeitem(xref->store, PDF_KXOBJECT, dict); pdf_dropxobject(form); return fz_rethrow(error, "cannot load xobject content stream (%d %d R)", fz_tonum(dict), fz_togen(dict)); } pdf_logrsrc("stream %d bytes\n", form->contents->wp - form->contents->rp); pdf_logrsrc("}\n"); *formp = form; return fz_okay; }
void pdfmoz_open(pdfmoz_t *moz, char *filename) { SCROLLINFO si; fz_error error; fz_obj *obj; fz_irect bbox; int rot; int i; strcpy(moz->error, ""); error = fz_newrenderer(&moz->rast, pdf_devicergb, 0, 1024 * 512); if (error) pdfmoz_error(moz, error); /* * Open PDF and load xref table */ moz->filename = filename; moz->xref = pdf_newxref(); error = pdf_loadxref(moz->xref, filename); if (error) { error = pdf_repairxref(moz->xref, filename); if (error) pdfmoz_error(moz, error); } /* * Handle encrypted PDF files */ error = pdf_decryptxref(moz->xref); if (error) pdfmoz_error(moz, error); if (pdf_needspassword(moz->xref)) { pdfmoz_warn(moz, "PDF file is encrypted and needs a password."); } moz->pagecount = pdf_getpagecount(moz->xref); moz->pages = fz_malloc(sizeof(page_t) * moz->pagecount); for (i = 0; i < moz->pagecount; i++) { fz_obj *pageobj; pageobj = pdf_getpageobject(moz->xref, i); moz->pages[i].obj = fz_keepobj(pageobj); moz->pages[i].page = nil; moz->pages[i].image = nil; obj = fz_dictgets(moz->pages[i].obj, "CropBox"); if (!obj) obj = fz_dictgets(moz->pages[i].obj, "MediaBox"); bbox = fz_roundrect(pdf_torect(obj)); moz->pages[i].w = bbox.x1 - bbox.x0; moz->pages[i].h = bbox.y1 - bbox.y0; rot = fz_toint(fz_dictgets(moz->pages[i].obj, "Rotate")); if ((rot / 90) % 2) { int t = moz->pages[i].w; moz->pages[i].w = moz->pages[i].h; moz->pages[i].h = t; } moz->pages[i].px = 1 + PAD; } /* * Load meta information * TODO: move this into mupdf library */ obj = fz_dictgets(moz->xref->trailer, "Root"); moz->xref->root = fz_resolveindirect(obj); if (!moz->xref->root) pdfmoz_error(moz, fz_throw("syntaxerror: missing Root object")); if (moz->xref->root) fz_keepobj(moz->xref->root); obj = fz_dictgets(moz->xref->trailer, "Info"); moz->xref->info = fz_resolveindirect(obj); if (moz->xref->info) fz_keepobj(moz->xref->info); moz->doctitle = filename; if (strrchr(moz->doctitle, '\\')) moz->doctitle = strrchr(moz->doctitle, '\\') + 1; if (strrchr(moz->doctitle, '/')) moz->doctitle = strrchr(moz->doctitle, '/') + 1; if (moz->xref->info) { obj = fz_dictgets(moz->xref->info, "Title"); if (obj) moz->doctitle = pdf_toutf8(obj); } /* * Start at first page */ si.cbSize = sizeof(si); si.fMask = SIF_POS | SIF_RANGE; // XXX | SIF_PAGE; si.nPos = 0; si.nMin = 0; si.nMax = 1; si.nPage = 1; SetScrollInfo(moz->hwnd, SB_VERT, &si, TRUE); moz->scrollpage = 0; moz->scrollyofs = 0; InvalidateRect(moz->hwnd, NULL, FALSE); }
void pdfapp_open(pdfapp_t *app, char *filename) { #if 0 fz_error error; fz_obj *obj; char *password = ""; #else fz_error error; fz_stream *file; char *password = ""; fz_obj *obj; fz_obj *info; int fd; fd = open(filename, O_BINARY | O_RDONLY, 0666); if (fd < 0) fprintf(stderr, "error, file %s does not exist\n", filename); #endif /* * Open PDF and load xref table */ app->filename = filename; #if 0 app->xref = pdf_newxref(); error = pdf_loadxref(app->xref, filename); if (error) { fz_catch(error, "trying to air"); error = pdf_repairxref(app->xref, filename); if (error) pdfapp_error(app, error); } error = pdf_decryptxref(app->xref); if (error) pdfapp_error(app, error); #else file = fz_open_fd(fd); error = pdf_open_xref_with_stream(&app->xref, file, NULL); if (error) pdfapp_error(app, fz_rethrow(error, "cannot open document '%s'", filename)); fz_close(file); #endif /* * Handle encrypted PDF files */ if (pdf_needs_password(app->xref)) { int okay = pdf_authenticate_password(app->xref, password); while (!okay) { //password = winpassword(app, filename); if (!password) exit(1); okay = pdf_authenticate_password(app->xref, password); if (!okay) pdfapp_warn(app, "Invalid password."); } } /* * Load meta information * TODO: move this into mupdf library */ #if 0 obj = fz_dictgets(app->xref->trailer, "Root"); app->xref->root = fz_resolveindirect(obj); if (!app->xref->root) pdfapp_error(app, fz_throw("syntaxerror: missing Root object")); fz_keepobj(app->xref->root); obj = fz_dictgets(app->xref->trailer, "Info"); app->xref->info = fz_resolveindirect(obj); if (!app->xref->info) pdfapp_warn(app, "Could not load PDF meta information."); if (app->xref->info) fz_keepobj(app->xref->info); /*app->outline = pdf_loadoutline(app->xref);*/ app->doctitle = filename; if (strrchr(app->doctitle, '\\')) app->doctitle = strrchr(app->doctitle, '\\') + 1; if (strrchr(app->doctitle, '/')) app->doctitle = strrchr(app->doctitle, '/') + 1; if (app->xref->info) { obj = fz_dictgets(app->xref->info, "Title"); if (obj) { app->doctitle = pdf_toutf8(obj); } } #else app->outline = pdf_load_outline(app->xref); app->doctitle = filename; if (strrchr(app->doctitle, '\\')) app->doctitle = strrchr(app->doctitle, '\\') + 1; if (strrchr(app->doctitle, '/')) app->doctitle = strrchr(app->doctitle, '/') + 1; info = fz_dict_gets(app->xref->trailer, "Info"); if (info) { obj = fz_dict_gets(info, "Title"); if (obj) app->doctitle = pdf_to_utf8(obj); } #endif /* * Start at first page */ #if 0 app->pagecount = pdf_getpagecount(app->xref); #else error = pdf_load_page_tree(app->xref); if (error) pdfapp_error(app, fz_rethrow(error, "cannot load page tree")); app->pagecount = pdf_count_pages(app->xref); #endif app->rotate = 0; }
Epdf_Document* epdf_document_new(const char* filename) { Epdf_Document* doc; fz_error error; doc = (Epdf_Document*)malloc(sizeof(Epdf_Document)); if(!doc) return NULL; doc->xref = NULL; doc->outline = NULL; doc->rast = NULL; doc->pagecount = 0; doc->zoom = 1.0; doc->rotate = 0; doc->locked = false; error = fz_newrenderer(&doc->rast, pdf_devicergb, 0, 1024 * 512); if(error) return pdfdoc_error(doc); fz_obj* obj; // Open PDF and load xref table doc->xref = pdf_newxref(); if(!doc->xref) return pdfdoc_error(doc); error = pdf_loadxref(doc->xref, (char*)filename); if(error) { fz_catch(error, "trying to repair"); fprintf(stderr, "There was a problem with file \"%s\".\nIt may be corrupted or generated by faulty software.\nTrying to repair the file.\n", filename); error = pdf_repairxref(doc->xref, (char*)filename); if(error) return pdfdoc_error(doc); } error = pdf_decryptxref(doc->xref); if(error) pdfdoc_error(doc); // Handle encrypted PDF files if(pdf_needspassword(doc->xref)) doc->locked = true; /* * Load meta information * TODO: move this into mupdf library */ obj = fz_dictgets(doc->xref->trailer, (char*)"Root"); doc->xref->root = fz_resolveindirect(obj); if(!doc->xref->root) { fz_throw("syntaxerror: missing Root object"); pdfdoc_error(doc); } fz_keepobj(doc->xref->root); obj = fz_dictgets(doc->xref->trailer, "Info"); doc->xref->info = fz_resolveindirect(obj); if(!doc->xref->info) fprintf(stderr, "Could not load PDF meta information.\n"); if(doc->xref->info) fz_keepobj(doc->xref->info); doc->outline = pdf_loadoutline(doc->xref); doc->filename = strdup(filename); if(doc->xref->info) { obj = fz_dictgets(doc->xref->info, "Title"); if(obj) doc->doctitle = pdf_toutf8(obj); } /* * Start at first page */ doc->pagecount = pdf_getpagecount(doc->xref); if(doc->zoom < 0.1) doc->zoom = 0.1; if(doc->zoom > 3.0) doc->zoom = 3.0; return doc; }
void pdf_loadpagetreenode(pdf_xref *xref, fz_obj *node, struct info info) { fz_obj *dict, *kids, *count; fz_obj *obj, *tmp; int i, n; /* prevent infinite recursion */ if (fz_dictgets(node, ".seen")) return; kids = fz_dictgets(node, "Kids"); count = fz_dictgets(node, "Count"); if (fz_isarray(kids) && fz_isint(count)) { obj = fz_dictgets(node, "Resources"); if (obj) info.resources = obj; obj = fz_dictgets(node, "MediaBox"); if (obj) info.mediabox = obj; obj = fz_dictgets(node, "CropBox"); if (obj) info.cropbox = obj; obj = fz_dictgets(node, "Rotate"); if (obj) info.rotate = obj; tmp = fz_newnull(); fz_dictputs(node, ".seen", tmp); fz_dropobj(tmp); n = fz_arraylen(kids); for (i = 0; i < n; i++) { obj = fz_arrayget(kids, i); pdf_loadpagetreenode(xref, obj, info); } fz_dictdels(node, ".seen"); } else { dict = fz_resolveindirect(node); if (info.resources && !fz_dictgets(dict, "Resources")) fz_dictputs(dict, "Resources", info.resources); if (info.mediabox && !fz_dictgets(dict, "MediaBox")) fz_dictputs(dict, "MediaBox", info.mediabox); if (info.cropbox && !fz_dictgets(dict, "CropBox")) fz_dictputs(dict, "CropBox", info.cropbox); if (info.rotate && !fz_dictgets(dict, "Rotate")) fz_dictputs(dict, "Rotate", info.rotate); if (xref->pagelen == xref->pagecap) { fz_warn("found more pages than expected"); xref->pagecap ++; xref->pagerefs = fz_realloc(xref->pagerefs, sizeof(fz_obj*) * xref->pagecap); xref->pageobjs = fz_realloc(xref->pageobjs, sizeof(fz_obj*) * xref->pagecap); } xref->pagerefs[xref->pagelen] = fz_keepobj(node); xref->pageobjs[xref->pagelen] = fz_keepobj(dict); xref->pagelen ++; } }