void pdf_loadannots(pdf_comment **cp, pdf_link **lp, pdf_xref *xref, fz_obj *annots) { pdf_comment *comment; pdf_link *link; fz_obj *subtype; fz_obj *obj; int i; comment = nil; link = nil; pdf_logpage("load annotations {\n"); for (i = 0; i < fz_arraylen(annots); i++) { obj = fz_arrayget(annots, i); subtype = fz_dictgets(obj, "Subtype"); if (fz_isname(subtype) && !strcmp(fz_toname(subtype), "Link")) { pdf_link *temp = pdf_loadlink(xref, obj); if (temp) { temp->next = link; link = temp; } } } pdf_logpage("}\n"); *cp = comment; *lp = link; }
fz_error pdf_loadoutline(pdf_outline **nodep, pdf_xref *xref) { fz_error error; pdf_outline *node; fz_obj *obj; fz_obj *first; pdf_logpage("load outlines {\n"); node = nil; obj = fz_dictgets(xref->root, "Outlines"); if (obj) { first = fz_dictgets(obj, "First"); if (first) { error = loadoutline(&node, xref, first); if (error) return fz_rethrow(error, "cannot load outline"); } } pdf_logpage("}\n"); *nodep = node; return fz_okay; }
static fz_error * loadoutline(pdf_outline **nodep, pdf_xref *xref, fz_obj *dict) { fz_error *error; pdf_outline *node; fz_obj *obj; node = fz_malloc(sizeof(pdf_outline)); node->title = "<unknown>"; node->link = nil; node->child = nil; node->next = nil; pdf_logpage("load outline {\n"); obj = fz_dictgets(dict, "Title"); if (obj) { error = pdf_toutf8(&node->title, obj); if (error) return error; pdf_logpage("title %s\n", node->title); } if (fz_dictgets(dict, "Dest") || fz_dictgets(dict, "A")) { error = pdf_loadlink(&node->link, xref, dict); if (error) return error; } obj = fz_dictgets(dict, "First"); if (obj) { error = pdf_resolve(&obj, xref); if (error) return error; error = loadoutline(&node->child, xref, obj); fz_dropobj(obj); if (error) return error; } pdf_logpage("}\n"); obj = fz_dictgets(dict, "Next"); if (obj) { error = pdf_resolve(&obj, xref); if (error) return error; error = loadoutline(&node->next, xref, obj); fz_dropobj(obj); if (error) return error; } *nodep = node; return nil; }
fz_error * pdf_loadannots(pdf_comment **cp, pdf_link **lp, pdf_xref *xref, fz_obj *annots) { fz_error *error; pdf_comment *comment; pdf_link *link; fz_obj *subtype; fz_obj *obj; int i; comment = nil; link = nil; pdf_logpage("load annotations {\n"); for (i = 0; i < fz_arraylen(annots); i++) { obj = fz_arrayget(annots, i); error = pdf_resolve(&obj, xref); if (error) goto cleanup; subtype = fz_dictgets(obj, "Subtype"); if (!strcmp(fz_toname(subtype), "Link")) { pdf_link *temp = nil; error = pdf_loadlink(&temp, xref, obj); fz_dropobj(obj); if (error) goto cleanup; if (temp) { temp->next = link; link = temp; } } else { error = loadcomment(&comment, xref, obj); fz_dropobj(obj); if (error) goto cleanup; } } pdf_logpage("}\n"); *cp = comment; *lp = link; return nil; cleanup: pdf_droplink(link); return error; }
fz_error pdf_loadannots(pdf_comment **cp, pdf_link **lp, pdf_xref *xref, fz_obj *annots) { fz_error error; pdf_comment *comment; pdf_link *link; fz_obj *subtype; fz_obj *obj; int i; comment = nil; link = nil; pdf_logpage("load annotations {\n"); for (i = 0; i < fz_arraylen(annots); i++) { obj = fz_arrayget(annots, i); subtype = fz_dictgets(obj, "Subtype"); if (fz_isname(subtype) && !strcmp(fz_toname(subtype), "Link")) { pdf_link *temp = nil; error = pdf_loadlink(&temp, xref, obj); if (error) { if (link) pdf_droplink(link); return fz_rethrow(error, "cannot load annotation link"); } if (temp) { temp->next = link; link = temp; } } else { error = loadcomment(&comment, xref, obj); if (error) { if (link) pdf_droplink(link); return fz_rethrow(error, "cannot load annotation comment"); } } } pdf_logpage("}\n"); *cp = comment; *lp = link; return fz_okay; }
static fz_error loadoutline(pdf_outline **nodep, pdf_xref *xref, fz_obj *dict) { fz_error error; pdf_outline *node; fz_obj *obj; node = fz_malloc(sizeof(pdf_outline)); node->title = nil; node->link = nil; node->child = nil; node->next = nil; pdf_logpage("load outline {\n"); obj = fz_dictgets(dict, "Title"); if (obj) { error = pdf_toutf8(&node->title, obj); if (error) return fz_rethrow(error, "cannot convert Title to UTF-8"); pdf_logpage("title %s\n", node->title); } if (fz_dictgets(dict, "Dest") || fz_dictgets(dict, "A")) { error = pdf_loadlink(&node->link, xref, dict); if (error) return fz_rethrow(error, "cannot load link"); } obj = fz_dictgets(dict, "First"); if (obj) { error = loadoutline(&node->child, xref, obj); if (error) return fz_rethrow(error, "cannot load outline"); } pdf_logpage("}\n"); obj = fz_dictgets(dict, "Next"); if (obj) { error = loadoutline(&node->next, xref, obj); if (error) return fz_rethrow(error, "cannot load outline"); } *nodep = node; return fz_okay; }
static pdf_outline * pdf_loadoutlineimp(pdf_xref *xref, fz_obj *dict) { pdf_outline *node; fz_obj *obj; if (fz_isnull(dict)) return nil; node = fz_malloc(sizeof(pdf_outline)); node->title = nil; node->link = nil; node->child = nil; node->next = nil; node->count = 0; pdf_logpage("load outline {\n"); obj = fz_dictgets(dict, "Title"); if (obj) { node->title = pdf_toutf8(obj); pdf_logpage("title %s\n", node->title); } obj = fz_dictgets(dict, "Count"); if (obj) { node->count = fz_toint(obj); } if (fz_dictgets(dict, "Dest") || fz_dictgets(dict, "A")) { node->link = pdf_loadlink(xref, dict); } obj = fz_dictgets(dict, "First"); if (obj) { node->child = pdf_loadoutlineimp(xref, obj); } pdf_logpage("}\n"); obj = fz_dictgets(dict, "Next"); if (obj) { node->next = pdf_loadoutlineimp(xref, obj); } return node; }
fz_error * pdf_loadoutline(pdf_outline **nodep, pdf_xref *xref) { fz_error *error; pdf_outline *node; fz_obj *obj; fz_obj *first; pdf_logpage("load outlines {\n"); node = nil; obj = fz_dictgets(xref->root, "Outlines"); if (obj) { error = pdf_resolve(&obj, xref); if (error) return error; first = fz_dictgets(obj, "First"); if (first) { error = pdf_resolve(&first, xref); fz_dropobj(obj); if (error) return error; error = loadoutline(&node, xref, first); fz_dropobj(first); if (error) return error; } else fz_dropobj(obj); } pdf_logpage("}\n"); *nodep = node; return nil; }
pdf_outline * pdf_loadoutline(pdf_xref *xref) { pdf_outline *node; fz_obj *root, *obj, *first; pdf_logpage("load outlines {\n"); node = nil; root = fz_dictgets(xref->trailer, "Root"); obj = fz_dictgets(root, "Outlines"); if (obj) { first = fz_dictgets(obj, "First"); if (first) node = pdf_loadoutlineimp(xref, first); } pdf_logpage("}\n"); return node; }
void pdf_droppagetree(pdf_pagetree *pages) { int i; pdf_logpage("drop pagetree %p\n", pages); for (i = 0; i < pages->count; i++) { if (pages->pref[i]) fz_dropobj(pages->pref[i]); if (pages->pobj[i]) fz_dropobj(pages->pobj[i]); } fz_free(pages->pref); fz_free(pages->pobj); fz_free(pages); }
void pdf_freepage(pdf_page *page) { pdf_logpage("drop page %p\n", page); if (page->resources) fz_dropobj(page->resources); if (page->contents) fz_dropbuffer(page->contents); if (page->list) fz_freedisplaylist(page->list); if (page->text) fz_freetextspan(page->text); if (page->links) pdf_freelink(page->links); if (page->annots) pdf_freeannot(page->annots); fz_free(page); }
static fz_error pdf_loadpagecontentsarray(fz_buffer **bigbufp, pdf_xref *xref, fz_obj *list) { fz_error error; fz_buffer *big; fz_buffer *one; int i; pdf_logpage("multiple content streams: %d\n", fz_arraylen(list)); /* TODO: openstream, read, close into big buffer at once */ big = fz_newbuffer(32 * 1024); for (i = 0; i < fz_arraylen(list); i++) { fz_obj *stm = fz_arrayget(list, i); error = pdf_loadstream(&one, xref, fz_tonum(stm), fz_togen(stm)); if (error) { fz_dropbuffer(big); return fz_rethrow(error, "cannot load content stream part %d/%d (%d %d R)", i + 1, fz_arraylen(list), fz_tonum(stm), fz_togen(stm)); } if (big->len + one->len + 1 > big->cap) fz_resizebuffer(big, big->len + one->len + 1); memcpy(big->data + big->len, one->data, one->len); big->data[big->len + one->len] = ' '; big->len += one->len + 1; fz_dropbuffer(one); } *bigbufp = big; return fz_okay; }
pdf_link * pdf_loadlink(pdf_xref *xref, fz_obj *dict) { fz_obj *dest; fz_obj *action; fz_obj *obj; fz_rect bbox; pdf_linkkind kind; pdf_logpage("load link {\n"); dest = nil; obj = fz_dictgets(dict, "Rect"); if (obj) { bbox = pdf_torect(obj); pdf_logpage("rect [%g %g %g %g]\n", bbox.x0, bbox.y0, bbox.x1, bbox.y1); } else bbox = fz_emptyrect; obj = fz_dictgets(dict, "Dest"); if (obj) { kind = PDF_LGOTO; dest = resolvedest(xref, obj); pdf_logpage("dest (%d %d R)\n", fz_tonum(dest), fz_togen(dest)); } action = fz_dictgets(dict, "A"); if (action) { obj = fz_dictgets(action, "S"); if (fz_isname(obj) && !strcmp(fz_toname(obj), "GoTo")) { kind = PDF_LGOTO; dest = resolvedest(xref, fz_dictgets(action, "D")); pdf_logpage("action goto (%d %d R)\n", fz_tonum(dest), fz_togen(dest)); } else if (fz_isname(obj) && !strcmp(fz_toname(obj), "URI")) { kind = PDF_LURI; dest = fz_dictgets(action, "URI"); pdf_logpage("action uri %s\n", fz_tostrbuf(dest)); } /* cf. http://code.google.com/p/sumatrapdf/issues/detail?id=726 */ else if (fz_isname(obj) && !strcmp(fz_toname(obj), "Launch")) { kind = PDF_LLAUNCH; dest = fz_dictgets(action, "F"); pdf_logpage("action launch (%d %d R)\n", fz_tonum(dest), fz_togen(dest)); } else { pdf_logpage("unhandled link action, ignoring link\n"); dest = nil; } } pdf_logpage("}\n"); if (dest) { return pdf_newlink(kind, bbox, dest); } return nil; }
fz_error * pdf_loadlink(pdf_link **linkp, pdf_xref *xref, fz_obj *dict) { fz_error *error; pdf_link *link; fz_obj *dest; fz_obj *action; fz_obj *obj; fz_rect bbox; pdf_linkkind kind; pdf_logpage("load link {\n"); link = nil; dest = nil; obj = fz_dictgets(dict, "Rect"); if (obj) { bbox = pdf_torect(obj); pdf_logpage("rect [%g %g %g %g]\n", bbox.x0, bbox.y0, bbox.x1, bbox.y1); } else bbox = fz_emptyrect; obj = fz_dictgets(dict, "Dest"); if (obj) { error = pdf_resolve(&obj, xref); if (error) return error; dest = resolvedest(xref, obj); pdf_logpage("dest %d %d R\n", fz_tonum(dest), fz_togen(dest)); fz_dropobj(obj); } kind = PDF_LUNKNOWN; action = fz_dictgets(dict, "A"); if (action) { error = pdf_resolve(&action, xref); if (error) return error; obj = fz_dictgets(action, "S"); if (!strcmp(fz_toname(obj), "GoTo")) { kind = PDF_LGOTO; dest = resolvedest(xref, fz_dictgets(action, "D")); pdf_logpage("action goto %d %d R\n", fz_tonum(dest), fz_togen(dest)); } else if (!strcmp(fz_toname(obj), "URI")) { kind = PDF_LURI; dest = fz_dictgets(action, "URI"); pdf_logpage("action uri %s\n", fz_tostrbuf(dest)); } else pdf_logpage("action ... ?\n"); fz_dropobj(action); } pdf_logpage("}\n"); if (dest) { error = pdf_newlink(&link, bbox, dest, kind); if (error) return error; *linkp = link; } return nil; }
fz_error * pdf_loadpagetree(pdf_pagetree **pp, pdf_xref *xref) { fz_error *error; struct stuff inherit; pdf_pagetree *p = nil; fz_obj *catalog = nil; fz_obj *pages = nil; fz_obj *trailer; fz_obj *ref; fz_obj *treeref; int count; int pagenum = 1; inherit.resources = nil; inherit.mediabox = nil; inherit.cropbox = nil; inherit.rotate = nil; trailer = xref->trailer; ref = fz_dictgets(trailer, "Root"); error = pdf_loadindirect(&catalog, xref, ref); if (error) { error = fz_rethrow(error, "cannot load Root object"); goto cleanup; } treeref = fz_dictgets(catalog, "Pages"); error = pdf_loadindirect(&pages, xref, treeref); if (error) { error = fz_rethrow(error, "cannot load Pages object"); goto cleanup; } ref = fz_dictgets(pages, "Count"); count = fz_toint(ref); p = fz_malloc(sizeof(pdf_pagetree)); if (!p) { error = fz_throw("outofmem: page tree struct"); goto cleanup; } pdf_logpage("load pagetree %d pages, %d %d (%p) {\n", count, treeref->u.r.oid, treeref->u.r.gid, p); p->pref = nil; p->pobj = nil; p->count = count; p->cursor = 0; p->pref = fz_malloc(sizeof(fz_obj*) * count); if (!p->pref) { error = fz_throw("outofmem: page tree reference array"); goto cleanup; } p->pobj = fz_malloc(sizeof(fz_obj*) * count); if (!p->pobj) { error = fz_throw("outofmem: page tree object array"); goto cleanup; } error = loadpagetree(xref, p, inherit, pages, treeref, &pagenum); if (error) { error = fz_rethrow(error, "cannot load pagetree"); goto cleanup; } fz_dropobj(pages); fz_dropobj(catalog); pdf_logpage("}\n", count); *pp = p; return fz_okay; cleanup: if (pages) fz_dropobj(pages); if (catalog) fz_dropobj(catalog); if (p) { fz_free(p->pref); fz_free(p->pobj); fz_free(p); } return error; /* already rethrown */ }
static fz_error * loadpagetree(pdf_xref *xref, pdf_pagetree *pages, struct stuff inherit, fz_obj *obj, fz_obj *ref, int *pagenum) { fz_error *error; fz_obj *type; fz_obj *kids; fz_obj *kref, *kobj; fz_obj *inh; int i; type = fz_dictgets(obj, "Type"); if (strcmp(fz_toname(type), "Page") == 0) { pdf_logpage("page %d, %d %d\n", *pagenum, ref->u.r.oid, ref->u.r.gid); (*pagenum)++; if (inherit.resources && !fz_dictgets(obj, "Resources")) { pdf_logpage("inherit resources (%d)\n", pages->cursor); error = fz_dictputs(obj, "Resources", inherit.resources); if (error) return fz_rethrow(error, "cannot inherit page tree resources"); } if (inherit.mediabox && !fz_dictgets(obj, "MediaBox")) { pdf_logpage("inherit mediabox (%d)\n", pages->cursor); error = fz_dictputs(obj, "MediaBox", inherit.mediabox); if (error) return fz_rethrow(error, "cannot inherit page tree mediabox"); } if (inherit.cropbox && !fz_dictgets(obj, "CropBox")) { pdf_logpage("inherit cropbox (%d)\n", pages->cursor); error = fz_dictputs(obj, "CropBox", inherit.cropbox); if (error) return fz_rethrow(error, "cannot inherit page tree cropbox"); } if (inherit.rotate && !fz_dictgets(obj, "Rotate")) { pdf_logpage("inherit rotate (%d)\n", pages->cursor); error = fz_dictputs(obj, "Rotate", inherit.rotate); if (error) return fz_rethrow(error, "cannot inherit page tree rotate"); } pages->pref[pages->cursor] = fz_keepobj(ref); pages->pobj[pages->cursor] = fz_keepobj(obj); pages->cursor ++; } else if (strcmp(fz_toname(type), "Pages") == 0) { inh = fz_dictgets(obj, "Resources"); if (inh) inherit.resources = inh; inh = fz_dictgets(obj, "MediaBox"); if (inh) inherit.mediabox = inh; inh = fz_dictgets(obj, "CropBox"); if (inh) inherit.cropbox = inh; inh = fz_dictgets(obj, "Rotate"); if (inh) inherit.rotate = inh; kids = fz_dictgets(obj, "Kids"); error = pdf_resolve(&kids, xref); if (error) return fz_rethrow(error, "cannot resolve /Kids"); pdf_logpage("subtree %d pages, %d %d {\n", fz_arraylen(kids), ref->u.r.oid, ref->u.r.gid); for (i = 0; i < fz_arraylen(kids); i++) { kref = fz_arrayget(kids, i); error = pdf_loadindirect(&kobj, xref, kref); if (error) { fz_dropobj(kids); return fz_rethrow(error, "cannot load kid"); } if (kobj == obj) { /* prevent infinite recursion possible in maliciously crafted PDFs */ fz_dropobj(kids); return fz_throw("corrupted pdf file"); } error = loadpagetree(xref, pages, inherit, kobj, kref, pagenum); fz_dropobj(kobj); if (error) { fz_dropobj(kids); return fz_rethrow(error, "cannot load subtree"); } } fz_dropobj(kids); pdf_logpage("}\n"); } else return fz_throw("pagetree node has unexpected type %s", fz_toname(type)); return fz_okay; }
fz_error pdf_loadlink(pdf_link **linkp, pdf_xref *xref, fz_obj *dict) { fz_error error; pdf_link *link; fz_obj *dest; fz_obj *action; fz_obj *obj; fz_rect bbox; pdf_linkkind kind; pdf_logpage("load link {\n"); link = nil; dest = nil; obj = fz_dictgets(dict, "Rect"); if (obj) { bbox = pdf_torect(obj); pdf_logpage("rect [%g %g %g %g]\n", bbox.x0, bbox.y0, bbox.x1, bbox.y1); } else bbox = fz_emptyrect; obj = fz_dictgets(dict, "Dest"); if (obj) { kind = PDF_LGOTO; dest = resolvedest(xref, obj); pdf_logpage("dest (%d %d R)\n", fz_tonum(dest), fz_togen(dest)); } action = fz_dictgets(dict, "A"); if (action) { obj = fz_dictgets(action, "S"); if (fz_isname(obj) && !strcmp(fz_toname(obj), "GoTo")) { kind = PDF_LGOTO; dest = resolvedest(xref, fz_dictgets(action, "D")); pdf_logpage("action goto (%d %d R)\n", fz_tonum(dest), fz_togen(dest)); } else if (fz_isname(obj) && !strcmp(fz_toname(obj), "URI")) { kind = PDF_LURI; dest = fz_dictgets(action, "URI"); pdf_logpage("action uri %s\n", fz_tostrbuf(dest)); } else { pdf_logpage("unhandled link action, ignoring link\n"); dest = nil; } } pdf_logpage("}\n"); if (dest) { error = pdf_newlink(&link, kind, bbox, dest); if (error) return fz_rethrow(error, "cannot create link"); *linkp = link; } return fz_okay; }
fz_error pdf_loadpage(pdf_page **pagep, pdf_xref *xref, fz_obj *dict) { fz_error error; pdf_page *page; fz_obj *obj; fz_bbox bbox; pdf_logpage("load page {\n"); // TODO: move this to a more appropriate place /* Ensure that we have a store for resource objects */ if (!xref->store) xref->store = pdf_newstore(); page = fz_malloc(sizeof(pdf_page)); page->resources = nil; page->contents = nil; page->transparency = 0; page->list = nil; page->text = nil; page->links = nil; page->annots = nil; obj = fz_dictgets(dict, "MediaBox"); bbox = fz_roundrect(pdf_torect(obj)); if (fz_isemptyrect(pdf_torect(obj))) { fz_warn("cannot find page bounds, guessing page bounds."); bbox.x0 = 0; bbox.y0 = 0; bbox.x1 = 612; bbox.y1 = 792; } obj = fz_dictgets(dict, "CropBox"); if (fz_isarray(obj)) { fz_bbox cropbox = fz_roundrect(pdf_torect(obj)); bbox = fz_intersectbbox(bbox, cropbox); } page->mediabox.x0 = MIN(bbox.x0, bbox.x1); page->mediabox.y0 = MIN(bbox.y0, bbox.y1); page->mediabox.x1 = MAX(bbox.x0, bbox.x1); page->mediabox.y1 = MAX(bbox.y0, bbox.y1); if (page->mediabox.x1 - page->mediabox.x0 < 1 || page->mediabox.y1 - page->mediabox.y0 < 1) return fz_throw("invalid page size"); page->rotate = fz_toint(fz_dictgets(dict, "Rotate")); pdf_logpage("bbox [%d %d %d %d]\n", bbox.x0, bbox.y0, bbox.x1, bbox.y1); pdf_logpage("rotate %d\n", page->rotate); obj = fz_dictgets(dict, "Annots"); if (obj) { pdf_loadlinks(&page->links, xref, obj); pdf_loadannots(&page->annots, xref, obj); } page->resources = fz_dictgets(dict, "Resources"); if (page->resources) fz_keepobj(page->resources); obj = fz_dictgets(dict, "Contents"); error = pdf_loadpagecontents(&page->contents, xref, obj); if (error) { pdf_freepage(page); return fz_rethrow(error, "cannot load page contents (%d %d R)", fz_tonum(obj), fz_togen(obj)); } if (page->resources && pdf_resourcesuseblending(page->resources)) page->transparency = 1; pdf_logpage("} %p\n", page); *pagep = page; return fz_okay; }