static int pdf_resources_use_blending(pdf_document *doc, pdf_obj *rdb) { fz_context *ctx = doc->ctx; pdf_obj *obj; int i, n, useBM = 0; if (!rdb) return 0; /* Have we been here before and remembered an answer? */ if (pdf_obj_memo(rdb, &useBM)) return useBM; /* stop on cyclic resource dependencies */ if (pdf_mark_obj(rdb)) return 0; fz_try(ctx) { obj = pdf_dict_gets(rdb, "ExtGState"); n = pdf_dict_len(obj); for (i = 0; i < n; i++) if (pdf_extgstate_uses_blending(doc, pdf_dict_get_val(obj, i))) goto found; obj = pdf_dict_gets(rdb, "Pattern"); n = pdf_dict_len(obj); for (i = 0; i < n; i++) if (pdf_pattern_uses_blending(doc, pdf_dict_get_val(obj, i))) goto found; obj = pdf_dict_gets(rdb, "XObject"); n = pdf_dict_len(obj); for (i = 0; i < n; i++) if (pdf_xobject_uses_blending(doc, pdf_dict_get_val(obj, i))) goto found; if (0) { found: useBM = 1; } } fz_always(ctx) { pdf_unmark_obj(rdb); } fz_catch(ctx) { fz_rethrow(ctx); } pdf_set_obj_memo(rdb, useBM); return useBM; }
pdf_obj * pdf_copy_dict(fz_context *ctx, pdf_obj *obj) { pdf_document *doc; pdf_obj *dict; int i, n; RESOLVE(obj); if (!OBJ_IS_DICT(obj)) fz_throw(ctx, FZ_ERROR_GENERIC, "not a dict (%s)", pdf_objkindstr(obj)); doc = DICT(obj)->doc; n = pdf_dict_len(ctx, obj); dict = pdf_new_dict(ctx, doc, n); fz_try(ctx) for (i = 0; i < n; i++) pdf_dict_put(ctx, dict, pdf_dict_get_key(ctx, obj, i), pdf_dict_get_val(ctx, obj, i)); fz_catch(ctx) { pdf_drop_obj(ctx, dict); fz_rethrow(ctx); } return dict; }
static void load_portfolio(fz_context *ctx, pdf_document *doc) { pdf_obj *obj; int i, n; pdf_portfolio **pp; obj = pdf_dict_getl(ctx, pdf_trailer(ctx, doc), PDF_NAME_Root, PDF_NAME_Collection, PDF_NAME_Schema, NULL); n = pdf_dict_len(ctx, obj); for (i = 0; i < n; i++) { pdf_obj *k = pdf_dict_get_key(ctx, obj, i); pdf_obj *v = pdf_dict_get_val(ctx, obj, i); int sort = pdf_to_int(ctx, pdf_dict_get(ctx, v, PDF_NAME_O)); pdf_obj *eo = pdf_dict_get(ctx, v, PDF_NAME_E); int editable = eo ? pdf_to_bool(ctx, eo) : 0; pdf_obj *vo = pdf_dict_get(ctx, v, PDF_NAME_V); int visible = vo ? pdf_to_bool(ctx, vo) : 1; char *subtype = pdf_to_name(ctx, pdf_dict_get(ctx, v, PDF_NAME_Subtype)); pdf_obj *name = pdf_dict_get(ctx, v, PDF_NAME_N); pdf_portfolio *p = fz_malloc_struct(ctx, pdf_portfolio); p->key = pdf_keep_obj(ctx, k); p->val = pdf_keep_obj(ctx, v); p->sort = sort; p->entry.visible = visible; p->entry.editable = editable; p->entry.name = pdf_keep_obj(ctx, name); if (!strcmp(subtype, "S")) p->entry.type = PDF_SCHEMA_TEXT; else if (!strcmp(subtype, "D")) p->entry.type = PDF_SCHEMA_DATE; else if (!strcmp(subtype, "N")) p->entry.type = PDF_SCHEMA_NUMBER; else if (!strcmp(subtype, "F")) p->entry.type = PDF_SCHEMA_FILENAME; else if (!strcmp(subtype, "Desc")) p->entry.type = PDF_SCHEMA_DESC; else if (!strcmp(subtype, "ModDate")) p->entry.type = PDF_SCHEMA_MODDATE; else if (!strcmp(subtype, "CreationDate")) p->entry.type = PDF_SCHEMA_CREATIONDATE; else if (!strcmp(subtype, "Size")) p->entry.type = PDF_SCHEMA_SIZE; else p->entry.type = PDF_SCHEMA_UNKNOWN; /* Now insert p */ pp = &doc->portfolio; while (*pp && (*pp)->sort <= p->sort) pp = &(*pp)->next; p->next = *pp; *pp = p; } }
static void gatherforms(int page, pdf_obj *pageref, pdf_obj *pageobj, pdf_obj *dict) { int i, n; n = pdf_dict_len(dict); for (i = 0; i < n; i++) { pdf_obj *xobjdict; pdf_obj *type; pdf_obj *subtype; pdf_obj *group; pdf_obj *groupsubtype; pdf_obj *reference; int k; xobjdict = pdf_dict_get_val(dict, i); if (!pdf_is_dict(xobjdict)) { fz_warn(ctx, "not a xobject dict (%d %d R)", pdf_to_num(xobjdict), pdf_to_gen(xobjdict)); continue; } type = pdf_dict_gets(xobjdict, "Subtype"); if (strcmp(pdf_to_name(type), "Form")) continue; subtype = pdf_dict_gets(xobjdict, "Subtype2"); if (!strcmp(pdf_to_name(subtype), "PS")) continue; group = pdf_dict_gets(xobjdict, "Group"); groupsubtype = pdf_dict_gets(group, "S"); reference = pdf_dict_gets(xobjdict, "Ref"); for (k = 0; k < forms; k++) if (!pdf_objcmp(form[k].u.form.obj, xobjdict)) break; if (k < forms) continue; form = fz_resize_array(ctx, form, forms+1, sizeof(struct info)); forms++; form[forms - 1].page = page; form[forms - 1].pageref = pageref; form[forms - 1].pageobj = pageobj; form[forms - 1].u.form.obj = xobjdict; form[forms - 1].u.form.groupsubtype = groupsubtype; form[forms - 1].u.form.reference = reference; } }
/* ** Merge srcdict into dstdict. */ static void wmupdf_dict_merge(fz_context *ctx,char *dictname,pdf_obj *dstdict,pdf_obj *srcdict) { int i,len; /* printf(" Merging %s dictionaries (%d <-- %d)\n",dictname,pdf_to_num(dstdict),pdf_to_num(srcdict)); */ len=pdf_dict_len(srcdict); for (i=0;i<len;i++) { pdf_obj *key,*value; key=pdf_dict_get_key(srcdict,i); value=pdf_dict_get_val(srcdict,i); wmupdf_dict_merge_keyval(ctx,dstdict,key,value); } }
static void gatherfonts(int page, pdf_obj *pageref, pdf_obj *pageobj, pdf_obj *dict) { int i, n; n = pdf_dict_len(dict); for (i = 0; i < n; i++) { pdf_obj *fontdict = NULL; pdf_obj *subtype = NULL; pdf_obj *basefont = NULL; pdf_obj *name = NULL; int k; fontdict = pdf_dict_get_val(dict, i); if (!pdf_is_dict(fontdict)) { fz_warn(ctx, "not a font dict (%d %d R)", pdf_to_num(fontdict), pdf_to_gen(fontdict)); continue; } subtype = pdf_dict_gets(fontdict, "Subtype"); basefont = pdf_dict_gets(fontdict, "BaseFont"); if (!basefont || pdf_is_null(basefont)) name = pdf_dict_gets(fontdict, "Name"); for (k = 0; k < fonts; k++) if (!pdf_objcmp(font[k].u.font.obj, fontdict)) break; if (k < fonts) continue; font = fz_resize_array(ctx, font, fonts+1, sizeof(struct info)); fonts++; font[fonts - 1].page = page; font[fonts - 1].pageref = pageref; font[fonts - 1].pageobj = pageobj; font[fonts - 1].u.font.obj = fontdict; font[fonts - 1].u.font.subtype = subtype; font[fonts - 1].u.font.name = basefont ? basefont : name; } }
pdf_obj * pdf_copy_dict(fz_context *ctx, pdf_obj *obj) { pdf_obj *dict; int i, n; RESOLVE(obj); if (!obj) return NULL; /* Can't warn :( */ if (obj->kind != PDF_DICT) fz_warn(ctx, "assert: not a dict (%s)", pdf_objkindstr(obj)); n = pdf_dict_len(obj); dict = pdf_new_dict(ctx, n); for (i = 0; i < n; i++) fz_dict_put(dict, pdf_dict_get_key(obj, i), pdf_dict_get_val(obj, i)); return dict; }
static void gathershadings(int page, pdf_obj *pageref, pdf_obj *pageobj, pdf_obj *dict) { int i, n; n = pdf_dict_len(dict); for (i = 0; i < n; i++) { pdf_obj *shade; pdf_obj *type; int k; shade = pdf_dict_get_val(dict, i); if (!pdf_is_dict(shade)) { fz_warn(ctx, "not a shading dict (%d %d R)", pdf_to_num(shade), pdf_to_gen(shade)); continue; } type = pdf_dict_gets(shade, "ShadingType"); if (!pdf_is_int(type) || pdf_to_int(type) < 1 || pdf_to_int(type) > 7) { fz_warn(ctx, "not a shading type (%d %d R)", pdf_to_num(shade), pdf_to_gen(shade)); type = NULL; } for (k = 0; k < shadings; k++) if (!pdf_objcmp(shading[k].u.shading.obj, shade)) break; if (k < shadings) continue; shading = fz_resize_array(ctx, shading, shadings+1, sizeof(struct info)); shadings++; shading[shadings - 1].page = page; shading[shadings - 1].pageref = pageref; shading[shadings - 1].pageobj = pageobj; shading[shadings - 1].u.shading.obj = shade; shading[shadings - 1].u.shading.type = type; } }
static void gatherpsobjs(int page, pdf_obj *pageref, pdf_obj *pageobj, pdf_obj *dict) { int i, n; n = pdf_dict_len(dict); for (i = 0; i < n; i++) { pdf_obj *xobjdict; pdf_obj *type; pdf_obj *subtype; int k; xobjdict = pdf_dict_get_val(dict, i); if (!pdf_is_dict(xobjdict)) { fz_warn(ctx, "not a xobject dict (%d %d R)", pdf_to_num(xobjdict), pdf_to_gen(xobjdict)); continue; } type = pdf_dict_gets(xobjdict, "Subtype"); subtype = pdf_dict_gets(xobjdict, "Subtype2"); if (strcmp(pdf_to_name(type), "PS") && (strcmp(pdf_to_name(type), "Form") || strcmp(pdf_to_name(subtype), "PS"))) continue; for (k = 0; k < psobjs; k++) if (!pdf_objcmp(psobj[k].u.form.obj, xobjdict)) break; if (k < psobjs) continue; psobj = fz_resize_array(ctx, psobj, psobjs+1, sizeof(struct info)); psobjs++; psobj[psobjs - 1].page = page; psobj[psobjs - 1].pageref = pageref; psobj[psobjs - 1].pageobj = pageobj; psobj[psobjs - 1].u.form.obj = xobjdict; } }
/* Performs the same task as pdf_clean_annot_contents, but with an optional text filter function. text_filter: Function to assess whether a given character should be kept (return 0) or removed (return 1). after_text: Function called after each text object is closed to allow other output to be sent. arg: Opaque value to be passed to callback functions. */ void pdf_filter_annot_contents(fz_context *ctx, pdf_document *doc, pdf_annot *annot, fz_cookie *cookie, pdf_page_contents_process_fn *proc, pdf_text_filter_fn *text_filter, pdf_after_text_object_fn *after_text, void *arg, int sanitize, int ascii) { pdf_obj *ap; int i, n; ap = pdf_dict_get(ctx, annot->obj, PDF_NAME(AP)); if (ap == NULL) return; n = pdf_dict_len(ctx, ap); for (i = 0; i < n; i++) { pdf_obj *v = pdf_dict_get_val(ctx, ap, i); if (v == NULL) continue; pdf_clean_stream_object(ctx, doc, v, NULL, cookie, 1, text_filter, after_text, arg, sanitize, ascii); } }
pdf_obj * pdf_copy_dict(fz_context *ctx, pdf_obj *obj) { pdf_obj *dict; int i, n; RESOLVE(obj); if (obj >= PDF_OBJ__LIMIT) { pdf_document *doc = DICT(obj)->doc; if (obj->kind != PDF_DICT) fz_warn(ctx, "assert: not a dict (%s)", pdf_objkindstr(obj)); n = pdf_dict_len(ctx, obj); dict = pdf_new_dict(ctx, doc, n); for (i = 0; i < n; i++) pdf_dict_put(ctx, dict, pdf_dict_get_key(ctx, obj, i), pdf_dict_get_val(ctx, obj, i)); return dict; } return NULL; /* Can't warn :( */ }
/* ** From MuPDF pdfclean.c */ static void wmupdf_preserve_old_dests(pdf_obj *olddests,fz_context *ctx,pdf_document *xref, pdf_obj *pages) { int i; pdf_obj *names = pdf_new_dict(ctx,1); pdf_obj *dests = pdf_new_dict(ctx,1); pdf_obj *names_list = pdf_new_array(ctx,32); int len = pdf_dict_len(olddests); pdf_obj *root; for (i=0;i<len;i++) { pdf_obj *key = pdf_dict_get_key(olddests,i); pdf_obj *val = pdf_dict_get_val(olddests,i); pdf_obj *key_str = pdf_new_string(ctx,pdf_to_name(key),strlen(pdf_to_name(key))); pdf_obj *dest = pdf_dict_gets(val,"D"); dest = pdf_array_get(dest ? dest : val, 0); if (pdf_array_contains(pdf_dict_gets(pages,"Kids"),dest)) { pdf_array_push(names_list, key_str); pdf_array_push(names_list, val); } pdf_drop_obj(key_str); } root = pdf_dict_gets(xref->trailer,"Root"); pdf_dict_puts(dests,"Names",names_list); pdf_dict_puts(names,"Dests",dests); pdf_dict_puts(root,"Names",names); pdf_drop_obj(names); pdf_drop_obj(dests); pdf_drop_obj(names_list); pdf_drop_obj(olddests); }
static void pdf_clean_type3(fz_context *ctx, pdf_document *doc, pdf_obj *obj, pdf_obj *orig_res, fz_cookie *cookie, int sanitize, int ascii) { pdf_processor *proc_buffer = NULL; pdf_processor *proc_filter = NULL; pdf_obj *res = NULL; pdf_obj *ref; pdf_obj *charprocs; int i, l; fz_var(res); fz_var(proc_buffer); fz_var(proc_filter); fz_try(ctx) { res = pdf_dict_get(ctx, obj, PDF_NAME(Resources)); if (res) orig_res = res; res = NULL; res = pdf_new_dict(ctx, doc, 1); charprocs = pdf_dict_get(ctx, obj, PDF_NAME(CharProcs)); l = pdf_dict_len(ctx, charprocs); for (i = 0; i < l; i++) { pdf_obj *val = pdf_dict_get_val(ctx, charprocs, i); fz_buffer *buffer = fz_new_buffer(ctx, 1024); fz_try(ctx) { proc_buffer = pdf_new_buffer_processor(ctx, buffer, ascii); if (sanitize) { proc_filter = pdf_new_filter_processor(ctx, doc, proc_buffer, orig_res, res); pdf_process_contents(ctx, proc_filter, doc, orig_res, val, cookie); pdf_close_processor(ctx, proc_filter); } else { pdf_process_contents(ctx, proc_filter, doc, orig_res, val, cookie); } pdf_close_processor(ctx, proc_buffer); pdf_update_stream(ctx, doc, val, buffer, 0); } fz_always(ctx) { pdf_drop_processor(ctx, proc_filter); pdf_drop_processor(ctx, proc_buffer); fz_drop_buffer(ctx, buffer); } fz_catch(ctx) { fz_rethrow(ctx); } } /* ProcSet - no cleaning possible. Inherit this from the old dict. */ pdf_dict_put(ctx, res, PDF_NAME(ProcSet), pdf_dict_get(ctx, orig_res, PDF_NAME(ProcSet))); ref = pdf_add_object(ctx, doc, res); pdf_dict_put_drop(ctx, obj, PDF_NAME(Resources), ref); } fz_always(ctx) { pdf_drop_obj(ctx, res); } fz_catch(ctx) { fz_rethrow(ctx); } }
/* Performs the same task as pdf_clean_page_contents, but with an optional text filter function. text_filter: Function to assess whether a given character should be kept (return 0) or removed (return 1). after_text: Function called after each text object is closed to allow other output to be sent. arg: Opaque value to be passed to callback functions. */ void pdf_filter_page_contents(fz_context *ctx, pdf_document *doc, pdf_page *page, fz_cookie *cookie, pdf_page_contents_process_fn *proc_fn, pdf_text_filter_fn *text_filter, pdf_after_text_object_fn *after_text, void *proc_arg, int sanitize, int ascii) { pdf_processor *proc_buffer = NULL; pdf_processor *proc_filter = NULL; pdf_obj *new_obj = NULL; pdf_obj *new_ref = NULL; pdf_obj *res = NULL; pdf_obj *obj; pdf_obj *contents; pdf_obj *resources; fz_buffer *buffer; fz_var(new_obj); fz_var(new_ref); fz_var(res); fz_var(proc_buffer); fz_var(proc_filter); buffer = fz_new_buffer(ctx, 1024); fz_try(ctx) { contents = pdf_page_contents(ctx, page); resources = pdf_page_resources(ctx, page); proc_buffer = pdf_new_buffer_processor(ctx, buffer, ascii); if (sanitize) { res = pdf_new_dict(ctx, doc, 1); proc_filter = pdf_new_filter_processor_with_text_filter(ctx, doc, proc_buffer, resources, res, text_filter, after_text, proc_arg); pdf_process_contents(ctx, proc_filter, doc, resources, contents, cookie); pdf_close_processor(ctx, proc_filter); } else { res = pdf_keep_obj(ctx, resources); pdf_process_contents(ctx, proc_buffer, doc, resources, contents, cookie); } pdf_close_processor(ctx, proc_buffer); /* Deal with page content stream. */ if (pdf_is_array(ctx, contents)) { /* create a new object to replace the array */ new_obj = pdf_new_dict(ctx, doc, 1); new_ref = pdf_add_object(ctx, doc, new_obj); contents = new_ref; pdf_dict_put(ctx, page->obj, PDF_NAME(Contents), contents); } else { pdf_dict_del(ctx, contents, PDF_NAME(Filter)); pdf_dict_del(ctx, contents, PDF_NAME(DecodeParms)); } pdf_update_stream(ctx, doc, contents, buffer, 0); /* Now deal with resources. The spec allows for Type3 fonts and form * XObjects to omit a resource dictionary and look in the parent. * Avoid that by flattening here as part of the cleaning. This could * conceivably cause changes in rendering, but we don't care. */ /* ExtGState */ obj = pdf_dict_get(ctx, res, PDF_NAME(ExtGState)); if (obj) { int i, l; l = pdf_dict_len(ctx, obj); for (i = 0; i < l; i++) { pdf_obj *o = pdf_dict_get(ctx, pdf_dict_get_val(ctx, obj, i), PDF_NAME(SMask)); if (!o) continue; o = pdf_dict_get(ctx, o, PDF_NAME(G)); if (!o) continue; /* Transparency group XObject */ pdf_clean_stream_object(ctx, doc, o, resources, cookie, 1, text_filter, after_text, proc_arg, sanitize, ascii); } } /* Pattern */ obj = pdf_dict_get(ctx, res, PDF_NAME(Pattern)); if (obj) { int i, l; l = pdf_dict_len(ctx, obj); for (i = 0; i < l; i++) { pdf_obj *pat_res; pdf_obj *pat = pdf_dict_get_val(ctx, obj, i); if (!pat) continue; pat_res = pdf_dict_get(ctx, pat, PDF_NAME(Resources)); if (pat_res == NULL) pat_res = resources; if (pdf_dict_get_int(ctx, pat, PDF_NAME(PatternType)) == 1) pdf_clean_stream_object(ctx, doc, pat, pat_res, cookie, 0, text_filter, after_text, proc_arg, sanitize, ascii); } } /* XObject */ obj = pdf_dict_get(ctx, res, PDF_NAME(XObject)); if (obj) { int i, l; l = pdf_dict_len(ctx, obj); for (i = 0; i < l; i++) { pdf_obj *xobj_res; pdf_obj *xobj = pdf_dict_get_val(ctx, obj, i); if (!xobj) continue; xobj_res = pdf_dict_get(ctx, xobj, PDF_NAME(Resources)); if (xobj_res == NULL) xobj_res = resources; if (pdf_name_eq(ctx, PDF_NAME(Form), pdf_dict_get(ctx, xobj, PDF_NAME(Subtype)))) pdf_clean_stream_object(ctx, doc, xobj, xobj_res, cookie, 1, text_filter, after_text, proc_arg, sanitize, ascii); } } /* Font */ obj = pdf_dict_get(ctx, res, PDF_NAME(Font)); if (obj) { int i, l; l = pdf_dict_len(ctx, obj); for (i = 0; i < l; i++) { pdf_obj *o = pdf_dict_get_val(ctx, obj, i); if (!o) continue; if (pdf_name_eq(ctx, PDF_NAME(Type3), pdf_dict_get(ctx, o, PDF_NAME(Subtype)))) pdf_clean_type3(ctx, doc, o, resources, cookie, sanitize, ascii); } } /* ProcSet - no cleaning possible. Inherit this from the old dict. */ obj = pdf_dict_get(ctx, resources, PDF_NAME(ProcSet)); if (obj) pdf_dict_put(ctx, res, PDF_NAME(ProcSet), obj); /* ColorSpace - no cleaning possible. */ /* Properties - no cleaning possible. */ if (proc_fn) (*proc_fn)(ctx, buffer, res, proc_arg); /* Update resource dictionary */ if (sanitize) { pdf_dict_put(ctx, page->obj, PDF_NAME(Resources), res); } } fz_always(ctx) { pdf_drop_processor(ctx, proc_filter); pdf_drop_processor(ctx, proc_buffer); fz_drop_buffer(ctx, buffer); pdf_drop_obj(ctx, new_obj); pdf_drop_obj(ctx, new_ref); pdf_drop_obj(ctx, res); } fz_catch(ctx) { fz_rethrow(ctx); } }
static void gatherresourceinfo(int page, pdf_obj *rsrc, int show) { pdf_obj *pageobj; pdf_obj *pageref; pdf_obj *font; pdf_obj *xobj; pdf_obj *shade; pdf_obj *pattern; pdf_obj *subrsrc; int i; pageref = pdf_lookup_page_obj(doc, page-1); pageobj = pdf_resolve_indirect(pageref); if (!pageobj) fz_throw(ctx, FZ_ERROR_GENERIC, "cannot retrieve info from page %d", page); font = pdf_dict_gets(rsrc, "Font"); if (show & FONTS && font) { int n; gatherfonts(page, pageref, pageobj, font); n = pdf_dict_len(font); for (i = 0; i < n; i++) { pdf_obj *obj = pdf_dict_get_val(font, i); subrsrc = pdf_dict_gets(obj, "Resources"); if (subrsrc && pdf_objcmp(rsrc, subrsrc)) gatherresourceinfo(page, subrsrc, show); } } xobj = pdf_dict_gets(rsrc, "XObject"); if (show & XOBJS && xobj) { int n; gatherimages(page, pageref, pageobj, xobj); gatherforms(page, pageref, pageobj, xobj); gatherpsobjs(page, pageref, pageobj, xobj); n = pdf_dict_len(xobj); for (i = 0; i < n; i++) { pdf_obj *obj = pdf_dict_get_val(xobj, i); subrsrc = pdf_dict_gets(obj, "Resources"); if (subrsrc && pdf_objcmp(rsrc, subrsrc)) gatherresourceinfo(page, subrsrc, show); } } shade = pdf_dict_gets(rsrc, "Shading"); if (show & SHADINGS && shade) gathershadings(page, pageref, pageobj, shade); pattern = pdf_dict_gets(rsrc, "Pattern"); if (show & PATTERNS && pattern) { int n; gatherpatterns(page, pageref, pageobj, pattern); n = pdf_dict_len(pattern); for (i = 0; i < n; i++) { pdf_obj *obj = pdf_dict_get_val(pattern, i); subrsrc = pdf_dict_gets(obj, "Resources"); if (subrsrc && pdf_objcmp(rsrc, subrsrc)) gatherresourceinfo(page, subrsrc, show); } } }
static void gatherpatterns(int page, pdf_obj *pageref, pdf_obj *pageobj, pdf_obj *dict) { int i, n; n = pdf_dict_len(dict); for (i = 0; i < n; i++) { pdf_obj *patterndict; pdf_obj *type; pdf_obj *paint = NULL; pdf_obj *tiling = NULL; pdf_obj *shading = NULL; int k; patterndict = pdf_dict_get_val(dict, i); if (!pdf_is_dict(patterndict)) { fz_warn(ctx, "not a pattern dict (%d %d R)", pdf_to_num(patterndict), pdf_to_gen(patterndict)); continue; } type = pdf_dict_gets(patterndict, "PatternType"); if (!pdf_is_int(type) || pdf_to_int(type) < 1 || pdf_to_int(type) > 2) { fz_warn(ctx, "not a pattern type (%d %d R)", pdf_to_num(patterndict), pdf_to_gen(patterndict)); type = NULL; } if (pdf_to_int(type) == 1) { paint = pdf_dict_gets(patterndict, "PaintType"); if (!pdf_is_int(paint) || pdf_to_int(paint) < 1 || pdf_to_int(paint) > 2) { fz_warn(ctx, "not a pattern paint type (%d %d R)", pdf_to_num(patterndict), pdf_to_gen(patterndict)); paint = NULL; } tiling = pdf_dict_gets(patterndict, "TilingType"); if (!pdf_is_int(tiling) || pdf_to_int(tiling) < 1 || pdf_to_int(tiling) > 3) { fz_warn(ctx, "not a pattern tiling type (%d %d R)", pdf_to_num(patterndict), pdf_to_gen(patterndict)); tiling = NULL; } } else { shading = pdf_dict_gets(patterndict, "Shading"); } for (k = 0; k < patterns; k++) if (!pdf_objcmp(pattern[k].u.pattern.obj, patterndict)) break; if (k < patterns) continue; pattern = fz_resize_array(ctx, pattern, patterns+1, sizeof(struct info)); patterns++; pattern[patterns - 1].page = page; pattern[patterns - 1].pageref = pageref; pattern[patterns - 1].pageobj = pageobj; pattern[patterns - 1].u.pattern.obj = patterndict; pattern[patterns - 1].u.pattern.type = type; pattern[patterns - 1].u.pattern.paint = paint; pattern[patterns - 1].u.pattern.tiling = tiling; pattern[patterns - 1].u.pattern.shading = shading; } }
static void retainpages(int argc, char **argv) { pdf_obj *oldroot, *root, *pages, *kids, *countobj, *parent, *olddests; /* Keep only pages/type and (reduced) dest entries to avoid * references to unretained pages */ oldroot = pdf_dict_gets(xref->trailer, "Root"); pages = pdf_dict_gets(oldroot, "Pages"); olddests = pdf_load_name_tree(xref, "Dests"); root = pdf_new_dict(ctx, 2); pdf_dict_puts(root, "Type", pdf_dict_gets(oldroot, "Type")); pdf_dict_puts(root, "Pages", pdf_dict_gets(oldroot, "Pages")); pdf_update_object(xref, pdf_to_num(oldroot), root); pdf_drop_obj(root); /* Create a new kids array with only the pages we want to keep */ parent = pdf_new_indirect(ctx, pdf_to_num(pages), pdf_to_gen(pages), xref); kids = pdf_new_array(ctx, 1); /* Retain pages specified */ while (argc - fz_optind) { int page, spage, epage, pagecount; char *spec, *dash; char *pagelist = argv[fz_optind]; pagecount = pdf_count_pages(xref); spec = fz_strsep(&pagelist, ","); while (spec) { dash = strchr(spec, '-'); if (dash == spec) spage = epage = pagecount; else spage = epage = atoi(spec); if (dash) { if (strlen(dash) > 1) epage = atoi(dash + 1); else epage = pagecount; } if (spage > epage) page = spage, spage = epage, epage = page; spage = fz_clampi(spage, 1, pagecount); epage = fz_clampi(epage, 1, pagecount); for (page = spage; page <= epage; page++) { pdf_obj *pageobj = xref->page_objs[page-1]; pdf_obj *pageref = xref->page_refs[page-1]; pdf_dict_puts(pageobj, "Parent", parent); /* Store page object in new kids array */ pdf_array_push(kids, pageref); } spec = fz_strsep(&pagelist, ","); } fz_optind++; } pdf_drop_obj(parent); /* Update page count and kids array */ countobj = pdf_new_int(ctx, pdf_array_len(kids)); pdf_dict_puts(pages, "Count", countobj); pdf_drop_obj(countobj); pdf_dict_puts(pages, "Kids", kids); pdf_drop_obj(kids); /* Also preserve the (partial) Dests name tree */ if (olddests) { int i; pdf_obj *names = pdf_new_dict(ctx, 1); pdf_obj *dests = pdf_new_dict(ctx, 1); pdf_obj *names_list = pdf_new_array(ctx, 32); int len = pdf_dict_len(olddests); for (i = 0; i < len; i++) { pdf_obj *key = pdf_dict_get_key(olddests, i); pdf_obj *val = pdf_dict_get_val(olddests, i); pdf_obj *key_str = pdf_new_string(ctx, pdf_to_name(key), strlen(pdf_to_name(key))); pdf_obj *dest = pdf_dict_gets(val, "D"); dest = pdf_array_get(dest ? dest : val, 0); if (pdf_array_contains(pdf_dict_gets(pages, "Kids"), dest)) { pdf_array_push(names_list, key_str); pdf_array_push(names_list, val); } pdf_drop_obj(key_str); } root = pdf_dict_gets(xref->trailer, "Root"); pdf_dict_puts(dests, "Names", names_list); pdf_dict_puts(names, "Dests", dests); pdf_dict_puts(root, "Names", names); pdf_drop_obj(names); pdf_drop_obj(dests); pdf_drop_obj(names_list); pdf_drop_obj(olddests); } }
static void gatherimages(int page, pdf_obj *pageref, pdf_obj *pageobj, pdf_obj *dict) { int i, n; n = pdf_dict_len(dict); for (i = 0; i < n; i++) { pdf_obj *imagedict; pdf_obj *type; pdf_obj *width; pdf_obj *height; pdf_obj *bpc = NULL; pdf_obj *filter = NULL; pdf_obj *cs = NULL; pdf_obj *altcs; int k; imagedict = pdf_dict_get_val(dict, i); if (!pdf_is_dict(imagedict)) { fz_warn(ctx, "not an image dict (%d %d R)", pdf_to_num(imagedict), pdf_to_gen(imagedict)); continue; } type = pdf_dict_gets(imagedict, "Subtype"); if (strcmp(pdf_to_name(type), "Image")) continue; filter = pdf_dict_gets(imagedict, "Filter"); altcs = NULL; cs = pdf_dict_gets(imagedict, "ColorSpace"); if (pdf_is_array(cs)) { pdf_obj *cses = cs; cs = pdf_array_get(cses, 0); if (pdf_is_name(cs) && (!strcmp(pdf_to_name(cs), "DeviceN") || !strcmp(pdf_to_name(cs), "Separation"))) { altcs = pdf_array_get(cses, 2); if (pdf_is_array(altcs)) altcs = pdf_array_get(altcs, 0); } } width = pdf_dict_gets(imagedict, "Width"); height = pdf_dict_gets(imagedict, "Height"); bpc = pdf_dict_gets(imagedict, "BitsPerComponent"); for (k = 0; k < images; k++) if (!pdf_objcmp(image[k].u.image.obj, imagedict)) break; if (k < images) continue; image = fz_resize_array(ctx, image, images+1, sizeof(struct info)); images++; image[images - 1].page = page; image[images - 1].pageref = pageref; image[images - 1].pageobj = pageobj; image[images - 1].u.image.obj = imagedict; image[images - 1].u.image.width = width; image[images - 1].u.image.height = height; image[images - 1].u.image.bpc = bpc; image[images - 1].u.image.filter = filter; image[images - 1].u.image.cs = cs; image[images - 1].u.image.altcs = altcs; } }
void pdf_clean_page_contents(fz_context *ctx, pdf_document *doc, pdf_page *page, fz_cookie *cookie, pdf_page_contents_process_fn *proc_fn, void *proc_arg) { pdf_processor *proc_buffer = NULL; pdf_processor *proc_filter = NULL; pdf_obj *new_obj = NULL; pdf_obj *new_ref = NULL; pdf_obj *res = NULL; pdf_obj *ref = NULL; pdf_obj *obj; pdf_obj *contents; fz_buffer *buffer; fz_var(new_obj); fz_var(new_ref); fz_var(res); fz_var(ref); fz_var(proc_buffer); fz_var(proc_filter); buffer = fz_new_buffer(ctx, 1024); fz_try(ctx) { res = pdf_new_dict(ctx, doc, 1); proc_buffer = pdf_new_buffer_processor(ctx, buffer); proc_filter = pdf_new_filter_processor(ctx, proc_buffer, doc, page->resources, res); pdf_process_contents(ctx, proc_filter, doc, page->resources, page->contents, cookie); contents = page->contents; if (pdf_is_array(ctx, contents)) { /* create a new object to replace the array */ new_obj = pdf_new_dict(ctx, doc, 1); new_ref = pdf_new_ref(ctx, doc, new_obj); page->contents = contents = new_ref; } else { pdf_dict_del(ctx, contents, PDF_NAME_Filter); pdf_dict_del(ctx, contents, PDF_NAME_DecodeParms); } /* Now deal with resources. The spec allows for Type3 fonts and form * XObjects to omit a resource dictionary and look in the parent. * Avoid that by flattening here as part of the cleaning. This could * conceivably cause changes in rendering, but we don't care. */ /* ExtGState */ obj = pdf_dict_get(ctx, res, PDF_NAME_ExtGState); if (obj) { int i, l; l = pdf_dict_len(ctx, obj); for (i = 0; i < l; i++) { pdf_obj *o = pdf_dict_get(ctx, pdf_dict_get_val(ctx, obj, i), PDF_NAME_SMask); if (!o) continue; o = pdf_dict_get(ctx, o, PDF_NAME_G); if (!o) continue; /* Transparency group XObject */ pdf_clean_stream_object(ctx, doc, o, page->resources, cookie, 1); } } /* ColorSpace - no cleaning possible */ /* Pattern */ obj = pdf_dict_get(ctx, res, PDF_NAME_Pattern); if (obj) { int i, l; l = pdf_dict_len(ctx, obj); for (i = 0; i < l; i++) { pdf_obj *pat = pdf_dict_get_val(ctx, obj, i); if (!pat) continue; if (pdf_to_int(ctx, pdf_dict_get(ctx, pat, PDF_NAME_PatternType)) == 1) pdf_clean_stream_object(ctx, doc, pat, page->resources, cookie, 0); } } /* Shading - no cleaning possible */ /* XObject */ obj = pdf_dict_get(ctx, res, PDF_NAME_XObject); if (obj) { int i, l; l = pdf_dict_len(ctx, obj); for (i = 0; i < l; i++) { pdf_obj *xobj = pdf_dict_get_val(ctx, obj, i); if (!pdf_name_eq(ctx, PDF_NAME_Form, pdf_dict_get(ctx, xobj, PDF_NAME_Subtype))) continue; pdf_clean_stream_object(ctx, doc, xobj, page->resources, cookie, 1); } } /* Font */ obj = pdf_dict_get(ctx, res, PDF_NAME_Font); if (obj) { int i, l; l = pdf_dict_len(ctx, obj); for (i = 0; i < l; i++) { pdf_obj *o = pdf_dict_get_val(ctx, obj, i); if (pdf_name_eq(ctx, PDF_NAME_Type3, pdf_dict_get(ctx, o, PDF_NAME_Subtype))) { pdf_clean_type3(ctx, doc, o, page->resources, cookie); } } } /* ProcSet - no cleaning possible. Inherit this from the old dict. */ obj = pdf_dict_get(ctx, page->resources, PDF_NAME_ProcSet); if (obj) pdf_dict_put(ctx, res, PDF_NAME_ProcSet, obj); /* Properties - no cleaning possible. */ if (proc_fn) (*proc_fn)(ctx, buffer, res, proc_arg); pdf_update_stream(ctx, doc, contents, buffer, 0); pdf_drop_obj(ctx, page->resources); ref = pdf_new_ref(ctx, doc, res); page->resources = pdf_keep_obj(ctx, ref); pdf_dict_put(ctx, page->me, PDF_NAME_Resources, ref); } fz_always(ctx) { pdf_drop_processor(ctx, proc_filter); pdf_drop_processor(ctx, proc_buffer); fz_drop_buffer(ctx, buffer); pdf_drop_obj(ctx, new_obj); pdf_drop_obj(ctx, new_ref); pdf_drop_obj(ctx, res); pdf_drop_obj(ctx, ref); } fz_catch(ctx) { fz_rethrow_message(ctx, "Failed while cleaning page"); } }
static void retainpages(fz_context *ctx, globals *glo, int argc, char **argv) { pdf_obj *oldroot, *root, *pages, *kids, *countobj, *parent, *olddests; pdf_document *doc = glo->doc; int argidx = 0; pdf_obj *names_list = NULL; pdf_obj *outlines; int pagecount; int i; int *page_object_nums; /* Keep only pages/type and (reduced) dest entries to avoid * references to unretained pages */ oldroot = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME_Root); pages = pdf_dict_get(ctx, oldroot, PDF_NAME_Pages); olddests = pdf_load_name_tree(ctx, doc, PDF_NAME_Dests); outlines = pdf_dict_get(ctx, oldroot, PDF_NAME_Outlines); root = pdf_new_dict(ctx, doc, 3); pdf_dict_put(ctx, root, PDF_NAME_Type, pdf_dict_get(ctx, oldroot, PDF_NAME_Type)); pdf_dict_put(ctx, root, PDF_NAME_Pages, pdf_dict_get(ctx, oldroot, PDF_NAME_Pages)); pdf_dict_put(ctx, root, PDF_NAME_Outlines, outlines); pdf_update_object(ctx, doc, pdf_to_num(ctx, oldroot), root); /* Create a new kids array with only the pages we want to keep */ parent = pdf_new_indirect(ctx, doc, pdf_to_num(ctx, pages), pdf_to_gen(ctx, pages)); kids = pdf_new_array(ctx, doc, 1); /* Retain pages specified */ while (argc - argidx) { int page, spage, epage; char *spec, *dash; char *pagelist = argv[argidx]; pagecount = pdf_count_pages(ctx, doc); spec = fz_strsep(&pagelist, ","); while (spec) { dash = strchr(spec, '-'); if (dash == spec) spage = epage = pagecount; else spage = epage = atoi(spec); if (dash) { if (strlen(dash) > 1) epage = atoi(dash + 1); else epage = pagecount; } spage = fz_clampi(spage, 1, pagecount); epage = fz_clampi(epage, 1, pagecount); if (spage < epage) for (page = spage; page <= epage; ++page) retainpage(ctx, doc, parent, kids, page); else for (page = spage; page >= epage; --page) retainpage(ctx, doc, parent, kids, page); spec = fz_strsep(&pagelist, ","); } argidx++; } pdf_drop_obj(ctx, parent); /* Update page count and kids array */ countobj = pdf_new_int(ctx, doc, pdf_array_len(ctx, kids)); pdf_dict_put(ctx, pages, PDF_NAME_Count, countobj); pdf_drop_obj(ctx, countobj); pdf_dict_put(ctx, pages, PDF_NAME_Kids, kids); pdf_drop_obj(ctx, kids); /* Force the next call to pdf_count_pages to recount */ glo->doc->page_count = 0; pagecount = pdf_count_pages(ctx, doc); page_object_nums = fz_calloc(ctx, pagecount, sizeof(*page_object_nums)); for (i = 0; i < pagecount; i++) { pdf_obj *pageref = pdf_lookup_page_obj(ctx, doc, i); page_object_nums[i] = pdf_to_num(ctx, pageref); } /* If we had an old Dests tree (now reformed as an olddests * dictionary), keep any entries in there that point to * valid pages. This may mean we keep more than we need, but * it's safe at least. */ if (olddests) { pdf_obj *names = pdf_new_dict(ctx, doc, 1); pdf_obj *dests = pdf_new_dict(ctx, doc, 1); int len = pdf_dict_len(ctx, olddests); names_list = pdf_new_array(ctx, doc, 32); for (i = 0; i < len; i++) { pdf_obj *key = pdf_dict_get_key(ctx, olddests, i); pdf_obj *val = pdf_dict_get_val(ctx, olddests, i); pdf_obj *dest = pdf_dict_get(ctx, val, PDF_NAME_D); dest = pdf_array_get(ctx, dest ? dest : val, 0); if (dest_is_valid_page(ctx, dest, page_object_nums, pagecount)) { pdf_obj *key_str = pdf_new_string(ctx, doc, pdf_to_name(ctx, key), strlen(pdf_to_name(ctx, key))); pdf_array_push(ctx, names_list, key_str); pdf_array_push(ctx, names_list, val); pdf_drop_obj(ctx, key_str); } } pdf_dict_put(ctx, dests, PDF_NAME_Names, names_list); pdf_dict_put(ctx, names, PDF_NAME_Dests, dests); pdf_dict_put(ctx, root, PDF_NAME_Names, names); pdf_drop_obj(ctx, names); pdf_drop_obj(ctx, dests); pdf_drop_obj(ctx, olddests); } /* Edit each pages /Annot list to remove any links that point to * nowhere. */ for (i = 0; i < pagecount; i++) { pdf_obj *pageref = pdf_lookup_page_obj(ctx, doc, i); pdf_obj *pageobj = pdf_resolve_indirect(ctx, pageref); pdf_obj *annots = pdf_dict_get(ctx, pageobj, PDF_NAME_Annots); int len = pdf_array_len(ctx, annots); int j; for (j = 0; j < len; j++) { pdf_obj *o = pdf_array_get(ctx, annots, j); if (!pdf_name_eq(ctx, pdf_dict_get(ctx, o, PDF_NAME_Subtype), PDF_NAME_Link)) continue; if (!dest_is_valid(ctx, o, pagecount, page_object_nums, names_list)) { /* Remove this annotation */ pdf_array_delete(ctx, annots, j); j--; } } } if (strip_outlines(ctx, doc, outlines, pagecount, page_object_nums, names_list) == 0) { pdf_dict_del(ctx, root, PDF_NAME_Outlines); } fz_free(ctx, page_object_nums); pdf_drop_obj(ctx, names_list); pdf_drop_obj(ctx, root); }
static void retainpages(fz_context *ctx, globals *glo, int argc, char **argv) { pdf_obj *oldroot, *root, *pages, *kids, *countobj, *parent, *olddests; pdf_document *doc = glo->doc; int argidx = 0; pdf_obj *names_list = NULL; int pagecount; int i; /* Keep only pages/type and (reduced) dest entries to avoid * references to unretained pages */ oldroot = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME_Root); pages = pdf_dict_get(ctx, oldroot, PDF_NAME_Pages); olddests = pdf_load_name_tree(ctx, doc, PDF_NAME_Dests); root = pdf_new_dict(ctx, doc, 2); pdf_dict_put(ctx, root, PDF_NAME_Type, pdf_dict_get(ctx, oldroot, PDF_NAME_Type)); pdf_dict_put(ctx, root, PDF_NAME_Pages, pdf_dict_get(ctx, oldroot, PDF_NAME_Pages)); pdf_update_object(ctx, doc, pdf_to_num(ctx, oldroot), root); pdf_drop_obj(ctx, root); /* Create a new kids array with only the pages we want to keep */ parent = pdf_new_indirect(ctx, doc, pdf_to_num(ctx, pages), pdf_to_gen(ctx, pages)); kids = pdf_new_array(ctx, doc, 1); /* Retain pages specified */ while (argc - argidx) { int page, spage, epage; char *spec, *dash; char *pagelist = argv[argidx]; pagecount = pdf_count_pages(ctx, doc); spec = fz_strsep(&pagelist, ","); while (spec) { dash = strchr(spec, '-'); if (dash == spec) spage = epage = pagecount; else spage = epage = atoi(spec); if (dash) { if (strlen(dash) > 1) epage = atoi(dash + 1); else epage = pagecount; } spage = fz_clampi(spage, 1, pagecount); epage = fz_clampi(epage, 1, pagecount); if (spage < epage) for (page = spage; page <= epage; ++page) retainpage(ctx, doc, parent, kids, page); else for (page = spage; page >= epage; --page) retainpage(ctx, doc, parent, kids, page); spec = fz_strsep(&pagelist, ","); } argidx++; } pdf_drop_obj(ctx, parent); /* Update page count and kids array */ countobj = pdf_new_int(ctx, doc, pdf_array_len(ctx, kids)); pdf_dict_put(ctx, pages, PDF_NAME_Count, countobj); pdf_drop_obj(ctx, countobj); pdf_dict_put(ctx, pages, PDF_NAME_Kids, kids); pdf_drop_obj(ctx, kids); /* Also preserve the (partial) Dests name tree */ if (olddests) { pdf_obj *names = pdf_new_dict(ctx, doc, 1); pdf_obj *dests = pdf_new_dict(ctx, doc, 1); int len = pdf_dict_len(ctx, olddests); names_list = pdf_new_array(ctx, doc, 32); for (i = 0; i < len; i++) { pdf_obj *key = pdf_dict_get_key(ctx, olddests, i); pdf_obj *val = pdf_dict_get_val(ctx, olddests, i); pdf_obj *dest = pdf_dict_get(ctx, val, PDF_NAME_D); dest = pdf_array_get(ctx, dest ? dest : val, 0); if (pdf_array_contains(ctx, pdf_dict_get(ctx, pages, PDF_NAME_Kids), dest)) { pdf_obj *key_str = pdf_new_string(ctx, doc, pdf_to_name(ctx, key), strlen(pdf_to_name(ctx, key))); pdf_array_push(ctx, names_list, key_str); pdf_array_push(ctx, names_list, val); pdf_drop_obj(ctx, key_str); } } root = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME_Root); pdf_dict_put(ctx, dests, PDF_NAME_Names, names_list); pdf_dict_put(ctx, names, PDF_NAME_Dests, dests); pdf_dict_put(ctx, root, PDF_NAME_Names, names); pdf_drop_obj(ctx, names); pdf_drop_obj(ctx, dests); pdf_drop_obj(ctx, names_list); pdf_drop_obj(ctx, olddests); } /* Force the next call to pdf_count_pages to recount */ glo->doc->page_count = 0; /* Edit each pages /Annot list to remove any links that point to * nowhere. */ pagecount = pdf_count_pages(ctx, doc); for (i = 0; i < pagecount; i++) { pdf_obj *pageref = pdf_lookup_page_obj(ctx, doc, i); pdf_obj *pageobj = pdf_resolve_indirect(ctx, pageref); pdf_obj *annots = pdf_dict_get(ctx, pageobj, PDF_NAME_Annots); int len = pdf_array_len(ctx, annots); int j; for (j = 0; j < len; j++) { pdf_obj *o = pdf_array_get(ctx, annots, j); pdf_obj *p; if (!pdf_name_eq(ctx, pdf_dict_get(ctx, o, PDF_NAME_Subtype), PDF_NAME_Link)) continue; p = pdf_dict_get(ctx, o, PDF_NAME_A); if (!pdf_name_eq(ctx, pdf_dict_get(ctx, p, PDF_NAME_S), PDF_NAME_GoTo)) continue; if (string_in_names_list(ctx, pdf_dict_get(ctx, p, PDF_NAME_D), names_list)) continue; /* FIXME: Should probably look at Next too */ /* Remove this annotation */ pdf_array_delete(ctx, annots, j); j--; } } }
static void gatherresourceinfo(int page, pdf_obj *rsrc) { pdf_obj *pageobj; pdf_obj *pageref; pdf_obj *font; pdf_obj *xobj; pdf_obj *shade; pdf_obj *pattern; pdf_obj *subrsrc; int i; pageobj = xref->page_objs[page-1]; pageref = xref->page_refs[page-1]; if (!pageobj) fz_throw(ctx, "cannot retrieve info from page %d", page); font = pdf_dict_gets(rsrc, "Font"); if (font) { int n; gatherfonts(page, pageref, pageobj, font); n = pdf_dict_len(font); for (i = 0; i < n; i++) { pdf_obj *obj = pdf_dict_get_val(font, i); subrsrc = pdf_dict_gets(obj, "Resources"); if (subrsrc && pdf_objcmp(rsrc, subrsrc)) gatherresourceinfo(page, subrsrc); } } xobj = pdf_dict_gets(rsrc, "XObject"); if (xobj) { int n; gatherimages(page, pageref, pageobj, xobj); gatherforms(page, pageref, pageobj, xobj); gatherpsobjs(page, pageref, pageobj, xobj); n = pdf_dict_len(xobj); for (i = 0; i < n; i++) { pdf_obj *obj = pdf_dict_get_val(xobj, i); subrsrc = pdf_dict_gets(obj, "Resources"); if (subrsrc && pdf_objcmp(rsrc, subrsrc)) gatherresourceinfo(page, subrsrc); } } shade = pdf_dict_gets(rsrc, "Shading"); if (shade) gathershadings(page, pageref, pageobj, shade); pattern = pdf_dict_gets(rsrc, "Pattern"); if (pattern) { int n; gatherpatterns(page, pageref, pageobj, pattern); n = pdf_dict_len(pattern); for (i = 0; i < n; i++) { pdf_obj *obj = pdf_dict_get_val(pattern, i); subrsrc = pdf_dict_gets(obj, "Resources"); if (subrsrc && pdf_objcmp(rsrc, subrsrc)) gatherresourceinfo(page, subrsrc); } } }
// just copy one of the resource sub-entries (e.g. /Font) static int copy_and_rename_resource(fz_context *dest_ctx, pdf_obj *dest, fz_context *src_ctx, pdf_obj *src, char *prefix, struct put_info *info) { char new_name[64]; /* this buffer is big enough up to hold all digits for two 16-bit numbers */ int i; for(i = 0; i < pdf_dict_len(src_ctx, src); i++) { pdf_obj *src_key = pdf_dict_get_key(src_ctx, src, i); pdf_obj *src_val = pdf_dict_get_val(src_ctx, src, i); if(!pdf_is_name(src_ctx, src_key)) { return(2); } /* if this is an inline resource, just copy this object into the new resource dict */ if(!pdf_is_indirect(src_ctx, src_val)) { if(snprintf(new_name, sizeof(new_name) / sizeof(new_name[0]), "%sinline_%d", prefix, info->next_inline_id++) >= sizeof(new_name) / sizeof(new_name[0])) return(1); // not enough space pdf_obj *new_res = copy_unassigned_object_continue(dest_ctx, info->dest_doc, src_ctx, info->src_doc, src_val, &info->new_ids); //pdf_obj *new_res = pdf_new_dict(dest_ctx, info->dest_doc, 10); printf("dump it...\n"); pdf_fprint_obj(dest_ctx, stdout, new_res, 0); /* now reference this new object in the resource object of this sheet */ pdf_obj *dest_key = pdf_new_name(dest_ctx, info->dest_doc, new_name); pdf_dict_put(dest_ctx, dest, dest_key, new_res); pdf_drop_obj(dest_ctx, dest_key); pdf_drop_obj(dest_ctx, new_res); } else { /* The new name of resource objects is always the num/gen of the referenced object in the src-file. Thus we can check by that name if the object was already referenced by another page of this sheet. */ if(snprintf(new_name, sizeof(new_name) / sizeof(new_name[0]), "%s%d_%d", prefix, pdf_to_num(dest_ctx, src_val), pdf_to_gen(dest_ctx, src_val)) >= sizeof(new_name) / sizeof(new_name[0])) return(1); // not enough space if(pdf_dict_gets(dest_ctx, dest, new_name) == NULL) { /* if this resource is not inlined and not already in the resource-dict of the current sheet... */ /* ...copy the referenced resource to the new document! If this object has copied already (for another sheet in dest_doc), copy_object_continue() will do nothing */ pdf_obj *new_res = copy_object_continue(dest_ctx, info->dest_doc, src_ctx, info->src_doc, src_val, &info->new_ids); /* now reference this new object in the resource object of this sheet */ pdf_obj *dest_key = pdf_new_name(dest_ctx, info->dest_doc, new_name); pdf_dict_put(dest_ctx, dest, dest_key, new_res); pdf_drop_obj(dest_ctx, dest_key); pdf_drop_obj(dest_ctx, new_res); } } /* even if it was used on another sheet or on this sheet, add it to the rename-dict for this sheet! Because it could have different names on different source-pages */ pdf_obj *rename_key = pdf_new_name(dest_ctx, info->dest_doc, pdf_to_name(dest_ctx, src_key)); pdf_obj *rename_val = pdf_new_name(dest_ctx, info->dest_doc, new_name); pdf_dict_put(dest_ctx, info->rename_dict, rename_key, rename_val); pdf_drop_obj(dest_ctx, rename_key); pdf_drop_obj(dest_ctx, rename_val); } return(0); }