/* * Create a filter given a name and param dictionary. */ static fz_stream * build_filter(fz_context *ctx, fz_stream *chain, pdf_document *doc, pdf_obj *f, pdf_obj *p, int num, int gen, fz_compression_params *params) { fz_compression_params local_params; if (params == NULL) params = &local_params; build_compression_params(ctx, f, p, params); /* If we were using params we were passed in, and we successfully * recognised the image type, we can use the existing filter and * shortstop here. */ if (params != &local_params && params->type != FZ_IMAGE_RAW) return chain; if (params->type != FZ_IMAGE_RAW) return fz_open_image_decomp_stream(ctx, chain, params, NULL); if (pdf_name_eq(ctx, f, PDF_NAME_ASCIIHexDecode) || pdf_name_eq(ctx, f, PDF_NAME_AHx)) return fz_open_ahxd(ctx, chain); else if (pdf_name_eq(ctx, f, PDF_NAME_ASCII85Decode) || pdf_name_eq(ctx, f, PDF_NAME_A85)) return fz_open_a85d(ctx, chain); else if (pdf_name_eq(ctx, f, PDF_NAME_JBIG2Decode)) { fz_jbig2_globals *globals = NULL; pdf_obj *obj = pdf_dict_get(ctx, p, PDF_NAME_JBIG2Globals); if (pdf_is_indirect(ctx, obj)) globals = pdf_load_jbig2_globals(ctx, doc, obj); /* fz_open_jbig2d takes possession of globals */ return fz_open_jbig2d(ctx, chain, globals); } else if (pdf_name_eq(ctx, f, PDF_NAME_JPXDecode)) return chain; /* JPX decoding is special cased in the image loading code */ else if (pdf_name_eq(ctx, f, PDF_NAME_Crypt)) { pdf_obj *name; if (!doc->crypt) { fz_warn(ctx, "crypt filter in unencrypted document"); return chain; } name = pdf_dict_get(ctx, p, PDF_NAME_Name); if (pdf_is_name(ctx, name)) return pdf_open_crypt_with_filter(ctx, chain, doc->crypt, name, num, gen); return chain; } fz_warn(ctx, "unknown filter name (%s)", pdf_to_name(ctx, f)); return chain; }
static void pdf_write_widget_appearance(fz_context *ctx, pdf_annot *annot, fz_buffer *buf, fz_rect *rect, fz_rect *bbox, fz_matrix *matrix, pdf_obj **res) { pdf_obj *ft = pdf_dict_get_inheritable(ctx, annot->obj, PDF_NAME(FT)); if (pdf_name_eq(ctx, ft, PDF_NAME(Tx))) { int ff = pdf_field_flags(ctx, annot->obj); char *format = NULL; const char *text = NULL; if (!annot->ignore_trigger_events) { format = pdf_field_event_format(ctx, annot->page->doc, annot->obj); if (format) text = format; else text = pdf_field_value(ctx, annot->obj); } else { text = pdf_field_value(ctx, annot->obj); } fz_try(ctx) pdf_write_tx_widget_appearance(ctx, annot, buf, rect, bbox, matrix, res, text, ff); fz_always(ctx) fz_free(ctx, format); fz_catch(ctx) fz_rethrow(ctx); } else if (pdf_name_eq(ctx, ft, PDF_NAME(Ch))) { pdf_write_ch_widget_appearance(ctx, annot, buf, rect, bbox, matrix, res); } else if (pdf_name_eq(ctx, ft, PDF_NAME(Sig))) { pdf_write_sig_widget_appearance(ctx, annot, buf, rect, bbox, matrix, res); } else { fz_throw(ctx, FZ_ERROR_GENERIC, "cannot create appearance stream for %s widgets", pdf_to_name(ctx, ft)); } }
static int is_allowed_subtype(fz_context *ctx, pdf_annot *annot, pdf_obj *property, pdf_obj **allowed) { pdf_obj *subtype = pdf_dict_get(ctx, annot->obj, PDF_NAME(Subtype)); while (*allowed) { if (pdf_name_eq(ctx, subtype, *allowed)) return 1; allowed++; } return 0; }
static void check_allowed_subtypes(fz_context *ctx, pdf_annot *annot, pdf_obj *property, pdf_obj **allowed) { pdf_obj *subtype = pdf_dict_get(ctx, annot->obj, PDF_NAME_Subtype); while (allowed) { if (pdf_name_eq(ctx, subtype, *allowed)) return; allowed++; } fz_throw(ctx, FZ_ERROR_GENERIC, "%s annotations have no %s property", pdf_to_name(ctx, subtype), pdf_to_name(ctx, property)); }
const char * pdf_annot_icon_name(fz_context *ctx, pdf_annot *annot) { pdf_obj *name; check_allowed_subtypes(ctx, annot, PDF_NAME(Name), icon_name_subtypes); name = pdf_dict_get(ctx, annot->obj, PDF_NAME(Name)); if (!name) { pdf_obj *subtype = pdf_dict_get(ctx, annot->obj, PDF_NAME(Subtype)); if (pdf_name_eq(ctx, subtype, PDF_NAME(Text))) return "Note"; if (pdf_name_eq(ctx, subtype, PDF_NAME(Stamp))) return "Draft"; if (pdf_name_eq(ctx, subtype, PDF_NAME(FileAttachment))) return "PushPin"; if (pdf_name_eq(ctx, subtype, PDF_NAME(Sound))) return "Speaker"; } return pdf_to_name(ctx, name); }
/* Image specific methods */ static void res_image_init(fz_context *ctx, pdf_document *doc, pdf_res_table *table) { int len, k; pdf_obj *obj; pdf_obj *type; pdf_obj *res = NULL; fz_image *image = NULL; unsigned char digest[16]; fz_var(obj); fz_var(image); fz_var(res); fz_try(ctx) { table->hash = fz_new_hash_table(ctx, 4096, 16, -1); len = pdf_count_objects(ctx, doc); for (k = 1; k < len; k++) { obj = pdf_load_object(ctx, doc, k, 0); type = pdf_dict_get(ctx, obj, PDF_NAME_Subtype); if (pdf_name_eq(ctx, type, PDF_NAME_Image)) { image = pdf_load_image(ctx, doc, obj); res_image_get_md5(ctx, image, digest); fz_drop_image(ctx, image); image = NULL; /* Don't allow overwrites. */ if (fz_hash_find(ctx, table->hash, digest) == NULL) fz_hash_insert(ctx, table->hash, digest, obj); } else { pdf_drop_obj(ctx, obj); } obj = NULL; } } fz_always(ctx) { fz_drop_image(ctx, image); pdf_drop_obj(ctx, obj); } fz_catch(ctx) { res_table_free(ctx, table); fz_rethrow(ctx); } }
char * pdf_parse_file_spec(fz_context *ctx, pdf_document *doc, pdf_obj *file_spec, pdf_obj *dest) { pdf_obj *filename = NULL; const char *path; char *uri; char frag[256]; if (pdf_is_string(ctx, file_spec)) filename = file_spec; if (pdf_is_dict(ctx, file_spec)) { #ifdef _WIN32 filename = pdf_dict_get(ctx, file_spec, PDF_NAME(DOS)); #else filename = pdf_dict_get(ctx, file_spec, PDF_NAME(Unix)); #endif if (!filename) filename = pdf_dict_geta(ctx, file_spec, PDF_NAME(UF), PDF_NAME(F)); } if (!pdf_is_string(ctx, filename)) { fz_warn(ctx, "cannot parse file specification"); return NULL; } if (pdf_is_array(ctx, dest)) fz_snprintf(frag, sizeof frag, "#page=%d", pdf_array_get_int(ctx, dest, 0) + 1); else if (pdf_is_name(ctx, dest)) fz_snprintf(frag, sizeof frag, "#%s", pdf_to_name(ctx, dest)); else if (pdf_is_string(ctx, dest)) fz_snprintf(frag, sizeof frag, "#%s", pdf_to_str_buf(ctx, dest)); else frag[0] = 0; path = pdf_to_text_string(ctx, filename); uri = NULL; #ifdef _WIN32 if (!pdf_name_eq(ctx, pdf_dict_get(ctx, file_spec, PDF_NAME(FS)), PDF_NAME(URL))) { /* Fix up the drive letter (change "/C/Documents/Foo" to "C:/Documents/Foo") */ if (path[0] == '/' && (('A' <= path[1] && path[1] <= 'Z') || ('a' <= path[1] && path[1] <= 'z')) && path[2] == '/') uri = fz_asprintf(ctx, "file://%c:%s%s", path[1], path+2, frag); } #endif if (!uri) uri = fz_asprintf(ctx, "file://%s%s", path, frag); return uri; }
static void execute_action(fz_context *ctx, pdf_document *doc, pdf_obj *obj, pdf_obj *a) { if (a) { pdf_obj *type = pdf_dict_get(ctx, a, PDF_NAME_S); if (pdf_name_eq(ctx, type, PDF_NAME_JavaScript)) { pdf_obj *js = pdf_dict_get(ctx, a, PDF_NAME_JS); if (js) { char *code = pdf_to_utf8(ctx, doc, js); fz_try(ctx) { pdf_js_execute(doc->js, code); } fz_always(ctx) { fz_free(ctx, code); } fz_catch(ctx) { fz_rethrow(ctx); } } } else if (pdf_name_eq(ctx, type, PDF_NAME_ResetForm)) { reset_form(ctx, doc, pdf_dict_get(ctx, a, PDF_NAME_Fields), pdf_to_int(ctx, pdf_dict_get(ctx, a, PDF_NAME_Flags)) & 1); } else if (pdf_name_eq(ctx, type, PDF_NAME_Named)) { pdf_obj *name = pdf_dict_get(ctx, a, PDF_NAME_N); if (pdf_name_eq(ctx, name, PDF_NAME_Print)) pdf_event_issue_print(ctx, doc); } }
static fz_link * pdf_load_link(fz_context *ctx, pdf_document *doc, pdf_obj *dict, int pagenum, const fz_matrix *page_ctm) { pdf_obj *action; pdf_obj *obj; fz_rect bbox; char *uri; fz_link *link = NULL; obj = pdf_dict_get(ctx, dict, PDF_NAME_Subtype); if (!pdf_name_eq(ctx, obj, PDF_NAME_Link)) return NULL; obj = pdf_dict_get(ctx, dict, PDF_NAME_Rect); if (!obj) return NULL; pdf_to_rect(ctx, obj, &bbox); fz_transform_rect(&bbox, page_ctm); obj = pdf_dict_get(ctx, dict, PDF_NAME_Dest); if (obj) uri = pdf_parse_link_dest(ctx, doc, obj); else { action = pdf_dict_get(ctx, dict, PDF_NAME_A); /* fall back to additional action button's down/up action */ if (!action) action = pdf_dict_geta(ctx, pdf_dict_get(ctx, dict, PDF_NAME_AA), PDF_NAME_U, PDF_NAME_D); uri = pdf_parse_link_action(ctx, doc, action, pagenum); } if (!uri) return NULL; fz_try(ctx) link = fz_new_link(ctx, &bbox, doc, uri); fz_always(ctx) fz_free(ctx, uri); fz_catch(ctx) fz_rethrow(ctx); return link; }
static int dest_is_valid(fz_context *ctx, pdf_obj *o, int page_count, int *page_object_nums, pdf_obj *names_list) { pdf_obj *p; p = pdf_dict_get(ctx, o, PDF_NAME_A); if (pdf_name_eq(ctx, pdf_dict_get(ctx, p, PDF_NAME_S), PDF_NAME_GoTo) && !string_in_names_list(ctx, pdf_dict_get(ctx, p, PDF_NAME_D), names_list)) return 0; p = pdf_dict_get(ctx, o, PDF_NAME_Dest); if (p == NULL) {} else if (pdf_is_string(ctx, p)) { return string_in_names_list(ctx, p, names_list); } else if (!dest_is_valid_page(ctx, pdf_array_get(ctx, p, 0), page_object_nums, page_count)) return 0; return 1; }
/* Performs the same task as pdf_clean_page_contents, but with an optional text filter function. text_filter: Function to assess whether a given character should be kept (return 0) or removed (return 1). after_text: Function called after each text object is closed to allow other output to be sent. arg: Opaque value to be passed to callback functions. */ void pdf_filter_page_contents(fz_context *ctx, pdf_document *doc, pdf_page *page, fz_cookie *cookie, pdf_page_contents_process_fn *proc_fn, pdf_text_filter_fn *text_filter, pdf_after_text_object_fn *after_text, void *proc_arg, int sanitize, int ascii) { pdf_processor *proc_buffer = NULL; pdf_processor *proc_filter = NULL; pdf_obj *new_obj = NULL; pdf_obj *new_ref = NULL; pdf_obj *res = NULL; pdf_obj *obj; pdf_obj *contents; pdf_obj *resources; fz_buffer *buffer; fz_var(new_obj); fz_var(new_ref); fz_var(res); fz_var(proc_buffer); fz_var(proc_filter); buffer = fz_new_buffer(ctx, 1024); fz_try(ctx) { contents = pdf_page_contents(ctx, page); resources = pdf_page_resources(ctx, page); proc_buffer = pdf_new_buffer_processor(ctx, buffer, ascii); if (sanitize) { res = pdf_new_dict(ctx, doc, 1); proc_filter = pdf_new_filter_processor_with_text_filter(ctx, doc, proc_buffer, resources, res, text_filter, after_text, proc_arg); pdf_process_contents(ctx, proc_filter, doc, resources, contents, cookie); pdf_close_processor(ctx, proc_filter); } else { res = pdf_keep_obj(ctx, resources); pdf_process_contents(ctx, proc_buffer, doc, resources, contents, cookie); } pdf_close_processor(ctx, proc_buffer); /* Deal with page content stream. */ if (pdf_is_array(ctx, contents)) { /* create a new object to replace the array */ new_obj = pdf_new_dict(ctx, doc, 1); new_ref = pdf_add_object(ctx, doc, new_obj); contents = new_ref; pdf_dict_put(ctx, page->obj, PDF_NAME(Contents), contents); } else { pdf_dict_del(ctx, contents, PDF_NAME(Filter)); pdf_dict_del(ctx, contents, PDF_NAME(DecodeParms)); } pdf_update_stream(ctx, doc, contents, buffer, 0); /* Now deal with resources. The spec allows for Type3 fonts and form * XObjects to omit a resource dictionary and look in the parent. * Avoid that by flattening here as part of the cleaning. This could * conceivably cause changes in rendering, but we don't care. */ /* ExtGState */ obj = pdf_dict_get(ctx, res, PDF_NAME(ExtGState)); if (obj) { int i, l; l = pdf_dict_len(ctx, obj); for (i = 0; i < l; i++) { pdf_obj *o = pdf_dict_get(ctx, pdf_dict_get_val(ctx, obj, i), PDF_NAME(SMask)); if (!o) continue; o = pdf_dict_get(ctx, o, PDF_NAME(G)); if (!o) continue; /* Transparency group XObject */ pdf_clean_stream_object(ctx, doc, o, resources, cookie, 1, text_filter, after_text, proc_arg, sanitize, ascii); } } /* Pattern */ obj = pdf_dict_get(ctx, res, PDF_NAME(Pattern)); if (obj) { int i, l; l = pdf_dict_len(ctx, obj); for (i = 0; i < l; i++) { pdf_obj *pat_res; pdf_obj *pat = pdf_dict_get_val(ctx, obj, i); if (!pat) continue; pat_res = pdf_dict_get(ctx, pat, PDF_NAME(Resources)); if (pat_res == NULL) pat_res = resources; if (pdf_dict_get_int(ctx, pat, PDF_NAME(PatternType)) == 1) pdf_clean_stream_object(ctx, doc, pat, pat_res, cookie, 0, text_filter, after_text, proc_arg, sanitize, ascii); } } /* XObject */ obj = pdf_dict_get(ctx, res, PDF_NAME(XObject)); if (obj) { int i, l; l = pdf_dict_len(ctx, obj); for (i = 0; i < l; i++) { pdf_obj *xobj_res; pdf_obj *xobj = pdf_dict_get_val(ctx, obj, i); if (!xobj) continue; xobj_res = pdf_dict_get(ctx, xobj, PDF_NAME(Resources)); if (xobj_res == NULL) xobj_res = resources; if (pdf_name_eq(ctx, PDF_NAME(Form), pdf_dict_get(ctx, xobj, PDF_NAME(Subtype)))) pdf_clean_stream_object(ctx, doc, xobj, xobj_res, cookie, 1, text_filter, after_text, proc_arg, sanitize, ascii); } } /* Font */ obj = pdf_dict_get(ctx, res, PDF_NAME(Font)); if (obj) { int i, l; l = pdf_dict_len(ctx, obj); for (i = 0; i < l; i++) { pdf_obj *o = pdf_dict_get_val(ctx, obj, i); if (!o) continue; if (pdf_name_eq(ctx, PDF_NAME(Type3), pdf_dict_get(ctx, o, PDF_NAME(Subtype)))) pdf_clean_type3(ctx, doc, o, resources, cookie, sanitize, ascii); } } /* ProcSet - no cleaning possible. Inherit this from the old dict. */ obj = pdf_dict_get(ctx, resources, PDF_NAME(ProcSet)); if (obj) pdf_dict_put(ctx, res, PDF_NAME(ProcSet), obj); /* ColorSpace - no cleaning possible. */ /* Properties - no cleaning possible. */ if (proc_fn) (*proc_fn)(ctx, buffer, res, proc_arg); /* Update resource dictionary */ if (sanitize) { pdf_dict_put(ctx, page->obj, PDF_NAME(Resources), res); } } fz_always(ctx) { pdf_drop_processor(ctx, proc_filter); pdf_drop_processor(ctx, proc_buffer); fz_drop_buffer(ctx, buffer); pdf_drop_obj(ctx, new_obj); pdf_drop_obj(ctx, new_ref); pdf_drop_obj(ctx, res); } fz_catch(ctx) { fz_rethrow(ctx); } }
static int isfontdesc(pdf_obj *obj) { pdf_obj *type = pdf_dict_get(ctx, obj, PDF_NAME_Type); return pdf_name_eq(ctx, type, PDF_NAME_FontDescriptor); }
static void savefont(pdf_obj *dict, int num) { char namebuf[1024]; fz_buffer *buf; pdf_obj *stream = NULL; pdf_obj *obj; char *ext = ""; fz_output *out; char *fontname = "font"; size_t len; unsigned char *data; obj = pdf_dict_get(ctx, dict, PDF_NAME_FontName); if (obj) fontname = pdf_to_name(ctx, obj); obj = pdf_dict_get(ctx, dict, PDF_NAME_FontFile); if (obj) { stream = obj; ext = "pfa"; } obj = pdf_dict_get(ctx, dict, PDF_NAME_FontFile2); if (obj) { stream = obj; ext = "ttf"; } obj = pdf_dict_get(ctx, dict, PDF_NAME_FontFile3); if (obj) { stream = obj; obj = pdf_dict_get(ctx, obj, PDF_NAME_Subtype); if (obj && !pdf_is_name(ctx, obj)) fz_throw(ctx, FZ_ERROR_GENERIC, "invalid font descriptor subtype"); if (pdf_name_eq(ctx, obj, PDF_NAME_Type1C)) ext = "cff"; else if (pdf_name_eq(ctx, obj, PDF_NAME_CIDFontType0C)) ext = "cid"; else if (pdf_name_eq(ctx, obj, PDF_NAME_OpenType)) ext = "otf"; else fz_throw(ctx, FZ_ERROR_GENERIC, "unhandled font type '%s'", pdf_to_name(ctx, obj)); } if (!stream) { fz_warn(ctx, "unhandled font type"); return; } buf = pdf_load_stream(ctx, doc, pdf_to_num(ctx, stream)); len = fz_buffer_storage(ctx, buf, &data); fz_try(ctx) { snprintf(namebuf, sizeof(namebuf), "%s-%04d.%s", fontname, num, ext); printf("extracting font %s\n", namebuf); out = fz_new_output_with_path(ctx, namebuf, 0); fz_try(ctx) fz_write(ctx, out, data, len); fz_always(ctx) fz_drop_output(ctx, out); fz_catch(ctx) fz_rethrow(ctx); } fz_always(ctx) fz_drop_buffer(ctx, buf); fz_catch(ctx) fz_rethrow(ctx); }
static void pdf_process_extgstate(fz_context *ctx, pdf_processor *proc, pdf_csi *csi, pdf_obj *dict) { pdf_obj *obj; obj = pdf_dict_get(ctx, dict, PDF_NAME_LW); if (pdf_is_number(ctx, obj) && proc->op_w) proc->op_w(ctx, proc, pdf_to_real(ctx, obj)); obj = pdf_dict_get(ctx, dict, PDF_NAME_LC); if (pdf_is_int(ctx, obj) && proc->op_J) proc->op_J(ctx, proc, fz_clampi(pdf_to_int(ctx, obj), 0, 2)); obj = pdf_dict_get(ctx, dict, PDF_NAME_LJ); if (pdf_is_int(ctx, obj) && proc->op_j) proc->op_j(ctx, proc, fz_clampi(pdf_to_int(ctx, obj), 0, 2)); obj = pdf_dict_get(ctx, dict, PDF_NAME_ML); if (pdf_is_number(ctx, obj) && proc->op_M) proc->op_M(ctx, proc, pdf_to_real(ctx, obj)); obj = pdf_dict_get(ctx, dict, PDF_NAME_D); if (pdf_is_array(ctx, obj) && proc->op_d) { pdf_obj *dash_array = pdf_array_get(ctx, obj, 0); pdf_obj *dash_phase = pdf_array_get(ctx, obj, 1); proc->op_d(ctx, proc, dash_array, pdf_to_real(ctx, dash_phase)); } obj = pdf_dict_get(ctx, dict, PDF_NAME_RI); if (pdf_is_name(ctx, obj) && proc->op_ri) proc->op_ri(ctx, proc, pdf_to_name(ctx, obj)); obj = pdf_dict_get(ctx, dict, PDF_NAME_FL); if (pdf_is_number(ctx, obj) && proc->op_i) proc->op_i(ctx, proc, pdf_to_real(ctx, obj)); obj = pdf_dict_get(ctx, dict, PDF_NAME_Font); if (pdf_is_array(ctx, obj) && proc->op_Tf) { pdf_obj *font_ref = pdf_array_get(ctx, obj, 0); pdf_obj *font_size = pdf_array_get(ctx, obj, 1); pdf_font_desc *font = load_font_or_hail_mary(ctx, csi->doc, csi->rdb, font_ref, 0, csi->cookie); fz_try(ctx) proc->op_Tf(ctx, proc, "ExtGState", font, pdf_to_real(ctx, font_size)); fz_always(ctx) pdf_drop_font(ctx, font); fz_catch(ctx) fz_rethrow(ctx); } /* transfer functions */ obj = pdf_dict_get(ctx, dict, PDF_NAME_TR2); if (pdf_is_name(ctx, obj)) if (!pdf_name_eq(ctx, obj, PDF_NAME_Identity) && !pdf_name_eq(ctx, obj, PDF_NAME_Default)) fz_warn(ctx, "ignoring transfer function"); if (!obj) /* TR is ignored in the presence of TR2 */ { pdf_obj *tr = pdf_dict_get(ctx, dict, PDF_NAME_TR); if (pdf_is_name(ctx, tr)) if (!pdf_name_eq(ctx, tr, PDF_NAME_Identity)) fz_warn(ctx, "ignoring transfer function"); } /* transparency state */ obj = pdf_dict_get(ctx, dict, PDF_NAME_CA); if (pdf_is_number(ctx, obj) && proc->op_gs_CA) proc->op_gs_CA(ctx, proc, pdf_to_real(ctx, obj)); obj = pdf_dict_get(ctx, dict, PDF_NAME_ca); if (pdf_is_number(ctx, obj) && proc->op_gs_ca) proc->op_gs_ca(ctx, proc, pdf_to_real(ctx, obj)); obj = pdf_dict_get(ctx, dict, PDF_NAME_BM); if (pdf_is_array(ctx, obj)) obj = pdf_array_get(ctx, obj, 0); if (pdf_is_name(ctx, obj) && proc->op_gs_BM) proc->op_gs_BM(ctx, proc, pdf_to_name(ctx, obj)); obj = pdf_dict_get(ctx, dict, PDF_NAME_SMask); if (proc->op_gs_SMask) { if (pdf_is_dict(ctx, obj)) { pdf_xobject *xobj; pdf_obj *group, *s, *bc, *tr; float softmask_bc[FZ_MAX_COLORS]; fz_colorspace *colorspace; int colorspace_n = 1; int k, luminosity; fz_var(xobj); group = pdf_dict_get(ctx, obj, PDF_NAME_G); if (!group) fz_throw(ctx, FZ_ERROR_GENERIC, "cannot load softmask xobject (%d 0 R)", pdf_to_num(ctx, obj)); xobj = pdf_load_xobject(ctx, csi->doc, group); fz_try(ctx) { colorspace = pdf_xobject_colorspace(ctx, xobj); if (colorspace) { colorspace_n = fz_colorspace_n(ctx, colorspace); fz_drop_colorspace(ctx, colorspace); } /* Default background color is black. */ for (k = 0; k < colorspace_n; k++) softmask_bc[k] = 0; /* Which in CMYK means not all zeros! This should really be * a test for subtractive color spaces, but this will have * to do for now. */ if (colorspace == fz_device_cmyk(ctx)) softmask_bc[3] = 1.0; bc = pdf_dict_get(ctx, obj, PDF_NAME_BC); if (pdf_is_array(ctx, bc)) { for (k = 0; k < colorspace_n; k++) softmask_bc[k] = pdf_to_real(ctx, pdf_array_get(ctx, bc, k)); } s = pdf_dict_get(ctx, obj, PDF_NAME_S); if (pdf_name_eq(ctx, s, PDF_NAME_Luminosity)) luminosity = 1; else luminosity = 0; tr = pdf_dict_get(ctx, obj, PDF_NAME_TR); if (tr && !pdf_name_eq(ctx, tr, PDF_NAME_Identity)) fz_warn(ctx, "ignoring transfer function"); proc->op_gs_SMask(ctx, proc, xobj, csi->rdb, softmask_bc, luminosity); } fz_always(ctx) { pdf_drop_xobject(ctx, xobj); } fz_catch(ctx) { fz_rethrow(ctx); } } else if (pdf_is_name(ctx, obj) && pdf_name_eq(ctx, obj, PDF_NAME_None)) { proc->op_gs_SMask(ctx, proc, NULL, NULL, NULL, 0); } }
static int isimage(pdf_obj *obj) { pdf_obj *type = pdf_dict_get(ctx, obj, PDF_NAME_Subtype); return pdf_name_eq(ctx, type, PDF_NAME_Image); }
static void savefont(pdf_obj *dict, int num) { char namebuf[1024]; fz_buffer *buf; pdf_obj *stream = NULL; pdf_obj *obj; char *ext = ""; FILE *f; char *fontname = "font"; int n, len; unsigned char *data; obj = pdf_dict_get(ctx, dict, PDF_NAME_FontName); if (obj) fontname = pdf_to_name(ctx, obj); obj = pdf_dict_get(ctx, dict, PDF_NAME_FontFile); if (obj) { stream = obj; ext = "pfa"; } obj = pdf_dict_get(ctx, dict, PDF_NAME_FontFile2); if (obj) { stream = obj; ext = "ttf"; } obj = pdf_dict_get(ctx, dict, PDF_NAME_FontFile3); if (obj) { stream = obj; obj = pdf_dict_get(ctx, obj, PDF_NAME_Subtype); if (obj && !pdf_is_name(ctx, obj)) fz_throw(ctx, FZ_ERROR_GENERIC, "invalid font descriptor subtype"); if (pdf_name_eq(ctx, obj, PDF_NAME_Type1C)) ext = "cff"; else if (pdf_name_eq(ctx, obj, PDF_NAME_CIDFontType0C)) ext = "cid"; else if (pdf_name_eq(ctx, obj, PDF_NAME_OpenType)) ext = "otf"; else fz_throw(ctx, FZ_ERROR_GENERIC, "unhandled font type '%s'", pdf_to_name(ctx, obj)); } if (!stream) { fz_warn(ctx, "unhandled font type"); return; } buf = pdf_load_stream(ctx, doc, pdf_to_num(ctx, stream), pdf_to_gen(ctx, stream)); snprintf(namebuf, sizeof(namebuf), "%s-%04d.%s", fontname, num, ext); printf("extracting font %s\n", namebuf); f = fz_fopen(namebuf, "wb"); if (!f) fz_throw(ctx, FZ_ERROR_GENERIC, "cannot create font file"); len = fz_buffer_storage(ctx, buf, &data); n = fwrite(data, 1, len, f); if (n < len) fz_throw(ctx, FZ_ERROR_GENERIC, "cannot write font file"); if (fclose(f) < 0) fz_throw(ctx, FZ_ERROR_GENERIC, "cannot close font file"); fz_drop_buffer(ctx, buf); }
static void retainpages(fz_context *ctx, globals *glo, int argc, char **argv) { pdf_obj *oldroot, *root, *pages, *kids, *countobj, *parent, *olddests; pdf_document *doc = glo->doc; int argidx = 0; pdf_obj *names_list = NULL; int pagecount; int i; /* Keep only pages/type and (reduced) dest entries to avoid * references to unretained pages */ oldroot = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME_Root); pages = pdf_dict_get(ctx, oldroot, PDF_NAME_Pages); olddests = pdf_load_name_tree(ctx, doc, PDF_NAME_Dests); root = pdf_new_dict(ctx, doc, 2); pdf_dict_put(ctx, root, PDF_NAME_Type, pdf_dict_get(ctx, oldroot, PDF_NAME_Type)); pdf_dict_put(ctx, root, PDF_NAME_Pages, pdf_dict_get(ctx, oldroot, PDF_NAME_Pages)); pdf_update_object(ctx, doc, pdf_to_num(ctx, oldroot), root); pdf_drop_obj(ctx, root); /* Create a new kids array with only the pages we want to keep */ parent = pdf_new_indirect(ctx, doc, pdf_to_num(ctx, pages), pdf_to_gen(ctx, pages)); kids = pdf_new_array(ctx, doc, 1); /* Retain pages specified */ while (argc - argidx) { int page, spage, epage; char *spec, *dash; char *pagelist = argv[argidx]; pagecount = pdf_count_pages(ctx, doc); spec = fz_strsep(&pagelist, ","); while (spec) { dash = strchr(spec, '-'); if (dash == spec) spage = epage = pagecount; else spage = epage = atoi(spec); if (dash) { if (strlen(dash) > 1) epage = atoi(dash + 1); else epage = pagecount; } spage = fz_clampi(spage, 1, pagecount); epage = fz_clampi(epage, 1, pagecount); if (spage < epage) for (page = spage; page <= epage; ++page) retainpage(ctx, doc, parent, kids, page); else for (page = spage; page >= epage; --page) retainpage(ctx, doc, parent, kids, page); spec = fz_strsep(&pagelist, ","); } argidx++; } pdf_drop_obj(ctx, parent); /* Update page count and kids array */ countobj = pdf_new_int(ctx, doc, pdf_array_len(ctx, kids)); pdf_dict_put(ctx, pages, PDF_NAME_Count, countobj); pdf_drop_obj(ctx, countobj); pdf_dict_put(ctx, pages, PDF_NAME_Kids, kids); pdf_drop_obj(ctx, kids); /* Also preserve the (partial) Dests name tree */ if (olddests) { pdf_obj *names = pdf_new_dict(ctx, doc, 1); pdf_obj *dests = pdf_new_dict(ctx, doc, 1); int len = pdf_dict_len(ctx, olddests); names_list = pdf_new_array(ctx, doc, 32); for (i = 0; i < len; i++) { pdf_obj *key = pdf_dict_get_key(ctx, olddests, i); pdf_obj *val = pdf_dict_get_val(ctx, olddests, i); pdf_obj *dest = pdf_dict_get(ctx, val, PDF_NAME_D); dest = pdf_array_get(ctx, dest ? dest : val, 0); if (pdf_array_contains(ctx, pdf_dict_get(ctx, pages, PDF_NAME_Kids), dest)) { pdf_obj *key_str = pdf_new_string(ctx, doc, pdf_to_name(ctx, key), strlen(pdf_to_name(ctx, key))); pdf_array_push(ctx, names_list, key_str); pdf_array_push(ctx, names_list, val); pdf_drop_obj(ctx, key_str); } } root = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME_Root); pdf_dict_put(ctx, dests, PDF_NAME_Names, names_list); pdf_dict_put(ctx, names, PDF_NAME_Dests, dests); pdf_dict_put(ctx, root, PDF_NAME_Names, names); pdf_drop_obj(ctx, names); pdf_drop_obj(ctx, dests); pdf_drop_obj(ctx, names_list); pdf_drop_obj(ctx, olddests); } /* Force the next call to pdf_count_pages to recount */ glo->doc->page_count = 0; /* Edit each pages /Annot list to remove any links that point to * nowhere. */ pagecount = pdf_count_pages(ctx, doc); for (i = 0; i < pagecount; i++) { pdf_obj *pageref = pdf_lookup_page_obj(ctx, doc, i); pdf_obj *pageobj = pdf_resolve_indirect(ctx, pageref); pdf_obj *annots = pdf_dict_get(ctx, pageobj, PDF_NAME_Annots); int len = pdf_array_len(ctx, annots); int j; for (j = 0; j < len; j++) { pdf_obj *o = pdf_array_get(ctx, annots, j); pdf_obj *p; if (!pdf_name_eq(ctx, pdf_dict_get(ctx, o, PDF_NAME_Subtype), PDF_NAME_Link)) continue; p = pdf_dict_get(ctx, o, PDF_NAME_A); if (!pdf_name_eq(ctx, pdf_dict_get(ctx, p, PDF_NAME_S), PDF_NAME_GoTo)) continue; if (string_in_names_list(ctx, pdf_dict_get(ctx, p, PDF_NAME_D), names_list)) continue; /* FIXME: Should probably look at Next too */ /* Remove this annotation */ pdf_array_delete(ctx, annots, j); j--; } } }
void pdf_clean_page_contents(fz_context *ctx, pdf_document *doc, pdf_page *page, fz_cookie *cookie, pdf_page_contents_process_fn *proc_fn, void *proc_arg) { pdf_processor *proc_buffer = NULL; pdf_processor *proc_filter = NULL; pdf_obj *new_obj = NULL; pdf_obj *new_ref = NULL; pdf_obj *res = NULL; pdf_obj *ref = NULL; pdf_obj *obj; pdf_obj *contents; fz_buffer *buffer; fz_var(new_obj); fz_var(new_ref); fz_var(res); fz_var(ref); fz_var(proc_buffer); fz_var(proc_filter); buffer = fz_new_buffer(ctx, 1024); fz_try(ctx) { res = pdf_new_dict(ctx, doc, 1); proc_buffer = pdf_new_buffer_processor(ctx, buffer); proc_filter = pdf_new_filter_processor(ctx, proc_buffer, doc, page->resources, res); pdf_process_contents(ctx, proc_filter, doc, page->resources, page->contents, cookie); contents = page->contents; if (pdf_is_array(ctx, contents)) { /* create a new object to replace the array */ new_obj = pdf_new_dict(ctx, doc, 1); new_ref = pdf_new_ref(ctx, doc, new_obj); page->contents = contents = new_ref; } else { pdf_dict_del(ctx, contents, PDF_NAME_Filter); pdf_dict_del(ctx, contents, PDF_NAME_DecodeParms); } /* Now deal with resources. The spec allows for Type3 fonts and form * XObjects to omit a resource dictionary and look in the parent. * Avoid that by flattening here as part of the cleaning. This could * conceivably cause changes in rendering, but we don't care. */ /* ExtGState */ obj = pdf_dict_get(ctx, res, PDF_NAME_ExtGState); if (obj) { int i, l; l = pdf_dict_len(ctx, obj); for (i = 0; i < l; i++) { pdf_obj *o = pdf_dict_get(ctx, pdf_dict_get_val(ctx, obj, i), PDF_NAME_SMask); if (!o) continue; o = pdf_dict_get(ctx, o, PDF_NAME_G); if (!o) continue; /* Transparency group XObject */ pdf_clean_stream_object(ctx, doc, o, page->resources, cookie, 1); } } /* ColorSpace - no cleaning possible */ /* Pattern */ obj = pdf_dict_get(ctx, res, PDF_NAME_Pattern); if (obj) { int i, l; l = pdf_dict_len(ctx, obj); for (i = 0; i < l; i++) { pdf_obj *pat = pdf_dict_get_val(ctx, obj, i); if (!pat) continue; if (pdf_to_int(ctx, pdf_dict_get(ctx, pat, PDF_NAME_PatternType)) == 1) pdf_clean_stream_object(ctx, doc, pat, page->resources, cookie, 0); } } /* Shading - no cleaning possible */ /* XObject */ obj = pdf_dict_get(ctx, res, PDF_NAME_XObject); if (obj) { int i, l; l = pdf_dict_len(ctx, obj); for (i = 0; i < l; i++) { pdf_obj *xobj = pdf_dict_get_val(ctx, obj, i); if (!pdf_name_eq(ctx, PDF_NAME_Form, pdf_dict_get(ctx, xobj, PDF_NAME_Subtype))) continue; pdf_clean_stream_object(ctx, doc, xobj, page->resources, cookie, 1); } } /* Font */ obj = pdf_dict_get(ctx, res, PDF_NAME_Font); if (obj) { int i, l; l = pdf_dict_len(ctx, obj); for (i = 0; i < l; i++) { pdf_obj *o = pdf_dict_get_val(ctx, obj, i); if (pdf_name_eq(ctx, PDF_NAME_Type3, pdf_dict_get(ctx, o, PDF_NAME_Subtype))) { pdf_clean_type3(ctx, doc, o, page->resources, cookie); } } } /* ProcSet - no cleaning possible. Inherit this from the old dict. */ obj = pdf_dict_get(ctx, page->resources, PDF_NAME_ProcSet); if (obj) pdf_dict_put(ctx, res, PDF_NAME_ProcSet, obj); /* Properties - no cleaning possible. */ if (proc_fn) (*proc_fn)(ctx, buffer, res, proc_arg); pdf_update_stream(ctx, doc, contents, buffer, 0); pdf_drop_obj(ctx, page->resources); ref = pdf_new_ref(ctx, doc, res); page->resources = pdf_keep_obj(ctx, ref); pdf_dict_put(ctx, page->me, PDF_NAME_Resources, ref); } fz_always(ctx) { pdf_drop_processor(ctx, proc_filter); pdf_drop_processor(ctx, proc_buffer); fz_drop_buffer(ctx, buffer); pdf_drop_obj(ctx, new_obj); pdf_drop_obj(ctx, new_ref); pdf_drop_obj(ctx, res); pdf_drop_obj(ctx, ref); } fz_catch(ctx) { fz_rethrow_message(ctx, "Failed while cleaning page"); } }
int pdf_repair_obj(fz_context *ctx, pdf_document *doc, pdf_lexbuf *buf, fz_off_t *stmofsp, int *stmlenp, pdf_obj **encrypt, pdf_obj **id, pdf_obj **page, fz_off_t *tmpofs, pdf_obj **root) { fz_stream *file = doc->file; pdf_token tok; int stm_len; *stmofsp = 0; if (stmlenp) *stmlenp = -1; stm_len = 0; /* On entry to this function, we know that we've just seen * '<int> <int> obj'. We expect the next thing we see to be a * pdf object. Regardless of the type of thing we meet next * we only need to fully parse it if it is a dictionary. */ tok = pdf_lex(ctx, file, buf); if (tok == PDF_TOK_OPEN_DICT) { pdf_obj *dict, *obj; fz_try(ctx) { dict = pdf_parse_dict(ctx, doc, file, buf); } fz_catch(ctx) { fz_rethrow_if(ctx, FZ_ERROR_TRYLATER); /* Don't let a broken object at EOF overwrite a good one */ if (file->eof) fz_rethrow(ctx); /* Silently swallow the error */ dict = pdf_new_dict(ctx, NULL, 2); } /* We must be careful not to try to resolve any indirections * here. We have just read dict, so we know it to be a non * indirected dictionary. Before we look at any values that * we get back from looking up in it, we need to check they * aren't indirected. */ if (encrypt || id || root) { obj = pdf_dict_get(ctx, dict, PDF_NAME_Type); if (!pdf_is_indirect(ctx, obj) && pdf_name_eq(ctx, obj, PDF_NAME_XRef)) { if (encrypt) { obj = pdf_dict_get(ctx, dict, PDF_NAME_Encrypt); if (obj) { pdf_drop_obj(ctx, *encrypt); *encrypt = pdf_keep_obj(ctx, obj); } } if (id) { obj = pdf_dict_get(ctx, dict, PDF_NAME_ID); if (obj) { pdf_drop_obj(ctx, *id); *id = pdf_keep_obj(ctx, obj); } } if (root) *root = pdf_keep_obj(ctx, pdf_dict_get(ctx, dict, PDF_NAME_Root)); } } obj = pdf_dict_get(ctx, dict, PDF_NAME_Length); if (!pdf_is_indirect(ctx, obj) && pdf_is_int(ctx, obj)) stm_len = pdf_to_int(ctx, obj); if (doc->file_reading_linearly && page) { obj = pdf_dict_get(ctx, dict, PDF_NAME_Type); if (!pdf_is_indirect(ctx, obj) && pdf_name_eq(ctx, obj, PDF_NAME_Page)) { pdf_drop_obj(ctx, *page); *page = pdf_keep_obj(ctx, dict); } } pdf_drop_obj(ctx, dict); }
void pdf_add_portfolio_schema(fz_context *ctx, pdf_document *doc, int entry, const pdf_portfolio_schema *info) { pdf_portfolio **pp; pdf_portfolio *p; pdf_obj *s; pdf_obj *sc; int num; char str_name[32]; pdf_obj *num_name = NULL; if (!doc) fz_throw(ctx, FZ_ERROR_GENERIC, "Bad pdf_add_portfolio_schema call"); if (doc->portfolio == NULL) load_portfolio(ctx, doc); fz_var(num_name); pp = &doc->portfolio; while (*pp && entry > 0) pp = &(*pp)->next, entry--; fz_try(ctx) { /* Find a name for the new schema entry */ num = 0; do { pdf_drop_obj(ctx, num_name); num_name = NULL; num++; sprintf(str_name, "%d", num); num_name = pdf_new_name(ctx, doc, str_name); p = doc->portfolio; for (p = doc->portfolio; p; p = p->next) if (pdf_name_eq(ctx, num_name, p->key)) break; } while (p); sc = pdf_new_dict(ctx, doc, 4); pdf_dict_put_drop(ctx, sc, PDF_NAME_E, pdf_new_bool(ctx, doc, !!info->editable)); pdf_dict_put_drop(ctx, sc, PDF_NAME_V, pdf_new_bool(ctx, doc, !!info->visible)); pdf_dict_put_drop(ctx, sc, PDF_NAME_N, info->name); pdf_dict_put(ctx, sc, PDF_NAME_Subtype, PDF_NAME_S); /* Add to our linked list (in the correct sorted place) */ p = fz_malloc_struct(ctx, pdf_portfolio); p->entry = *info; p->sort = 0; /* Will be rewritten in a mo */ p->key = pdf_keep_obj(ctx, num_name); p->val = pdf_keep_obj(ctx, sc); p->next = *pp; *pp = p; /* Add the key to the schema */ s = pdf_dict_getl(ctx, pdf_trailer(ctx, doc), PDF_NAME_Root, PDF_NAME_Collection, PDF_NAME_Schema, NULL); pdf_dict_put(ctx, s, num_name, sc); /* Renumber the schema entries */ for (num = 0, p = doc->portfolio; p; num++, p = p->next) { pdf_dict_put_drop(ctx, p->val, PDF_NAME_O, pdf_new_int(ctx, doc, num)); p->sort = num; } } fz_always(ctx) pdf_drop_obj(ctx, num_name); fz_catch(ctx) fz_rethrow(ctx); }
static void retainpages(fz_context *ctx, globals *glo, int argc, char **argv) { pdf_obj *oldroot, *root, *pages, *kids, *countobj, *parent, *olddests; pdf_document *doc = glo->doc; int argidx = 0; pdf_obj *names_list = NULL; pdf_obj *outlines; int pagecount; int i; int *page_object_nums; /* Keep only pages/type and (reduced) dest entries to avoid * references to unretained pages */ oldroot = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME_Root); pages = pdf_dict_get(ctx, oldroot, PDF_NAME_Pages); olddests = pdf_load_name_tree(ctx, doc, PDF_NAME_Dests); outlines = pdf_dict_get(ctx, oldroot, PDF_NAME_Outlines); root = pdf_new_dict(ctx, doc, 3); pdf_dict_put(ctx, root, PDF_NAME_Type, pdf_dict_get(ctx, oldroot, PDF_NAME_Type)); pdf_dict_put(ctx, root, PDF_NAME_Pages, pdf_dict_get(ctx, oldroot, PDF_NAME_Pages)); pdf_dict_put(ctx, root, PDF_NAME_Outlines, outlines); pdf_update_object(ctx, doc, pdf_to_num(ctx, oldroot), root); /* Create a new kids array with only the pages we want to keep */ parent = pdf_new_indirect(ctx, doc, pdf_to_num(ctx, pages), pdf_to_gen(ctx, pages)); kids = pdf_new_array(ctx, doc, 1); /* Retain pages specified */ while (argc - argidx) { int page, spage, epage; char *spec, *dash; char *pagelist = argv[argidx]; pagecount = pdf_count_pages(ctx, doc); spec = fz_strsep(&pagelist, ","); while (spec) { dash = strchr(spec, '-'); if (dash == spec) spage = epage = pagecount; else spage = epage = atoi(spec); if (dash) { if (strlen(dash) > 1) epage = atoi(dash + 1); else epage = pagecount; } spage = fz_clampi(spage, 1, pagecount); epage = fz_clampi(epage, 1, pagecount); if (spage < epage) for (page = spage; page <= epage; ++page) retainpage(ctx, doc, parent, kids, page); else for (page = spage; page >= epage; --page) retainpage(ctx, doc, parent, kids, page); spec = fz_strsep(&pagelist, ","); } argidx++; } pdf_drop_obj(ctx, parent); /* Update page count and kids array */ countobj = pdf_new_int(ctx, doc, pdf_array_len(ctx, kids)); pdf_dict_put(ctx, pages, PDF_NAME_Count, countobj); pdf_drop_obj(ctx, countobj); pdf_dict_put(ctx, pages, PDF_NAME_Kids, kids); pdf_drop_obj(ctx, kids); /* Force the next call to pdf_count_pages to recount */ glo->doc->page_count = 0; pagecount = pdf_count_pages(ctx, doc); page_object_nums = fz_calloc(ctx, pagecount, sizeof(*page_object_nums)); for (i = 0; i < pagecount; i++) { pdf_obj *pageref = pdf_lookup_page_obj(ctx, doc, i); page_object_nums[i] = pdf_to_num(ctx, pageref); } /* If we had an old Dests tree (now reformed as an olddests * dictionary), keep any entries in there that point to * valid pages. This may mean we keep more than we need, but * it's safe at least. */ if (olddests) { pdf_obj *names = pdf_new_dict(ctx, doc, 1); pdf_obj *dests = pdf_new_dict(ctx, doc, 1); int len = pdf_dict_len(ctx, olddests); names_list = pdf_new_array(ctx, doc, 32); for (i = 0; i < len; i++) { pdf_obj *key = pdf_dict_get_key(ctx, olddests, i); pdf_obj *val = pdf_dict_get_val(ctx, olddests, i); pdf_obj *dest = pdf_dict_get(ctx, val, PDF_NAME_D); dest = pdf_array_get(ctx, dest ? dest : val, 0); if (dest_is_valid_page(ctx, dest, page_object_nums, pagecount)) { pdf_obj *key_str = pdf_new_string(ctx, doc, pdf_to_name(ctx, key), strlen(pdf_to_name(ctx, key))); pdf_array_push(ctx, names_list, key_str); pdf_array_push(ctx, names_list, val); pdf_drop_obj(ctx, key_str); } } pdf_dict_put(ctx, dests, PDF_NAME_Names, names_list); pdf_dict_put(ctx, names, PDF_NAME_Dests, dests); pdf_dict_put(ctx, root, PDF_NAME_Names, names); pdf_drop_obj(ctx, names); pdf_drop_obj(ctx, dests); pdf_drop_obj(ctx, olddests); } /* Edit each pages /Annot list to remove any links that point to * nowhere. */ for (i = 0; i < pagecount; i++) { pdf_obj *pageref = pdf_lookup_page_obj(ctx, doc, i); pdf_obj *pageobj = pdf_resolve_indirect(ctx, pageref); pdf_obj *annots = pdf_dict_get(ctx, pageobj, PDF_NAME_Annots); int len = pdf_array_len(ctx, annots); int j; for (j = 0; j < len; j++) { pdf_obj *o = pdf_array_get(ctx, annots, j); if (!pdf_name_eq(ctx, pdf_dict_get(ctx, o, PDF_NAME_Subtype), PDF_NAME_Link)) continue; if (!dest_is_valid(ctx, o, pagecount, page_object_nums, names_list)) { /* Remove this annotation */ pdf_array_delete(ctx, annots, j); j--; } } } if (strip_outlines(ctx, doc, outlines, pagecount, page_object_nums, names_list) == 0) { pdf_dict_del(ctx, root, PDF_NAME_Outlines); } fz_free(ctx, page_object_nums); pdf_drop_obj(ctx, names_list); pdf_drop_obj(ctx, root); }
char * pdf_parse_link_dest(fz_context *ctx, pdf_document *doc, pdf_obj *dest) { pdf_obj *obj, *pageobj; fz_rect mediabox; fz_matrix pagectm; const char *ld; int page, x, y, h; dest = resolve_dest(ctx, doc, dest); if (dest == NULL) { fz_warn(ctx, "undefined link destination"); return NULL; } if (pdf_is_name(ctx, dest)) { ld = pdf_to_name(ctx, dest); return fz_strdup(ctx, ld); } else if (pdf_is_string(ctx, dest)) { ld = pdf_to_str_buf(ctx, dest); return fz_strdup(ctx, ld); } pageobj = pdf_array_get(ctx, dest, 0); if (pdf_is_int(ctx, pageobj)) { page = pdf_to_int(ctx, pageobj); pageobj = pdf_lookup_page_obj(ctx, doc, page); } else { fz_try(ctx) page = pdf_lookup_page_number(ctx, doc, pageobj); fz_catch(ctx) page = -1; } if (page < 0) return NULL; obj = pdf_array_get(ctx, dest, 1); if (obj) { /* Link coords use a coordinate space that does not seem to respect Rotate or UserUnit. */ /* All we need to do is figure out the page height to flip the coordinate space. */ pdf_page_obj_transform(ctx, pageobj, &mediabox, &pagectm); mediabox = fz_transform_rect(mediabox, pagectm); h = mediabox.y1 - mediabox.y0; if (pdf_name_eq(ctx, obj, PDF_NAME(XYZ))) { x = pdf_array_get_int(ctx, dest, 2); y = h - pdf_array_get_int(ctx, dest, 3); } else if (pdf_name_eq(ctx, obj, PDF_NAME(FitR))) { x = pdf_array_get_int(ctx, dest, 2); y = h - pdf_array_get_int(ctx, dest, 5); } else if (pdf_name_eq(ctx, obj, PDF_NAME(FitH)) || pdf_name_eq(ctx, obj, PDF_NAME(FitBH))) { x = 0; y = h - pdf_array_get_int(ctx, dest, 2); } else if (pdf_name_eq(ctx, obj, PDF_NAME(FitV)) || pdf_name_eq(ctx, obj, PDF_NAME(FitBV))) { x = pdf_array_get_int(ctx, dest, 2); y = 0; } else { x = 0; y = 0; } return fz_asprintf(ctx, "#%d,%d,%d", page + 1, x, y); } return fz_asprintf(ctx, "#%d", page + 1); }
char * pdf_parse_link_dest(fz_context *ctx, pdf_document *doc, pdf_obj *dest) { pdf_obj *obj; char buf[256]; const char *ld; int page; int x, y; dest = resolve_dest(ctx, doc, dest); if (dest == NULL) { fz_warn(ctx, "undefined link destination"); return NULL; } if (pdf_is_name(ctx, dest)) { ld = pdf_to_name(ctx, dest); return fz_strdup(ctx, ld); } else if (pdf_is_string(ctx, dest)) { ld = pdf_to_str_buf(ctx, dest); return fz_strdup(ctx, ld); } obj = pdf_array_get(ctx, dest, 0); if (pdf_is_int(ctx, obj)) page = pdf_to_int(ctx, obj); else { fz_try(ctx) page = pdf_lookup_page_number(ctx, doc, obj); fz_catch(ctx) page = -1; } x = y = 0; obj = pdf_array_get(ctx, dest, 1); if (pdf_name_eq(ctx, obj, PDF_NAME_XYZ)) { x = pdf_to_int(ctx, pdf_array_get(ctx, dest, 2)); y = pdf_to_int(ctx, pdf_array_get(ctx, dest, 3)); } else if (pdf_name_eq(ctx, obj, PDF_NAME_FitR)) { x = pdf_to_int(ctx, pdf_array_get(ctx, dest, 2)); y = pdf_to_int(ctx, pdf_array_get(ctx, dest, 5)); } else if (pdf_name_eq(ctx, obj, PDF_NAME_FitH) || pdf_name_eq(ctx, obj, PDF_NAME_FitBH)) y = pdf_to_int(ctx, pdf_array_get(ctx, dest, 2)); else if (pdf_name_eq(ctx, obj, PDF_NAME_FitV) || pdf_name_eq(ctx, obj, PDF_NAME_FitBV)) x = pdf_to_int(ctx, pdf_array_get(ctx, dest, 2)); if (page >= 0) { if (x != 0 || y != 0) fz_snprintf(buf, sizeof buf, "#%d,%d,%d", page + 1, x, y); else fz_snprintf(buf, sizeof buf, "#%d", page + 1); return fz_strdup(ctx, buf); } return NULL; }
char * pdf_parse_link_action(fz_context *ctx, pdf_document *doc, pdf_obj *action, int pagenum) { pdf_obj *obj, *dest, *file_spec; if (!action) return NULL; obj = pdf_dict_get(ctx, action, PDF_NAME_S); if (pdf_name_eq(ctx, PDF_NAME_GoTo, obj)) { dest = pdf_dict_get(ctx, action, PDF_NAME_D); return pdf_parse_link_dest(ctx, doc, dest); } else if (pdf_name_eq(ctx, PDF_NAME_URI, obj)) { /* URI entries are ASCII strings */ const char *uri = pdf_to_str_buf(ctx, pdf_dict_get(ctx, action, PDF_NAME_URI)); if (!fz_is_external_link(ctx, uri)) { pdf_obj *uri_base_obj = pdf_dict_getp(ctx, pdf_trailer(ctx, doc), "Root/URI/Base"); const char *uri_base = uri_base_obj ? pdf_to_str_buf(ctx, uri_base_obj) : "file://"; char *new_uri = fz_malloc(ctx, strlen(uri_base) + strlen(uri) + 1); strcpy(new_uri, uri_base); strcat(new_uri, uri); return new_uri; } return fz_strdup(ctx, uri); } else if (pdf_name_eq(ctx, PDF_NAME_Launch, obj)) { file_spec = pdf_dict_get(ctx, action, PDF_NAME_F); return pdf_parse_file_spec(ctx, doc, file_spec, NULL); } else if (pdf_name_eq(ctx, PDF_NAME_GoToR, obj)) { dest = pdf_dict_get(ctx, action, PDF_NAME_D); file_spec = pdf_dict_get(ctx, action, PDF_NAME_F); return pdf_parse_file_spec(ctx, doc, file_spec, dest); } else if (pdf_name_eq(ctx, PDF_NAME_Named, obj)) { dest = pdf_dict_get(ctx, action, PDF_NAME_N); if (pdf_name_eq(ctx, PDF_NAME_FirstPage, dest)) pagenum = 0; else if (pdf_name_eq(ctx, PDF_NAME_LastPage, dest)) pagenum = pdf_count_pages(ctx, doc) - 1; else if (pdf_name_eq(ctx, PDF_NAME_PrevPage, dest) && pagenum >= 0) { if (pagenum > 0) pagenum--; } else if (pdf_name_eq(ctx, PDF_NAME_NextPage, dest) && pagenum >= 0) { if (pagenum < pdf_count_pages(ctx, doc) - 1) pagenum++; } else return NULL; return fz_asprintf(ctx, "#%d", pagenum + 1); } return NULL; }