static int pdf_extgstate_uses_blending(pdf_document *doc, pdf_obj *dict) { pdf_obj *obj = pdf_dict_gets(dict, "BM"); /* SumatraPDF: properly support /BM arrays */ if (pdf_is_array(obj)) { int k; for (k = 0; k < pdf_array_len(obj); k++) { char *bm = pdf_to_name(pdf_array_get(obj, k)); if (!strcmp(bm, "Normal") || fz_lookup_blendmode(bm) > 0) break; } obj = pdf_array_get(obj, k); } if (pdf_is_name(obj) && strcmp(pdf_to_name(obj), "Normal")) return 1; /* SumatraPDF: support transfer functions */ obj = pdf_dict_getsa(dict, "TR", "TR2"); if (obj && !pdf_is_name(obj)) return 1; return 0; }
static void pdf_load_font_descriptor(pdf_font_desc *fontdesc, pdf_document *xref, pdf_obj *dict, char *collection, char *basefont) { pdf_obj *obj1, *obj2, *obj3, *obj; char *fontname; char *origname; FT_Face face; fz_context *ctx = xref->ctx; if (!strchr(basefont, ',') || strchr(basefont, '+')) origname = pdf_to_name(pdf_dict_gets(dict, "FontName")); else origname = basefont; fontname = clean_font_name(origname); fontdesc->flags = pdf_to_int(pdf_dict_gets(dict, "Flags")); fontdesc->italic_angle = pdf_to_real(pdf_dict_gets(dict, "ItalicAngle")); fontdesc->ascent = pdf_to_real(pdf_dict_gets(dict, "Ascent")); fontdesc->descent = pdf_to_real(pdf_dict_gets(dict, "Descent")); fontdesc->cap_height = pdf_to_real(pdf_dict_gets(dict, "CapHeight")); fontdesc->x_height = pdf_to_real(pdf_dict_gets(dict, "XHeight")); fontdesc->missing_width = pdf_to_real(pdf_dict_gets(dict, "MissingWidth")); obj1 = pdf_dict_gets(dict, "FontFile"); obj2 = pdf_dict_gets(dict, "FontFile2"); obj3 = pdf_dict_gets(dict, "FontFile3"); obj = obj1 ? obj1 : obj2 ? obj2 : obj3; if (pdf_is_indirect(obj)) { fz_try(ctx) { pdf_load_embedded_font(fontdesc, xref, obj); } fz_catch(ctx) { fz_warn(ctx, "ignored error when loading embedded font; attempting to load system font"); if (origname != fontname) pdf_load_builtin_font(ctx, fontdesc, fontname); else pdf_load_system_font(ctx, fontdesc, fontname, collection); /* RJW: "cannot load font descriptor (%d %d R)", pdf_to_num(dict), pdf_to_gen(dict) */ } } else { if (origname != fontname)
pdfout_data * pdfout_data_scalar_from_pdf (fz_context *ctx, pdf_obj *obj) { const char *s; if (pdf_is_null (ctx, obj)) return pdfout_data_scalar_new (ctx, "null", strlen ("null")); else if (pdf_is_bool (ctx, obj)) { if (pdf_to_bool (ctx, obj)) s = "true"; else s = "false"; return pdfout_data_scalar_new (ctx, s, strlen (s)); } else if (pdf_is_name (ctx, obj)) { s = pdf_to_name (ctx, obj); return pdfout_data_scalar_new (ctx, s, strlen (s)); } else if (pdf_is_string (ctx, obj)) { int len; char *str = pdfout_str_obj_to_utf8 (ctx, obj, &len); pdfout_data *result = pdfout_data_scalar_new (ctx, str, len); free (str); return result; } else if (pdf_is_int (ctx, obj)) { int n = pdf_to_int (ctx, obj); char buf[200]; int len = pdfout_snprintf (ctx, buf, "%d", n); return pdfout_data_scalar_new (ctx, buf, len); } else if (pdf_is_real (ctx, obj)) { float f = pdf_to_real (ctx, obj); char buf[200]; int len = pdfout_snprintf (ctx, buf, "%g", f); return pdfout_data_scalar_new (ctx, buf, len); } else abort(); }
static void pdf_write_widget_appearance(fz_context *ctx, pdf_annot *annot, fz_buffer *buf, fz_rect *rect, fz_rect *bbox, fz_matrix *matrix, pdf_obj **res) { pdf_obj *ft = pdf_dict_get_inheritable(ctx, annot->obj, PDF_NAME(FT)); if (pdf_name_eq(ctx, ft, PDF_NAME(Tx))) { int ff = pdf_field_flags(ctx, annot->obj); char *format = NULL; const char *text = NULL; if (!annot->ignore_trigger_events) { format = pdf_field_event_format(ctx, annot->page->doc, annot->obj); if (format) text = format; else text = pdf_field_value(ctx, annot->obj); } else { text = pdf_field_value(ctx, annot->obj); } fz_try(ctx) pdf_write_tx_widget_appearance(ctx, annot, buf, rect, bbox, matrix, res, text, ff); fz_always(ctx) fz_free(ctx, format); fz_catch(ctx) fz_rethrow(ctx); } else if (pdf_name_eq(ctx, ft, PDF_NAME(Ch))) { pdf_write_ch_widget_appearance(ctx, annot, buf, rect, bbox, matrix, res); } else if (pdf_name_eq(ctx, ft, PDF_NAME(Sig))) { pdf_write_sig_widget_appearance(ctx, annot, buf, rect, bbox, matrix, res); } else { fz_throw(ctx, FZ_ERROR_GENERIC, "cannot create appearance stream for %s widgets", pdf_to_name(ctx, ft)); } }
int pdf_lookup_page_number(pdf_document *doc, pdf_obj *node) { fz_context *ctx = doc->ctx; int needle = pdf_to_num(node); int total = 0; pdf_obj *parent, *parent2; if (strcmp(pdf_to_name(pdf_dict_gets(node, "Type")), "Page") != 0) fz_throw(ctx, FZ_ERROR_GENERIC, "invalid page object"); parent2 = parent = pdf_dict_gets(node, "Parent"); fz_var(parent); fz_try(ctx) { while (pdf_is_dict(parent)) { if (pdf_mark_obj(parent)) fz_throw(ctx, FZ_ERROR_GENERIC, "cycle in page tree (parents)"); total += pdf_count_pages_before_kid(doc, parent, needle); needle = pdf_to_num(parent); parent = pdf_dict_gets(parent, "Parent"); } } fz_always(ctx) { /* Run back and unmark */ while (parent2) { pdf_unmark_obj(parent2); if (parent2 == parent) break; parent2 = pdf_dict_gets(parent2, "Parent"); } } fz_catch(ctx) { fz_rethrow(ctx); } return total; }
const char * pdf_annot_icon_name(fz_context *ctx, pdf_annot *annot) { pdf_obj *name; check_allowed_subtypes(ctx, annot, PDF_NAME(Name), icon_name_subtypes); name = pdf_dict_get(ctx, annot->obj, PDF_NAME(Name)); if (!name) { pdf_obj *subtype = pdf_dict_get(ctx, annot->obj, PDF_NAME(Subtype)); if (pdf_name_eq(ctx, subtype, PDF_NAME(Text))) return "Note"; if (pdf_name_eq(ctx, subtype, PDF_NAME(Stamp))) return "Draft"; if (pdf_name_eq(ctx, subtype, PDF_NAME(FileAttachment))) return "PushPin"; if (pdf_name_eq(ctx, subtype, PDF_NAME(Sound))) return "Speaker"; } return pdf_to_name(ctx, name); }
static int pdf_count_pages_before_kid(pdf_document *doc, pdf_obj *parent, int kid_num) { pdf_obj *kids = pdf_dict_gets(parent, "Kids"); int i, total = 0, len = pdf_array_len(kids); for (i = 0; i < len; i++) { pdf_obj *kid = pdf_array_get(kids, i); if (pdf_to_num(kid) == kid_num) return total; if (!strcmp(pdf_to_name(pdf_dict_gets(kid, "Type")), "Pages")) { pdf_obj *count = pdf_dict_gets(kid, "Count"); int n = pdf_to_int(count); if (count == NULL || n <= 0) fz_throw(doc->ctx, FZ_ERROR_GENERIC, "illegal or missing count in pages tree"); total += n; } else total++; } fz_throw(doc->ctx, FZ_ERROR_GENERIC, "kid not found in parent's kids array"); }
/* * Load CMap stream in PDF file */ pdf_cmap * pdf_load_embedded_cmap(pdf_document *doc, pdf_obj *stmobj) { fz_stream *file = NULL; pdf_cmap *cmap = NULL; pdf_cmap *usecmap; pdf_obj *wmode; pdf_obj *obj = NULL; fz_context *ctx = doc->ctx; int phase = 0; fz_var(phase); fz_var(obj); fz_var(file); fz_var(cmap); if (pdf_obj_marked(stmobj)) fz_throw(ctx, FZ_ERROR_GENERIC, "Recursion in embedded cmap"); if ((cmap = pdf_find_item(ctx, pdf_free_cmap_imp, stmobj)) != NULL) { return cmap; } fz_try(ctx) { file = pdf_open_stream(doc, pdf_to_num(stmobj), pdf_to_gen(stmobj)); phase = 1; cmap = pdf_load_cmap(ctx, file); phase = 2; fz_close(file); file = NULL; wmode = pdf_dict_gets(stmobj, "WMode"); if (pdf_is_int(wmode)) pdf_set_cmap_wmode(ctx, cmap, pdf_to_int(wmode)); obj = pdf_dict_gets(stmobj, "UseCMap"); if (pdf_is_name(obj)) { usecmap = pdf_load_system_cmap(ctx, pdf_to_name(obj)); pdf_set_usecmap(ctx, cmap, usecmap); pdf_drop_cmap(ctx, usecmap); } else if (pdf_is_indirect(obj)) { phase = 3; pdf_mark_obj(obj); usecmap = pdf_load_embedded_cmap(doc, obj); pdf_unmark_obj(obj); phase = 4; pdf_set_usecmap(ctx, cmap, usecmap); pdf_drop_cmap(ctx, usecmap); } pdf_store_item(ctx, stmobj, cmap, pdf_cmap_size(ctx, cmap)); } fz_catch(ctx) { if (file) fz_close(file); if (cmap) pdf_drop_cmap(ctx, cmap); if (phase < 1) fz_rethrow_message(ctx, "cannot open cmap stream (%d %d R)", pdf_to_num(stmobj), pdf_to_gen(stmobj)); else if (phase < 2) fz_rethrow_message(ctx, "cannot parse cmap stream (%d %d R)", pdf_to_num(stmobj), pdf_to_gen(stmobj)); else if (phase < 3) fz_rethrow_message(ctx, "cannot load system usecmap '%s'", pdf_to_name(obj)); else { if (phase == 3) pdf_unmark_obj(obj); fz_rethrow_message(ctx, "cannot load embedded usecmap (%d %d R)", pdf_to_num(obj), pdf_to_gen(obj)); } } return cmap; }
static void pdf_process_extgstate(fz_context *ctx, pdf_processor *proc, pdf_csi *csi, pdf_obj *dict) { pdf_obj *obj; obj = pdf_dict_get(ctx, dict, PDF_NAME_LW); if (pdf_is_number(ctx, obj) && proc->op_w) proc->op_w(ctx, proc, pdf_to_real(ctx, obj)); obj = pdf_dict_get(ctx, dict, PDF_NAME_LC); if (pdf_is_int(ctx, obj) && proc->op_J) proc->op_J(ctx, proc, fz_clampi(pdf_to_int(ctx, obj), 0, 2)); obj = pdf_dict_get(ctx, dict, PDF_NAME_LJ); if (pdf_is_int(ctx, obj) && proc->op_j) proc->op_j(ctx, proc, fz_clampi(pdf_to_int(ctx, obj), 0, 2)); obj = pdf_dict_get(ctx, dict, PDF_NAME_ML); if (pdf_is_number(ctx, obj) && proc->op_M) proc->op_M(ctx, proc, pdf_to_real(ctx, obj)); obj = pdf_dict_get(ctx, dict, PDF_NAME_D); if (pdf_is_array(ctx, obj) && proc->op_d) { pdf_obj *dash_array = pdf_array_get(ctx, obj, 0); pdf_obj *dash_phase = pdf_array_get(ctx, obj, 1); proc->op_d(ctx, proc, dash_array, pdf_to_real(ctx, dash_phase)); } obj = pdf_dict_get(ctx, dict, PDF_NAME_RI); if (pdf_is_name(ctx, obj) && proc->op_ri) proc->op_ri(ctx, proc, pdf_to_name(ctx, obj)); obj = pdf_dict_get(ctx, dict, PDF_NAME_FL); if (pdf_is_number(ctx, obj) && proc->op_i) proc->op_i(ctx, proc, pdf_to_real(ctx, obj)); obj = pdf_dict_get(ctx, dict, PDF_NAME_Font); if (pdf_is_array(ctx, obj) && proc->op_Tf) { pdf_obj *font_ref = pdf_array_get(ctx, obj, 0); pdf_obj *font_size = pdf_array_get(ctx, obj, 1); pdf_font_desc *font = load_font_or_hail_mary(ctx, csi->doc, csi->rdb, font_ref, 0, csi->cookie); fz_try(ctx) proc->op_Tf(ctx, proc, "ExtGState", font, pdf_to_real(ctx, font_size)); fz_always(ctx) pdf_drop_font(ctx, font); fz_catch(ctx) fz_rethrow(ctx); } /* transfer functions */ obj = pdf_dict_get(ctx, dict, PDF_NAME_TR2); if (pdf_is_name(ctx, obj)) if (!pdf_name_eq(ctx, obj, PDF_NAME_Identity) && !pdf_name_eq(ctx, obj, PDF_NAME_Default)) fz_warn(ctx, "ignoring transfer function"); if (!obj) /* TR is ignored in the presence of TR2 */ { pdf_obj *tr = pdf_dict_get(ctx, dict, PDF_NAME_TR); if (pdf_is_name(ctx, tr)) if (!pdf_name_eq(ctx, tr, PDF_NAME_Identity)) fz_warn(ctx, "ignoring transfer function"); } /* transparency state */ obj = pdf_dict_get(ctx, dict, PDF_NAME_CA); if (pdf_is_number(ctx, obj) && proc->op_gs_CA) proc->op_gs_CA(ctx, proc, pdf_to_real(ctx, obj)); obj = pdf_dict_get(ctx, dict, PDF_NAME_ca); if (pdf_is_number(ctx, obj) && proc->op_gs_ca) proc->op_gs_ca(ctx, proc, pdf_to_real(ctx, obj)); obj = pdf_dict_get(ctx, dict, PDF_NAME_BM); if (pdf_is_array(ctx, obj)) obj = pdf_array_get(ctx, obj, 0); if (pdf_is_name(ctx, obj) && proc->op_gs_BM) proc->op_gs_BM(ctx, proc, pdf_to_name(ctx, obj)); obj = pdf_dict_get(ctx, dict, PDF_NAME_SMask); if (proc->op_gs_SMask) { if (pdf_is_dict(ctx, obj)) { pdf_xobject *xobj; pdf_obj *group, *s, *bc, *tr; float softmask_bc[FZ_MAX_COLORS]; fz_colorspace *colorspace; int colorspace_n = 1; int k, luminosity; fz_var(xobj); group = pdf_dict_get(ctx, obj, PDF_NAME_G); if (!group) fz_throw(ctx, FZ_ERROR_GENERIC, "cannot load softmask xobject (%d 0 R)", pdf_to_num(ctx, obj)); xobj = pdf_load_xobject(ctx, csi->doc, group); fz_try(ctx) { colorspace = pdf_xobject_colorspace(ctx, xobj); if (colorspace) { colorspace_n = fz_colorspace_n(ctx, colorspace); fz_drop_colorspace(ctx, colorspace); } /* Default background color is black. */ for (k = 0; k < colorspace_n; k++) softmask_bc[k] = 0; /* Which in CMYK means not all zeros! This should really be * a test for subtractive color spaces, but this will have * to do for now. */ if (colorspace == fz_device_cmyk(ctx)) softmask_bc[3] = 1.0; bc = pdf_dict_get(ctx, obj, PDF_NAME_BC); if (pdf_is_array(ctx, bc)) { for (k = 0; k < colorspace_n; k++) softmask_bc[k] = pdf_to_real(ctx, pdf_array_get(ctx, bc, k)); } s = pdf_dict_get(ctx, obj, PDF_NAME_S); if (pdf_name_eq(ctx, s, PDF_NAME_Luminosity)) luminosity = 1; else luminosity = 0; tr = pdf_dict_get(ctx, obj, PDF_NAME_TR); if (tr && !pdf_name_eq(ctx, tr, PDF_NAME_Identity)) fz_warn(ctx, "ignoring transfer function"); proc->op_gs_SMask(ctx, proc, xobj, csi->rdb, softmask_bc, luminosity); } fz_always(ctx) { pdf_drop_xobject(ctx, xobj); } fz_catch(ctx) { fz_rethrow(ctx); } } else if (pdf_is_name(ctx, obj) && pdf_name_eq(ctx, obj, PDF_NAME_None)) { proc->op_gs_SMask(ctx, proc, NULL, NULL, NULL, 0); } }
pdf_xobject * pdf_load_xobject(pdf_document *xref, pdf_obj *dict) { pdf_xobject *form; pdf_obj *obj; fz_context *ctx = xref->ctx; if ((form = pdf_find_item(ctx, pdf_free_xobject_imp, dict))) { return form; } form = fz_malloc_struct(ctx, pdf_xobject); FZ_INIT_STORABLE(form, 1, pdf_free_xobject_imp); form->resources = NULL; form->contents = NULL; form->colorspace = NULL; form->me = NULL; /* Store item immediately, to avoid possible recursion if objects refer back to this one */ pdf_store_item(ctx, dict, form, pdf_xobject_size(form)); obj = pdf_dict_gets(dict, "BBox"); form->bbox = pdf_to_rect(ctx, obj); obj = pdf_dict_gets(dict, "Matrix"); if (obj) form->matrix = pdf_to_matrix(ctx, obj); else form->matrix = fz_identity; form->isolated = 0; form->knockout = 0; form->transparency = 0; obj = pdf_dict_gets(dict, "Group"); if (obj) { pdf_obj *attrs = obj; form->isolated = pdf_to_bool(pdf_dict_gets(attrs, "I")); form->knockout = pdf_to_bool(pdf_dict_gets(attrs, "K")); obj = pdf_dict_gets(attrs, "S"); if (pdf_is_name(obj) && !strcmp(pdf_to_name(obj), "Transparency")) form->transparency = 1; obj = pdf_dict_gets(attrs, "CS"); if (obj) { form->colorspace = pdf_load_colorspace(xref, obj); if (!form->colorspace) fz_throw(ctx, "cannot load xobject colorspace"); } } form->resources = pdf_dict_gets(dict, "Resources"); if (form->resources) pdf_keep_obj(form->resources); fz_try(ctx) { form->contents = pdf_load_stream(xref, pdf_to_num(dict), pdf_to_gen(dict)); } fz_catch(ctx) { pdf_remove_item(ctx, pdf_free_xobject_imp, dict); pdf_drop_xobject(ctx, form); fz_throw(ctx, "cannot load xobject content stream (%d %d R)", pdf_to_num(dict), pdf_to_gen(dict)); } form->me = pdf_keep_obj(dict); return form; }
static void gatherimages(int page, pdf_obj *pageref, pdf_obj *pageobj, pdf_obj *dict) { int i, n; n = pdf_dict_len(dict); for (i = 0; i < n; i++) { pdf_obj *imagedict; pdf_obj *type; pdf_obj *width; pdf_obj *height; pdf_obj *bpc = NULL; pdf_obj *filter = NULL; pdf_obj *cs = NULL; pdf_obj *altcs; int k; imagedict = pdf_dict_get_val(dict, i); if (!pdf_is_dict(imagedict)) { fz_warn(ctx, "not an image dict (%d %d R)", pdf_to_num(imagedict), pdf_to_gen(imagedict)); continue; } type = pdf_dict_gets(imagedict, "Subtype"); if (strcmp(pdf_to_name(type), "Image")) continue; filter = pdf_dict_gets(imagedict, "Filter"); altcs = NULL; cs = pdf_dict_gets(imagedict, "ColorSpace"); if (pdf_is_array(cs)) { pdf_obj *cses = cs; cs = pdf_array_get(cses, 0); if (pdf_is_name(cs) && (!strcmp(pdf_to_name(cs), "DeviceN") || !strcmp(pdf_to_name(cs), "Separation"))) { altcs = pdf_array_get(cses, 2); if (pdf_is_array(altcs)) altcs = pdf_array_get(altcs, 0); } } width = pdf_dict_gets(imagedict, "Width"); height = pdf_dict_gets(imagedict, "Height"); bpc = pdf_dict_gets(imagedict, "BitsPerComponent"); for (k = 0; k < images; k++) if (!pdf_objcmp(image[k].u.image.obj, imagedict)) break; if (k < images) continue; image = fz_resize_array(ctx, image, images+1, sizeof(struct info)); images++; image[images - 1].page = page; image[images - 1].pageref = pageref; image[images - 1].pageobj = pageobj; image[images - 1].u.image.obj = imagedict; image[images - 1].u.image.width = width; image[images - 1].u.image.height = height; image[images - 1].u.image.bpc = bpc; image[images - 1].u.image.filter = filter; image[images - 1].u.image.cs = cs; image[images - 1].u.image.altcs = altcs; } }
pdf_font_desc * pdf_load_type3_font(fz_context *ctx, pdf_document *doc, pdf_obj *rdb, pdf_obj *dict) { char buf[256]; char *estrings[256]; pdf_font_desc *fontdesc = NULL; pdf_obj *encoding; pdf_obj *widths; pdf_obj *charprocs; pdf_obj *obj; int first, last; int i, k, n; fz_rect bbox; fz_matrix matrix; fz_font *font; fz_var(fontdesc); /* Make a new type3 font entry in the document */ if (doc->num_type3_fonts == doc->max_type3_fonts) { int new_max = doc->max_type3_fonts * 2; if (new_max == 0) new_max = 4; doc->type3_fonts = fz_resize_array(ctx, doc->type3_fonts, new_max, sizeof(*doc->type3_fonts)); doc->max_type3_fonts = new_max; } fz_try(ctx) { obj = pdf_dict_get(ctx, dict, PDF_NAME_Name); if (pdf_is_name(ctx, obj)) fz_strlcpy(buf, pdf_to_name(ctx, obj), sizeof buf); else fz_strlcpy(buf, "Unnamed-T3", sizeof buf); fontdesc = pdf_new_font_desc(ctx); obj = pdf_dict_get(ctx, dict, PDF_NAME_FontMatrix); pdf_to_matrix(ctx, obj, &matrix); obj = pdf_dict_get(ctx, dict, PDF_NAME_FontBBox); fz_transform_rect(pdf_to_rect(ctx, obj, &bbox), &matrix); font = fz_new_type3_font(ctx, buf, &matrix); fontdesc->font = font; fontdesc->size += sizeof(fz_font) + 256 * (sizeof(fz_buffer*) + sizeof(float)); fz_set_font_bbox(ctx, font, bbox.x0, bbox.y0, bbox.x1, bbox.y1); /* Encoding */ for (i = 0; i < 256; i++) estrings[i] = NULL; encoding = pdf_dict_get(ctx, dict, PDF_NAME_Encoding); if (!encoding) { fz_throw(ctx, FZ_ERROR_GENERIC, "syntaxerror: Type3 font missing Encoding"); } if (pdf_is_name(ctx, encoding)) pdf_load_encoding(estrings, pdf_to_name(ctx, encoding)); if (pdf_is_dict(ctx, encoding)) { pdf_obj *base, *diff, *item; base = pdf_dict_get(ctx, encoding, PDF_NAME_BaseEncoding); if (pdf_is_name(ctx, base)) pdf_load_encoding(estrings, pdf_to_name(ctx, base)); diff = pdf_dict_get(ctx, encoding, PDF_NAME_Differences); if (pdf_is_array(ctx, diff)) { n = pdf_array_len(ctx, diff); k = 0; for (i = 0; i < n; i++) { item = pdf_array_get(ctx, diff, i); if (pdf_is_int(ctx, item)) k = pdf_to_int(ctx, item); if (pdf_is_name(ctx, item) && k >= 0 && k < nelem(estrings)) estrings[k++] = pdf_to_name(ctx, item); } } } fontdesc->encoding = pdf_new_identity_cmap(ctx, 0, 1); fontdesc->size += pdf_cmap_size(ctx, fontdesc->encoding); pdf_load_to_unicode(ctx, doc, fontdesc, estrings, NULL, pdf_dict_get(ctx, dict, PDF_NAME_ToUnicode)); /* Widths */ pdf_set_default_hmtx(ctx, fontdesc, 0); first = pdf_to_int(ctx, pdf_dict_get(ctx, dict, PDF_NAME_FirstChar)); last = pdf_to_int(ctx, pdf_dict_get(ctx, dict, PDF_NAME_LastChar)); if (first < 0 || last > 255 || first > last) first = last = 0; widths = pdf_dict_get(ctx, dict, PDF_NAME_Widths); if (!widths) { fz_throw(ctx, FZ_ERROR_GENERIC, "syntaxerror: Type3 font missing Widths"); } for (i = first; i <= last; i++) { float w = pdf_to_real(ctx, pdf_array_get(ctx, widths, i - first)); w = font->t3matrix.a * w * 1000; font->t3widths[i] = w * 0.001f; pdf_add_hmtx(ctx, fontdesc, i, i, w); } pdf_end_hmtx(ctx, fontdesc); /* Resources -- inherit page resources if the font doesn't have its own */ font->t3freeres = pdf_t3_free_resources; font->t3resources = pdf_dict_get(ctx, dict, PDF_NAME_Resources); if (!font->t3resources) font->t3resources = rdb; if (font->t3resources) pdf_keep_obj(ctx, font->t3resources); if (!font->t3resources) fz_warn(ctx, "no resource dictionary for type 3 font!"); font->t3doc = doc; font->t3run = pdf_run_glyph_func; /* CharProcs */ charprocs = pdf_dict_get(ctx, dict, PDF_NAME_CharProcs); if (!charprocs) { fz_throw(ctx, FZ_ERROR_GENERIC, "syntaxerror: Type3 font missing CharProcs"); } for (i = 0; i < 256; i++) { if (estrings[i]) { obj = pdf_dict_gets(ctx, charprocs, estrings[i]); if (pdf_is_stream(ctx, obj)) { font->t3procs[i] = pdf_load_stream(ctx, obj); fz_trim_buffer(ctx, font->t3procs[i]); fontdesc->size += fz_buffer_storage(ctx, font->t3procs[i], NULL); fontdesc->size += 0; // TODO: display list size calculation } } } } fz_catch(ctx) { pdf_drop_font(ctx, fontdesc); fz_rethrow(ctx); } doc->type3_fonts[doc->num_type3_fonts++] = fz_keep_font(ctx, font); return fontdesc; }
static pdf_obj * pdf_lookup_page_loc_imp(pdf_document *doc, pdf_obj *node, int *skip, pdf_obj **parentp, int *indexp) { fz_context *ctx = doc->ctx; pdf_obj *kids, *hit; int i, len; kids = pdf_dict_gets(node, "Kids"); len = pdf_array_len(kids); if (pdf_mark_obj(node)) fz_throw(ctx, FZ_ERROR_GENERIC, "cycle in page tree"); hit = NULL; fz_var(hit); fz_try(ctx) { for (i = 0; i < len; i++) { pdf_obj *kid = pdf_array_get(kids, i); char *type = pdf_to_name(pdf_dict_gets(kid, "Type")); if (!strcmp(type, "Page")) { if (*skip == 0) { if (parentp) *parentp = node; if (indexp) *indexp = i; hit = kid; break; } else { (*skip)--; } } else if (!strcmp(type, "Pages")) { int count = pdf_to_int(pdf_dict_gets(kid, "Count")); if (*skip < count) { hit = pdf_lookup_page_loc_imp(doc, kid, skip, parentp, indexp); if (hit) break; } else { *skip -= count; } } else { fz_throw(ctx, FZ_ERROR_GENERIC, "non-page object in page tree"); } } } fz_always(ctx) { pdf_unmark_obj(node); } fz_catch(ctx) { fz_rethrow(ctx); } return hit; }
/* * Create a filter given a name and param dictionary. */ static fz_stream * build_filter(fz_stream *chain, pdf_document * xref, pdf_obj * f, pdf_obj * p, int num, int gen, pdf_image_params *params) { fz_context *ctx = chain->ctx; char *s = pdf_to_name(f); int predictor = pdf_to_int(pdf_dict_gets(p, "Predictor")); int columns = pdf_to_int(pdf_dict_gets(p, "Columns")); int colors = pdf_to_int(pdf_dict_gets(p, "Colors")); int bpc = pdf_to_int(pdf_dict_gets(p, "BitsPerComponent")); if (predictor == 0) predictor = 1; if (columns == 0) columns = 1; if (colors == 0) colors = 1; if (bpc == 0) bpc = 8; if (!strcmp(s, "ASCIIHexDecode") || !strcmp(s, "AHx")) return fz_open_ahxd(chain); else if (!strcmp(s, "ASCII85Decode") || !strcmp(s, "A85")) return fz_open_a85d(chain); else if (!strcmp(s, "CCITTFaxDecode") || !strcmp(s, "CCF")) { pdf_obj *k = pdf_dict_gets(p, "K"); pdf_obj *eol = pdf_dict_gets(p, "EndOfLine"); pdf_obj *eba = pdf_dict_gets(p, "EncodedByteAlign"); pdf_obj *columns = pdf_dict_gets(p, "Columns"); pdf_obj *rows = pdf_dict_gets(p, "Rows"); pdf_obj *eob = pdf_dict_gets(p, "EndOfBlock"); pdf_obj *bi1 = pdf_dict_gets(p, "BlackIs1"); if (params) { /* We will shortstop here */ params->type = PDF_IMAGE_FAX; params->u.fax.k = (k ? pdf_to_int(k) : 0); params->u.fax.eol = (eol ? pdf_to_bool(eol) : 0); params->u.fax.eba = (eba ? pdf_to_bool(eba) : 0); params->u.fax.columns = (columns ? pdf_to_int(columns) : 1728); params->u.fax.rows = (rows ? pdf_to_int(rows) : 0); params->u.fax.eob = (eob ? pdf_to_bool(eob) : 1); params->u.fax.bi1 = (bi1 ? pdf_to_bool(bi1) : 0); return chain; } return fz_open_faxd(chain, k ? pdf_to_int(k) : 0, eol ? pdf_to_bool(eol) : 0, eba ? pdf_to_bool(eba) : 0, columns ? pdf_to_int(columns) : 1728, rows ? pdf_to_int(rows) : 0, eob ? pdf_to_bool(eob) : 1, bi1 ? pdf_to_bool(bi1) : 0); } else if (!strcmp(s, "DCTDecode") || !strcmp(s, "DCT")) { pdf_obj *ct = pdf_dict_gets(p, "ColorTransform"); if (params) { /* We will shortstop here */ params->type = PDF_IMAGE_JPEG; params->u.jpeg.ct = (ct ? pdf_to_int(ct) : -1); return chain; } return fz_open_dctd(chain, ct ? pdf_to_int(ct) : -1); } else if (!strcmp(s, "RunLengthDecode") || !strcmp(s, "RL")) { if (params) { /* We will shortstop here */ params->type = PDF_IMAGE_RLD; return chain; } return fz_open_rld(chain); } else if (!strcmp(s, "FlateDecode") || !strcmp(s, "Fl")) { if (params) { /* We will shortstop here */ params->type = PDF_IMAGE_FLATE; params->u.flate.predictor = predictor; params->u.flate.columns = columns; params->u.flate.colors = colors; params->u.flate.bpc = bpc; return chain; } chain = fz_open_flated(chain); if (predictor > 1) chain = fz_open_predict(chain, predictor, columns, colors, bpc); return chain; } else if (!strcmp(s, "LZWDecode") || !strcmp(s, "LZW")) { pdf_obj *ec = pdf_dict_gets(p, "EarlyChange"); if (params) { /* We will shortstop here */ params->type = PDF_IMAGE_LZW; params->u.lzw.predictor = predictor; params->u.lzw.columns = columns; params->u.lzw.colors = colors; params->u.lzw.bpc = bpc; params->u.lzw.ec = (ec ? pdf_to_int(ec) : 1); return chain; } chain = fz_open_lzwd(chain, ec ? pdf_to_int(ec) : 1); if (predictor > 1) chain = fz_open_predict(chain, predictor, columns, colors, bpc); return chain; } else if (!strcmp(s, "JBIG2Decode")) { fz_buffer *globals = NULL; pdf_obj *obj = pdf_dict_gets(p, "JBIG2Globals"); if (obj) globals = pdf_load_stream(xref, pdf_to_num(obj), pdf_to_gen(obj)); /* fz_open_jbig2d takes possession of globals */ return fz_open_jbig2d(chain, globals); } else if (!strcmp(s, "JPXDecode")) return chain; /* JPX decoding is special cased in the image loading code */ else if (!strcmp(s, "Crypt")) { pdf_obj *name; if (!xref->crypt) { fz_warn(ctx, "crypt filter in unencrypted document"); return chain; } name = pdf_dict_gets(p, "Name"); if (pdf_is_name(name)) return pdf_open_crypt_with_filter(chain, xref->crypt, pdf_to_name(name), num, gen); return chain; } fz_warn(ctx, "unknown filter name (%s)", s); return chain; }
pdf_font_desc * pdf_load_type3_font(pdf_document *doc, pdf_obj *rdb, pdf_obj *dict) { char buf[256]; char *estrings[256]; pdf_font_desc *fontdesc = NULL; pdf_obj *encoding; pdf_obj *widths; pdf_obj *charprocs; pdf_obj *obj; int first, last; int i, k, n; fz_rect bbox; fz_matrix matrix; fz_context *ctx = doc->ctx; fz_var(fontdesc); /* Make a new type3 font entry in the document */ if (doc->num_type3_fonts == doc->max_type3_fonts) { int new_max = doc->max_type3_fonts * 2; if (new_max == 0) new_max = 4; doc->type3_fonts = fz_resize_array(doc->ctx, doc->type3_fonts, new_max, sizeof(*doc->type3_fonts)); doc->max_type3_fonts = new_max; } fz_try(ctx) { obj = pdf_dict_gets(dict, "Name"); if (pdf_is_name(obj)) fz_strlcpy(buf, pdf_to_name(obj), sizeof buf); else sprintf(buf, "Unnamed-T3"); fontdesc = pdf_new_font_desc(ctx); obj = pdf_dict_gets(dict, "FontMatrix"); pdf_to_matrix(ctx, obj, &matrix); obj = pdf_dict_gets(dict, "FontBBox"); fz_transform_rect(pdf_to_rect(ctx, obj, &bbox), &matrix); fontdesc->font = fz_new_type3_font(ctx, buf, &matrix); fontdesc->size += sizeof(fz_font) + 256 * (sizeof(fz_buffer*) + sizeof(float)); fz_set_font_bbox(ctx, fontdesc->font, bbox.x0, bbox.y0, bbox.x1, bbox.y1); /* SumatraPDF: expose Type3 FontDescriptor flags */ fontdesc->flags = pdf_to_int(pdf_dict_gets(pdf_dict_gets(dict, "FontDescriptor"), "Flags")); /* Encoding */ for (i = 0; i < 256; i++) estrings[i] = NULL; encoding = pdf_dict_gets(dict, "Encoding"); if (!encoding) { fz_throw(ctx, FZ_ERROR_GENERIC, "syntaxerror: Type3 font missing Encoding"); } if (pdf_is_name(encoding)) pdf_load_encoding(estrings, pdf_to_name(encoding)); if (pdf_is_dict(encoding)) { pdf_obj *base, *diff, *item; base = pdf_dict_gets(encoding, "BaseEncoding"); if (pdf_is_name(base)) pdf_load_encoding(estrings, pdf_to_name(base)); diff = pdf_dict_gets(encoding, "Differences"); if (pdf_is_array(diff)) { n = pdf_array_len(diff); k = 0; for (i = 0; i < n; i++) { item = pdf_array_get(diff, i); if (pdf_is_int(item)) k = pdf_to_int(item); if (pdf_is_name(item) && k >= 0 && k < nelem(estrings)) estrings[k++] = pdf_to_name(item); } } } fontdesc->encoding = pdf_new_identity_cmap(ctx, 0, 1); fontdesc->size += pdf_cmap_size(ctx, fontdesc->encoding); pdf_load_to_unicode(doc, fontdesc, estrings, NULL, pdf_dict_gets(dict, "ToUnicode")); /* SumatraPDF: trying to match Adobe Reader's behavior */ if (!(fontdesc->flags & PDF_FD_SYMBOLIC) && fontdesc->cid_to_ucs_len >= 128) for (i = 32; i < 128; i++) if (fontdesc->cid_to_ucs[i] == '?' || fontdesc->cid_to_ucs[i] == '\0') fontdesc->cid_to_ucs[i] = i; /* Widths */ pdf_set_default_hmtx(ctx, fontdesc, 0); first = pdf_to_int(pdf_dict_gets(dict, "FirstChar")); last = pdf_to_int(pdf_dict_gets(dict, "LastChar")); /* cf. http://code.google.com/p/sumatrapdf/issues/detail?id=1966 */ if (first >= 256 && last - first < 256) { fz_warn(ctx, "ignoring out-of-bound values for FirstChar/LastChar: %d/%d", first, last); last -= first; first = 0; } if (first < 0 || last > 255 || first > last) first = last = 0; widths = pdf_dict_gets(dict, "Widths"); if (!widths) { fz_throw(ctx, FZ_ERROR_GENERIC, "syntaxerror: Type3 font missing Widths"); } for (i = first; i <= last; i++) { float w = pdf_to_real(pdf_array_get(widths, i - first)); w = fontdesc->font->t3matrix.a * w * 1000; fontdesc->font->t3widths[i] = w * 0.001f; pdf_add_hmtx(ctx, fontdesc, i, i, w); } pdf_end_hmtx(ctx, fontdesc); /* Resources -- inherit page resources if the font doesn't have its own */ fontdesc->font->t3freeres = pdf_t3_free_resources; fontdesc->font->t3resources = pdf_dict_gets(dict, "Resources"); if (!fontdesc->font->t3resources) fontdesc->font->t3resources = rdb; if (fontdesc->font->t3resources) pdf_keep_obj(fontdesc->font->t3resources); if (!fontdesc->font->t3resources) fz_warn(ctx, "no resource dictionary for type 3 font!"); fontdesc->font->t3doc = doc; fontdesc->font->t3run = pdf_run_glyph_func; /* CharProcs */ charprocs = pdf_dict_gets(dict, "CharProcs"); if (!charprocs) { fz_throw(ctx, FZ_ERROR_GENERIC, "syntaxerror: Type3 font missing CharProcs"); } for (i = 0; i < 256; i++) { if (estrings[i]) { /* SumatraPDF: don't reject fonts with few broken glyphs */ fz_try(ctx) { obj = pdf_dict_gets(charprocs, estrings[i]); if (pdf_is_stream(doc, pdf_to_num(obj), pdf_to_gen(obj))) { fontdesc->font->t3procs[i] = pdf_load_stream(doc, pdf_to_num(obj), pdf_to_gen(obj)); fontdesc->size += fontdesc->font->t3procs[i]->cap; fontdesc->size += 0; // TODO: display list size calculation } } fz_catch(ctx) { fz_warn(ctx, "failed to get data for type 3 glyph '%s'", estrings[i]); } } } } fz_catch(ctx) { if (fontdesc) pdf_drop_font(ctx, fontdesc); fz_rethrow_message(ctx, "cannot load type3 font (%d %d R)", pdf_to_num(dict), pdf_to_gen(dict)); } doc->type3_fonts[doc->num_type3_fonts++] = fz_keep_font(ctx, fontdesc->font); return fontdesc; }
static void check_allowed_subtypes(fz_context *ctx, pdf_annot *annot, pdf_obj *property, pdf_obj **allowed) { pdf_obj *subtype = pdf_dict_get(ctx, annot->obj, PDF_NAME(Subtype)); if (!is_allowed_subtype(ctx, annot, property, allowed)) fz_throw(ctx, FZ_ERROR_GENERIC, "%s annotations have no %s property", pdf_to_name(ctx, subtype), pdf_to_name(ctx, property)); }
// just copy one of the resource sub-entries (e.g. /Font) static int copy_and_rename_resource(fz_context *dest_ctx, pdf_obj *dest, fz_context *src_ctx, pdf_obj *src, char *prefix, struct put_info *info) { char new_name[64]; /* this buffer is big enough up to hold all digits for two 16-bit numbers */ int i; for(i = 0; i < pdf_dict_len(src_ctx, src); i++) { pdf_obj *src_key = pdf_dict_get_key(src_ctx, src, i); pdf_obj *src_val = pdf_dict_get_val(src_ctx, src, i); if(!pdf_is_name(src_ctx, src_key)) { return(2); } /* if this is an inline resource, just copy this object into the new resource dict */ if(!pdf_is_indirect(src_ctx, src_val)) { if(snprintf(new_name, sizeof(new_name) / sizeof(new_name[0]), "%sinline_%d", prefix, info->next_inline_id++) >= sizeof(new_name) / sizeof(new_name[0])) return(1); // not enough space pdf_obj *new_res = copy_unassigned_object_continue(dest_ctx, info->dest_doc, src_ctx, info->src_doc, src_val, &info->new_ids); //pdf_obj *new_res = pdf_new_dict(dest_ctx, info->dest_doc, 10); printf("dump it...\n"); pdf_fprint_obj(dest_ctx, stdout, new_res, 0); /* now reference this new object in the resource object of this sheet */ pdf_obj *dest_key = pdf_new_name(dest_ctx, info->dest_doc, new_name); pdf_dict_put(dest_ctx, dest, dest_key, new_res); pdf_drop_obj(dest_ctx, dest_key); pdf_drop_obj(dest_ctx, new_res); } else { /* The new name of resource objects is always the num/gen of the referenced object in the src-file. Thus we can check by that name if the object was already referenced by another page of this sheet. */ if(snprintf(new_name, sizeof(new_name) / sizeof(new_name[0]), "%s%d_%d", prefix, pdf_to_num(dest_ctx, src_val), pdf_to_gen(dest_ctx, src_val)) >= sizeof(new_name) / sizeof(new_name[0])) return(1); // not enough space if(pdf_dict_gets(dest_ctx, dest, new_name) == NULL) { /* if this resource is not inlined and not already in the resource-dict of the current sheet... */ /* ...copy the referenced resource to the new document! If this object has copied already (for another sheet in dest_doc), copy_object_continue() will do nothing */ pdf_obj *new_res = copy_object_continue(dest_ctx, info->dest_doc, src_ctx, info->src_doc, src_val, &info->new_ids); /* now reference this new object in the resource object of this sheet */ pdf_obj *dest_key = pdf_new_name(dest_ctx, info->dest_doc, new_name); pdf_dict_put(dest_ctx, dest, dest_key, new_res); pdf_drop_obj(dest_ctx, dest_key); pdf_drop_obj(dest_ctx, new_res); } } /* even if it was used on another sheet or on this sheet, add it to the rename-dict for this sheet! Because it could have different names on different source-pages */ pdf_obj *rename_key = pdf_new_name(dest_ctx, info->dest_doc, pdf_to_name(dest_ctx, src_key)); pdf_obj *rename_val = pdf_new_name(dest_ctx, info->dest_doc, new_name); pdf_dict_put(dest_ctx, info->rename_dict, rename_key, rename_val); pdf_drop_obj(dest_ctx, rename_key); pdf_drop_obj(dest_ctx, rename_val); } return(0); }
char * pdf_parse_file_spec(fz_context *ctx, pdf_document *doc, pdf_obj *file_spec, pdf_obj *dest) { pdf_obj *filename=NULL; char *path = NULL; char *uri = NULL; char buf[256]; size_t n; if (pdf_is_string(ctx, file_spec)) filename = file_spec; if (pdf_is_dict(ctx, file_spec)) { #if defined(_WIN32) || defined(_WIN64) filename = pdf_dict_get(ctx, file_spec, PDF_NAME_DOS); #else filename = pdf_dict_get(ctx, file_spec, PDF_NAME_Unix); #endif if (!filename) filename = pdf_dict_geta(ctx, file_spec, PDF_NAME_UF, PDF_NAME_F); } if (!pdf_is_string(ctx, filename)) { fz_warn(ctx, "cannot parse file specification"); return NULL; } path = pdf_to_utf8(ctx, filename); #if defined(_WIN32) || defined(_WIN64) if (strcmp(pdf_to_name(ctx, pdf_dict_gets(ctx, file_spec, "FS")), "URL") != 0) { /* move the file name into the expected place and use the expected path separator */ char *c; if (path[0] == '/' && (('A' <= path[1] && path[1] <= 'Z') || ('a' <= path[1] && path[1] <= 'z')) && path[2] == '/') { path[0] = path[1]; path[1] = ':'; } for (c = path; *c; c++) { if (*c == '/') *c = '\\'; } } #endif if (pdf_is_array(ctx, dest)) fz_snprintf(buf, sizeof buf, "#page=%d", pdf_to_int(ctx, pdf_array_get(ctx, dest, 0)) + 1); else if (pdf_is_name(ctx, dest)) fz_snprintf(buf, sizeof buf, "#%s", pdf_to_name(ctx, dest)); else if (pdf_is_string(ctx, dest)) fz_snprintf(buf, sizeof buf, "#%s", pdf_to_str_buf(ctx, dest)); else buf[0] = 0; n = 7 + strlen(path) + strlen(buf) + 1; uri = fz_malloc(ctx, n); fz_strlcpy(uri, "file://", n); fz_strlcat(uri, path, n); fz_strlcat(uri, buf, n); fz_free(ctx, path); return uri; }
static pdf_obj * pdf_lookup_page_loc_imp(pdf_document *doc, pdf_obj *node, int *skip, pdf_obj **parentp, int *indexp) { fz_context *ctx = doc->ctx; pdf_obj *kids; pdf_obj *hit = NULL; int i, len; pdf_obj *local_stack[LOCAL_STACK_SIZE]; pdf_obj **stack = &local_stack[0]; int stack_max = LOCAL_STACK_SIZE; int stack_len = 0; fz_var(hit); fz_var(stack); fz_var(stack_len); fz_var(stack_max); fz_try(ctx) { do { kids = pdf_dict_gets(node, "Kids"); len = pdf_array_len(kids); if (len == 0) fz_throw(ctx, FZ_ERROR_GENERIC, "Malformed pages tree"); /* Every node we need to unmark goes into the stack */ if (stack_len == stack_max) { if (stack == &local_stack[0]) { stack = fz_malloc_array(ctx, stack_max * 2, sizeof(*stack)); memcpy(stack, &local_stack[0], stack_max * sizeof(*stack)); } else stack = fz_resize_array(ctx, stack, stack_max * 2, sizeof(*stack)); stack_max *= 2; } stack[stack_len++] = node; if (pdf_mark_obj(node)) fz_throw(ctx, FZ_ERROR_GENERIC, "cycle in page tree"); for (i = 0; i < len; i++) { pdf_obj *kid = pdf_array_get(kids, i); char *type = pdf_to_name(pdf_dict_gets(kid, "Type")); if (!strcmp(type, "Page") || (!*type && pdf_dict_gets(kid, "MediaBox"))) { if (*skip == 0) { if (parentp) *parentp = node; if (indexp) *indexp = i; hit = kid; break; } else { (*skip)--; } } else if (!strcmp(type, "Pages") || (!*type && pdf_dict_gets(kid, "Kids"))) { int count = pdf_to_int(pdf_dict_gets(kid, "Count")); if (*skip < count) { node = kid; break; } else { *skip -= count; } } else { fz_throw(ctx, FZ_ERROR_GENERIC, "non-page object in page tree (%s)", type); } } } while (hit == NULL); } fz_always(ctx) { for (i = stack_len; i > 0; i--) pdf_unmark_obj(stack[i-1]); if (stack != &local_stack[0]) fz_free(ctx, stack); } fz_catch(ctx) { fz_rethrow(ctx); } return hit; }
pdf_obj * pdf_parse_dict(pdf_document *xref, fz_stream *file, pdf_lexbuf *buf) { pdf_obj *dict; pdf_obj *key = NULL; pdf_obj *val = NULL; int tok; int a, b; fz_context *ctx = file->ctx; dict = pdf_new_dict(ctx, 8); fz_var(key); fz_var(val); fz_try(ctx) { while (1) { tok = pdf_lex(file, buf); skip: if (tok == PDF_TOK_CLOSE_DICT) break; /* for BI .. ID .. EI in content streams */ if (tok == PDF_TOK_KEYWORD && !strcmp(buf->scratch, "ID")) break; if (tok != PDF_TOK_NAME) fz_throw(ctx, "invalid key in dict"); key = fz_new_name(ctx, buf->scratch); tok = pdf_lex(file, buf); switch (tok) { case PDF_TOK_OPEN_ARRAY: /* cf. http://code.google.com/p/sumatrapdf/issues/detail?id=1643 */ fz_try(ctx) { val = pdf_parse_array(xref, file, buf); } fz_catch(ctx) { fz_warn(ctx, "ignoring broken array for '%s'", pdf_to_name(key)); pdf_drop_obj(key); val = key = NULL; do tok = pdf_lex(file, buf); while (tok != PDF_TOK_CLOSE_DICT && tok != PDF_TOK_CLOSE_ARRAY && tok != PDF_TOK_EOF && tok != PDF_TOK_OPEN_ARRAY && tok != PDF_TOK_OPEN_DICT); if (tok == PDF_TOK_CLOSE_DICT) goto skip; if (tok == PDF_TOK_CLOSE_ARRAY) continue; fz_throw(ctx, "cannot make sense of broken array after all"); } break; case PDF_TOK_OPEN_DICT: val = pdf_parse_dict(xref, file, buf); break; case PDF_TOK_NAME: val = fz_new_name(ctx, buf->scratch); break; case PDF_TOK_REAL: val = pdf_new_real(ctx, buf->f); break; case PDF_TOK_STRING: val = pdf_new_string(ctx, buf->scratch, buf->len); break; case PDF_TOK_TRUE: val = pdf_new_bool(ctx, 1); break; case PDF_TOK_FALSE: val = pdf_new_bool(ctx, 0); break; case PDF_TOK_NULL: val = pdf_new_null(ctx); break; case PDF_TOK_INT: /* 64-bit to allow for numbers > INT_MAX and overflow */ a = buf->i; tok = pdf_lex(file, buf); if (tok == PDF_TOK_CLOSE_DICT || tok == PDF_TOK_NAME || (tok == PDF_TOK_KEYWORD && !strcmp(buf->scratch, "ID"))) { val = pdf_new_int(ctx, a); fz_dict_put(dict, key, val); pdf_drop_obj(val); val = NULL; pdf_drop_obj(key); key = NULL; goto skip; } if (tok == PDF_TOK_INT) { b = buf->i; tok = pdf_lex(file, buf); if (tok == PDF_TOK_R) { val = pdf_new_indirect(ctx, a, b, xref); break; } } fz_throw(ctx, "invalid indirect reference in dict"); default: fz_throw(ctx, "unknown token in dict"); } fz_dict_put(dict, key, val); pdf_drop_obj(val); val = NULL; pdf_drop_obj(key); key = NULL; } } fz_catch(ctx) { pdf_drop_obj(dict); pdf_drop_obj(key); pdf_drop_obj(val); fz_throw(ctx, "cannot parse dict"); } return dict; }
pdf_page * pdf_load_page_by_obj(pdf_document *doc, int number, pdf_obj *pageref) { fz_context *ctx = doc->ctx; pdf_page *page; pdf_annot *annot; pdf_obj *pageobj, *obj; fz_rect mediabox, cropbox, realbox; float userunit; fz_matrix mat; /* SumatraPDF: allow replacing potentially slow pdf_lookup_page_obj */ pageobj = pdf_resolve_indirect(pageref); page = fz_malloc_struct(ctx, pdf_page); page->resources = NULL; page->contents = NULL; page->transparency = 0; page->links = NULL; page->annots = NULL; page->annot_tailp = &page->annots; page->deleted_annots = NULL; page->tmp_annots = NULL; page->me = pdf_keep_obj(pageobj); page->incomplete = 0; obj = pdf_dict_gets(pageobj, "UserUnit"); if (pdf_is_real(obj)) userunit = pdf_to_real(obj); else userunit = 1; pdf_to_rect(ctx, pdf_lookup_inherited_page_item(doc, pageobj, "MediaBox"), &mediabox); if (fz_is_empty_rect(&mediabox)) { fz_warn(ctx, "cannot find page size for page %d", number + 1); mediabox.x0 = 0; mediabox.y0 = 0; mediabox.x1 = 612; mediabox.y1 = 792; } pdf_to_rect(ctx, pdf_lookup_inherited_page_item(doc, pageobj, "CropBox"), &cropbox); if (!fz_is_empty_rect(&cropbox)) fz_intersect_rect(&mediabox, &cropbox); page->mediabox.x0 = fz_min(mediabox.x0, mediabox.x1) * userunit; page->mediabox.y0 = fz_min(mediabox.y0, mediabox.y1) * userunit; page->mediabox.x1 = fz_max(mediabox.x0, mediabox.x1) * userunit; page->mediabox.y1 = fz_max(mediabox.y0, mediabox.y1) * userunit; if (page->mediabox.x1 - page->mediabox.x0 < 1 || page->mediabox.y1 - page->mediabox.y0 < 1) { fz_warn(ctx, "invalid page size in page %d", number + 1); page->mediabox = fz_unit_rect; } page->rotate = pdf_to_int(pdf_lookup_inherited_page_item(doc, pageobj, "Rotate")); /* Snap page->rotate to 0, 90, 180 or 270 */ if (page->rotate < 0) page->rotate = 360 - ((-page->rotate) % 360); if (page->rotate >= 360) page->rotate = page->rotate % 360; page->rotate = 90*((page->rotate + 45)/90); if (page->rotate > 360) page->rotate = 0; fz_pre_rotate(fz_scale(&page->ctm, 1, -1), -page->rotate); realbox = page->mediabox; fz_transform_rect(&realbox, &page->ctm); fz_pre_scale(fz_translate(&mat, -realbox.x0, -realbox.y0), userunit, userunit); fz_concat(&page->ctm, &page->ctm, &mat); fz_try(ctx) { obj = pdf_dict_gets(pageobj, "Annots"); if (obj) { page->links = pdf_load_link_annots(doc, obj, &page->ctm); pdf_load_annots(doc, page, obj); } } fz_catch(ctx) { if (fz_caught(ctx) != FZ_ERROR_TRYLATER) /* SumatraPDF: ignore annotations in case of unexpected errors */ fz_warn(ctx, "unexpectedly failed to load page annotations"); page->incomplete |= PDF_PAGE_INCOMPLETE_ANNOTS; } page->duration = pdf_to_real(pdf_dict_gets(pageobj, "Dur")); obj = pdf_dict_gets(pageobj, "Trans"); page->transition_present = (obj != NULL); if (obj) { pdf_load_transition(doc, page, obj); } // TODO: inherit page->resources = pdf_lookup_inherited_page_item(doc, pageobj, "Resources"); if (page->resources) pdf_keep_obj(page->resources); obj = pdf_dict_gets(pageobj, "Contents"); fz_try(ctx) { page->contents = pdf_keep_obj(obj); if (pdf_resources_use_blending(doc, page->resources)) page->transparency = 1; /* cf. http://code.google.com/p/sumatrapdf/issues/detail?id=2107 */ else if (!strcmp(pdf_to_name(pdf_dict_getp(pageobj, "Group/S")), "Transparency")) page->transparency = 1; for (annot = page->annots; annot && !page->transparency; annot = annot->next) if (annot->ap && pdf_resources_use_blending(doc, annot->ap->resources)) page->transparency = 1; } fz_catch(ctx) { if (fz_caught(ctx) != FZ_ERROR_TRYLATER) { pdf_free_page(doc, page); fz_rethrow_message(ctx, "cannot load page %d contents (%d 0 R)", number + 1, pdf_to_num(pageref)); } page->incomplete |= PDF_PAGE_INCOMPLETE_CONTENTS; } return page; }
static pdf_font_desc * pdf_load_simple_font(pdf_document *xref, pdf_obj *dict) { pdf_obj *descriptor; pdf_obj *encoding; pdf_obj *widths; unsigned short *etable = NULL; pdf_font_desc *fontdesc = NULL; char *subtype; FT_Face face; FT_CharMap cmap; int symbolic; int kind; char *basefont; char *estrings[256]; char ebuffer[256][32]; int i, k, n; int fterr; fz_context *ctx = xref->ctx; fz_var(fontdesc); fz_var(etable); basefont = pdf_to_name(pdf_dict_gets(dict, "BaseFont")); /* Load font file */ fz_try(ctx) { fontdesc = pdf_new_font_desc(ctx); descriptor = pdf_dict_gets(dict, "FontDescriptor"); if (descriptor) pdf_load_font_descriptor(fontdesc, xref, descriptor, NULL, basefont); else pdf_load_builtin_font(ctx, fontdesc, basefont); /* Some chinese documents mistakenly consider WinAnsiEncoding to be codepage 936 */ if (descriptor && pdf_is_string(pdf_dict_gets(descriptor, "FontName")) && !pdf_dict_gets(dict, "ToUnicode") && !strcmp(pdf_to_name(pdf_dict_gets(dict, "Encoding")), "WinAnsiEncoding") && pdf_to_int(pdf_dict_gets(descriptor, "Flags")) == 4) { char *cp936fonts[] = { "\xCB\xCE\xCC\xE5", "SimSun,Regular", "\xBA\xDA\xCC\xE5", "SimHei,Regular", "\xBF\xAC\xCC\xE5_GB2312", "SimKai,Regular", "\xB7\xC2\xCB\xCE_GB2312", "SimFang,Regular", "\xC1\xA5\xCA\xE9", "SimLi,Regular", NULL }; for (i = 0; cp936fonts[i]; i += 2) if (!strcmp(basefont, cp936fonts[i])) break; if (cp936fonts[i]) { fz_warn(ctx, "workaround for S22PDF lying about chinese font encodings"); pdf_drop_font(ctx, fontdesc); fontdesc = pdf_new_font_desc(ctx); pdf_load_font_descriptor(fontdesc, xref, descriptor, "Adobe-GB1", cp936fonts[i+1]); fontdesc->encoding = pdf_load_system_cmap(ctx, "GBK-EUC-H"); fontdesc->to_unicode = pdf_load_system_cmap(ctx, "Adobe-GB1-UCS2"); fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-GB1-UCS2"); face = fontdesc->font->ft_face; kind = ft_kind(face); goto skip_encoding; } } face = fontdesc->font->ft_face; kind = ft_kind(face); /* Encoding */ symbolic = fontdesc->flags & 4; if (face->num_charmaps > 0) cmap = face->charmaps[0]; else cmap = NULL; for (i = 0; i < face->num_charmaps; i++) { FT_CharMap test = face->charmaps[i]; if (kind == TYPE1) { if (test->platform_id == 7) cmap = test; } if (kind == TRUETYPE) { if (test->platform_id == 1 && test->encoding_id == 0) cmap = test; if (test->platform_id == 3 && test->encoding_id == 1) cmap = test; if (symbolic && test->platform_id == 3 && test->encoding_id == 0) cmap = test; } } if (cmap) { fterr = FT_Set_Charmap(face, cmap); if (fterr) fz_warn(ctx, "freetype could not set cmap: %s", ft_error_string(fterr)); } else fz_warn(ctx, "freetype could not find any cmaps"); etable = fz_malloc_array(ctx, 256, sizeof(unsigned short)); fontdesc->size += 256 * sizeof(unsigned short); for (i = 0; i < 256; i++) { estrings[i] = NULL; etable[i] = 0; } encoding = pdf_dict_gets(dict, "Encoding"); if (encoding) { if (pdf_is_name(encoding)) pdf_load_encoding(estrings, pdf_to_name(encoding)); if (pdf_is_dict(encoding)) { pdf_obj *base, *diff, *item; base = pdf_dict_gets(encoding, "BaseEncoding"); if (pdf_is_name(base)) pdf_load_encoding(estrings, pdf_to_name(base)); else if (!fontdesc->is_embedded && !symbolic) pdf_load_encoding(estrings, "StandardEncoding"); diff = pdf_dict_gets(encoding, "Differences"); if (pdf_is_array(diff)) { n = pdf_array_len(diff); k = 0; for (i = 0; i < n; i++) { item = pdf_array_get(diff, i); if (pdf_is_int(item)) k = pdf_to_int(item); if (pdf_is_name(item) && k >= 0 && k < nelem(estrings)) estrings[k++] = pdf_to_name(item); } } } } /* start with the builtin encoding */ for (i = 0; i < 256; i++) etable[i] = ft_char_index(face, i); fz_lock(ctx, FZ_LOCK_FREETYPE); /* built-in and substitute fonts may be a different type than what the document expects */ subtype = pdf_to_name(pdf_dict_gets(dict, "Subtype")); if (!strcmp(subtype, "Type1")) kind = TYPE1; else if (!strcmp(subtype, "MMType1")) kind = TYPE1; else if (!strcmp(subtype, "TrueType")) kind = TRUETYPE; else if (!strcmp(subtype, "CIDFontType0")) kind = TYPE1; else if (!strcmp(subtype, "CIDFontType2")) kind = TRUETYPE; /* encode by glyph name where we can */ if (kind == TYPE1) { for (i = 0; i < 256; i++) { if (estrings[i]) { etable[i] = FT_Get_Name_Index(face, estrings[i]); if (etable[i] == 0) { int aglcode = pdf_lookup_agl(estrings[i]); const char **dupnames = pdf_lookup_agl_duplicates(aglcode); while (*dupnames) { etable[i] = FT_Get_Name_Index(face, (char*)*dupnames); if (etable[i]) break; dupnames++; } } } } } /* encode by glyph name where we can */ if (kind == TRUETYPE) { /* Unicode cmap */ if (!symbolic && face->charmap && face->charmap->platform_id == 3) { for (i = 0; i < 256; i++) { if (estrings[i]) { int aglcode = pdf_lookup_agl(estrings[i]); if (!aglcode) etable[i] = FT_Get_Name_Index(face, estrings[i]); else etable[i] = ft_char_index(face, aglcode); } } } /* MacRoman cmap */ else if (!symbolic && face->charmap && face->charmap->platform_id == 1) { for (i = 0; i < 256; i++) { if (estrings[i]) { k = lookup_mre_code(estrings[i]); if (k <= 0) etable[i] = FT_Get_Name_Index(face, estrings[i]); else etable[i] = ft_char_index(face, k); } } } /* Symbolic cmap */ else if (!face->charmap || face->charmap->encoding != FT_ENCODING_MS_SYMBOL) { for (i = 0; i < 256; i++) { if (estrings[i]) { etable[i] = FT_Get_Name_Index(face, estrings[i]); if (etable[i] == 0) etable[i] = ft_char_index(face, i); } } } } /* try to reverse the glyph names from the builtin encoding */ for (i = 0; i < 256; i++) { if (etable[i] && !estrings[i]) { if (FT_HAS_GLYPH_NAMES(face)) { fterr = FT_Get_Glyph_Name(face, etable[i], ebuffer[i], 32); if (fterr) fz_warn(ctx, "freetype get glyph name (gid %d): %s", etable[i], ft_error_string(fterr)); if (ebuffer[i][0]) estrings[i] = ebuffer[i]; } else { estrings[i] = (char*) pdf_win_ansi[i]; /* discard const */ } } } /* symbolic Type 1 fonts with an implicit encoding and non-standard glyph names */ if (kind == TYPE1 && symbolic) { for (i = 0; i < 256; i++) if (etable[i] && estrings[i] && !pdf_lookup_agl(estrings[i])) estrings[i] = (char*) pdf_standard[i]; } fz_unlock(ctx, FZ_LOCK_FREETYPE); fontdesc->encoding = pdf_new_identity_cmap(ctx, 0, 1); fontdesc->size += pdf_cmap_size(ctx, fontdesc->encoding); fontdesc->cid_to_gid_len = 256; fontdesc->cid_to_gid = etable; fz_try(ctx) { pdf_load_to_unicode(xref, fontdesc, estrings, NULL, pdf_dict_gets(dict, "ToUnicode")); } fz_catch(ctx) { fz_warn(ctx, "cannot load ToUnicode CMap"); } skip_encoding: /* Widths */ pdf_set_default_hmtx(ctx, fontdesc, fontdesc->missing_width); widths = pdf_dict_gets(dict, "Widths"); if (widths) { int first, last; first = pdf_to_int(pdf_dict_gets(dict, "FirstChar")); last = pdf_to_int(pdf_dict_gets(dict, "LastChar")); if (first < 0 || last > 255 || first > last) first = last = 0; for (i = 0; i < last - first + 1; i++) { int wid = pdf_to_int(pdf_array_get(widths, i)); pdf_add_hmtx(ctx, fontdesc, i + first, i + first, wid); } } else { fz_lock(ctx, FZ_LOCK_FREETYPE); fterr = FT_Set_Char_Size(face, 1000, 1000, 72, 72); if (fterr) fz_warn(ctx, "freetype set character size: %s", ft_error_string(fterr)); for (i = 0; i < 256; i++) { pdf_add_hmtx(ctx, fontdesc, i, i, ft_width(ctx, fontdesc, i)); } fz_unlock(ctx, FZ_LOCK_FREETYPE); } pdf_end_hmtx(ctx, fontdesc); } fz_catch(ctx) { if (fontdesc && etable != fontdesc->cid_to_gid) fz_free(ctx, etable); pdf_drop_font(ctx, fontdesc); fz_throw(ctx, "cannot load simple font (%d %d R)", pdf_to_num(dict), pdf_to_gen(dict)); } return fontdesc; }
static int keyvalcmp(const void *ap, const void *bp) { const struct keyval *a = ap; const struct keyval *b = bp; return strcmp(pdf_to_name(a->k), pdf_to_name(b->k)); }
static void savefont(pdf_obj *dict, int num) { char namebuf[1024]; char *subtype; fz_buffer *buf; pdf_obj *stream = NULL; pdf_obj *obj; char *ext = ""; FILE *f; char *fontname = "font"; int n, len; unsigned char *data; obj = pdf_dict_gets(dict, "FontName"); if (obj) fontname = pdf_to_name(obj); obj = pdf_dict_gets(dict, "FontFile"); if (obj) { stream = obj; ext = "pfa"; } obj = pdf_dict_gets(dict, "FontFile2"); if (obj) { stream = obj; ext = "ttf"; } obj = pdf_dict_gets(dict, "FontFile3"); if (obj) { stream = obj; obj = pdf_dict_gets(obj, "Subtype"); if (obj && !pdf_is_name(obj)) fz_throw(ctx, FZ_ERROR_GENERIC, "invalid font descriptor subtype"); subtype = pdf_to_name(obj); if (!strcmp(subtype, "Type1C")) ext = "cff"; else if (!strcmp(subtype, "CIDFontType0C")) ext = "cid"; else if (!strcmp(subtype, "OpenType")) ext = "otf"; else fz_throw(ctx, FZ_ERROR_GENERIC, "unhandled font type '%s'", subtype); } if (!stream) { fz_warn(ctx, "unhandled font type"); return; } buf = pdf_load_stream(doc, pdf_to_num(stream), pdf_to_gen(stream)); snprintf(namebuf, sizeof(namebuf), "%s-%04d.%s", fontname, num, ext); printf("extracting font %s\n", namebuf); f = fopen(namebuf, "wb"); if (!f) fz_throw(ctx, FZ_ERROR_GENERIC, "cannot create font file"); len = fz_buffer_storage(ctx, buf, &data); n = fwrite(data, 1, len, f); if (n < len) fz_throw(ctx, FZ_ERROR_GENERIC, "cannot write font file"); if (fclose(f) < 0) fz_throw(ctx, FZ_ERROR_GENERIC, "cannot close font file"); fz_drop_buffer(ctx, buf); }
static void printinfo(char *filename, int show, int page) { int i; int j; #define PAGE_FMT "\t% 5d (% 7d %1d R): " if (show & DIMENSIONS && dims > 0) { printf("Mediaboxes (%d):\n", dims); for (i = 0; i < dims; i++) { printf(PAGE_FMT "[ %g %g %g %g ]\n", dim[i].page, pdf_to_num(dim[i].pageref), pdf_to_gen(dim[i].pageref), dim[i].u.dim.bbox->x0, dim[i].u.dim.bbox->y0, dim[i].u.dim.bbox->x1, dim[i].u.dim.bbox->y1); } printf("\n"); } if (show & FONTS && fonts > 0) { printf("Fonts (%d):\n", fonts); for (i = 0; i < fonts; i++) { printf(PAGE_FMT "%s '%s' (%d %d R)\n", font[i].page, pdf_to_num(font[i].pageref), pdf_to_gen(font[i].pageref), pdf_to_name(font[i].u.font.subtype), pdf_to_name(font[i].u.font.name), pdf_to_num(font[i].u.font.obj), pdf_to_gen(font[i].u.font.obj)); } printf("\n"); } if (show & IMAGES && images > 0) { printf("Images (%d):\n", images); for (i = 0; i < images; i++) { char *cs = NULL; char *altcs = NULL; printf(PAGE_FMT "[ ", image[i].page, pdf_to_num(image[i].pageref), pdf_to_gen(image[i].pageref)); if (pdf_is_array(image[i].u.image.filter)) { int n = pdf_array_len(image[i].u.image.filter); for (j = 0; j < n; j++) { pdf_obj *obj = pdf_array_get(image[i].u.image.filter, j); char *filter = fz_strdup(ctx, pdf_to_name(obj)); if (strstr(filter, "Decode")) *(strstr(filter, "Decode")) = '\0'; printf("%s%s", filter, j == pdf_array_len(image[i].u.image.filter) - 1 ? "" : " "); fz_free(ctx, filter); } } else if (image[i].u.image.filter) { pdf_obj *obj = image[i].u.image.filter; char *filter = fz_strdup(ctx, pdf_to_name(obj)); if (strstr(filter, "Decode")) *(strstr(filter, "Decode")) = '\0'; printf("%s", filter); fz_free(ctx, filter); } else printf("Raw"); if (image[i].u.image.cs) { cs = fz_strdup(ctx, pdf_to_name(image[i].u.image.cs)); if (!strncmp(cs, "Device", 6)) { int len = strlen(cs + 6); memmove(cs + 3, cs + 6, len + 1); cs[3 + len + 1] = '\0'; } if (strstr(cs, "ICC")) fz_strlcpy(cs, "ICC", 4); if (strstr(cs, "Indexed")) fz_strlcpy(cs, "Idx", 4); if (strstr(cs, "Pattern")) fz_strlcpy(cs, "Pat", 4); if (strstr(cs, "Separation")) fz_strlcpy(cs, "Sep", 4); } if (image[i].u.image.altcs) { altcs = fz_strdup(ctx, pdf_to_name(image[i].u.image.altcs)); if (!strncmp(altcs, "Device", 6)) { int len = strlen(altcs + 6); memmove(altcs + 3, altcs + 6, len + 1); altcs[3 + len + 1] = '\0'; } if (strstr(altcs, "ICC")) fz_strlcpy(altcs, "ICC", 4); if (strstr(altcs, "Indexed")) fz_strlcpy(altcs, "Idx", 4); if (strstr(altcs, "Pattern")) fz_strlcpy(altcs, "Pat", 4); if (strstr(altcs, "Separation")) fz_strlcpy(altcs, "Sep", 4); } printf(" ] %dx%d %dbpc %s%s%s (%d %d R)\n", pdf_to_int(image[i].u.image.width), pdf_to_int(image[i].u.image.height), image[i].u.image.bpc ? pdf_to_int(image[i].u.image.bpc) : 1, image[i].u.image.cs ? cs : "ImageMask", image[i].u.image.altcs ? " " : "", image[i].u.image.altcs ? altcs : "", pdf_to_num(image[i].u.image.obj), pdf_to_gen(image[i].u.image.obj)); fz_free(ctx, cs); fz_free(ctx, altcs); } printf("\n"); } if (show & SHADINGS && shadings > 0) { printf("Shading patterns (%d):\n", shadings); for (i = 0; i < shadings; i++) { char *shadingtype[] = { "", "Function", "Axial", "Radial", "Triangle mesh", "Lattice", "Coons patch", "Tensor patch", }; printf(PAGE_FMT "%s (%d %d R)\n", shading[i].page, pdf_to_num(shading[i].pageref), pdf_to_gen(shading[i].pageref), shadingtype[pdf_to_int(shading[i].u.shading.type)], pdf_to_num(shading[i].u.shading.obj), pdf_to_gen(shading[i].u.shading.obj)); } printf("\n"); } if (show & PATTERNS && patterns > 0) { printf("Patterns (%d):\n", patterns); for (i = 0; i < patterns; i++) { if (pdf_to_int(pattern[i].u.pattern.type) == 1) { char *painttype[] = { "", "Colored", "Uncolored", }; char *tilingtype[] = { "", "Constant", "No distortion", "Constant/fast tiling", }; printf(PAGE_FMT "Tiling %s %s (%d %d R)\n", pattern[i].page, pdf_to_num(pattern[i].pageref), pdf_to_gen(pattern[i].pageref), painttype[pdf_to_int(pattern[i].u.pattern.paint)], tilingtype[pdf_to_int(pattern[i].u.pattern.tiling)], pdf_to_num(pattern[i].u.pattern.obj), pdf_to_gen(pattern[i].u.pattern.obj)); } else { printf(PAGE_FMT "Shading %d %d R (%d %d R)\n", pattern[i].page, pdf_to_num(pattern[i].pageref), pdf_to_gen(pattern[i].pageref), pdf_to_num(pattern[i].u.pattern.shading), pdf_to_gen(pattern[i].u.pattern.shading), pdf_to_num(pattern[i].u.pattern.obj), pdf_to_gen(pattern[i].u.pattern.obj)); } } printf("\n"); } if (show & XOBJS && forms > 0) { printf("Form xobjects (%d):\n", forms); for (i = 0; i < forms; i++) { printf(PAGE_FMT "Form%s%s%s%s (%d %d R)\n", form[i].page, pdf_to_num(form[i].pageref), pdf_to_gen(form[i].pageref), form[i].u.form.groupsubtype ? " " : "", form[i].u.form.groupsubtype ? pdf_to_name(form[i].u.form.groupsubtype) : "", form[i].u.form.groupsubtype ? " Group" : "", form[i].u.form.reference ? " Reference" : "", pdf_to_num(form[i].u.form.obj), pdf_to_gen(form[i].u.form.obj)); } printf("\n"); } if (show & XOBJS && psobjs > 0) { printf("Postscript xobjects (%d):\n", psobjs); for (i = 0; i < psobjs; i++) { printf(PAGE_FMT "(%d %d R)\n", psobj[i].page, pdf_to_num(psobj[i].pageref), pdf_to_gen(psobj[i].pageref), pdf_to_num(psobj[i].u.form.obj), pdf_to_gen(psobj[i].u.form.obj)); } printf("\n"); } }
char * pdf_parse_link_dest(fz_context *ctx, pdf_document *doc, pdf_obj *dest) { pdf_obj *obj; char buf[256]; const char *ld; int page; int x, y; dest = resolve_dest(ctx, doc, dest); if (dest == NULL) { fz_warn(ctx, "undefined link destination"); return NULL; } if (pdf_is_name(ctx, dest)) { ld = pdf_to_name(ctx, dest); return fz_strdup(ctx, ld); } else if (pdf_is_string(ctx, dest)) { ld = pdf_to_str_buf(ctx, dest); return fz_strdup(ctx, ld); } obj = pdf_array_get(ctx, dest, 0); if (pdf_is_int(ctx, obj)) page = pdf_to_int(ctx, obj); else { fz_try(ctx) page = pdf_lookup_page_number(ctx, doc, obj); fz_catch(ctx) page = -1; } x = y = 0; obj = pdf_array_get(ctx, dest, 1); if (pdf_name_eq(ctx, obj, PDF_NAME_XYZ)) { x = pdf_to_int(ctx, pdf_array_get(ctx, dest, 2)); y = pdf_to_int(ctx, pdf_array_get(ctx, dest, 3)); } else if (pdf_name_eq(ctx, obj, PDF_NAME_FitR)) { x = pdf_to_int(ctx, pdf_array_get(ctx, dest, 2)); y = pdf_to_int(ctx, pdf_array_get(ctx, dest, 5)); } else if (pdf_name_eq(ctx, obj, PDF_NAME_FitH) || pdf_name_eq(ctx, obj, PDF_NAME_FitBH)) y = pdf_to_int(ctx, pdf_array_get(ctx, dest, 2)); else if (pdf_name_eq(ctx, obj, PDF_NAME_FitV) || pdf_name_eq(ctx, obj, PDF_NAME_FitBV)) x = pdf_to_int(ctx, pdf_array_get(ctx, dest, 2)); if (page >= 0) { if (x != 0 || y != 0) fz_snprintf(buf, sizeof buf, "#%d,%d,%d", page + 1, x, y); else fz_snprintf(buf, sizeof buf, "#%d", page + 1); return fz_strdup(ctx, buf); } return NULL; }
static void savefont(pdf_obj *dict, int num) { char namebuf[1024]; fz_buffer *buf; pdf_obj *stream = NULL; pdf_obj *obj; char *ext = ""; fz_output *out; char *fontname = "font"; size_t len; unsigned char *data; obj = pdf_dict_get(ctx, dict, PDF_NAME_FontName); if (obj) fontname = pdf_to_name(ctx, obj); obj = pdf_dict_get(ctx, dict, PDF_NAME_FontFile); if (obj) { stream = obj; ext = "pfa"; } obj = pdf_dict_get(ctx, dict, PDF_NAME_FontFile2); if (obj) { stream = obj; ext = "ttf"; } obj = pdf_dict_get(ctx, dict, PDF_NAME_FontFile3); if (obj) { stream = obj; obj = pdf_dict_get(ctx, obj, PDF_NAME_Subtype); if (obj && !pdf_is_name(ctx, obj)) fz_throw(ctx, FZ_ERROR_GENERIC, "invalid font descriptor subtype"); if (pdf_name_eq(ctx, obj, PDF_NAME_Type1C)) ext = "cff"; else if (pdf_name_eq(ctx, obj, PDF_NAME_CIDFontType0C)) ext = "cid"; else if (pdf_name_eq(ctx, obj, PDF_NAME_OpenType)) ext = "otf"; else fz_throw(ctx, FZ_ERROR_GENERIC, "unhandled font type '%s'", pdf_to_name(ctx, obj)); } if (!stream) { fz_warn(ctx, "unhandled font type"); return; } buf = pdf_load_stream(ctx, doc, pdf_to_num(ctx, stream)); len = fz_buffer_storage(ctx, buf, &data); fz_try(ctx) { snprintf(namebuf, sizeof(namebuf), "%s-%04d.%s", fontname, num, ext); printf("extracting font %s\n", namebuf); out = fz_new_output_with_path(ctx, namebuf, 0); fz_try(ctx) fz_write(ctx, out, data, len); fz_always(ctx) fz_drop_output(ctx, out); fz_catch(ctx) fz_rethrow(ctx); } fz_always(ctx) fz_drop_buffer(ctx, buf); fz_catch(ctx) fz_rethrow(ctx); }
static void retainpages(fz_context *ctx, globals *glo, int argc, char **argv) { pdf_obj *oldroot, *root, *pages, *kids, *countobj, *parent, *olddests; pdf_document *doc = glo->doc; int argidx = 0; pdf_obj *names_list = NULL; int pagecount; int i; /* Keep only pages/type and (reduced) dest entries to avoid * references to unretained pages */ oldroot = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME_Root); pages = pdf_dict_get(ctx, oldroot, PDF_NAME_Pages); olddests = pdf_load_name_tree(ctx, doc, PDF_NAME_Dests); root = pdf_new_dict(ctx, doc, 2); pdf_dict_put(ctx, root, PDF_NAME_Type, pdf_dict_get(ctx, oldroot, PDF_NAME_Type)); pdf_dict_put(ctx, root, PDF_NAME_Pages, pdf_dict_get(ctx, oldroot, PDF_NAME_Pages)); pdf_update_object(ctx, doc, pdf_to_num(ctx, oldroot), root); pdf_drop_obj(ctx, root); /* Create a new kids array with only the pages we want to keep */ parent = pdf_new_indirect(ctx, doc, pdf_to_num(ctx, pages), pdf_to_gen(ctx, pages)); kids = pdf_new_array(ctx, doc, 1); /* Retain pages specified */ while (argc - argidx) { int page, spage, epage; char *spec, *dash; char *pagelist = argv[argidx]; pagecount = pdf_count_pages(ctx, doc); spec = fz_strsep(&pagelist, ","); while (spec) { dash = strchr(spec, '-'); if (dash == spec) spage = epage = pagecount; else spage = epage = atoi(spec); if (dash) { if (strlen(dash) > 1) epage = atoi(dash + 1); else epage = pagecount; } spage = fz_clampi(spage, 1, pagecount); epage = fz_clampi(epage, 1, pagecount); if (spage < epage) for (page = spage; page <= epage; ++page) retainpage(ctx, doc, parent, kids, page); else for (page = spage; page >= epage; --page) retainpage(ctx, doc, parent, kids, page); spec = fz_strsep(&pagelist, ","); } argidx++; } pdf_drop_obj(ctx, parent); /* Update page count and kids array */ countobj = pdf_new_int(ctx, doc, pdf_array_len(ctx, kids)); pdf_dict_put(ctx, pages, PDF_NAME_Count, countobj); pdf_drop_obj(ctx, countobj); pdf_dict_put(ctx, pages, PDF_NAME_Kids, kids); pdf_drop_obj(ctx, kids); /* Also preserve the (partial) Dests name tree */ if (olddests) { pdf_obj *names = pdf_new_dict(ctx, doc, 1); pdf_obj *dests = pdf_new_dict(ctx, doc, 1); int len = pdf_dict_len(ctx, olddests); names_list = pdf_new_array(ctx, doc, 32); for (i = 0; i < len; i++) { pdf_obj *key = pdf_dict_get_key(ctx, olddests, i); pdf_obj *val = pdf_dict_get_val(ctx, olddests, i); pdf_obj *dest = pdf_dict_get(ctx, val, PDF_NAME_D); dest = pdf_array_get(ctx, dest ? dest : val, 0); if (pdf_array_contains(ctx, pdf_dict_get(ctx, pages, PDF_NAME_Kids), dest)) { pdf_obj *key_str = pdf_new_string(ctx, doc, pdf_to_name(ctx, key), strlen(pdf_to_name(ctx, key))); pdf_array_push(ctx, names_list, key_str); pdf_array_push(ctx, names_list, val); pdf_drop_obj(ctx, key_str); } } root = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME_Root); pdf_dict_put(ctx, dests, PDF_NAME_Names, names_list); pdf_dict_put(ctx, names, PDF_NAME_Dests, dests); pdf_dict_put(ctx, root, PDF_NAME_Names, names); pdf_drop_obj(ctx, names); pdf_drop_obj(ctx, dests); pdf_drop_obj(ctx, names_list); pdf_drop_obj(ctx, olddests); } /* Force the next call to pdf_count_pages to recount */ glo->doc->page_count = 0; /* Edit each pages /Annot list to remove any links that point to * nowhere. */ pagecount = pdf_count_pages(ctx, doc); for (i = 0; i < pagecount; i++) { pdf_obj *pageref = pdf_lookup_page_obj(ctx, doc, i); pdf_obj *pageobj = pdf_resolve_indirect(ctx, pageref); pdf_obj *annots = pdf_dict_get(ctx, pageobj, PDF_NAME_Annots); int len = pdf_array_len(ctx, annots); int j; for (j = 0; j < len; j++) { pdf_obj *o = pdf_array_get(ctx, annots, j); pdf_obj *p; if (!pdf_name_eq(ctx, pdf_dict_get(ctx, o, PDF_NAME_Subtype), PDF_NAME_Link)) continue; p = pdf_dict_get(ctx, o, PDF_NAME_A); if (!pdf_name_eq(ctx, pdf_dict_get(ctx, p, PDF_NAME_S), PDF_NAME_GoTo)) continue; if (string_in_names_list(ctx, pdf_dict_get(ctx, p, PDF_NAME_D), names_list)) continue; /* FIXME: Should probably look at Next too */ /* Remove this annotation */ pdf_array_delete(ctx, annots, j); j--; } } }
static pdf_font_desc * load_cid_font(pdf_document *xref, pdf_obj *dict, pdf_obj *encoding, pdf_obj *to_unicode) { pdf_obj *widths; pdf_obj *descriptor; pdf_font_desc *fontdesc = NULL; FT_Face face; int kind; char collection[256]; char *basefont; int i, k, fterr; pdf_obj *obj; int dw; fz_context *ctx = xref->ctx; fz_var(fontdesc); fz_try(ctx) { /* Get font name and CID collection */ basefont = pdf_to_name(pdf_dict_gets(dict, "BaseFont")); { pdf_obj *cidinfo; char tmpstr[64]; int tmplen; cidinfo = pdf_dict_gets(dict, "CIDSystemInfo"); if (!cidinfo) fz_throw(ctx, "cid font is missing info"); obj = pdf_dict_gets(cidinfo, "Registry"); tmplen = fz_mini(sizeof tmpstr - 1, pdf_to_str_len(obj)); memcpy(tmpstr, pdf_to_str_buf(obj), tmplen); tmpstr[tmplen] = '\0'; fz_strlcpy(collection, tmpstr, sizeof collection); fz_strlcat(collection, "-", sizeof collection); obj = pdf_dict_gets(cidinfo, "Ordering"); tmplen = fz_mini(sizeof tmpstr - 1, pdf_to_str_len(obj)); memcpy(tmpstr, pdf_to_str_buf(obj), tmplen); tmpstr[tmplen] = '\0'; fz_strlcat(collection, tmpstr, sizeof collection); } /* Load font file */ fontdesc = pdf_new_font_desc(ctx); descriptor = pdf_dict_gets(dict, "FontDescriptor"); if (!descriptor) fz_throw(ctx, "syntaxerror: missing font descriptor"); pdf_load_font_descriptor(fontdesc, xref, descriptor, collection, basefont); face = fontdesc->font->ft_face; kind = ft_kind(face); /* Encoding */ if (pdf_is_name(encoding)) { if (!strcmp(pdf_to_name(encoding), "Identity-H")) fontdesc->encoding = pdf_new_identity_cmap(ctx, 0, 2); else if (!strcmp(pdf_to_name(encoding), "Identity-V")) fontdesc->encoding = pdf_new_identity_cmap(ctx, 1, 2); else fontdesc->encoding = pdf_load_system_cmap(ctx, pdf_to_name(encoding)); } else if (pdf_is_indirect(encoding)) { fontdesc->encoding = pdf_load_embedded_cmap(xref, encoding); } else { fz_throw(ctx, "syntaxerror: font missing encoding"); } fontdesc->size += pdf_cmap_size(ctx, fontdesc->encoding); pdf_set_font_wmode(ctx, fontdesc, pdf_cmap_wmode(ctx, fontdesc->encoding)); if (kind == TRUETYPE) { pdf_obj *cidtogidmap; cidtogidmap = pdf_dict_gets(dict, "CIDToGIDMap"); if (pdf_is_indirect(cidtogidmap)) { fz_buffer *buf; buf = pdf_load_stream(xref, pdf_to_num(cidtogidmap), pdf_to_gen(cidtogidmap)); fontdesc->cid_to_gid_len = (buf->len) / 2; fontdesc->cid_to_gid = fz_malloc_array(ctx, fontdesc->cid_to_gid_len, sizeof(unsigned short)); fontdesc->size += fontdesc->cid_to_gid_len * sizeof(unsigned short); for (i = 0; i < fontdesc->cid_to_gid_len; i++) fontdesc->cid_to_gid[i] = (buf->data[i * 2] << 8) + buf->data[i * 2 + 1]; fz_drop_buffer(ctx, buf); } /* if truetype font is external, cidtogidmap should not be identity */ /* so we map from cid to unicode and then map that through the (3 1) */ /* unicode cmap to get a glyph id */ else if (fontdesc->font->ft_substitute) { fterr = FT_Select_Charmap(face, ft_encoding_unicode); if (fterr) { fz_throw(ctx, "fonterror: no unicode cmap when emulating CID font: %s", ft_error_string(fterr)); } if (!strcmp(collection, "Adobe-CNS1")) fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-CNS1-UCS2"); else if (!strcmp(collection, "Adobe-GB1")) fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-GB1-UCS2"); else if (!strcmp(collection, "Adobe-Japan1")) fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-Japan1-UCS2"); else if (!strcmp(collection, "Adobe-Japan2")) fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-Japan2-UCS2"); else if (!strcmp(collection, "Adobe-Korea1")) fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-Korea1-UCS2"); } } pdf_load_to_unicode(xref, fontdesc, NULL, collection, to_unicode); /* Horizontal */ dw = 1000; obj = pdf_dict_gets(dict, "DW"); if (obj) dw = pdf_to_int(obj); pdf_set_default_hmtx(ctx, fontdesc, dw); widths = pdf_dict_gets(dict, "W"); if (widths) { int c0, c1, w, n, m; n = pdf_array_len(widths); for (i = 0; i < n; ) { c0 = pdf_to_int(pdf_array_get(widths, i)); obj = pdf_array_get(widths, i + 1); if (pdf_is_array(obj)) { m = pdf_array_len(obj); for (k = 0; k < m; k++) { w = pdf_to_int(pdf_array_get(obj, k)); pdf_add_hmtx(ctx, fontdesc, c0 + k, c0 + k, w); } i += 2; } else { c1 = pdf_to_int(obj); w = pdf_to_int(pdf_array_get(widths, i + 2)); pdf_add_hmtx(ctx, fontdesc, c0, c1, w); i += 3; } } } pdf_end_hmtx(ctx, fontdesc); /* Vertical */ if (pdf_cmap_wmode(ctx, fontdesc->encoding) == 1) { int dw2y = 880; int dw2w = -1000; obj = pdf_dict_gets(dict, "DW2"); if (obj) { dw2y = pdf_to_int(pdf_array_get(obj, 0)); dw2w = pdf_to_int(pdf_array_get(obj, 1)); } pdf_set_default_vmtx(ctx, fontdesc, dw2y, dw2w); widths = pdf_dict_gets(dict, "W2"); if (widths) { int c0, c1, w, x, y, n; n = pdf_array_len(widths); for (i = 0; i < n; ) { c0 = pdf_to_int(pdf_array_get(widths, i)); obj = pdf_array_get(widths, i + 1); if (pdf_is_array(obj)) { int m = pdf_array_len(obj); for (k = 0; k * 3 < m; k ++) { w = pdf_to_int(pdf_array_get(obj, k * 3 + 0)); x = pdf_to_int(pdf_array_get(obj, k * 3 + 1)); y = pdf_to_int(pdf_array_get(obj, k * 3 + 2)); pdf_add_vmtx(ctx, fontdesc, c0 + k, c0 + k, x, y, w); } i += 2; } else { c1 = pdf_to_int(obj); w = pdf_to_int(pdf_array_get(widths, i + 2)); x = pdf_to_int(pdf_array_get(widths, i + 3)); y = pdf_to_int(pdf_array_get(widths, i + 4)); pdf_add_vmtx(ctx, fontdesc, c0, c1, x, y, w); i += 5; } } } pdf_end_vmtx(ctx, fontdesc); } } fz_catch(ctx) { pdf_drop_font(ctx, fontdesc); fz_throw(ctx, "cannot load cid font (%d %d R)", pdf_to_num(dict), pdf_to_gen(dict)); } return fontdesc; }
fz_link_dest pdf_parse_link_dest(pdf_document *xref, pdf_obj *dest) { fz_link_dest ld; pdf_obj *obj; int l_from_2 = 0; int b_from_3 = 0; int r_from_4 = 0; int t_from_5 = 0; int t_from_3 = 0; int t_from_2 = 0; int z_from_4 = 0; dest = resolve_dest(xref, dest); if (dest == NULL || !pdf_is_array(dest)) { ld.kind = FZ_LINK_NONE; return ld; } obj = pdf_array_get(dest, 0); if (pdf_is_int(obj)) ld.ld.gotor.page = pdf_to_int(obj); else ld.ld.gotor.page = pdf_lookup_page_number(xref, obj); ld.kind = FZ_LINK_GOTO; ld.ld.gotor.flags = 0; ld.ld.gotor.lt.x = 0; ld.ld.gotor.lt.y = 0; ld.ld.gotor.rb.x = 0; ld.ld.gotor.rb.y = 0; ld.ld.gotor.file_spec = NULL; ld.ld.gotor.new_window = 0; obj = pdf_array_get(dest, 1); if (!pdf_is_name(obj)) return ld; if (!strcmp("XYZ", pdf_to_name(obj))) { l_from_2 = t_from_3 = z_from_4 = 1; ld.ld.gotor.flags |= fz_link_flag_r_is_zoom; } else if ((!strcmp("Fit", pdf_to_name(obj))) || (!strcmp("FitB", pdf_to_name(obj)))) { ld.ld.gotor.flags |= fz_link_flag_fit_h; ld.ld.gotor.flags |= fz_link_flag_fit_v; } else if ((!strcmp("FitH", pdf_to_name(obj))) || (!strcmp("FitBH", pdf_to_name(obj)))) { t_from_2 = 1; ld.ld.gotor.flags |= fz_link_flag_fit_h; } else if ((!strcmp("FitV", pdf_to_name(obj))) || (!strcmp("FitBV", pdf_to_name(obj)))) { l_from_2 = 1; ld.ld.gotor.flags |= fz_link_flag_fit_v; } else if (!strcmp("FitR", pdf_to_name(obj))) { l_from_2 = b_from_3 = r_from_4 = t_from_5 = 1; ld.ld.gotor.flags |= fz_link_flag_fit_h; ld.ld.gotor.flags |= fz_link_flag_fit_v; } if (l_from_2) { obj = pdf_array_get(dest, 2); if (pdf_is_int(obj)) { ld.ld.gotor.flags |= fz_link_flag_l_valid; ld.ld.gotor.lt.x = pdf_to_int(obj); } else if (pdf_is_real(obj)) { ld.ld.gotor.flags |= fz_link_flag_l_valid; ld.ld.gotor.lt.x = pdf_to_real(obj); } } if (b_from_3) { obj = pdf_array_get(dest, 3); if (pdf_is_int(obj)) { ld.ld.gotor.flags |= fz_link_flag_b_valid; ld.ld.gotor.rb.y = pdf_to_int(obj); } else if (pdf_is_real(obj)) { ld.ld.gotor.flags |= fz_link_flag_b_valid; ld.ld.gotor.rb.y = pdf_to_real(obj); } } if (r_from_4) { obj = pdf_array_get(dest, 4); if (pdf_is_int(obj)) { ld.ld.gotor.flags |= fz_link_flag_r_valid; ld.ld.gotor.rb.x = pdf_to_int(obj); } else if (pdf_is_real(obj)) { ld.ld.gotor.flags |= fz_link_flag_r_valid; ld.ld.gotor.rb.x = pdf_to_real(obj); } } if (t_from_5 || t_from_3 || t_from_2) { if (t_from_5) obj = pdf_array_get(dest, 5); else if (t_from_3) obj = pdf_array_get(dest, 3); else obj = pdf_array_get(dest, 2); if (pdf_is_int(obj)) { ld.ld.gotor.flags |= fz_link_flag_t_valid; ld.ld.gotor.lt.y = pdf_to_int(obj); } else if (pdf_is_real(obj)) { ld.ld.gotor.flags |= fz_link_flag_t_valid; ld.ld.gotor.lt.y = pdf_to_real(obj); } } if (z_from_4) { obj = pdf_array_get(dest, 4); if (pdf_is_int(obj)) { ld.ld.gotor.flags |= fz_link_flag_r_valid; ld.ld.gotor.rb.x = pdf_to_int(obj); } else if (pdf_is_real(obj)) { ld.ld.gotor.flags |= fz_link_flag_r_valid; ld.ld.gotor.rb.x = pdf_to_real(obj); } } /* Duplicate the values out for the sake of stupid clients */ if ((ld.ld.gotor.flags & (fz_link_flag_l_valid | fz_link_flag_r_valid)) == fz_link_flag_l_valid) ld.ld.gotor.rb.x = ld.ld.gotor.lt.x; if ((ld.ld.gotor.flags & (fz_link_flag_l_valid | fz_link_flag_r_valid | fz_link_flag_r_is_zoom)) == fz_link_flag_r_valid) ld.ld.gotor.lt.x = ld.ld.gotor.rb.x; if ((ld.ld.gotor.flags & (fz_link_flag_t_valid | fz_link_flag_b_valid)) == fz_link_flag_t_valid) ld.ld.gotor.rb.y = ld.ld.gotor.lt.y; if ((ld.ld.gotor.flags & (fz_link_flag_t_valid | fz_link_flag_b_valid)) == fz_link_flag_b_valid) ld.ld.gotor.lt.y = ld.ld.gotor.rb.y; return ld; }