static void pdf_load_embedded_font(pdf_document *doc, pdf_font_desc *fontdesc, char *fontname, pdf_obj *stmref) { fz_buffer *buf; fz_context *ctx = doc->ctx; fz_try(ctx) { buf = pdf_load_stream(doc, pdf_to_num(stmref), pdf_to_gen(stmref)); } fz_catch(ctx) { fz_rethrow_message(ctx, "cannot load font stream (%d %d R)", pdf_to_num(stmref), pdf_to_gen(stmref)); } fz_try(ctx) { fontdesc->font = fz_new_font_from_buffer(ctx, fontname, buf, 0, 1); } fz_always(ctx) { fz_drop_buffer(ctx, buf); } fz_catch(ctx) { fz_rethrow_message(ctx, "cannot load embedded font (%d %d R)", pdf_to_num(stmref), pdf_to_gen(stmref)); } fontdesc->size += buf->len; fontdesc->is_embedded = 1; }
static void pdf_load_embedded_font(pdf_document *doc, pdf_font_desc *fontdesc, char *fontname, pdf_obj *stmref) { fz_buffer *buf; fz_context *ctx = doc->ctx; fz_try(ctx) { buf = pdf_load_stream(doc, pdf_to_num(stmref), pdf_to_gen(stmref)); } fz_catch(ctx) { fz_rethrow_message(ctx, "cannot load font stream (%d %d R)", pdf_to_num(stmref), pdf_to_gen(stmref)); } fz_try(ctx) { fontdesc->font = fz_new_font_from_memory(ctx, fontname, buf->data, buf->len, 0, 1); } fz_catch(ctx) { fz_drop_buffer(ctx, buf); fz_rethrow_message(ctx, "cannot load embedded font (%d %d R)", pdf_to_num(stmref), pdf_to_gen(stmref)); } fontdesc->size += buf->len; /* save the buffer so we can free it later */ fontdesc->font->ft_data = buf->data; fontdesc->font->ft_size = buf->len; fz_free(ctx, buf); /* only free the fz_buffer struct, not the contained data */ fontdesc->is_embedded = 1; }
static fz_colorspace * load_indexed(pdf_document *doc, pdf_obj *array) { fz_context *ctx = doc->ctx; pdf_obj *baseobj = pdf_array_get(array, 1); pdf_obj *highobj = pdf_array_get(array, 2); pdf_obj *lookupobj = pdf_array_get(array, 3); fz_colorspace *base = NULL; fz_colorspace *cs; int i, n, high; unsigned char *lookup = NULL; fz_var(base); fz_var(lookup); fz_try(ctx) { base = pdf_load_colorspace(doc, baseobj); high = pdf_to_int(highobj); high = fz_clampi(high, 0, 255); n = base->n * (high + 1); lookup = fz_malloc_array(ctx, 1, n); if (pdf_is_string(lookupobj) && pdf_to_str_len(lookupobj) >= n) { unsigned char *buf = (unsigned char *) pdf_to_str_buf(lookupobj); for (i = 0; i < n; i++) lookup[i] = buf[i]; } else if (pdf_is_indirect(lookupobj)) { fz_stream *file = NULL; fz_var(file); fz_try(ctx) { file = pdf_open_stream(doc, pdf_to_num(lookupobj), pdf_to_gen(lookupobj)); i = fz_read(file, lookup, n); if (i < n) memset(lookup+i, 0, n-i); } fz_always(ctx) { fz_close(file); } fz_catch(ctx) { fz_rethrow_message(ctx, "cannot open colorspace lookup table (%d 0 R)", pdf_to_num(lookupobj)); } } else { fz_rethrow_message(ctx, "cannot parse colorspace lookup table"); } cs = fz_new_indexed_colorspace(ctx, base, high, lookup); }
xps_document * xps_open_document(fz_context *ctx, const char *filename) { char buf[2048]; fz_stream *file; char *p; xps_document *doc; if (strstr(filename, "/_rels/.rels") || strstr(filename, "\\_rels\\.rels")) { fz_strlcpy(buf, filename, sizeof buf); p = strstr(buf, "/_rels/.rels"); if (!p) p = strstr(buf, "\\_rels\\.rels"); *p = 0; return xps_open_document_with_directory(ctx, buf); } file = fz_open_file(ctx, filename); fz_try(ctx) doc = xps_open_document_with_stream(ctx, file); fz_always(ctx) fz_drop_stream(ctx, file); fz_catch(ctx) fz_rethrow_message(ctx, "cannot load document '%s'", filename); return doc; }
pdf_page * pdf_create_page(pdf_document *doc, fz_rect mediabox, int res, int rotate) { pdf_page *page = NULL; pdf_obj *pageobj; float userunit = 1; fz_context *ctx = doc->ctx; fz_matrix ctm, tmp; fz_rect realbox; page = fz_malloc_struct(ctx, pdf_page); fz_try(ctx) { page->resources = NULL; page->contents = NULL; page->transparency = 0; page->links = NULL; page->annots = NULL; page->me = pageobj = pdf_new_dict(doc, 4); pdf_dict_puts_drop(pageobj, "Type", pdf_new_name(doc, "Page")); page->mediabox.x0 = fz_min(mediabox.x0, mediabox.x1) * userunit; page->mediabox.y0 = fz_min(mediabox.y0, mediabox.y1) * userunit; page->mediabox.x1 = fz_max(mediabox.x0, mediabox.x1) * userunit; page->mediabox.y1 = fz_max(mediabox.y0, mediabox.y1) * userunit; pdf_dict_puts_drop(pageobj, "MediaBox", pdf_new_rect(doc, &page->mediabox)); /* Snap page->rotate to 0, 90, 180 or 270 */ if (page->rotate < 0) page->rotate = 360 - ((-page->rotate) % 360); if (page->rotate >= 360) page->rotate = page->rotate % 360; page->rotate = 90*((page->rotate + 45)/90); if (page->rotate > 360) page->rotate = 0; pdf_dict_puts_drop(pageobj, "Rotate", pdf_new_int(doc, page->rotate)); fz_pre_rotate(fz_scale(&ctm, 1, -1), -page->rotate); realbox = page->mediabox; fz_transform_rect(&realbox, &ctm); fz_pre_scale(fz_translate(&tmp, -realbox.x0, -realbox.y0), userunit, userunit); fz_concat(&ctm, &ctm, &tmp); page->ctm = ctm; /* Do not create a Contents, as an empty Contents dict is not * valid. See Bug 694712 */ } fz_catch(ctx) { pdf_drop_obj(page->me); fz_free(ctx, page); fz_rethrow_message(ctx, "Failed to create page"); } return page; }
static void pdf_clean_stream_object(fz_context *ctx, pdf_document *doc, pdf_obj *obj, pdf_obj *orig_res, fz_cookie *cookie, int own_res) { pdf_processor *proc_buffer = NULL; pdf_processor *proc_filter = NULL; pdf_obj *res = NULL; pdf_obj *ref = NULL; fz_buffer *buffer; if (!obj) return; fz_var(res); fz_var(ref); fz_var(proc_buffer); fz_var(proc_filter); buffer = fz_new_buffer(ctx, 1024); fz_try(ctx) { if (own_res) { pdf_obj *r = pdf_dict_get(ctx, obj, PDF_NAME_Resources); if (r) orig_res = r; } res = pdf_new_dict(ctx, doc, 1); proc_buffer = pdf_new_buffer_processor(ctx, buffer); proc_filter = pdf_new_filter_processor(ctx, proc_buffer, doc, orig_res, res); pdf_process_contents(ctx, proc_filter, doc, orig_res, obj, cookie); pdf_update_stream(ctx, doc, obj, buffer, 0); if (own_res) { ref = pdf_new_ref(ctx, doc, res); pdf_dict_put(ctx, obj, PDF_NAME_Resources, ref); } } fz_always(ctx) { pdf_drop_processor(ctx, proc_filter); pdf_drop_processor(ctx, proc_buffer); fz_drop_buffer(ctx, buffer); pdf_drop_obj(ctx, res); pdf_drop_obj(ctx, ref); } fz_catch(ctx) { fz_rethrow_message(ctx, "Failed while cleaning xobject"); } }
static fz_buffer * pdf_load_image_stream(fz_context *ctx, pdf_document *doc, int num, int gen, int orig_num, int orig_gen, fz_compression_params *params, int *truncated) { fz_stream *stm = NULL; pdf_obj *dict, *obj; int i, len, n; fz_buffer *buf; fz_var(buf); if (num > 0 && num < pdf_xref_len(ctx, doc)) { pdf_xref_entry *entry = pdf_get_xref_entry(ctx, doc, num); /* Return ref to existing buffer, but only if uncompressed, * or shortstoppable */ if (can_reuse_buffer(ctx, entry, params)) return fz_keep_buffer(ctx, entry->stm_buf); } dict = pdf_load_object(ctx, doc, num, gen); len = pdf_to_int(ctx, pdf_dict_get(ctx, dict, PDF_NAME_Length)); obj = pdf_dict_get(ctx, dict, PDF_NAME_Filter); len = pdf_guess_filter_length(len, pdf_to_name(ctx, obj)); n = pdf_array_len(ctx, obj); for (i = 0; i < n; i++) len = pdf_guess_filter_length(len, pdf_to_name(ctx, pdf_array_get(ctx, obj, i))); pdf_drop_obj(ctx, dict); stm = pdf_open_image_stream(ctx, doc, num, gen, orig_num, orig_gen, params); fz_try(ctx) { if (truncated) buf = fz_read_best(ctx, stm, len, truncated); else buf = fz_read_all(ctx, stm, len); } fz_always(ctx) { fz_drop_stream(ctx, stm); } fz_catch(ctx) { fz_rethrow_message(ctx, "cannot read raw stream (%d %d R)", num, gen); } return buf; }
pdf_pattern * pdf_load_pattern(fz_context *ctx, pdf_document *doc, pdf_obj *dict) { pdf_pattern *pat; pdf_obj *obj; if ((pat = pdf_find_item(ctx, pdf_drop_pattern_imp, dict)) != NULL) { return pat; } pat = fz_malloc_struct(ctx, pdf_pattern); FZ_INIT_STORABLE(pat, 1, pdf_drop_pattern_imp); pat->document = doc; pat->resources = NULL; pat->contents = NULL; fz_try(ctx) { /* Store pattern now, to avoid possible recursion if objects refer back to this one */ pdf_store_item(ctx, dict, pat, pdf_pattern_size(pat)); pat->ismask = pdf_to_int(ctx, pdf_dict_get(ctx, dict, PDF_NAME_PaintType)) == 2; pat->xstep = pdf_to_real(ctx, pdf_dict_get(ctx, dict, PDF_NAME_XStep)); pat->ystep = pdf_to_real(ctx, pdf_dict_get(ctx, dict, PDF_NAME_YStep)); obj = pdf_dict_gets(ctx, dict, "BBox"); pdf_to_rect(ctx, obj, &pat->bbox); obj = pdf_dict_gets(ctx, dict, "Matrix"); if (obj) pdf_to_matrix(ctx, obj, &pat->matrix); else pat->matrix = fz_identity; pat->resources = pdf_dict_get(ctx, dict, PDF_NAME_Resources); if (pat->resources) pdf_keep_obj(ctx, pat->resources); pat->contents = pdf_keep_obj(ctx, dict); } fz_catch(ctx) { pdf_remove_item(ctx, pdf_drop_pattern_imp, dict); pdf_drop_pattern(ctx, pat); fz_rethrow_message(ctx, "cannot load pattern (%d %d R)", pdf_to_num(ctx, dict), pdf_to_gen(ctx, dict)); } return pat; }
static pdf_font_desc * load_cid_font(pdf_document *doc, pdf_obj *dict, pdf_obj *encoding, pdf_obj *to_unicode) { pdf_obj *widths; pdf_obj *descriptor; pdf_font_desc *fontdesc = NULL; FT_Face face; int kind; char collection[256]; char *basefont; int i, k, fterr; pdf_obj *obj; int dw; fz_context *ctx = doc->ctx; fz_var(fontdesc); fz_try(ctx) { /* Get font name and CID collection */ basefont = pdf_to_name(pdf_dict_gets(dict, "BaseFont")); { pdf_obj *cidinfo; char tmpstr[64]; int tmplen; cidinfo = pdf_dict_gets(dict, "CIDSystemInfo"); if (!cidinfo) fz_throw(ctx, FZ_ERROR_GENERIC, "cid font is missing info"); obj = pdf_dict_gets(cidinfo, "Registry"); tmplen = fz_mini(sizeof tmpstr - 1, pdf_to_str_len(obj)); memcpy(tmpstr, pdf_to_str_buf(obj), tmplen); tmpstr[tmplen] = '\0'; fz_strlcpy(collection, tmpstr, sizeof collection); fz_strlcat(collection, "-", sizeof collection); obj = pdf_dict_gets(cidinfo, "Ordering"); tmplen = fz_mini(sizeof tmpstr - 1, pdf_to_str_len(obj)); memcpy(tmpstr, pdf_to_str_buf(obj), tmplen); tmpstr[tmplen] = '\0'; fz_strlcat(collection, tmpstr, sizeof collection); } /* Load font file */ fontdesc = pdf_new_font_desc(ctx); descriptor = pdf_dict_gets(dict, "FontDescriptor"); if (!descriptor) fz_throw(ctx, FZ_ERROR_GENERIC, "syntaxerror: missing font descriptor"); pdf_load_font_descriptor(fontdesc, doc, descriptor, collection, basefont, 1); face = fontdesc->font->ft_face; kind = ft_kind(face); /* Encoding */ if (pdf_is_name(encoding)) { if (!strcmp(pdf_to_name(encoding), "Identity-H")) fontdesc->encoding = pdf_new_identity_cmap(ctx, 0, 2); else if (!strcmp(pdf_to_name(encoding), "Identity-V")) fontdesc->encoding = pdf_new_identity_cmap(ctx, 1, 2); else fontdesc->encoding = pdf_load_system_cmap(ctx, pdf_to_name(encoding)); } else if (pdf_is_indirect(encoding)) { fontdesc->encoding = pdf_load_embedded_cmap(doc, encoding); } else { fz_throw(ctx, FZ_ERROR_GENERIC, "syntaxerror: font missing encoding"); } fontdesc->size += pdf_cmap_size(ctx, fontdesc->encoding); pdf_set_font_wmode(ctx, fontdesc, pdf_cmap_wmode(ctx, fontdesc->encoding)); if (kind == TRUETYPE) { pdf_obj *cidtogidmap; cidtogidmap = pdf_dict_gets(dict, "CIDToGIDMap"); if (pdf_is_indirect(cidtogidmap)) { fz_buffer *buf; buf = pdf_load_stream(doc, pdf_to_num(cidtogidmap), pdf_to_gen(cidtogidmap)); fontdesc->cid_to_gid_len = (buf->len) / 2; fontdesc->cid_to_gid = fz_malloc_array(ctx, fontdesc->cid_to_gid_len, sizeof(unsigned short)); fontdesc->size += fontdesc->cid_to_gid_len * sizeof(unsigned short); for (i = 0; i < fontdesc->cid_to_gid_len; i++) fontdesc->cid_to_gid[i] = (buf->data[i * 2] << 8) + buf->data[i * 2 + 1]; fz_drop_buffer(ctx, buf); } /* if truetype font is external, cidtogidmap should not be identity */ /* so we map from cid to unicode and then map that through the (3 1) */ /* unicode cmap to get a glyph id */ else if (fontdesc->font->ft_substitute) { fterr = FT_Select_Charmap(face, ft_encoding_unicode); if (fterr) { fz_throw(ctx, FZ_ERROR_GENERIC, "fonterror: no unicode cmap when emulating CID font: %s", ft_error_string(fterr)); } if (!strcmp(collection, "Adobe-CNS1")) fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-CNS1-UCS2"); else if (!strcmp(collection, "Adobe-GB1")) fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-GB1-UCS2"); else if (!strcmp(collection, "Adobe-Japan1")) fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-Japan1-UCS2"); else if (!strcmp(collection, "Adobe-Japan2")) fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-Japan2-UCS2"); else if (!strcmp(collection, "Adobe-Korea1")) fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-Korea1-UCS2"); } } pdf_load_to_unicode(doc, fontdesc, NULL, collection, to_unicode); /* If we have an identity encoding, we're supposed to use the glyph ids directly. * If we only have a substitute font, that won't work. * Make a last ditch attempt by using * the ToUnicode table if it exists to map via the substitute font's cmap. */ if (strstr(fontdesc->encoding->cmap_name, "Identity-") && fontdesc->font->ft_substitute) { fz_warn(ctx, "non-embedded font using identity encoding: %s", basefont); if (fontdesc->to_unicode && !fontdesc->to_ttf_cmap) fontdesc->to_ttf_cmap = pdf_keep_cmap(ctx, fontdesc->to_unicode); } /* Horizontal */ dw = 1000; obj = pdf_dict_gets(dict, "DW"); if (obj) dw = pdf_to_int(obj); pdf_set_default_hmtx(ctx, fontdesc, dw); widths = pdf_dict_gets(dict, "W"); if (widths) { int c0, c1, w, n, m; n = pdf_array_len(widths); for (i = 0; i < n; ) { c0 = pdf_to_int(pdf_array_get(widths, i)); obj = pdf_array_get(widths, i + 1); if (pdf_is_array(obj)) { m = pdf_array_len(obj); for (k = 0; k < m; k++) { w = pdf_to_int(pdf_array_get(obj, k)); pdf_add_hmtx(ctx, fontdesc, c0 + k, c0 + k, w); } i += 2; } else { c1 = pdf_to_int(obj); w = pdf_to_int(pdf_array_get(widths, i + 2)); pdf_add_hmtx(ctx, fontdesc, c0, c1, w); i += 3; } } } pdf_end_hmtx(ctx, fontdesc); /* Vertical */ if (pdf_cmap_wmode(ctx, fontdesc->encoding) == 1) { int dw2y = 880; int dw2w = -1000; obj = pdf_dict_gets(dict, "DW2"); if (obj) { dw2y = pdf_to_int(pdf_array_get(obj, 0)); dw2w = pdf_to_int(pdf_array_get(obj, 1)); } pdf_set_default_vmtx(ctx, fontdesc, dw2y, dw2w); widths = pdf_dict_gets(dict, "W2"); if (widths) { int c0, c1, w, x, y, n; n = pdf_array_len(widths); for (i = 0; i < n; ) { c0 = pdf_to_int(pdf_array_get(widths, i)); obj = pdf_array_get(widths, i + 1); if (pdf_is_array(obj)) { int m = pdf_array_len(obj); for (k = 0; k * 3 < m; k ++) { w = pdf_to_int(pdf_array_get(obj, k * 3 + 0)); x = pdf_to_int(pdf_array_get(obj, k * 3 + 1)); y = pdf_to_int(pdf_array_get(obj, k * 3 + 2)); pdf_add_vmtx(ctx, fontdesc, c0 + k, c0 + k, x, y, w); } i += 2; } else { c1 = pdf_to_int(obj); w = pdf_to_int(pdf_array_get(widths, i + 2)); x = pdf_to_int(pdf_array_get(widths, i + 3)); y = pdf_to_int(pdf_array_get(widths, i + 4)); pdf_add_vmtx(ctx, fontdesc, c0, c1, x, y, w); i += 5; } } } pdf_end_vmtx(ctx, fontdesc); } } fz_catch(ctx) { pdf_drop_font(ctx, fontdesc); fz_rethrow_message(ctx, "cannot load cid font (%d %d R)", pdf_to_num(dict), pdf_to_gen(dict)); } return fontdesc; }
static pdf_font_desc * pdf_load_simple_font_by_name(pdf_document *doc, pdf_obj *dict, char *basefont) { pdf_obj *descriptor; pdf_obj *encoding; pdf_obj *widths; unsigned short *etable = NULL; pdf_font_desc *fontdesc = NULL; char *subtype; FT_Face face; FT_CharMap cmap; int symbolic; int kind; char *estrings[256]; char ebuffer[256][32]; int i, k, n; int fterr; int has_lock = 0; fz_context *ctx = doc->ctx; fz_var(fontdesc); fz_var(etable); fz_var(has_lock); /* Load font file */ fz_try(ctx) { fontdesc = pdf_new_font_desc(ctx); descriptor = pdf_dict_gets(dict, "FontDescriptor"); if (descriptor) pdf_load_font_descriptor(fontdesc, doc, descriptor, NULL, basefont, 0); else pdf_load_builtin_font(ctx, fontdesc, basefont); /* Some chinese documents mistakenly consider WinAnsiEncoding to be codepage 936 */ if (descriptor && pdf_is_string(pdf_dict_gets(descriptor, "FontName")) && !pdf_dict_gets(dict, "ToUnicode") && !strcmp(pdf_to_name(pdf_dict_gets(dict, "Encoding")), "WinAnsiEncoding") && pdf_to_int(pdf_dict_gets(descriptor, "Flags")) == 4) { char *cp936fonts[] = { "\xCB\xCE\xCC\xE5", "SimSun,Regular", "\xBA\xDA\xCC\xE5", "SimHei,Regular", "\xBF\xAC\xCC\xE5_GB2312", "SimKai,Regular", "\xB7\xC2\xCB\xCE_GB2312", "SimFang,Regular", "\xC1\xA5\xCA\xE9", "SimLi,Regular", NULL }; for (i = 0; cp936fonts[i]; i += 2) if (!strcmp(basefont, cp936fonts[i])) break; if (cp936fonts[i]) { fz_warn(ctx, "workaround for S22PDF lying about chinese font encodings"); pdf_drop_font(ctx, fontdesc); fontdesc = NULL; fontdesc = pdf_new_font_desc(ctx); pdf_load_font_descriptor(fontdesc, doc, descriptor, "Adobe-GB1", cp936fonts[i+1], 0); fontdesc->encoding = pdf_load_system_cmap(ctx, "GBK-EUC-H"); fontdesc->to_unicode = pdf_load_system_cmap(ctx, "Adobe-GB1-UCS2"); fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-GB1-UCS2"); face = fontdesc->font->ft_face; /* kind = ft_kind(face); */ goto skip_encoding; } } face = fontdesc->font->ft_face; kind = ft_kind(face); /* Encoding */ symbolic = fontdesc->flags & 4; if (face->num_charmaps > 0) cmap = face->charmaps[0]; else cmap = NULL; for (i = 0; i < face->num_charmaps; i++) { FT_CharMap test = face->charmaps[i]; if (kind == TYPE1) { if (test->platform_id == 7) cmap = test; } if (kind == TRUETYPE) { if (test->platform_id == 1 && test->encoding_id == 0) cmap = test; if (test->platform_id == 3 && test->encoding_id == 1) cmap = test; if (symbolic && test->platform_id == 3 && test->encoding_id == 0) cmap = test; } } if (cmap) { fterr = FT_Set_Charmap(face, cmap); if (fterr) fz_warn(ctx, "freetype could not set cmap: %s", ft_error_string(fterr)); } else fz_warn(ctx, "freetype could not find any cmaps"); etable = fz_malloc_array(ctx, 256, sizeof(unsigned short)); fontdesc->size += 256 * sizeof(unsigned short); for (i = 0; i < 256; i++) { estrings[i] = NULL; etable[i] = 0; } encoding = pdf_dict_gets(dict, "Encoding"); if (encoding) { if (pdf_is_name(encoding)) pdf_load_encoding(estrings, pdf_to_name(encoding)); if (pdf_is_dict(encoding)) { pdf_obj *base, *diff, *item; base = pdf_dict_gets(encoding, "BaseEncoding"); if (pdf_is_name(base)) pdf_load_encoding(estrings, pdf_to_name(base)); else if (!fontdesc->is_embedded && !symbolic) pdf_load_encoding(estrings, "StandardEncoding"); diff = pdf_dict_gets(encoding, "Differences"); if (pdf_is_array(diff)) { n = pdf_array_len(diff); k = 0; for (i = 0; i < n; i++) { item = pdf_array_get(diff, i); if (pdf_is_int(item)) k = pdf_to_int(item); if (pdf_is_name(item) && k >= 0 && k < nelem(estrings)) estrings[k++] = pdf_to_name(item); } } } } /* start with the builtin encoding */ for (i = 0; i < 256; i++) etable[i] = ft_char_index(face, i); fz_lock(ctx, FZ_LOCK_FREETYPE); has_lock = 1; /* built-in and substitute fonts may be a different type than what the document expects */ subtype = pdf_to_name(pdf_dict_gets(dict, "Subtype")); if (!strcmp(subtype, "Type1")) kind = TYPE1; else if (!strcmp(subtype, "MMType1")) kind = TYPE1; else if (!strcmp(subtype, "TrueType")) kind = TRUETYPE; else if (!strcmp(subtype, "CIDFontType0")) kind = TYPE1; else if (!strcmp(subtype, "CIDFontType2")) kind = TRUETYPE; /* encode by glyph name where we can */ if (kind == TYPE1) { for (i = 0; i < 256; i++) { if (estrings[i]) { etable[i] = FT_Get_Name_Index(face, estrings[i]); if (etable[i] == 0) { int aglcode = pdf_lookup_agl(estrings[i]); const char **dupnames = pdf_lookup_agl_duplicates(aglcode); while (*dupnames) { etable[i] = FT_Get_Name_Index(face, (char*)*dupnames); if (etable[i]) break; dupnames++; } if (etable[i] == 0) { char buf[10]; sprintf(buf, "uni%04X", aglcode); etable[i] = FT_Get_Name_Index(face, buf); } } } } } /* encode by glyph name where we can */ if (kind == TRUETYPE) { /* Unicode cmap */ if (!symbolic && face->charmap && face->charmap->platform_id == 3) { for (i = 0; i < 256; i++) { if (estrings[i]) { int aglcode = pdf_lookup_agl(estrings[i]); if (!aglcode) etable[i] = FT_Get_Name_Index(face, estrings[i]); else etable[i] = ft_char_index(face, aglcode); } } } /* MacRoman cmap */ else if (!symbolic && face->charmap && face->charmap->platform_id == 1) { for (i = 0; i < 256; i++) { if (estrings[i]) { k = lookup_mre_code(estrings[i]); if (k <= 0) etable[i] = FT_Get_Name_Index(face, estrings[i]); else etable[i] = ft_char_index(face, k); } } } /* Symbolic cmap */ else if (!face->charmap || face->charmap->encoding != FT_ENCODING_MS_SYMBOL) { for (i = 0; i < 256; i++) { if (estrings[i]) { etable[i] = FT_Get_Name_Index(face, estrings[i]); if (etable[i] == 0) etable[i] = ft_char_index(face, i); } } } } /* try to reverse the glyph names from the builtin encoding */ for (i = 0; i < 256; i++) { if (etable[i] && !estrings[i]) { if (FT_HAS_GLYPH_NAMES(face)) { fterr = FT_Get_Glyph_Name(face, etable[i], ebuffer[i], 32); if (fterr) fz_warn(ctx, "freetype get glyph name (gid %d): %s", etable[i], ft_error_string(fterr)); if (ebuffer[i][0]) estrings[i] = ebuffer[i]; } else { estrings[i] = (char*) pdf_win_ansi[i]; /* discard const */ } } } /* symbolic Type 1 fonts with an implicit encoding and non-standard glyph names */ if (kind == TYPE1 && symbolic) { for (i = 0; i < 256; i++) if (etable[i] && estrings[i] && !pdf_lookup_agl(estrings[i])) estrings[i] = (char*) pdf_standard[i]; } fz_unlock(ctx, FZ_LOCK_FREETYPE); has_lock = 0; fontdesc->encoding = pdf_new_identity_cmap(ctx, 0, 1); fontdesc->size += pdf_cmap_size(ctx, fontdesc->encoding); fontdesc->cid_to_gid_len = 256; fontdesc->cid_to_gid = etable; fz_try(ctx) { pdf_load_to_unicode(doc, fontdesc, estrings, NULL, pdf_dict_gets(dict, "ToUnicode")); } fz_catch(ctx) { fz_rethrow_if(ctx, FZ_ERROR_TRYLATER); fz_warn(ctx, "cannot load ToUnicode CMap"); } skip_encoding: /* Widths */ pdf_set_default_hmtx(ctx, fontdesc, fontdesc->missing_width); widths = pdf_dict_gets(dict, "Widths"); if (widths) { int first, last; first = pdf_to_int(pdf_dict_gets(dict, "FirstChar")); last = pdf_to_int(pdf_dict_gets(dict, "LastChar")); if (first < 0 || last > 255 || first > last) first = last = 0; for (i = 0; i < last - first + 1; i++) { int wid = pdf_to_int(pdf_array_get(widths, i)); pdf_add_hmtx(ctx, fontdesc, i + first, i + first, wid); } } else { fz_lock(ctx, FZ_LOCK_FREETYPE); has_lock = 1; fterr = FT_Set_Char_Size(face, 1000, 1000, 72, 72); if (fterr) fz_warn(ctx, "freetype set character size: %s", ft_error_string(fterr)); for (i = 0; i < 256; i++) { pdf_add_hmtx(ctx, fontdesc, i, i, ft_width(ctx, fontdesc, i)); } fz_unlock(ctx, FZ_LOCK_FREETYPE); has_lock = 0; } pdf_end_hmtx(ctx, fontdesc); } fz_catch(ctx) { if (has_lock) fz_unlock(ctx, FZ_LOCK_FREETYPE); if (fontdesc && etable != fontdesc->cid_to_gid) fz_free(ctx, etable); pdf_drop_font(ctx, fontdesc); fz_rethrow_message(ctx, "cannot load simple font (%d %d R)", pdf_to_num(dict), pdf_to_gen(dict)); } return fontdesc; }
pdf_xobject * pdf_load_xobject(pdf_document *doc, pdf_obj *dict) { pdf_xobject *form; pdf_obj *obj; fz_context *ctx = doc->ctx; if ((form = pdf_find_item(ctx, pdf_free_xobject_imp, dict))) { return form; } form = fz_malloc_struct(ctx, pdf_xobject); FZ_INIT_STORABLE(form, 1, pdf_free_xobject_imp); form->resources = NULL; form->contents = NULL; form->colorspace = NULL; form->me = NULL; form->iteration = 0; /* Store item immediately, to avoid possible recursion if objects refer back to this one */ pdf_store_item(ctx, dict, form, pdf_xobject_size(form)); fz_try(ctx) { obj = pdf_dict_gets(dict, "BBox"); pdf_to_rect(ctx, obj, &form->bbox); obj = pdf_dict_gets(dict, "Matrix"); if (obj) pdf_to_matrix(ctx, obj, &form->matrix); else form->matrix = fz_identity; form->isolated = 0; form->knockout = 0; form->transparency = 0; obj = pdf_dict_gets(dict, "Group"); if (obj) { pdf_obj *attrs = obj; form->isolated = pdf_to_bool(pdf_dict_gets(attrs, "I")); form->knockout = pdf_to_bool(pdf_dict_gets(attrs, "K")); obj = pdf_dict_gets(attrs, "S"); if (pdf_is_name(obj) && !strcmp(pdf_to_name(obj), "Transparency")) form->transparency = 1; obj = pdf_dict_gets(attrs, "CS"); if (obj) { form->colorspace = pdf_load_colorspace(doc, obj); if (!form->colorspace) fz_throw(ctx, FZ_ERROR_GENERIC, "cannot load xobject colorspace"); } } form->resources = pdf_dict_gets(dict, "Resources"); if (form->resources) pdf_keep_obj(form->resources); form->contents = pdf_keep_obj(dict); } fz_catch(ctx) { pdf_remove_item(ctx, pdf_free_xobject_imp, dict); pdf_drop_xobject(ctx, form); fz_rethrow_message(ctx, "cannot load xobject content stream (%d %d R)", pdf_to_num(dict), pdf_to_gen(dict)); } form->me = pdf_keep_obj(dict); return form; }
pdf_obj * pdf_parse_ind_obj(pdf_document *doc, fz_stream *file, pdf_lexbuf *buf, int *onum, int *ogen, int *ostmofs) { pdf_obj *obj = NULL; int num = 0, gen = 0, stm_ofs; pdf_token tok; int a, b; fz_context *ctx = file->ctx; fz_var(obj); tok = pdf_lex(file, buf); if (tok != PDF_TOK_INT) fz_throw(ctx, FZ_ERROR_GENERIC, "expected object number"); num = buf->i; tok = pdf_lex(file, buf); if (tok != PDF_TOK_INT) fz_throw(ctx, FZ_ERROR_GENERIC, "expected generation number (%d ? obj)", num); gen = buf->i; tok = pdf_lex(file, buf); if (tok != PDF_TOK_OBJ) fz_throw(ctx, FZ_ERROR_GENERIC, "expected 'obj' keyword (%d %d ?)", num, gen); tok = pdf_lex(file, buf); switch (tok) { case PDF_TOK_OPEN_ARRAY: obj = pdf_parse_array(doc, file, buf); break; case PDF_TOK_OPEN_DICT: obj = pdf_parse_dict(doc, file, buf); break; case PDF_TOK_NAME: obj = pdf_new_name(doc, buf->scratch); break; case PDF_TOK_REAL: obj = pdf_new_real(doc, buf->f); break; case PDF_TOK_STRING: obj = pdf_new_string(doc, buf->scratch, buf->len); break; case PDF_TOK_TRUE: obj = pdf_new_bool(doc, 1); break; case PDF_TOK_FALSE: obj = pdf_new_bool(doc, 0); break; case PDF_TOK_NULL: obj = pdf_new_null(doc); break; case PDF_TOK_INT: a = buf->i; tok = pdf_lex(file, buf); if (tok == PDF_TOK_STREAM || tok == PDF_TOK_ENDOBJ) { obj = pdf_new_int(doc, a); goto skip; } if (tok == PDF_TOK_INT) { b = buf->i; tok = pdf_lex(file, buf); if (tok == PDF_TOK_R) { obj = pdf_new_indirect(doc, a, b); break; } } fz_throw(ctx, FZ_ERROR_GENERIC, "expected 'R' keyword (%d %d R)", num, gen); case PDF_TOK_ENDOBJ: obj = pdf_new_null(doc); goto skip; default: fz_throw(ctx, FZ_ERROR_GENERIC, "syntax error in object (%d %d R)", num, gen); } fz_try(ctx) { tok = pdf_lex(file, buf); } fz_catch(ctx) { pdf_drop_obj(obj); fz_rethrow_message(ctx, "cannot parse indirect object (%d %d R)", num, gen); } skip: if (tok == PDF_TOK_STREAM) { int c = fz_read_byte(file); while (c == ' ') c = fz_read_byte(file); if (c == '\r') { c = fz_peek_byte(file); if (c != '\n') fz_warn(ctx, "line feed missing after stream begin marker (%d %d R)", num, gen); else fz_read_byte(file); } stm_ofs = fz_tell(file); } else if (tok == PDF_TOK_ENDOBJ) { stm_ofs = 0; } else { fz_warn(ctx, "expected 'endobj' or 'stream' keyword (%d %d R)", num, gen); stm_ofs = 0; } if (onum) *onum = num; if (ogen) *ogen = gen; if (ostmofs) *ostmofs = stm_ofs; return obj; }
pdf_obj * pdf_parse_dict(pdf_document *doc, fz_stream *file, pdf_lexbuf *buf) { pdf_obj *dict; pdf_obj *key = NULL; pdf_obj *val = NULL; pdf_token tok; int a, b; fz_context *ctx = file->ctx; dict = pdf_new_dict(doc, 8); fz_var(key); fz_var(val); fz_try(ctx) { while (1) { tok = pdf_lex(file, buf); skip: if (tok == PDF_TOK_CLOSE_DICT) break; /* for BI .. ID .. EI in content streams */ if (tok == PDF_TOK_KEYWORD && !strcmp(buf->scratch, "ID")) break; if (tok != PDF_TOK_NAME) fz_throw(ctx, FZ_ERROR_GENERIC, "invalid key in dict"); key = pdf_new_name(doc, buf->scratch); tok = pdf_lex(file, buf); switch (tok) { case PDF_TOK_OPEN_ARRAY: /* cf. http://code.google.com/p/sumatrapdf/issues/detail?id=1643 */ fz_try(ctx) { val = pdf_parse_array(doc, file, buf); } fz_catch(ctx) { fz_warn(ctx, "ignoring broken array for '%s'", pdf_to_name(key)); pdf_drop_obj(key); val = key = NULL; do tok = pdf_lex(file, buf); while (tok != PDF_TOK_CLOSE_DICT && tok != PDF_TOK_CLOSE_ARRAY && tok != PDF_TOK_EOF && tok != PDF_TOK_OPEN_ARRAY && tok != PDF_TOK_OPEN_DICT); if (tok == PDF_TOK_CLOSE_DICT) goto skip; if (tok == PDF_TOK_CLOSE_ARRAY) continue; fz_throw(ctx, FZ_ERROR_GENERIC, "cannot make sense of broken array after all"); } break; case PDF_TOK_OPEN_DICT: val = pdf_parse_dict(doc, file, buf); break; case PDF_TOK_NAME: val = pdf_new_name(doc, buf->scratch); break; case PDF_TOK_REAL: val = pdf_new_real(doc, buf->f); break; case PDF_TOK_STRING: val = pdf_new_string(doc, buf->scratch, buf->len); break; case PDF_TOK_TRUE: val = pdf_new_bool(doc, 1); break; case PDF_TOK_FALSE: val = pdf_new_bool(doc, 0); break; case PDF_TOK_NULL: val = pdf_new_null(doc); break; case PDF_TOK_INT: /* 64-bit to allow for numbers > INT_MAX and overflow */ a = buf->i; tok = pdf_lex(file, buf); if (tok == PDF_TOK_CLOSE_DICT || tok == PDF_TOK_NAME || (tok == PDF_TOK_KEYWORD && !strcmp(buf->scratch, "ID"))) { val = pdf_new_int(doc, a); pdf_dict_put(dict, key, val); pdf_drop_obj(val); val = NULL; pdf_drop_obj(key); key = NULL; goto skip; } if (tok == PDF_TOK_INT) { b = buf->i; tok = pdf_lex(file, buf); if (tok == PDF_TOK_R) { val = pdf_new_indirect(doc, a, b); break; } } fz_throw(ctx, FZ_ERROR_GENERIC, "invalid indirect reference in dict"); default: fz_throw(ctx, FZ_ERROR_GENERIC, "unknown token in dict"); } pdf_dict_put(dict, key, val); pdf_drop_obj(val); val = NULL; pdf_drop_obj(key); key = NULL; } } fz_catch(ctx) { pdf_drop_obj(dict); pdf_drop_obj(key); pdf_drop_obj(val); fz_rethrow_message(ctx, "cannot parse dict"); } return dict; }
void pdf_clean_page_contents(fz_context *ctx, pdf_document *doc, pdf_page *page, fz_cookie *cookie, pdf_page_contents_process_fn *proc_fn, void *proc_arg) { pdf_processor *proc_buffer = NULL; pdf_processor *proc_filter = NULL; pdf_obj *new_obj = NULL; pdf_obj *new_ref = NULL; pdf_obj *res = NULL; pdf_obj *ref = NULL; pdf_obj *obj; pdf_obj *contents; fz_buffer *buffer; fz_var(new_obj); fz_var(new_ref); fz_var(res); fz_var(ref); fz_var(proc_buffer); fz_var(proc_filter); buffer = fz_new_buffer(ctx, 1024); fz_try(ctx) { res = pdf_new_dict(ctx, doc, 1); proc_buffer = pdf_new_buffer_processor(ctx, buffer); proc_filter = pdf_new_filter_processor(ctx, proc_buffer, doc, page->resources, res); pdf_process_contents(ctx, proc_filter, doc, page->resources, page->contents, cookie); contents = page->contents; if (pdf_is_array(ctx, contents)) { /* create a new object to replace the array */ new_obj = pdf_new_dict(ctx, doc, 1); new_ref = pdf_new_ref(ctx, doc, new_obj); page->contents = contents = new_ref; } else { pdf_dict_del(ctx, contents, PDF_NAME_Filter); pdf_dict_del(ctx, contents, PDF_NAME_DecodeParms); } /* Now deal with resources. The spec allows for Type3 fonts and form * XObjects to omit a resource dictionary and look in the parent. * Avoid that by flattening here as part of the cleaning. This could * conceivably cause changes in rendering, but we don't care. */ /* ExtGState */ obj = pdf_dict_get(ctx, res, PDF_NAME_ExtGState); if (obj) { int i, l; l = pdf_dict_len(ctx, obj); for (i = 0; i < l; i++) { pdf_obj *o = pdf_dict_get(ctx, pdf_dict_get_val(ctx, obj, i), PDF_NAME_SMask); if (!o) continue; o = pdf_dict_get(ctx, o, PDF_NAME_G); if (!o) continue; /* Transparency group XObject */ pdf_clean_stream_object(ctx, doc, o, page->resources, cookie, 1); } } /* ColorSpace - no cleaning possible */ /* Pattern */ obj = pdf_dict_get(ctx, res, PDF_NAME_Pattern); if (obj) { int i, l; l = pdf_dict_len(ctx, obj); for (i = 0; i < l; i++) { pdf_obj *pat = pdf_dict_get_val(ctx, obj, i); if (!pat) continue; if (pdf_to_int(ctx, pdf_dict_get(ctx, pat, PDF_NAME_PatternType)) == 1) pdf_clean_stream_object(ctx, doc, pat, page->resources, cookie, 0); } } /* Shading - no cleaning possible */ /* XObject */ obj = pdf_dict_get(ctx, res, PDF_NAME_XObject); if (obj) { int i, l; l = pdf_dict_len(ctx, obj); for (i = 0; i < l; i++) { pdf_obj *xobj = pdf_dict_get_val(ctx, obj, i); if (!pdf_name_eq(ctx, PDF_NAME_Form, pdf_dict_get(ctx, xobj, PDF_NAME_Subtype))) continue; pdf_clean_stream_object(ctx, doc, xobj, page->resources, cookie, 1); } } /* Font */ obj = pdf_dict_get(ctx, res, PDF_NAME_Font); if (obj) { int i, l; l = pdf_dict_len(ctx, obj); for (i = 0; i < l; i++) { pdf_obj *o = pdf_dict_get_val(ctx, obj, i); if (pdf_name_eq(ctx, PDF_NAME_Type3, pdf_dict_get(ctx, o, PDF_NAME_Subtype))) { pdf_clean_type3(ctx, doc, o, page->resources, cookie); } } } /* ProcSet - no cleaning possible. Inherit this from the old dict. */ obj = pdf_dict_get(ctx, page->resources, PDF_NAME_ProcSet); if (obj) pdf_dict_put(ctx, res, PDF_NAME_ProcSet, obj); /* Properties - no cleaning possible. */ if (proc_fn) (*proc_fn)(ctx, buffer, res, proc_arg); pdf_update_stream(ctx, doc, contents, buffer, 0); pdf_drop_obj(ctx, page->resources); ref = pdf_new_ref(ctx, doc, res); page->resources = pdf_keep_obj(ctx, ref); pdf_dict_put(ctx, page->me, PDF_NAME_Resources, ref); } fz_always(ctx) { pdf_drop_processor(ctx, proc_filter); pdf_drop_processor(ctx, proc_buffer); fz_drop_buffer(ctx, buffer); pdf_drop_obj(ctx, new_obj); pdf_drop_obj(ctx, new_ref); pdf_drop_obj(ctx, res); pdf_drop_obj(ctx, ref); } fz_catch(ctx) { fz_rethrow_message(ctx, "Failed while cleaning page"); } }
static void processpage(fz_context *ctx, fz_document *doc, int pagenum) { fz_page *page; int needshot = 0; fz_try(ctx) { page = fz_load_page(ctx, doc, pagenum - 1); } fz_catch(ctx) { fz_rethrow_message(ctx, "cannot load page %d in file '%s'", pagenum, filename); } pdf_document *inter = pdf_specifics(ctx, doc); pdf_widget *widget = NULL; if (inter) widget = pdf_first_widget(inter, (pdf_page *)page); if (widget) { fprintf(mujstest_file, "GOTO %d\n", pagenum); needshot = 1; } for (;widget; widget = pdf_next_widget(widget)) { fz_rect rect; int w, h, len; int type = pdf_widget_get_type(widget); pdf_bound_widget(widget, &rect); w = (rect.x1 - rect.x0); h = (rect.y1 - rect.y0); ++mujstest_count; switch (type) { default: fprintf(mujstest_file, "%% UNKNOWN %0.2f %0.2f %0.2f %0.2f\n", rect.x0, rect.y0, rect.x1, rect.y1); break; case PDF_WIDGET_TYPE_PUSHBUTTON: fprintf(mujstest_file, "%% PUSHBUTTON %0.2f %0.2f %0.2f %0.2f\n", rect.x0, rect.y0, rect.x1, rect.y1); break; case PDF_WIDGET_TYPE_CHECKBOX: fprintf(mujstest_file, "%% CHECKBOX %0.2f %0.2f %0.2f %0.2f\n", rect.x0, rect.y0, rect.x1, rect.y1); break; case PDF_WIDGET_TYPE_RADIOBUTTON: fprintf(mujstest_file, "%% RADIOBUTTON %0.2f %0.2f %0.2f %0.2f\n", rect.x0, rect.y0, rect.x1, rect.y1); break; case PDF_WIDGET_TYPE_TEXT: { int maxlen = pdf_text_widget_max_len(inter, widget); int texttype = pdf_text_widget_content_type(inter, widget); /* If height is low, assume a single row, and base * the width off that. */ if (h < 10) { w = (w+h-1) / (h ? h : 1); h = 1; } /* Otherwise, if width is low, work off height */ else if (w < 10) { h = (w+h-1) / (w ? w : 1); w = 1; } else { w = (w+9)/10; h = (h+9)/10; } len = w*h; if (len < 2) len = 2; if (len > maxlen) len = maxlen; fprintf(mujstest_file, "%% TEXT %0.2f %0.2f %0.2f %0.2f\n", rect.x0, rect.y0, rect.x1, rect.y1); switch (texttype) { default: case PDF_WIDGET_CONTENT_UNRESTRAINED: fprintf(mujstest_file, "TEXT %d ", mujstest_count); escape_string(mujstest_file, len-3, lorem); fprintf(mujstest_file, "\n"); break; case PDF_WIDGET_CONTENT_NUMBER: fprintf(mujstest_file, "TEXT %d\n", mujstest_count); break; case PDF_WIDGET_CONTENT_SPECIAL: #ifdef __MINGW32__ fprintf(mujstest_file, "TEXT %I64d\n", 46702919800LL + mujstest_count); #else fprintf(mujstest_file, "TEXT %lld\n", 46702919800LL + mujstest_count); #endif break; case PDF_WIDGET_CONTENT_DATE: fprintf(mujstest_file, "TEXT Jun %d 1979\n", 1 + ((13 + mujstest_count) % 30)); break; case PDF_WIDGET_CONTENT_TIME: ++mujstest_count; fprintf(mujstest_file, "TEXT %02d:%02d\n", ((mujstest_count/60) % 24), mujstest_count % 60); break; } break; } case PDF_WIDGET_TYPE_LISTBOX: fprintf(mujstest_file, "%% LISTBOX %0.2f %0.2f %0.2f %0.2f\n", rect.x0, rect.y0, rect.x1, rect.y1); break; case PDF_WIDGET_TYPE_COMBOBOX: fprintf(mujstest_file, "%% COMBOBOX %0.2f %0.2f %0.2f %0.2f\n", rect.x0, rect.y0, rect.x1, rect.y1); break; } fprintf(mujstest_file, "CLICK %0.2f %0.2f\n", (rect.x0+rect.x1)/2, (rect.y0+rect.y1)/2); } fz_flush_warnings(ctx); if (mujstest_file && needshot) { fprintf(mujstest_file, "SCREENSHOT\n"); } }
int pdf_repair_obj(fz_context *ctx, pdf_document *doc, pdf_lexbuf *buf, fz_off_t *stmofsp, int *stmlenp, pdf_obj **encrypt, pdf_obj **id, pdf_obj **page, fz_off_t *tmpofs, pdf_obj **root) { fz_stream *file = doc->file; pdf_token tok; int stm_len; *stmofsp = 0; if (stmlenp) *stmlenp = -1; stm_len = 0; /* On entry to this function, we know that we've just seen * '<int> <int> obj'. We expect the next thing we see to be a * pdf object. Regardless of the type of thing we meet next * we only need to fully parse it if it is a dictionary. */ tok = pdf_lex(ctx, file, buf); if (tok == PDF_TOK_OPEN_DICT) { pdf_obj *dict, *obj; fz_try(ctx) { dict = pdf_parse_dict(ctx, doc, file, buf); } fz_catch(ctx) { fz_rethrow_if(ctx, FZ_ERROR_TRYLATER); /* Don't let a broken object at EOF overwrite a good one */ if (file->eof) fz_rethrow_message(ctx, "broken object at EOF ignored"); /* Silently swallow the error */ dict = pdf_new_dict(ctx, NULL, 2); } /* We must be careful not to try to resolve any indirections * here. We have just read dict, so we know it to be a non * indirected dictionary. Before we look at any values that * we get back from looking up in it, we need to check they * aren't indirected. */ if (encrypt || id || root) { obj = pdf_dict_get(ctx, dict, PDF_NAME_Type); if (!pdf_is_indirect(ctx, obj) && pdf_name_eq(ctx, obj, PDF_NAME_XRef)) { if (encrypt) { obj = pdf_dict_get(ctx, dict, PDF_NAME_Encrypt); if (obj) { pdf_drop_obj(ctx, *encrypt); *encrypt = pdf_keep_obj(ctx, obj); } } if (id) { obj = pdf_dict_get(ctx, dict, PDF_NAME_ID); if (obj) { pdf_drop_obj(ctx, *id); *id = pdf_keep_obj(ctx, obj); } } if (root) *root = pdf_keep_obj(ctx, pdf_dict_get(ctx, dict, PDF_NAME_Root)); } } obj = pdf_dict_get(ctx, dict, PDF_NAME_Length); if (!pdf_is_indirect(ctx, obj) && pdf_is_int(ctx, obj)) stm_len = pdf_to_int(ctx, obj); if (doc->file_reading_linearly && page) { obj = pdf_dict_get(ctx, dict, PDF_NAME_Type); if (!pdf_is_indirect(ctx, obj) && pdf_name_eq(ctx, obj, PDF_NAME_Page)) { pdf_drop_obj(ctx, *page); *page = pdf_keep_obj(ctx, dict); } } pdf_drop_obj(ctx, dict); }
pdf_obj * pdf_parse_dict(fz_context *ctx, pdf_document *doc, fz_stream *file, pdf_lexbuf *buf) { pdf_obj *dict; pdf_obj *key = NULL; pdf_obj *val = NULL; pdf_token tok; int a, b; dict = pdf_new_dict(ctx, doc, 8); fz_var(key); fz_var(val); fz_try(ctx) { while (1) { tok = pdf_lex(ctx, file, buf); skip: if (tok == PDF_TOK_CLOSE_DICT) break; /* for BI .. ID .. EI in content streams */ if (tok == PDF_TOK_KEYWORD && !strcmp(buf->scratch, "ID")) break; if (tok != PDF_TOK_NAME) fz_throw(ctx, FZ_ERROR_GENERIC, "invalid key in dict"); key = pdf_new_name(ctx, doc, buf->scratch); tok = pdf_lex(ctx, file, buf); switch (tok) { case PDF_TOK_OPEN_ARRAY: val = pdf_parse_array(ctx, doc, file, buf); break; case PDF_TOK_OPEN_DICT: val = pdf_parse_dict(ctx, doc, file, buf); break; case PDF_TOK_NAME: val = pdf_new_name(ctx, doc, buf->scratch); break; case PDF_TOK_REAL: val = pdf_new_real(ctx, doc, buf->f); break; case PDF_TOK_STRING: val = pdf_new_string(ctx, doc, buf->scratch, buf->len); break; case PDF_TOK_TRUE: val = pdf_new_bool(ctx, doc, 1); break; case PDF_TOK_FALSE: val = pdf_new_bool(ctx, doc, 0); break; case PDF_TOK_NULL: val = pdf_new_null(ctx, doc); break; case PDF_TOK_INT: /* 64-bit to allow for numbers > INT_MAX and overflow */ a = buf->i; tok = pdf_lex(ctx, file, buf); if (tok == PDF_TOK_CLOSE_DICT || tok == PDF_TOK_NAME || (tok == PDF_TOK_KEYWORD && !strcmp(buf->scratch, "ID"))) { val = pdf_new_int(ctx, doc, a); pdf_dict_put(ctx, dict, key, val); pdf_drop_obj(ctx, val); val = NULL; pdf_drop_obj(ctx, key); key = NULL; goto skip; } if (tok == PDF_TOK_INT) { b = buf->i; tok = pdf_lex(ctx, file, buf); if (tok == PDF_TOK_R) { val = pdf_new_indirect(ctx, doc, a, b); break; } } fz_throw(ctx, FZ_ERROR_GENERIC, "invalid indirect reference in dict"); default: fz_throw(ctx, FZ_ERROR_GENERIC, "unknown token in dict"); } pdf_dict_put(ctx, dict, key, val); pdf_drop_obj(ctx, val); val = NULL; pdf_drop_obj(ctx, key); key = NULL; } } fz_catch(ctx) { pdf_drop_obj(ctx, dict); pdf_drop_obj(ctx, key); pdf_drop_obj(ctx, val); fz_rethrow_message(ctx, "cannot parse dict"); } return dict; }
static void pdf_clean_type3(fz_context *ctx, pdf_document *doc, pdf_obj *obj, pdf_obj *orig_res, fz_cookie *cookie) { pdf_processor *proc_buffer = NULL; pdf_processor *proc_filter = NULL; pdf_obj *res = NULL; pdf_obj *ref = NULL; pdf_obj *charprocs; int i, l; fz_var(res); fz_var(ref); fz_var(proc_buffer); fz_var(proc_filter); fz_try(ctx) { res = pdf_dict_get(ctx, obj, PDF_NAME_Resources); if (res) orig_res = res; res = NULL; res = pdf_new_dict(ctx, doc, 1); charprocs = pdf_dict_get(ctx, obj, PDF_NAME_CharProcs); l = pdf_dict_len(ctx, charprocs); for (i = 0; i < l; i++) { pdf_obj *val = pdf_dict_get_val(ctx, charprocs, i); fz_buffer *buffer = fz_new_buffer(ctx, 1024); fz_try(ctx) { proc_buffer = pdf_new_buffer_processor(ctx, buffer); proc_filter = pdf_new_filter_processor(ctx, proc_buffer, doc, orig_res, res); pdf_process_contents(ctx, proc_filter, doc, orig_res, val, cookie); pdf_update_stream(ctx, doc, val, buffer, 0); } fz_always(ctx) { pdf_drop_processor(ctx, proc_filter); pdf_drop_processor(ctx, proc_buffer); fz_drop_buffer(ctx, buffer); } fz_catch(ctx) { fz_rethrow(ctx); } } /* ProcSet - no cleaning possible. Inherit this from the old dict. */ pdf_dict_put(ctx, res, PDF_NAME_ProcSet, pdf_dict_get(ctx, orig_res, PDF_NAME_ProcSet)); ref = pdf_new_ref(ctx, doc, res); pdf_dict_put(ctx, obj, PDF_NAME_Resources, ref); } fz_always(ctx) { pdf_drop_obj(ctx, res); pdf_drop_obj(ctx, ref); } fz_catch(ctx) { fz_rethrow_message(ctx, "Failed while cleaning xobject"); } }
/* * Load CMap stream in PDF file */ pdf_cmap * pdf_load_embedded_cmap(pdf_document *doc, pdf_obj *stmobj) { fz_stream *file = NULL; pdf_cmap *cmap = NULL; pdf_cmap *usecmap; pdf_obj *wmode; pdf_obj *obj = NULL; fz_context *ctx = doc->ctx; int phase = 0; fz_var(phase); fz_var(obj); fz_var(file); fz_var(cmap); if (pdf_obj_marked(stmobj)) fz_throw(ctx, FZ_ERROR_GENERIC, "Recursion in embedded cmap"); if ((cmap = pdf_find_item(ctx, pdf_free_cmap_imp, stmobj)) != NULL) { return cmap; } fz_try(ctx) { file = pdf_open_stream(doc, pdf_to_num(stmobj), pdf_to_gen(stmobj)); phase = 1; cmap = pdf_load_cmap(ctx, file); phase = 2; fz_close(file); file = NULL; wmode = pdf_dict_gets(stmobj, "WMode"); if (pdf_is_int(wmode)) pdf_set_cmap_wmode(ctx, cmap, pdf_to_int(wmode)); obj = pdf_dict_gets(stmobj, "UseCMap"); if (pdf_is_name(obj)) { usecmap = pdf_load_system_cmap(ctx, pdf_to_name(obj)); pdf_set_usecmap(ctx, cmap, usecmap); pdf_drop_cmap(ctx, usecmap); } else if (pdf_is_indirect(obj)) { phase = 3; pdf_mark_obj(obj); usecmap = pdf_load_embedded_cmap(doc, obj); pdf_unmark_obj(obj); phase = 4; pdf_set_usecmap(ctx, cmap, usecmap); pdf_drop_cmap(ctx, usecmap); } pdf_store_item(ctx, stmobj, cmap, pdf_cmap_size(ctx, cmap)); } fz_catch(ctx) { if (file) fz_close(file); if (cmap) pdf_drop_cmap(ctx, cmap); if (phase < 1) fz_rethrow_message(ctx, "cannot open cmap stream (%d %d R)", pdf_to_num(stmobj), pdf_to_gen(stmobj)); else if (phase < 2) fz_rethrow_message(ctx, "cannot parse cmap stream (%d %d R)", pdf_to_num(stmobj), pdf_to_gen(stmobj)); else if (phase < 3) fz_rethrow_message(ctx, "cannot load system usecmap '%s'", pdf_to_name(obj)); else { if (phase == 3) pdf_unmark_obj(obj); fz_rethrow_message(ctx, "cannot load embedded usecmap (%d %d R)", pdf_to_num(obj), pdf_to_gen(obj)); } } return cmap; }
pdf_page * pdf_load_page_by_obj(pdf_document *doc, int number, pdf_obj *pageref) { fz_context *ctx = doc->ctx; pdf_page *page; pdf_annot *annot; pdf_obj *pageobj, *obj; fz_rect mediabox, cropbox, realbox; float userunit; fz_matrix mat; /* SumatraPDF: allow replacing potentially slow pdf_lookup_page_obj */ pageobj = pdf_resolve_indirect(pageref); page = fz_malloc_struct(ctx, pdf_page); page->resources = NULL; page->contents = NULL; page->transparency = 0; page->links = NULL; page->annots = NULL; page->annot_tailp = &page->annots; page->deleted_annots = NULL; page->tmp_annots = NULL; page->me = pdf_keep_obj(pageobj); page->incomplete = 0; obj = pdf_dict_gets(pageobj, "UserUnit"); if (pdf_is_real(obj)) userunit = pdf_to_real(obj); else userunit = 1; pdf_to_rect(ctx, pdf_lookup_inherited_page_item(doc, pageobj, "MediaBox"), &mediabox); if (fz_is_empty_rect(&mediabox)) { fz_warn(ctx, "cannot find page size for page %d", number + 1); mediabox.x0 = 0; mediabox.y0 = 0; mediabox.x1 = 612; mediabox.y1 = 792; } pdf_to_rect(ctx, pdf_lookup_inherited_page_item(doc, pageobj, "CropBox"), &cropbox); if (!fz_is_empty_rect(&cropbox)) fz_intersect_rect(&mediabox, &cropbox); page->mediabox.x0 = fz_min(mediabox.x0, mediabox.x1) * userunit; page->mediabox.y0 = fz_min(mediabox.y0, mediabox.y1) * userunit; page->mediabox.x1 = fz_max(mediabox.x0, mediabox.x1) * userunit; page->mediabox.y1 = fz_max(mediabox.y0, mediabox.y1) * userunit; if (page->mediabox.x1 - page->mediabox.x0 < 1 || page->mediabox.y1 - page->mediabox.y0 < 1) { fz_warn(ctx, "invalid page size in page %d", number + 1); page->mediabox = fz_unit_rect; } page->rotate = pdf_to_int(pdf_lookup_inherited_page_item(doc, pageobj, "Rotate")); /* Snap page->rotate to 0, 90, 180 or 270 */ if (page->rotate < 0) page->rotate = 360 - ((-page->rotate) % 360); if (page->rotate >= 360) page->rotate = page->rotate % 360; page->rotate = 90*((page->rotate + 45)/90); if (page->rotate > 360) page->rotate = 0; fz_pre_rotate(fz_scale(&page->ctm, 1, -1), -page->rotate); realbox = page->mediabox; fz_transform_rect(&realbox, &page->ctm); fz_pre_scale(fz_translate(&mat, -realbox.x0, -realbox.y0), userunit, userunit); fz_concat(&page->ctm, &page->ctm, &mat); fz_try(ctx) { obj = pdf_dict_gets(pageobj, "Annots"); if (obj) { page->links = pdf_load_link_annots(doc, obj, &page->ctm); pdf_load_annots(doc, page, obj); } } fz_catch(ctx) { if (fz_caught(ctx) != FZ_ERROR_TRYLATER) /* SumatraPDF: ignore annotations in case of unexpected errors */ fz_warn(ctx, "unexpectedly failed to load page annotations"); page->incomplete |= PDF_PAGE_INCOMPLETE_ANNOTS; } page->duration = pdf_to_real(pdf_dict_gets(pageobj, "Dur")); obj = pdf_dict_gets(pageobj, "Trans"); page->transition_present = (obj != NULL); if (obj) { pdf_load_transition(doc, page, obj); } // TODO: inherit page->resources = pdf_lookup_inherited_page_item(doc, pageobj, "Resources"); if (page->resources) pdf_keep_obj(page->resources); obj = pdf_dict_gets(pageobj, "Contents"); fz_try(ctx) { page->contents = pdf_keep_obj(obj); if (pdf_resources_use_blending(doc, page->resources)) page->transparency = 1; /* cf. http://code.google.com/p/sumatrapdf/issues/detail?id=2107 */ else if (!strcmp(pdf_to_name(pdf_dict_getp(pageobj, "Group/S")), "Transparency")) page->transparency = 1; for (annot = page->annots; annot && !page->transparency; annot = annot->next) if (annot->ap && pdf_resources_use_blending(doc, annot->ap->resources)) page->transparency = 1; } fz_catch(ctx) { if (fz_caught(ctx) != FZ_ERROR_TRYLATER) { pdf_free_page(doc, page); fz_rethrow_message(ctx, "cannot load page %d contents (%d 0 R)", number + 1, pdf_to_num(pageref)); } page->incomplete |= PDF_PAGE_INCOMPLETE_CONTENTS; } return page; }
static void drawpage(fz_context *ctx, fz_document *doc, int pagenum) { fz_page *page; fz_display_list *list = NULL; fz_device *dev = NULL; int start; fz_cookie cookie = { 0 }; fz_var(list); fz_var(dev); if (showtime) { start = gettime(); } fz_try(ctx) { page = fz_load_page(doc, pagenum - 1); } fz_catch(ctx) { fz_rethrow_message(ctx, "cannot load page %d in file '%s'", pagenum, filename); } if (uselist) { fz_try(ctx) { list = fz_new_display_list(ctx); dev = fz_new_list_device(ctx, list); fz_run_page(doc, page, dev, &fz_identity, &cookie); } fz_always(ctx) { fz_free_device(dev); dev = NULL; } fz_catch(ctx) { fz_drop_display_list(ctx, list); fz_free_page(doc, page); fz_rethrow_message(ctx, "cannot draw page %d in file '%s'", pagenum, filename); } } if (showxml) { fz_try(ctx) { dev = fz_new_trace_device(ctx); if (list) fz_run_display_list(list, dev, &fz_identity, &fz_infinite_rect, &cookie); else fz_run_page(doc, page, dev, &fz_identity, &cookie); } fz_always(ctx) { fz_free_device(dev); dev = NULL; } fz_catch(ctx) { fz_drop_display_list(ctx, list); fz_free_page(doc, page); fz_rethrow(ctx); } } if (showtext) { fz_text_page *text = NULL; fz_var(text); fz_try(ctx) { text = fz_new_text_page(ctx); dev = fz_new_text_device(ctx, sheet, text); if (showtext == TEXT_HTML) fz_disable_device_hints(dev, FZ_IGNORE_IMAGE); if (list) fz_run_display_list(list, dev, &fz_identity, &fz_infinite_rect, &cookie); else fz_run_page(doc, page, dev, &fz_identity, &cookie); fz_free_device(dev); dev = NULL; if (showtext == TEXT_XML) { fz_print_text_page_xml(ctx, out, text); } else if (showtext == TEXT_HTML) { fz_analyze_text(ctx, sheet, text); fz_print_text_page_html(ctx, out, text); } else if (showtext == TEXT_PLAIN) { fz_print_text_page(ctx, out, text); fz_printf(out, "\f\n"); } } fz_always(ctx) { fz_free_device(dev); dev = NULL; fz_free_text_page(ctx, text); } fz_catch(ctx) { fz_drop_display_list(ctx, list); fz_free_page(doc, page); fz_rethrow(ctx); } } if (showmd5 || showtime || showfeatures) printf("page %s %d", filename, pagenum); if (showfeatures) { int iscolor; dev = fz_new_test_device(ctx, &iscolor, 0.02f); fz_try(ctx) { if (list) fz_run_display_list(list, dev, &fz_identity, &fz_infinite_rect, NULL); else fz_run_page(doc, page, dev, &fz_identity, &cookie); } fz_always(ctx) { fz_free_device(dev); dev = NULL; } fz_catch(ctx) { fz_rethrow(ctx); } printf(" %s", iscolor ? "color" : "grayscale"); } if (pdfout) { fz_matrix ctm; fz_rect bounds, tbounds; pdf_page *newpage; fz_bound_page(doc, page, &bounds); fz_rotate(&ctm, rotation); tbounds = bounds; fz_transform_rect(&tbounds, &ctm); newpage = pdf_create_page(pdfout, bounds, 72, 0); fz_try(ctx) { dev = pdf_page_write(pdfout, newpage); if (list) fz_run_display_list(list, dev, &ctm, &tbounds, &cookie); else fz_run_page(doc, page, dev, &ctm, &cookie); fz_free_device(dev); dev = NULL; } fz_always(ctx) { fz_free_device(dev); dev = NULL; } fz_catch(ctx) { fz_drop_display_list(ctx, list); fz_free_page(doc, page); fz_rethrow(ctx); } pdf_insert_page(pdfout, newpage, INT_MAX); pdf_free_page(pdfout, newpage); } if (output && output_format == OUT_SVG) { float zoom; fz_matrix ctm; fz_rect bounds, tbounds; char buf[512]; FILE *file; fz_output *out; if (!strcmp(output, "-")) file = stdout; else { sprintf(buf, output, pagenum); file = fopen(buf, "wb"); if (file == NULL) fz_throw(ctx, FZ_ERROR_GENERIC, "cannot open file '%s': %s", buf, strerror(errno)); } out = fz_new_output_with_file(ctx, file); fz_bound_page(doc, page, &bounds); zoom = resolution / 72; fz_pre_rotate(fz_scale(&ctm, zoom, zoom), rotation); tbounds = bounds; fz_transform_rect(&tbounds, &ctm); fz_try(ctx) { dev = fz_new_svg_device(ctx, out, tbounds.x1-tbounds.x0, tbounds.y1-tbounds.y0); if (list) fz_run_display_list(list, dev, &ctm, &tbounds, &cookie); else fz_run_page(doc, page, dev, &ctm, &cookie); fz_free_device(dev); dev = NULL; } fz_always(ctx) { fz_free_device(dev); dev = NULL; fz_close_output(out); if (file != stdout) fclose(file); } fz_catch(ctx) { fz_drop_display_list(ctx, list); fz_free_page(doc, page); fz_rethrow(ctx); } }
pdf_font_desc * pdf_load_type3_font(pdf_document *doc, pdf_obj *rdb, pdf_obj *dict) { char buf[256]; char *estrings[256]; pdf_font_desc *fontdesc = NULL; pdf_obj *encoding; pdf_obj *widths; pdf_obj *charprocs; pdf_obj *obj; int first, last; int i, k, n; fz_rect bbox; fz_matrix matrix; fz_context *ctx = doc->ctx; fz_var(fontdesc); /* Make a new type3 font entry in the document */ if (doc->num_type3_fonts == doc->max_type3_fonts) { int new_max = doc->max_type3_fonts * 2; if (new_max == 0) new_max = 4; doc->type3_fonts = fz_resize_array(doc->ctx, doc->type3_fonts, new_max, sizeof(*doc->type3_fonts)); doc->max_type3_fonts = new_max; } fz_try(ctx) { obj = pdf_dict_gets(dict, "Name"); if (pdf_is_name(obj)) fz_strlcpy(buf, pdf_to_name(obj), sizeof buf); else sprintf(buf, "Unnamed-T3"); fontdesc = pdf_new_font_desc(ctx); obj = pdf_dict_gets(dict, "FontMatrix"); pdf_to_matrix(ctx, obj, &matrix); obj = pdf_dict_gets(dict, "FontBBox"); fz_transform_rect(pdf_to_rect(ctx, obj, &bbox), &matrix); fontdesc->font = fz_new_type3_font(ctx, buf, &matrix); fontdesc->size += sizeof(fz_font) + 256 * (sizeof(fz_buffer*) + sizeof(float)); fz_set_font_bbox(ctx, fontdesc->font, bbox.x0, bbox.y0, bbox.x1, bbox.y1); /* SumatraPDF: expose Type3 FontDescriptor flags */ fontdesc->flags = pdf_to_int(pdf_dict_gets(pdf_dict_gets(dict, "FontDescriptor"), "Flags")); /* Encoding */ for (i = 0; i < 256; i++) estrings[i] = NULL; encoding = pdf_dict_gets(dict, "Encoding"); if (!encoding) { fz_throw(ctx, FZ_ERROR_GENERIC, "syntaxerror: Type3 font missing Encoding"); } if (pdf_is_name(encoding)) pdf_load_encoding(estrings, pdf_to_name(encoding)); if (pdf_is_dict(encoding)) { pdf_obj *base, *diff, *item; base = pdf_dict_gets(encoding, "BaseEncoding"); if (pdf_is_name(base)) pdf_load_encoding(estrings, pdf_to_name(base)); diff = pdf_dict_gets(encoding, "Differences"); if (pdf_is_array(diff)) { n = pdf_array_len(diff); k = 0; for (i = 0; i < n; i++) { item = pdf_array_get(diff, i); if (pdf_is_int(item)) k = pdf_to_int(item); if (pdf_is_name(item) && k >= 0 && k < nelem(estrings)) estrings[k++] = pdf_to_name(item); } } } fontdesc->encoding = pdf_new_identity_cmap(ctx, 0, 1); fontdesc->size += pdf_cmap_size(ctx, fontdesc->encoding); pdf_load_to_unicode(doc, fontdesc, estrings, NULL, pdf_dict_gets(dict, "ToUnicode")); /* SumatraPDF: trying to match Adobe Reader's behavior */ if (!(fontdesc->flags & PDF_FD_SYMBOLIC) && fontdesc->cid_to_ucs_len >= 128) for (i = 32; i < 128; i++) if (fontdesc->cid_to_ucs[i] == '?' || fontdesc->cid_to_ucs[i] == '\0') fontdesc->cid_to_ucs[i] = i; /* Widths */ pdf_set_default_hmtx(ctx, fontdesc, 0); first = pdf_to_int(pdf_dict_gets(dict, "FirstChar")); last = pdf_to_int(pdf_dict_gets(dict, "LastChar")); /* cf. http://code.google.com/p/sumatrapdf/issues/detail?id=1966 */ if (first >= 256 && last - first < 256) { fz_warn(ctx, "ignoring out-of-bound values for FirstChar/LastChar: %d/%d", first, last); last -= first; first = 0; } if (first < 0 || last > 255 || first > last) first = last = 0; widths = pdf_dict_gets(dict, "Widths"); if (!widths) { fz_throw(ctx, FZ_ERROR_GENERIC, "syntaxerror: Type3 font missing Widths"); } for (i = first; i <= last; i++) { float w = pdf_to_real(pdf_array_get(widths, i - first)); w = fontdesc->font->t3matrix.a * w * 1000; fontdesc->font->t3widths[i] = w * 0.001f; pdf_add_hmtx(ctx, fontdesc, i, i, w); } pdf_end_hmtx(ctx, fontdesc); /* Resources -- inherit page resources if the font doesn't have its own */ fontdesc->font->t3freeres = pdf_t3_free_resources; fontdesc->font->t3resources = pdf_dict_gets(dict, "Resources"); if (!fontdesc->font->t3resources) fontdesc->font->t3resources = rdb; if (fontdesc->font->t3resources) pdf_keep_obj(fontdesc->font->t3resources); if (!fontdesc->font->t3resources) fz_warn(ctx, "no resource dictionary for type 3 font!"); fontdesc->font->t3doc = doc; fontdesc->font->t3run = pdf_run_glyph_func; /* CharProcs */ charprocs = pdf_dict_gets(dict, "CharProcs"); if (!charprocs) { fz_throw(ctx, FZ_ERROR_GENERIC, "syntaxerror: Type3 font missing CharProcs"); } for (i = 0; i < 256; i++) { if (estrings[i]) { /* SumatraPDF: don't reject fonts with few broken glyphs */ fz_try(ctx) { obj = pdf_dict_gets(charprocs, estrings[i]); if (pdf_is_stream(doc, pdf_to_num(obj), pdf_to_gen(obj))) { fontdesc->font->t3procs[i] = pdf_load_stream(doc, pdf_to_num(obj), pdf_to_gen(obj)); fontdesc->size += fontdesc->font->t3procs[i]->cap; fontdesc->size += 0; // TODO: display list size calculation } } fz_catch(ctx) { fz_warn(ctx, "failed to get data for type 3 glyph '%s'", estrings[i]); } } } } fz_catch(ctx) { if (fontdesc) pdf_drop_font(ctx, fontdesc); fz_rethrow_message(ctx, "cannot load type3 font (%d %d R)", pdf_to_num(dict), pdf_to_gen(dict)); } doc->type3_fonts[doc->num_type3_fonts++] = fz_keep_font(ctx, fontdesc->font); return fontdesc; }
pdf_obj * pdf_new_xobject(pdf_document *doc, const fz_rect *bbox, const fz_matrix *mat) { int idict_num; pdf_obj *idict = NULL; pdf_obj *dict = NULL; pdf_xobject *form = NULL; pdf_obj *obj = NULL; pdf_obj *res = NULL; pdf_obj *procset = NULL; fz_context *ctx = doc->ctx; fz_var(idict); fz_var(dict); fz_var(form); fz_var(obj); fz_var(res); fz_var(procset); fz_try(ctx) { dict = pdf_new_dict(doc, 0); obj = pdf_new_rect(doc, bbox); pdf_print_obj(obj); pdf_dict_puts(dict, "BBox", obj); pdf_drop_obj(obj); obj = NULL; obj = pdf_new_int(doc, 1); pdf_dict_puts(dict, "FormType", obj); pdf_drop_obj(obj); obj = NULL; obj = pdf_new_int(doc, 0); pdf_dict_puts(dict, "Length", obj); pdf_drop_obj(obj); obj = NULL; obj = pdf_new_matrix(doc, mat); pdf_dict_puts(dict, "Matrix", obj); pdf_drop_obj(obj); obj = NULL; res = pdf_new_dict(doc, 0); procset = pdf_new_array(doc, 2); obj = pdf_new_name(doc, "PDF"); pdf_array_push(procset, obj); pdf_drop_obj(obj); obj = NULL; obj = pdf_new_name(doc, "Text"); pdf_array_push(procset, obj); pdf_drop_obj(obj); obj = NULL; pdf_dict_puts(res, "ProcSet", procset); pdf_drop_obj(procset); procset = NULL; pdf_dict_puts(dict, "Resources", res); obj = pdf_new_name(doc, "Form"); pdf_dict_puts(dict, "Subtype", obj); pdf_drop_obj(obj); obj = NULL; obj = pdf_new_name(doc, "XObject"); pdf_dict_puts(dict, "Type", obj); pdf_drop_obj(obj); obj = NULL; form = fz_malloc_struct(ctx, pdf_xobject); FZ_INIT_STORABLE(form, 1, pdf_free_xobject_imp); form->resources = NULL; form->contents = NULL; form->colorspace = NULL; form->me = NULL; form->iteration = 0; form->bbox = *bbox; form->matrix = *mat; form->isolated = 0; form->knockout = 0; form->transparency = 0; form->resources = res; res = NULL; idict_num = pdf_create_object(doc); pdf_update_object(doc, idict_num, dict); idict = pdf_new_indirect(doc, idict_num, 0); pdf_drop_obj(dict); dict = NULL; pdf_store_item(ctx, idict, form, pdf_xobject_size(form)); form->contents = pdf_keep_obj(idict); form->me = pdf_keep_obj(idict); pdf_drop_xobject(ctx, form); form = NULL; } fz_catch(ctx) { pdf_drop_obj(procset); pdf_drop_obj(res); pdf_drop_obj(obj); pdf_drop_obj(dict); pdf_drop_obj(idict); pdf_drop_xobject(ctx, form); fz_rethrow_message(ctx, "failed to create xobject)"); } return idict; }
static fz_shade * pdf_load_shading_dict(fz_context *ctx, pdf_document *doc, pdf_obj *dict, const fz_matrix *transform) { fz_shade *shade = NULL; fz_function *func[FZ_MAX_COLORS] = { NULL }; pdf_obj *obj; int funcs = 0; int type = 0; int i, in, out; fz_var(shade); fz_var(func); fz_var(funcs); fz_var(type); fz_try(ctx) { shade = fz_malloc_struct(ctx, fz_shade); FZ_INIT_STORABLE(shade, 1, fz_drop_shade_imp); shade->type = FZ_MESH_TYPE4; shade->use_background = 0; shade->use_function = 0; shade->matrix = *transform; shade->bbox = fz_infinite_rect; shade->colorspace = NULL; funcs = 0; obj = pdf_dict_gets(ctx, dict, "ShadingType"); type = pdf_to_int(ctx, obj); obj = pdf_dict_gets(ctx, dict, "ColorSpace"); if (!obj) fz_throw(ctx, FZ_ERROR_GENERIC, "shading colorspace is missing"); shade->colorspace = pdf_load_colorspace(ctx, doc, obj); obj = pdf_dict_gets(ctx, dict, "Background"); if (obj) { shade->use_background = 1; for (i = 0; i < shade->colorspace->n; i++) shade->background[i] = pdf_to_real(ctx, pdf_array_get(ctx, obj, i)); } obj = pdf_dict_gets(ctx, dict, "BBox"); if (pdf_is_array(ctx, obj)) pdf_to_rect(ctx, obj, &shade->bbox); obj = pdf_dict_gets(ctx, dict, "Function"); if (pdf_is_dict(ctx, obj)) { funcs = 1; if (type == 1) in = 2; else in = 1; out = shade->colorspace->n; func[0] = pdf_load_function(ctx, doc, obj, in, out); if (!func[0]) fz_throw(ctx, FZ_ERROR_GENERIC, "cannot load shading function (%d %d R)", pdf_to_num(ctx, obj), pdf_to_gen(ctx, obj)); } else if (pdf_is_array(ctx, obj)) { funcs = pdf_array_len(ctx, obj); if (funcs != 1 && funcs != shade->colorspace->n) { funcs = 0; fz_throw(ctx, FZ_ERROR_GENERIC, "incorrect number of shading functions"); } if (funcs > FZ_MAX_COLORS) { funcs = 0; fz_throw(ctx, FZ_ERROR_GENERIC, "too many shading functions"); } if (type == 1) in = 2; else in = 1; out = 1; for (i = 0; i < funcs; i++) { func[i] = pdf_load_function(ctx, doc, pdf_array_get(ctx, obj, i), in, out); if (!func[i]) fz_throw(ctx, FZ_ERROR_GENERIC, "cannot load shading function (%d %d R)", pdf_to_num(ctx, obj), pdf_to_gen(ctx, obj)); } } else if (type < 4) { /* Functions are compulsory for types 1,2,3 */ fz_throw(ctx, FZ_ERROR_GENERIC, "cannot load shading function (%d %d R)", pdf_to_num(ctx, obj), pdf_to_gen(ctx, obj)); } shade->type = type; switch (type) { case 1: pdf_load_function_based_shading(ctx, doc, shade, dict, func[0]); break; case 2: pdf_load_linear_shading(ctx, doc, shade, dict, funcs, func); break; case 3: pdf_load_radial_shading(ctx, doc, shade, dict, funcs, func); break; case 4: pdf_load_type4_shade(ctx, doc, shade, dict, funcs, func); break; case 5: pdf_load_type5_shade(ctx, doc, shade, dict, funcs, func); break; case 6: pdf_load_type6_shade(ctx, doc, shade, dict, funcs, func); break; case 7: pdf_load_type7_shade(ctx, doc, shade, dict, funcs, func); break; default: fz_throw(ctx, FZ_ERROR_GENERIC, "unknown shading type: %d", type); } } fz_always(ctx) { for (i = 0; i < funcs; i++) if (func[i]) fz_drop_function(ctx, func[i]); } fz_catch(ctx) { fz_drop_shade(ctx, shade); fz_rethrow_message(ctx, "cannot load shading type %d (%d %d R)", type, pdf_to_num(ctx, dict), pdf_to_gen(ctx, dict)); } return shade; }
pdf_obj * pdf_parse_array(pdf_document *doc, fz_stream *file, pdf_lexbuf *buf) { pdf_obj *ary = NULL; pdf_obj *obj = NULL; int a = 0, b = 0, n = 0; pdf_token tok; fz_context *ctx = file->ctx; pdf_obj *op = NULL; fz_var(obj); ary = pdf_new_array(doc, 4); fz_try(ctx) { while (1) { tok = pdf_lex(file, buf); if (tok != PDF_TOK_INT && tok != PDF_TOK_R) { if (n > 0) { obj = pdf_new_int(doc, a); pdf_array_push(ary, obj); pdf_drop_obj(obj); obj = NULL; } if (n > 1) { obj = pdf_new_int(doc, b); pdf_array_push(ary, obj); pdf_drop_obj(obj); obj = NULL; } n = 0; } if (tok == PDF_TOK_INT && n == 2) { obj = pdf_new_int(doc, a); pdf_array_push(ary, obj); pdf_drop_obj(obj); obj = NULL; a = b; n --; } switch (tok) { case PDF_TOK_CLOSE_ARRAY: op = ary; goto end; case PDF_TOK_INT: if (n == 0) a = buf->i; if (n == 1) b = buf->i; n ++; break; case PDF_TOK_R: if (n != 2) fz_throw(ctx, FZ_ERROR_GENERIC, "cannot parse indirect reference in array"); obj = pdf_new_indirect(doc, a, b); pdf_array_push(ary, obj); pdf_drop_obj(obj); obj = NULL; n = 0; break; case PDF_TOK_OPEN_ARRAY: obj = pdf_parse_array(doc, file, buf); pdf_array_push(ary, obj); pdf_drop_obj(obj); obj = NULL; break; case PDF_TOK_OPEN_DICT: obj = pdf_parse_dict(doc, file, buf); pdf_array_push(ary, obj); pdf_drop_obj(obj); obj = NULL; break; case PDF_TOK_NAME: obj = pdf_new_name(doc, buf->scratch); pdf_array_push(ary, obj); pdf_drop_obj(obj); obj = NULL; break; case PDF_TOK_REAL: obj = pdf_new_real(doc, buf->f); pdf_array_push(ary, obj); pdf_drop_obj(obj); obj = NULL; break; case PDF_TOK_STRING: obj = pdf_new_string(doc, buf->scratch, buf->len); pdf_array_push(ary, obj); pdf_drop_obj(obj); obj = NULL; break; case PDF_TOK_TRUE: obj = pdf_new_bool(doc, 1); pdf_array_push(ary, obj); pdf_drop_obj(obj); obj = NULL; break; case PDF_TOK_FALSE: obj = pdf_new_bool(doc, 0); pdf_array_push(ary, obj); pdf_drop_obj(obj); obj = NULL; break; case PDF_TOK_NULL: obj = pdf_new_null(doc); pdf_array_push(ary, obj); pdf_drop_obj(obj); obj = NULL; break; default: fz_throw(ctx, FZ_ERROR_GENERIC, "cannot parse token in array"); } } end: {} } fz_catch(ctx) { pdf_drop_obj(obj); pdf_drop_obj(ary); fz_rethrow_message(ctx, "cannot parse array"); } return op; }
static void drawpage(fz_context *ctx, fz_document *doc, int pagenum) { fz_page *page; fz_display_list *list = NULL; fz_device *dev = NULL; int start; fz_cookie cookie = { 0 }; int needshot = 0; fz_var(list); fz_var(dev); if (showtime) { start = gettime(); } fz_try(ctx) { page = fz_load_page(doc, pagenum - 1); } fz_catch(ctx) { fz_rethrow_message(ctx, "cannot load page %d in file '%s'", pagenum, filename); } if (mujstest_file) { pdf_document *inter = pdf_specifics(doc); pdf_widget *widget = NULL; if (inter) widget = pdf_first_widget(inter, (pdf_page *)page); if (widget) { fprintf(mujstest_file, "GOTO %d\n", pagenum); needshot = 1; } for (;widget; widget = pdf_next_widget(widget)) { fz_rect rect; int w, h, len; int type = pdf_widget_get_type(widget); pdf_bound_widget(widget, &rect); w = (rect.x1 - rect.x0); h = (rect.y1 - rect.y0); ++mujstest_count; switch (type) { default: fprintf(mujstest_file, "%% UNKNOWN %0.2f %0.2f %0.2f %0.2f\n", rect.x0, rect.y0, rect.x1, rect.y1); break; case PDF_WIDGET_TYPE_PUSHBUTTON: fprintf(mujstest_file, "%% PUSHBUTTON %0.2f %0.2f %0.2f %0.2f\n", rect.x0, rect.y0, rect.x1, rect.y1); break; case PDF_WIDGET_TYPE_CHECKBOX: fprintf(mujstest_file, "%% CHECKBOX %0.2f %0.2f %0.2f %0.2f\n", rect.x0, rect.y0, rect.x1, rect.y1); break; case PDF_WIDGET_TYPE_RADIOBUTTON: fprintf(mujstest_file, "%% RADIOBUTTON %0.2f %0.2f %0.2f %0.2f\n", rect.x0, rect.y0, rect.x1, rect.y1); break; case PDF_WIDGET_TYPE_TEXT: { int maxlen = pdf_text_widget_max_len(inter, widget); int texttype = pdf_text_widget_content_type(inter, widget); /* If height is low, assume a single row, and base * the width off that. */ if (h < 10) { w = (w+h-1) / (h ? h : 1); h = 1; } /* Otherwise, if width is low, work off height */ else if (w < 10) { h = (w+h-1) / (w ? w : 1); w = 1; } else { w = (w+9)/10; h = (h+9)/10; } len = w*h; if (len < 2) len = 2; if (len > maxlen) len = maxlen; fprintf(mujstest_file, "%% TEXT %0.2f %0.2f %0.2f %0.2f\n", rect.x0, rect.y0, rect.x1, rect.y1); switch (texttype) { default: case PDF_WIDGET_CONTENT_UNRESTRAINED: fprintf(mujstest_file, "TEXT %d ", mujstest_count); escape_string(mujstest_file, len-3, lorem); fprintf(mujstest_file, "\n"); break; case PDF_WIDGET_CONTENT_NUMBER: fprintf(mujstest_file, "TEXT %d\n", mujstest_count); break; case PDF_WIDGET_CONTENT_SPECIAL: #ifdef __MINGW32__ fprintf(mujstest_file, "TEXT %I64d\n", 46702919800LL + mujstest_count); #else fprintf(mujstest_file, "TEXT %lld\n", 46702919800LL + mujstest_count); #endif break; case PDF_WIDGET_CONTENT_DATE: fprintf(mujstest_file, "TEXT Jun %d 1979\n", 1 + ((13 + mujstest_count) % 30)); break; case PDF_WIDGET_CONTENT_TIME: ++mujstest_count; fprintf(mujstest_file, "TEXT %02d:%02d\n", ((mujstest_count/60) % 24), mujstest_count % 60); break; } break; } case PDF_WIDGET_TYPE_LISTBOX: fprintf(mujstest_file, "%% LISTBOX %0.2f %0.2f %0.2f %0.2f\n", rect.x0, rect.y0, rect.x1, rect.y1); break; case PDF_WIDGET_TYPE_COMBOBOX: fprintf(mujstest_file, "%% COMBOBOX %0.2f %0.2f %0.2f %0.2f\n", rect.x0, rect.y0, rect.x1, rect.y1); break; } fprintf(mujstest_file, "CLICK %0.2f %0.2f\n", (rect.x0+rect.x1)/2, (rect.y0+rect.y1)/2); } } if (uselist) { fz_try(ctx) { list = fz_new_display_list(ctx); dev = fz_new_list_device(ctx, list); fz_run_page(doc, page, dev, &fz_identity, &cookie); } fz_always(ctx) { fz_free_device(dev); dev = NULL; } fz_catch(ctx) { fz_drop_display_list(ctx, list); fz_free_page(doc, page); fz_rethrow_message(ctx, "cannot draw page %d in file '%s'", pagenum, filename); } } if (showxml) { fz_try(ctx) { dev = fz_new_trace_device(ctx); if (list) fz_run_display_list(list, dev, &fz_identity, &fz_infinite_rect, &cookie); else fz_run_page(doc, page, dev, &fz_identity, &cookie); } fz_always(ctx) { fz_free_device(dev); dev = NULL; } fz_catch(ctx) { fz_drop_display_list(ctx, list); fz_free_page(doc, page); fz_rethrow(ctx); } } if (showtext) { fz_text_page *text = NULL; fz_var(text); fz_try(ctx) { text = fz_new_text_page(ctx); dev = fz_new_text_device(ctx, sheet, text); if (showtext == TEXT_HTML) fz_disable_device_hints(dev, FZ_IGNORE_IMAGE); if (list) fz_run_display_list(list, dev, &fz_identity, &fz_infinite_rect, &cookie); else fz_run_page(doc, page, dev, &fz_identity, &cookie); fz_free_device(dev); dev = NULL; if (showtext == TEXT_XML) { fz_print_text_page_xml(ctx, out, text); } else if (showtext == TEXT_HTML) { fz_analyze_text(ctx, sheet, text); fz_print_text_page_html(ctx, out, text); } else if (showtext == TEXT_PLAIN) { fz_print_text_page(ctx, out, text); fz_printf(out, "\f\n"); } } fz_always(ctx) { fz_free_device(dev); dev = NULL; fz_free_text_page(ctx, text); } fz_catch(ctx) { fz_drop_display_list(ctx, list); fz_free_page(doc, page); fz_rethrow(ctx); } } if (showmd5 || showtime) printf("page %s %d", filename, pagenum); if (pdfout) { fz_matrix ctm; fz_rect bounds, tbounds; pdf_page *newpage; fz_bound_page(doc, page, &bounds); fz_rotate(&ctm, rotation); tbounds = bounds; fz_transform_rect(&tbounds, &ctm); newpage = pdf_create_page(pdfout, bounds, 72, 0); fz_try(ctx) { dev = pdf_page_write(pdfout, newpage); if (list) fz_run_display_list(list, dev, &ctm, &tbounds, &cookie); else fz_run_page(doc, page, dev, &ctm, &cookie); fz_free_device(dev); dev = NULL; } fz_always(ctx) { fz_free_device(dev); dev = NULL; } fz_catch(ctx) { fz_drop_display_list(ctx, list); fz_free_page(doc, page); fz_rethrow(ctx); } pdf_insert_page(pdfout, newpage, INT_MAX); pdf_free_page(pdfout, newpage); } if (output && output_format == OUT_SVG) { float zoom; fz_matrix ctm; fz_rect bounds, tbounds; char buf[512]; FILE *file; fz_output *out; if (!strcmp(output, "-")) file = stdout; else { sprintf(buf, output, pagenum); file = fopen(buf, "wb"); if (file == NULL) fz_throw(ctx, FZ_ERROR_GENERIC, "cannot open file '%s': %s", buf, strerror(errno)); } out = fz_new_output_with_file(ctx, file); fz_bound_page(doc, page, &bounds); zoom = resolution / 72; fz_pre_rotate(fz_scale(&ctm, zoom, zoom), rotation); tbounds = bounds; fz_transform_rect(&tbounds, &ctm); fz_try(ctx) { dev = fz_new_svg_device(ctx, out, tbounds.x1-tbounds.x0, tbounds.y1-tbounds.y0); if (list) fz_run_display_list(list, dev, &ctm, &tbounds, &cookie); else fz_run_page(doc, page, dev, &ctm, &cookie); fz_free_device(dev); dev = NULL; } fz_always(ctx) { fz_free_device(dev); dev = NULL; fz_close_output(out); if (file != stdout) fclose(file); } fz_catch(ctx) { fz_drop_display_list(ctx, list); fz_free_page(doc, page); fz_rethrow(ctx); } }
fz_pixmap * fz_load_jpx(fz_context *ctx, unsigned char *data, int size, fz_colorspace *defcs, int indexed) { fz_pixmap *img; fz_colorspace *origcs; opj_dparameters_t params; opj_codec_t *codec; opj_image_t *jpx; opj_stream_t *stream; fz_colorspace *colorspace; unsigned char *p; OPJ_CODEC_FORMAT format; int a, n, w, h, depth, sgnd; int x, y, k, v; stream_block sb; if (size < 2) fz_throw(ctx, FZ_ERROR_GENERIC, "not enough data to determine image format"); /* Check for SOC marker -- if found we have a bare J2K stream */ if (data[0] == 0xFF && data[1] == 0x4F) format = OPJ_CODEC_J2K; else format = OPJ_CODEC_JP2; opj_set_default_decoder_parameters(¶ms); if (indexed) params.flags |= OPJ_DPARAMETERS_IGNORE_PCLR_CMAP_CDEF_FLAG; codec = opj_create_decompress(format); opj_set_info_handler(codec, fz_opj_info_callback, ctx); opj_set_warning_handler(codec, fz_opj_warning_callback, ctx); opj_set_error_handler(codec, fz_opj_error_callback, ctx); if (!opj_setup_decoder(codec, ¶ms)) { fz_throw(ctx, FZ_ERROR_GENERIC, "j2k decode failed"); } stream = opj_stream_default_create(OPJ_TRUE); sb.data = data; sb.pos = 0; sb.size = size; opj_stream_set_read_function(stream, fz_opj_stream_read); opj_stream_set_skip_function(stream, fz_opj_stream_skip); opj_stream_set_seek_function(stream, fz_opj_stream_seek); opj_stream_set_user_data(stream, &sb); /* Set the length to avoid an assert */ opj_stream_set_user_data_length(stream, size); if (!opj_read_header(stream, codec, &jpx)) { opj_stream_destroy(stream); opj_destroy_codec(codec); fz_throw(ctx, FZ_ERROR_GENERIC, "Failed to read JPX header"); } if (!opj_decode(codec, stream, jpx)) { opj_stream_destroy(stream); opj_destroy_codec(codec); opj_image_destroy(jpx); fz_throw(ctx, FZ_ERROR_GENERIC, "Failed to decode JPX image"); } opj_stream_destroy(stream); opj_destroy_codec(codec); /* jpx should never be NULL here, but check anyway */ if (!jpx) fz_throw(ctx, FZ_ERROR_GENERIC, "opj_decode failed"); for (k = 1; k < (int)jpx->numcomps; k++) { if (!jpx->comps[k].data) { opj_image_destroy(jpx); fz_throw(ctx, FZ_ERROR_GENERIC, "image components are missing data"); } if (jpx->comps[k].w != jpx->comps[0].w) { opj_image_destroy(jpx); fz_throw(ctx, FZ_ERROR_GENERIC, "image components have different width"); } if (jpx->comps[k].h != jpx->comps[0].h) { opj_image_destroy(jpx); fz_throw(ctx, FZ_ERROR_GENERIC, "image components have different height"); } if (jpx->comps[k].prec != jpx->comps[0].prec) { opj_image_destroy(jpx); fz_throw(ctx, FZ_ERROR_GENERIC, "image components have different precision"); } } n = jpx->numcomps; w = jpx->comps[0].w; h = jpx->comps[0].h; depth = jpx->comps[0].prec; sgnd = jpx->comps[0].sgnd; if (jpx->color_space == OPJ_CLRSPC_SRGB && n == 4) { n = 3; a = 1; } else if (jpx->color_space == OPJ_CLRSPC_SYCC && n == 4) { n = 3; a = 1; } else if (n == 2) { n = 1; a = 1; } else if (n > 4) { n = 4; a = 1; } else { a = 0; } origcs = defcs; if (defcs) { if (defcs->n == n) { colorspace = defcs; } else { fz_warn(ctx, "jpx file and dict colorspaces do not match"); defcs = NULL; } } if (!defcs) { switch (n) { case 1: colorspace = fz_device_gray(ctx); break; case 3: colorspace = fz_device_rgb(ctx); break; case 4: colorspace = fz_device_cmyk(ctx); break; } } fz_try(ctx) { img = fz_new_pixmap(ctx, colorspace, w, h); } fz_catch(ctx) { opj_image_destroy(jpx); fz_rethrow_message(ctx, "out of memory loading jpx"); } p = img->samples; for (y = 0; y < h; y++) { for (x = 0; x < w; x++) { for (k = 0; k < n + a; k++) { v = jpx->comps[k].data[y * w + x]; if (sgnd) v = v + (1 << (depth - 1)); if (depth > 8) v = v >> (depth - 8); *p++ = v; } if (!a) *p++ = 255; } } opj_image_destroy(jpx); if (a) { if (n == 4) { fz_pixmap *tmp = fz_new_pixmap(ctx, fz_device_rgb(ctx), w, h); fz_convert_pixmap(ctx, tmp, img); fz_drop_pixmap(ctx, img); img = tmp; } fz_premultiply_pixmap(ctx, img); } if (origcs != defcs) { fz_pixmap *tmp = fz_new_pixmap(ctx, origcs, w, h); fz_convert_pixmap(ctx, tmp, img); fz_drop_pixmap(ctx, img); img = tmp; } return img; }