static pdf_font_desc * pdf_load_simple_font(pdf_document *xref, pdf_obj *dict) { pdf_obj *descriptor; pdf_obj *encoding; pdf_obj *widths; unsigned short *etable = NULL; pdf_font_desc *fontdesc = NULL; FT_Face face; FT_CharMap cmap; int symbolic; int kind; char *basefont; char *fontname; char *estrings[256]; char ebuffer[256][32]; int i, k, n; int fterr; fz_context *ctx = xref->ctx; fz_var(fontdesc); fz_var(etable); basefont = pdf_to_name(pdf_dict_gets(dict, "BaseFont")); fontname = clean_font_name(basefont); /* Load font file */ fz_try(ctx) { fontdesc = pdf_new_font_desc(ctx); descriptor = pdf_dict_gets(dict, "FontDescriptor"); if (descriptor) pdf_load_font_descriptor(fontdesc, xref, descriptor, NULL, basefont); else pdf_load_builtin_font(ctx, fontdesc, fontname); /* Some chinese documents mistakenly consider WinAnsiEncoding to be codepage 936 */ if (!*fontdesc->font->name && !pdf_dict_gets(dict, "ToUnicode") && !strcmp(pdf_to_name(pdf_dict_gets(dict, "Encoding")), "WinAnsiEncoding") && pdf_to_int(pdf_dict_gets(descriptor, "Flags")) == 4) { /* note: without the comma, pdf_load_font_descriptor would prefer /FontName over /BaseFont */ char *cp936fonts[] = { "\xCB\xCE\xCC\xE5", "SimSun,Regular", "\xBA\xDA\xCC\xE5", "SimHei,Regular", "\xBF\xAC\xCC\xE5_GB2312", "SimKai,Regular", "\xB7\xC2\xCB\xCE_GB2312", "SimFang,Regular", "\xC1\xA5\xCA\xE9", "SimLi,Regular", NULL }; for (i = 0; cp936fonts[i]; i += 2) if (!strcmp(basefont, cp936fonts[i])) break; if (cp936fonts[i]) { fz_warn(ctx, "workaround for S22PDF lying about chinese font encodings"); pdf_drop_font(ctx, fontdesc); fontdesc = pdf_new_font_desc(ctx); pdf_load_font_descriptor(fontdesc, xref, descriptor, "Adobe-GB1", cp936fonts[i+1]); fontdesc->encoding = pdf_load_system_cmap(ctx, "GBK-EUC-H"); fontdesc->to_unicode = pdf_load_system_cmap(ctx, "Adobe-GB1-UCS2"); fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-GB1-UCS2"); /* RJW: "cannot load font" */ face = fontdesc->font->ft_face; kind = ft_kind(face); goto skip_encoding; } } face = fontdesc->font->ft_face; kind = ft_kind(face); /* Encoding */ symbolic = fontdesc->flags & 4; if (face->num_charmaps > 0) cmap = face->charmaps[0]; else cmap = NULL; for (i = 0; i < face->num_charmaps; i++) { FT_CharMap test = face->charmaps[i]; if (kind == TYPE1) { if (test->platform_id == 7) cmap = test; } if (kind == TRUETYPE) { if (test->platform_id == 1 && test->encoding_id == 0) cmap = test; if (test->platform_id == 3 && test->encoding_id == 1) cmap = test; } } if (cmap) { fterr = FT_Set_Charmap(face, cmap); if (fterr) fz_warn(ctx, "freetype could not set cmap: %s", ft_error_string(fterr)); } else fz_warn(ctx, "freetype could not find any cmaps"); etable = fz_malloc_array(ctx, 256, sizeof(unsigned short)); fontdesc->size += 256 * sizeof(unsigned short); for (i = 0; i < 256; i++) { estrings[i] = NULL; etable[i] = 0; } encoding = pdf_dict_gets(dict, "Encoding"); if (encoding) { if (pdf_is_name(encoding)) pdf_load_encoding(estrings, pdf_to_name(encoding)); if (pdf_is_dict(encoding)) { pdf_obj *base, *diff, *item; base = pdf_dict_gets(encoding, "BaseEncoding"); if (pdf_is_name(base)) pdf_load_encoding(estrings, pdf_to_name(base)); else if (!fontdesc->is_embedded && !symbolic) pdf_load_encoding(estrings, "StandardEncoding"); diff = pdf_dict_gets(encoding, "Differences"); if (pdf_is_array(diff)) { n = pdf_array_len(diff); k = 0; for (i = 0; i < n; i++) { item = pdf_array_get(diff, i); if (pdf_is_int(item)) k = pdf_to_int(item); if (pdf_is_name(item)) estrings[k++] = pdf_to_name(item); if (k < 0) k = 0; if (k > 255) k = 255; } } } } /* start with the builtin encoding */ for (i = 0; i < 256; i++) etable[i] = ft_char_index(face, i); /* encode by glyph name where we can */ fz_lock(ctx, FZ_LOCK_FREETYPE); if (kind == TYPE1) { for (i = 0; i < 256; i++) { if (estrings[i]) { etable[i] = FT_Get_Name_Index(face, estrings[i]); if (etable[i] == 0) { int aglcode = pdf_lookup_agl(estrings[i]); const char **dupnames = pdf_lookup_agl_duplicates(aglcode); while (*dupnames) { etable[i] = FT_Get_Name_Index(face, (char*)*dupnames); if (etable[i]) break; dupnames++; } } } } } /* encode by glyph name where we can */ if (kind == TRUETYPE) { /* Unicode cmap */ if (!symbolic && face->charmap && face->charmap->platform_id == 3) { for (i = 0; i < 256; i++) { if (estrings[i]) { int aglcode = pdf_lookup_agl(estrings[i]); if (!aglcode) etable[i] = FT_Get_Name_Index(face, estrings[i]); else etable[i] = ft_char_index(face, aglcode); } } } /* MacRoman cmap */ else if (!symbolic && face->charmap && face->charmap->platform_id == 1) { for (i = 0; i < 256; i++) { if (estrings[i]) { k = lookup_mre_code(estrings[i]); if (k <= 0) etable[i] = FT_Get_Name_Index(face, estrings[i]); else etable[i] = ft_char_index(face, k); } } } /* Symbolic cmap */ else { for (i = 0; i < 256; i++) { if (estrings[i]) { etable[i] = FT_Get_Name_Index(face, estrings[i]); if (etable[i] == 0) etable[i] = ft_char_index(face, i); } } } } /* try to reverse the glyph names from the builtin encoding */ for (i = 0; i < 256; i++) { if (etable[i] && !estrings[i]) { if (FT_HAS_GLYPH_NAMES(face)) { fterr = FT_Get_Glyph_Name(face, etable[i], ebuffer[i], 32); if (fterr) fz_warn(ctx, "freetype get glyph name (gid %d): %s", etable[i], ft_error_string(fterr)); if (ebuffer[i][0]) estrings[i] = ebuffer[i]; } else { estrings[i] = (char*) pdf_win_ansi[i]; /* discard const */ } } } fz_unlock(ctx, FZ_LOCK_FREETYPE); fontdesc->encoding = pdf_new_identity_cmap(ctx, 0, 1); fontdesc->size += pdf_cmap_size(ctx, fontdesc->encoding); fontdesc->cid_to_gid_len = 256; fontdesc->cid_to_gid = etable; pdf_load_to_unicode(xref, fontdesc, estrings, NULL, pdf_dict_gets(dict, "ToUnicode")); /* RJW: "cannot load to_unicode" */ skip_encoding: /* Widths */ pdf_set_default_hmtx(ctx, fontdesc, fontdesc->missing_width); widths = pdf_dict_gets(dict, "Widths"); if (widths) { int first, last; first = pdf_to_int(pdf_dict_gets(dict, "FirstChar")); last = pdf_to_int(pdf_dict_gets(dict, "LastChar")); if (first < 0 || last > 255 || first > last) first = last = 0; for (i = 0; i < last - first + 1; i++) { int wid = pdf_to_int(pdf_array_get(widths, i)); pdf_add_hmtx(ctx, fontdesc, i + first, i + first, wid); } } else { fz_lock(ctx, FZ_LOCK_FREETYPE); fterr = FT_Set_Char_Size(face, 1000, 1000, 72, 72); if (fterr) fz_warn(ctx, "freetype set character size: %s", ft_error_string(fterr)); for (i = 0; i < 256; i++) { pdf_add_hmtx(ctx, fontdesc, i, i, ft_width(ctx, fontdesc, i)); } fz_unlock(ctx, FZ_LOCK_FREETYPE); } pdf_end_hmtx(ctx, fontdesc); } fz_catch(ctx) { if (fontdesc && etable != fontdesc->cid_to_gid) fz_free(ctx, etable); pdf_drop_font(ctx, fontdesc); fz_throw(ctx, "cannot load simple font (%d %d R)", pdf_to_num(dict), pdf_to_gen(dict)); } return fontdesc; }
char * pdf_parse_file_spec(fz_context *ctx, pdf_document *doc, pdf_obj *file_spec, pdf_obj *dest) { pdf_obj *filename=NULL; char *path = NULL; char *uri = NULL; char buf[256]; size_t n; if (pdf_is_string(ctx, file_spec)) filename = file_spec; if (pdf_is_dict(ctx, file_spec)) { #if defined(_WIN32) || defined(_WIN64) filename = pdf_dict_get(ctx, file_spec, PDF_NAME_DOS); #else filename = pdf_dict_get(ctx, file_spec, PDF_NAME_Unix); #endif if (!filename) filename = pdf_dict_geta(ctx, file_spec, PDF_NAME_UF, PDF_NAME_F); } if (!pdf_is_string(ctx, filename)) { fz_warn(ctx, "cannot parse file specification"); return NULL; } path = pdf_to_utf8(ctx, filename); #if defined(_WIN32) || defined(_WIN64) if (strcmp(pdf_to_name(ctx, pdf_dict_gets(ctx, file_spec, "FS")), "URL") != 0) { /* move the file name into the expected place and use the expected path separator */ char *c; if (path[0] == '/' && (('A' <= path[1] && path[1] <= 'Z') || ('a' <= path[1] && path[1] <= 'z')) && path[2] == '/') { path[0] = path[1]; path[1] = ':'; } for (c = path; *c; c++) { if (*c == '/') *c = '\\'; } } #endif if (pdf_is_array(ctx, dest)) fz_snprintf(buf, sizeof buf, "#page=%d", pdf_to_int(ctx, pdf_array_get(ctx, dest, 0)) + 1); else if (pdf_is_name(ctx, dest)) fz_snprintf(buf, sizeof buf, "#%s", pdf_to_name(ctx, dest)); else if (pdf_is_string(ctx, dest)) fz_snprintf(buf, sizeof buf, "#%s", pdf_to_str_buf(ctx, dest)); else buf[0] = 0; n = 7 + strlen(path) + strlen(buf) + 1; uri = fz_malloc(ctx, n); fz_strlcpy(uri, "file://", n); fz_strlcat(uri, path, n); fz_strlcat(uri, buf, n); fz_free(ctx, path); return uri; }
void pdf_load_annots(fz_context *ctx, pdf_page *page, pdf_obj *annots) { pdf_document *doc = page->doc; pdf_annot *annot, **itr; pdf_obj *obj, *ap, *as, *n; int i, len, keep_annot; fz_var(annot); fz_var(itr); fz_var(keep_annot); itr = &page->annots; len = pdf_array_len(ctx, annots); /* Create an initial linked list of pdf_annot structures with only the obj field filled in. We do this because update_appearance has the potential to change the annot array, so we don't want to be iterating through the array while that happens. */ fz_try(ctx) { for (i = 0; i < len; i++) { obj = pdf_array_get(ctx, annots, i); annot = pdf_new_annot(ctx, page); *itr = annot; annot->obj = pdf_keep_obj(ctx, obj); itr = &annot->next; } } fz_catch(ctx) { pdf_drop_annots(ctx, page->annots); page->annots = NULL; fz_rethrow(ctx); } /* Iterate through the newly created annot linked list, using a double pointer to facilitate deleting broken annotations. */ itr = &page->annots; while (*itr) { annot = *itr; fz_try(ctx) { pdf_hotspot *hp = &doc->hotspot; n = NULL; if (doc->update_appearance) doc->update_appearance(ctx, doc, annot); obj = annot->obj; ap = pdf_dict_get(ctx, obj, PDF_NAME_AP); as = pdf_dict_get(ctx, obj, PDF_NAME_AS); /* We only collect annotations with an appearance * stream into this list, so remove any that don't * (such as links) and continue. */ keep_annot = pdf_is_dict(ctx, ap); if (!keep_annot) break; if (hp->num == pdf_to_num(ctx, obj) && (hp->state & HOTSPOT_POINTER_DOWN)) { n = pdf_dict_get(ctx, ap, PDF_NAME_D); /* down state */ } if (n == NULL) n = pdf_dict_get(ctx, ap, PDF_NAME_N); /* normal state */ /* lookup current state in sub-dictionary */ if (!pdf_is_stream(ctx, n)) n = pdf_dict_get(ctx, n, as); annot->ap = NULL; if (pdf_is_stream(ctx, n)) { annot->ap = pdf_load_xobject(ctx, doc, n); annot->ap_iteration = annot->ap->iteration; } else fz_warn(ctx, "no appearance stream for annotation %d 0 R", pdf_to_num(ctx, annot->obj)); if (obj == doc->focus_obj) doc->focus = annot; /* Move to next item in the linked list */ itr = &annot->next; } fz_catch(ctx) { if (fz_caught(ctx) == FZ_ERROR_TRYLATER) { pdf_drop_annots(ctx, page->annots); page->annots = NULL; fz_rethrow(ctx); } keep_annot = 0; fz_warn(ctx, "ignoring broken annotation"); } if (!keep_annot) { /* Move to next item in the linked list, dropping this one */ *itr = annot->next; annot->next = NULL; /* Required because pdf_drop_annots follows the "next" chain */ pdf_drop_annots(ctx, annot); } } page->annot_tailp = itr; }
static void gatherimages(int page, pdf_obj *pageref, pdf_obj *pageobj, pdf_obj *dict) { int i, n; n = pdf_dict_len(dict); for (i = 0; i < n; i++) { pdf_obj *imagedict; pdf_obj *type; pdf_obj *width; pdf_obj *height; pdf_obj *bpc = NULL; pdf_obj *filter = NULL; pdf_obj *cs = NULL; pdf_obj *altcs; int k; imagedict = pdf_dict_get_val(dict, i); if (!pdf_is_dict(imagedict)) { fz_warn(ctx, "not an image dict (%d %d R)", pdf_to_num(imagedict), pdf_to_gen(imagedict)); continue; } type = pdf_dict_gets(imagedict, "Subtype"); if (strcmp(pdf_to_name(type), "Image")) continue; filter = pdf_dict_gets(imagedict, "Filter"); altcs = NULL; cs = pdf_dict_gets(imagedict, "ColorSpace"); if (pdf_is_array(cs)) { pdf_obj *cses = cs; cs = pdf_array_get(cses, 0); if (pdf_is_name(cs) && (!strcmp(pdf_to_name(cs), "DeviceN") || !strcmp(pdf_to_name(cs), "Separation"))) { altcs = pdf_array_get(cses, 2); if (pdf_is_array(altcs)) altcs = pdf_array_get(altcs, 0); } } width = pdf_dict_gets(imagedict, "Width"); height = pdf_dict_gets(imagedict, "Height"); bpc = pdf_dict_gets(imagedict, "BitsPerComponent"); for (k = 0; k < images; k++) if (!pdf_objcmp(image[k].u.image.obj, imagedict)) break; if (k < images) continue; image = fz_resize_array(ctx, image, images+1, sizeof(struct info)); images++; image[images - 1].page = page; image[images - 1].pageref = pageref; image[images - 1].pageobj = pageobj; image[images - 1].u.image.obj = imagedict; image[images - 1].u.image.width = width; image[images - 1].u.image.height = height; image[images - 1].u.image.bpc = bpc; image[images - 1].u.image.filter = filter; image[images - 1].u.image.cs = cs; image[images - 1].u.image.altcs = altcs; } }
static void gatherpatterns(int page, pdf_obj *pageref, pdf_obj *pageobj, pdf_obj *dict) { int i, n; n = pdf_dict_len(dict); for (i = 0; i < n; i++) { pdf_obj *patterndict; pdf_obj *type; pdf_obj *paint = NULL; pdf_obj *tiling = NULL; pdf_obj *shading = NULL; int k; patterndict = pdf_dict_get_val(dict, i); if (!pdf_is_dict(patterndict)) { fz_warn(ctx, "not a pattern dict (%d %d R)", pdf_to_num(patterndict), pdf_to_gen(patterndict)); continue; } type = pdf_dict_gets(patterndict, "PatternType"); if (!pdf_is_int(type) || pdf_to_int(type) < 1 || pdf_to_int(type) > 2) { fz_warn(ctx, "not a pattern type (%d %d R)", pdf_to_num(patterndict), pdf_to_gen(patterndict)); type = NULL; } if (pdf_to_int(type) == 1) { paint = pdf_dict_gets(patterndict, "PaintType"); if (!pdf_is_int(paint) || pdf_to_int(paint) < 1 || pdf_to_int(paint) > 2) { fz_warn(ctx, "not a pattern paint type (%d %d R)", pdf_to_num(patterndict), pdf_to_gen(patterndict)); paint = NULL; } tiling = pdf_dict_gets(patterndict, "TilingType"); if (!pdf_is_int(tiling) || pdf_to_int(tiling) < 1 || pdf_to_int(tiling) > 3) { fz_warn(ctx, "not a pattern tiling type (%d %d R)", pdf_to_num(patterndict), pdf_to_gen(patterndict)); tiling = NULL; } } else { shading = pdf_dict_gets(patterndict, "Shading"); } for (k = 0; k < patterns; k++) if (!pdf_objcmp(pattern[k].u.pattern.obj, patterndict)) break; if (k < patterns) continue; pattern = fz_resize_array(ctx, pattern, patterns+1, sizeof(struct info)); patterns++; pattern[patterns - 1].page = page; pattern[patterns - 1].pageref = pageref; pattern[patterns - 1].pageobj = pageobj; pattern[patterns - 1].u.pattern.obj = patterndict; pattern[patterns - 1].u.pattern.type = type; pattern[patterns - 1].u.pattern.paint = paint; pattern[patterns - 1].u.pattern.tiling = tiling; pattern[patterns - 1].u.pattern.shading = shading; } }
static fz_shade * pdf_load_shading_dict(fz_context *ctx, pdf_document *doc, pdf_obj *dict, const fz_matrix *transform) { fz_shade *shade = NULL; pdf_function *func[FZ_MAX_COLORS] = { NULL }; pdf_obj *obj; int funcs = 0; int type = 0; int i, in, out, n; fz_var(shade); fz_var(func); fz_var(funcs); fz_var(type); fz_try(ctx) { shade = fz_malloc_struct(ctx, fz_shade); FZ_INIT_STORABLE(shade, 1, fz_drop_shade_imp); shade->type = FZ_MESH_TYPE4; shade->use_background = 0; shade->use_function = 0; shade->matrix = *transform; shade->bbox = fz_infinite_rect; shade->colorspace = NULL; funcs = 0; obj = pdf_dict_get(ctx, dict, PDF_NAME(ShadingType)); type = pdf_to_int(ctx, obj); obj = pdf_dict_get(ctx, dict, PDF_NAME(ColorSpace)); if (!obj) fz_throw(ctx, FZ_ERROR_SYNTAX, "shading colorspace is missing"); shade->colorspace = pdf_load_colorspace(ctx, obj); n = fz_colorspace_n(ctx, shade->colorspace); obj = pdf_dict_get(ctx, dict, PDF_NAME(Background)); if (obj) { shade->use_background = 1; for (i = 0; i < n; i++) shade->background[i] = pdf_array_get_real(ctx, obj, i); } obj = pdf_dict_get(ctx, dict, PDF_NAME(BBox)); if (pdf_is_array(ctx, obj)) pdf_to_rect(ctx, obj, &shade->bbox); obj = pdf_dict_get(ctx, dict, PDF_NAME(Function)); if (pdf_is_dict(ctx, obj)) { funcs = 1; if (type == 1) in = 2; else in = 1; out = n; func[0] = pdf_load_function(ctx, obj, in, out); if (!func[0]) fz_throw(ctx, FZ_ERROR_SYNTAX, "cannot load shading function (%d 0 R)", pdf_to_num(ctx, obj)); } else if (pdf_is_array(ctx, obj)) { funcs = pdf_array_len(ctx, obj); if (funcs != 1 && funcs != n) { funcs = 0; fz_throw(ctx, FZ_ERROR_SYNTAX, "incorrect number of shading functions"); } if (funcs > FZ_MAX_COLORS) { funcs = 0; fz_throw(ctx, FZ_ERROR_SYNTAX, "too many shading functions"); } if (type == 1) in = 2; else in = 1; out = 1; for (i = 0; i < funcs; i++) { func[i] = pdf_load_function(ctx, pdf_array_get(ctx, obj, i), in, out); if (!func[i]) fz_throw(ctx, FZ_ERROR_SYNTAX, "cannot load shading function (%d 0 R)", pdf_to_num(ctx, obj)); } } else if (type < 4) { /* Functions are compulsory for types 1,2,3 */ fz_throw(ctx, FZ_ERROR_SYNTAX, "cannot load shading function (%d 0 R)", pdf_to_num(ctx, obj)); } shade->type = type; switch (type) { case 1: pdf_load_function_based_shading(ctx, doc, shade, dict, func[0]); break; case 2: pdf_load_linear_shading(ctx, doc, shade, dict, funcs, func); break; case 3: pdf_load_radial_shading(ctx, doc, shade, dict, funcs, func); break; case 4: pdf_load_type4_shade(ctx, doc, shade, dict, funcs, func); break; case 5: pdf_load_type5_shade(ctx, doc, shade, dict, funcs, func); break; case 6: pdf_load_type6_shade(ctx, doc, shade, dict, funcs, func); break; case 7: pdf_load_type7_shade(ctx, doc, shade, dict, funcs, func); break; default: fz_throw(ctx, FZ_ERROR_SYNTAX, "unknown shading type: %d", type); } } fz_always(ctx) { for (i = 0; i < funcs; i++) pdf_drop_function(ctx, func[i]); } fz_catch(ctx) { fz_drop_shade(ctx, shade); fz_rethrow(ctx); } return shade; }
pdf_font_desc * pdf_load_type3_font(pdf_document *doc, pdf_obj *rdb, pdf_obj *dict) { char buf[256]; char *estrings[256]; pdf_font_desc *fontdesc = NULL; pdf_obj *encoding; pdf_obj *widths; pdf_obj *charprocs; pdf_obj *obj; int first, last; int i, k, n; fz_rect bbox; fz_matrix matrix; fz_context *ctx = doc->ctx; fz_var(fontdesc); /* Make a new type3 font entry in the document */ if (doc->num_type3_fonts == doc->max_type3_fonts) { int new_max = doc->max_type3_fonts * 2; if (new_max == 0) new_max = 4; doc->type3_fonts = fz_resize_array(doc->ctx, doc->type3_fonts, new_max, sizeof(*doc->type3_fonts)); doc->max_type3_fonts = new_max; } fz_try(ctx) { obj = pdf_dict_gets(dict, "Name"); if (pdf_is_name(obj)) fz_strlcpy(buf, pdf_to_name(obj), sizeof buf); else sprintf(buf, "Unnamed-T3"); fontdesc = pdf_new_font_desc(ctx); obj = pdf_dict_gets(dict, "FontMatrix"); pdf_to_matrix(ctx, obj, &matrix); obj = pdf_dict_gets(dict, "FontBBox"); fz_transform_rect(pdf_to_rect(ctx, obj, &bbox), &matrix); fontdesc->font = fz_new_type3_font(ctx, buf, &matrix); fontdesc->size += sizeof(fz_font) + 256 * (sizeof(fz_buffer*) + sizeof(float)); fz_set_font_bbox(ctx, fontdesc->font, bbox.x0, bbox.y0, bbox.x1, bbox.y1); /* Encoding */ for (i = 0; i < 256; i++) estrings[i] = NULL; encoding = pdf_dict_gets(dict, "Encoding"); if (!encoding) { fz_throw(ctx, FZ_ERROR_GENERIC, "syntaxerror: Type3 font missing Encoding"); } if (pdf_is_name(encoding)) pdf_load_encoding(estrings, pdf_to_name(encoding)); if (pdf_is_dict(encoding)) { pdf_obj *base, *diff, *item; base = pdf_dict_gets(encoding, "BaseEncoding"); if (pdf_is_name(base)) pdf_load_encoding(estrings, pdf_to_name(base)); diff = pdf_dict_gets(encoding, "Differences"); if (pdf_is_array(diff)) { n = pdf_array_len(diff); k = 0; for (i = 0; i < n; i++) { item = pdf_array_get(diff, i); if (pdf_is_int(item)) k = pdf_to_int(item); if (pdf_is_name(item) && k >= 0 && k < nelem(estrings)) estrings[k++] = pdf_to_name(item); } } } fontdesc->encoding = pdf_new_identity_cmap(ctx, 0, 1); fontdesc->size += pdf_cmap_size(ctx, fontdesc->encoding); pdf_load_to_unicode(doc, fontdesc, estrings, NULL, pdf_dict_gets(dict, "ToUnicode")); /* Widths */ pdf_set_default_hmtx(ctx, fontdesc, 0); first = pdf_to_int(pdf_dict_gets(dict, "FirstChar")); last = pdf_to_int(pdf_dict_gets(dict, "LastChar")); if (first < 0 || last > 255 || first > last) first = last = 0; widths = pdf_dict_gets(dict, "Widths"); if (!widths) { fz_throw(ctx, FZ_ERROR_GENERIC, "syntaxerror: Type3 font missing Widths"); } for (i = first; i <= last; i++) { float w = pdf_to_real(pdf_array_get(widths, i - first)); w = fontdesc->font->t3matrix.a * w * 1000; fontdesc->font->t3widths[i] = w * 0.001f; pdf_add_hmtx(ctx, fontdesc, i, i, w); } pdf_end_hmtx(ctx, fontdesc); /* Resources -- inherit page resources if the font doesn't have its own */ fontdesc->font->t3freeres = pdf_t3_free_resources; fontdesc->font->t3resources = pdf_dict_gets(dict, "Resources"); if (!fontdesc->font->t3resources) fontdesc->font->t3resources = rdb; if (fontdesc->font->t3resources) pdf_keep_obj(fontdesc->font->t3resources); if (!fontdesc->font->t3resources) fz_warn(ctx, "no resource dictionary for type 3 font!"); fontdesc->font->t3doc = doc; fontdesc->font->t3run = pdf_run_glyph_func; /* CharProcs */ charprocs = pdf_dict_gets(dict, "CharProcs"); if (!charprocs) { fz_throw(ctx, FZ_ERROR_GENERIC, "syntaxerror: Type3 font missing CharProcs"); } for (i = 0; i < 256; i++) { if (estrings[i]) { obj = pdf_dict_gets(charprocs, estrings[i]); if (pdf_is_stream(doc, pdf_to_num(obj), pdf_to_gen(obj))) { fontdesc->font->t3procs[i] = pdf_load_stream(doc, pdf_to_num(obj), pdf_to_gen(obj)); fontdesc->size += fontdesc->font->t3procs[i]->cap; fontdesc->size += 0; // TODO: display list size calculation } } } } fz_catch(ctx) { if (fontdesc) pdf_drop_font(ctx, fontdesc); fz_rethrow_message(ctx, "cannot load type3 font (%d %d R)", pdf_to_num(dict), pdf_to_gen(dict)); } doc->type3_fonts[doc->num_type3_fonts++] = fz_keep_font(ctx, fontdesc->font); return fontdesc; }
static fz_image * pdf_load_image_imp(pdf_document *doc, pdf_obj *rdb, pdf_obj *dict, fz_stream *cstm, int forcemask) { fz_stream *stm = NULL; fz_image *image = NULL; pdf_obj *obj, *res; int w, h, bpc, n; int imagemask; int interpolate; int indexed; fz_image *mask = NULL; /* explicit mask/soft mask image */ int usecolorkey = 0; fz_colorspace *colorspace = NULL; float decode[FZ_MAX_COLORS * 2]; int colorkey[FZ_MAX_COLORS * 2]; int stride; int i; fz_context *ctx = doc->ctx; fz_compressed_buffer *buffer; fz_var(stm); fz_var(mask); fz_var(image); fz_var(colorspace); fz_try(ctx) { /* special case for JPEG2000 images */ if (pdf_is_jpx_image(ctx, dict)) { // image = pdf_load_jpx(doc, dict, forcemask); // // if (forcemask) // { // fz_pixmap *mask_pixmap; // if (image->n != 2) // { // fz_pixmap *gray; // fz_irect bbox; // fz_warn(ctx, "soft mask should be grayscale"); // gray = fz_new_pixmap_with_bbox(ctx, fz_device_gray(ctx), fz_pixmap_bbox(ctx, image->tile, &bbox)); // fz_convert_pixmap(ctx, gray, image->tile); // fz_drop_pixmap(ctx, image->tile); // image->tile = gray; // } // mask_pixmap = fz_alpha_from_gray(ctx, image->tile, 1); // fz_drop_pixmap(ctx, image->tile); // image->tile = mask_pixmap; // } break; /* Out of fz_try */ } w = pdf_to_int(pdf_dict_getsa(dict, "Width", "W")); h = pdf_to_int(pdf_dict_getsa(dict, "Height", "H")); bpc = pdf_to_int(pdf_dict_getsa(dict, "BitsPerComponent", "BPC")); if (bpc == 0) bpc = 8; imagemask = pdf_to_bool(pdf_dict_getsa(dict, "ImageMask", "IM")); interpolate = pdf_to_bool(pdf_dict_getsa(dict, "Interpolate", "I")); indexed = 0; usecolorkey = 0; if (imagemask) bpc = 1; if (w <= 0) fz_throw(ctx, FZ_ERROR_GENERIC, "image width is zero (or less)"); if (h <= 0) fz_throw(ctx, FZ_ERROR_GENERIC, "image height is zero (or less)"); if (bpc <= 0) fz_throw(ctx, FZ_ERROR_GENERIC, "image depth is zero (or less)"); if (bpc > 16) fz_throw(ctx, FZ_ERROR_GENERIC, "image depth is too large: %d", bpc); if (w > (1 << 16)) fz_throw(ctx, FZ_ERROR_GENERIC, "image is too wide"); if (h > (1 << 16)) fz_throw(ctx, FZ_ERROR_GENERIC, "image is too high"); obj = pdf_dict_getsa(dict, "ColorSpace", "CS"); if (obj && !imagemask && !forcemask) { /* colorspace resource lookup is only done for inline images */ if (pdf_is_name(obj)) { res = pdf_dict_get(pdf_dict_gets(rdb, "ColorSpace"), obj); if (res) obj = res; } colorspace = pdf_load_colorspace(doc, obj); indexed = fz_colorspace_is_indexed(colorspace); n = colorspace->n; } else { n = 1; } obj = pdf_dict_getsa(dict, "Decode", "D"); if (obj) { for (i = 0; i < n * 2; i++) decode[i] = pdf_to_real(pdf_array_get(obj, i)); } else { float maxval = indexed ? (1 << bpc) - 1 : 1; for (i = 0; i < n * 2; i++) decode[i] = i & 1 ? maxval : 0; } obj = pdf_dict_getsa(dict, "SMask", "Mask"); if (pdf_is_dict(obj)) { /* Not allowed for inline images or soft masks */ if (cstm) fz_warn(ctx, "Ignoring invalid inline image soft mask"); else if (forcemask) fz_warn(ctx, "Ignoring recursive image soft mask"); else { mask = pdf_load_image_imp(doc, rdb, obj, NULL, 1); obj = pdf_dict_gets(obj, "Matte"); if (pdf_is_array(obj)) { usecolorkey = 1; for (i = 0; i < n; i++) colorkey[i] = pdf_to_real(pdf_array_get(obj, i)) * 255; } } } else if (pdf_is_array(obj)) { usecolorkey = 1; for (i = 0; i < n * 2; i++) { if (!pdf_is_int(pdf_array_get(obj, i))) { fz_warn(ctx, "invalid value in color key mask"); usecolorkey = 0; } colorkey[i] = pdf_to_int(pdf_array_get(obj, i)); } } /* Do we load from a ref, or do we load an inline stream? */ if (cstm == NULL) { /* Just load the compressed image data now and we can * decode it on demand. */ int num = pdf_to_num(dict); int gen = pdf_to_gen(dict); buffer = pdf_load_compressed_stream(doc, num, gen); image = fz_new_image(ctx, w, h, bpc, colorspace, 96, 96, interpolate, imagemask, decode, usecolorkey ? colorkey : NULL, buffer, mask); } else { /* Inline stream */ stride = (w * n * bpc + 7) / 8; image = fz_new_image(ctx, w, h, bpc, colorspace, 96, 96, interpolate, imagemask, decode, usecolorkey ? colorkey : NULL, NULL, mask); pdf_load_compressed_inline_image(doc, dict, stride * h, cstm, indexed, image); } } fz_catch(ctx) { fz_drop_colorspace(ctx, colorspace); fz_drop_image(ctx, mask); fz_drop_image(ctx, image); fz_rethrow(ctx); } return image; }
static fz_outline * pdf_load_outline_imp(fz_context *ctx, pdf_document *doc, pdf_obj *dict) { fz_outline *node, **prev, *first; pdf_obj *obj; pdf_obj *odict = dict; fz_var(dict); fz_var(first); fz_try(ctx) { first = NULL; prev = &first; while (dict && pdf_is_dict(ctx, dict)) { if (pdf_mark_obj(ctx, dict)) break; node = fz_new_outline(ctx); *prev = node; prev = &node->next; obj = pdf_dict_get(ctx, dict, PDF_NAME_Title); if (obj) node->title = pdf_to_utf8(ctx, obj); if ((obj = pdf_dict_get(ctx, dict, PDF_NAME_Dest)) != NULL) node->uri = pdf_parse_link_dest(ctx, doc, obj); else if ((obj = pdf_dict_get(ctx, dict, PDF_NAME_A)) != NULL) node->uri = pdf_parse_link_action(ctx, doc, obj); else node->uri = NULL; if (node->uri) node->page = pdf_resolve_link(ctx, doc, node->uri, NULL, NULL); else node->page = -1; obj = pdf_dict_get(ctx, dict, PDF_NAME_First); if (obj) { node->down = pdf_load_outline_imp(ctx, doc, obj); obj = pdf_dict_get(ctx, dict, PDF_NAME_Count); if (pdf_to_int(ctx, obj) > 0) node->is_open = 1; } dict = pdf_dict_get(ctx, dict, PDF_NAME_Next); } } fz_always(ctx) { for (dict = odict; dict && pdf_obj_marked(ctx, dict); dict = pdf_dict_get(ctx, dict, PDF_NAME_Next)) pdf_unmark_obj(ctx, dict); } fz_catch(ctx) { fz_drop_outline(ctx, first); fz_rethrow(ctx); } return first; }
pdf_font_desc * pdf_load_type3_font(pdf_document *xref, pdf_obj *rdb, pdf_obj *dict) { char buf[256]; char *estrings[256]; pdf_font_desc *fontdesc = NULL; pdf_obj *encoding; pdf_obj *widths; pdf_obj *charprocs; pdf_obj *obj; int first, last; int i, k, n; fz_rect bbox; fz_matrix matrix; fz_context *ctx = xref->ctx; fz_var(fontdesc); fz_try(ctx) { obj = pdf_dict_gets(dict, "Name"); if (pdf_is_name(obj)) fz_strlcpy(buf, pdf_to_name(obj), sizeof buf); else sprintf(buf, "Unnamed-T3"); fontdesc = pdf_new_font_desc(ctx); obj = pdf_dict_gets(dict, "FontMatrix"); matrix = pdf_to_matrix(ctx, obj); obj = pdf_dict_gets(dict, "FontBBox"); bbox = pdf_to_rect(ctx, obj); bbox = fz_transform_rect(matrix, bbox); fontdesc->font = fz_new_type3_font(ctx, buf, matrix); fontdesc->size += sizeof(fz_font) + 256 * (sizeof(fz_buffer*) + sizeof(float)); fz_set_font_bbox(ctx, fontdesc->font, bbox.x0, bbox.y0, bbox.x1, bbox.y1); /* SumatraPDF: expose Type3 FontDescriptor flags */ fontdesc->flags = pdf_to_int(pdf_dict_gets(pdf_dict_gets(dict, "FontDescriptor"), "Flags")); /* Encoding */ for (i = 0; i < 256; i++) estrings[i] = NULL; encoding = pdf_dict_gets(dict, "Encoding"); if (!encoding) { fz_throw(ctx, "syntaxerror: Type3 font missing Encoding"); } if (pdf_is_name(encoding)) pdf_load_encoding(estrings, pdf_to_name(encoding)); if (pdf_is_dict(encoding)) { pdf_obj *base, *diff, *item; base = pdf_dict_gets(encoding, "BaseEncoding"); if (pdf_is_name(base)) pdf_load_encoding(estrings, pdf_to_name(base)); diff = pdf_dict_gets(encoding, "Differences"); if (pdf_is_array(diff)) { n = pdf_array_len(diff); k = 0; for (i = 0; i < n; i++) { item = pdf_array_get(diff, i); if (pdf_is_int(item)) k = pdf_to_int(item); if (pdf_is_name(item) && k >= 0 && k < nelem(estrings)) estrings[k++] = pdf_to_name(item); } } } fontdesc->encoding = pdf_new_identity_cmap(ctx, 0, 1); fontdesc->size += pdf_cmap_size(ctx, fontdesc->encoding); pdf_load_to_unicode(xref, fontdesc, estrings, NULL, pdf_dict_gets(dict, "ToUnicode")); /* SumatraPDF: trying to match Adobe Reader's behavior */ if (!(fontdesc->flags & PDF_FD_SYMBOLIC) && fontdesc->cid_to_ucs_len >= 128) for (i = 32; i < 128; i++) if (fontdesc->cid_to_ucs[i] == '?' || fontdesc->cid_to_ucs[i] == '\0') fontdesc->cid_to_ucs[i] = i; /* Widths */ pdf_set_default_hmtx(ctx, fontdesc, 0); first = pdf_to_int(pdf_dict_gets(dict, "FirstChar")); last = pdf_to_int(pdf_dict_gets(dict, "LastChar")); /* cf. http://code.google.com/p/sumatrapdf/issues/detail?id=1966 */ if (first >= 256 && last - first < 256) { fz_warn(ctx, "ignoring out-of-bound values for FirstChar/LastChar: %d/%d", first, last); last -= first; first = 0; } if (first < 0 || last > 255 || first > last) first = last = 0; widths = pdf_dict_gets(dict, "Widths"); if (!widths) { fz_throw(ctx, "syntaxerror: Type3 font missing Widths"); } for (i = first; i <= last; i++) { float w = pdf_to_real(pdf_array_get(widths, i - first)); w = fontdesc->font->t3matrix.a * w * 1000; fontdesc->font->t3widths[i] = w * 0.001f; pdf_add_hmtx(ctx, fontdesc, i, i, w); } pdf_end_hmtx(ctx, fontdesc); /* Resources -- inherit page resources if the font doesn't have its own */ fontdesc->font->t3freeres = pdf_t3_free_resources; fontdesc->font->t3resources = pdf_dict_gets(dict, "Resources"); if (!fontdesc->font->t3resources) fontdesc->font->t3resources = rdb; if (fontdesc->font->t3resources) pdf_keep_obj(fontdesc->font->t3resources); if (!fontdesc->font->t3resources) fz_warn(ctx, "no resource dictionary for type 3 font!"); fontdesc->font->t3doc = xref; fontdesc->font->t3run = pdf_run_glyph_func; /* CharProcs */ charprocs = pdf_dict_gets(dict, "CharProcs"); if (!charprocs) { fz_throw(ctx, "syntaxerror: Type3 font missing CharProcs"); } for (i = 0; i < 256; i++) { if (estrings[i]) { /* SumatraPDF: don't reject fonts with few broken glyphs */ fz_try(ctx) { obj = pdf_dict_gets(charprocs, estrings[i]); if (pdf_is_stream(xref, pdf_to_num(obj), pdf_to_gen(obj))) { fontdesc->font->t3procs[i] = pdf_load_stream(xref, pdf_to_num(obj), pdf_to_gen(obj)); fontdesc->size += fontdesc->font->t3procs[i]->cap; fontdesc->size += 0; // TODO: display list size calculation } } fz_catch(ctx) { fz_warn(ctx, "failed to get data for type 3 glyph '%s'", estrings[i]); } } } } fz_catch(ctx) { if (fontdesc) pdf_drop_font(ctx, fontdesc); fz_throw(ctx, "cannot load type3 font (%d %d R)", pdf_to_num(dict), pdf_to_gen(dict)); } return fontdesc; }
static fz_image * pdf_load_jpx(pdf_document *doc, pdf_obj *dict, int forcemask) { fz_buffer *buf = NULL; fz_colorspace *colorspace = NULL; fz_pixmap *img = NULL; pdf_obj *obj; fz_context *ctx = doc->ctx; int indexed = 0; fz_image *mask = NULL; fz_var(img); fz_var(buf); fz_var(colorspace); fz_var(mask); buf = pdf_load_stream(doc, pdf_to_num(dict), pdf_to_gen(dict)); /* FIXME: We can't handle decode arrays for indexed images currently */ fz_try(ctx) { obj = pdf_dict_gets(dict, "ColorSpace"); if (obj) { colorspace = pdf_load_colorspace(doc, obj); indexed = fz_colorspace_is_indexed(colorspace); } img = fz_load_jpx(ctx, buf->data, buf->len, colorspace, indexed); obj = pdf_dict_getsa(dict, "SMask", "Mask"); if (pdf_is_dict(obj)) { if (forcemask) fz_warn(ctx, "Ignoring recursive JPX soft mask"); else mask = pdf_load_image_imp(doc, NULL, obj, NULL, 1); } obj = pdf_dict_getsa(dict, "Decode", "D"); if (obj && !indexed) { float decode[FZ_MAX_COLORS * 2]; int i; for (i = 0; i < img->n * 2; i++) decode[i] = pdf_to_real(pdf_array_get(obj, i)); fz_decode_tile(img, decode); } } fz_always(ctx) { fz_drop_colorspace(ctx, colorspace); fz_drop_buffer(ctx, buf); } fz_catch(ctx) { fz_drop_pixmap(ctx, img); fz_rethrow(ctx); } return fz_new_image_from_pixmap(ctx, img, mask); }
static fz_image * pdf_load_image_imp(pdf_document *xref, pdf_obj *rdb, pdf_obj *dict, fz_stream *cstm, int forcemask) { fz_stream *stm = NULL; fz_image *image = NULL; pdf_obj *obj, *res; int w, h, bpc, n; int imagemask; int interpolate; int indexed; fz_image *mask = NULL; /* explicit mask/soft mask image */ int usecolorkey = 0; fz_colorspace *colorspace = NULL; float decode[FZ_MAX_COLORS * 2]; int colorkey[FZ_MAX_COLORS * 2]; int i; fz_context *ctx = xref->ctx; fz_var(stm); fz_var(mask); fz_var(image); fz_var(colorspace); fz_try(ctx) { /* special case for JPEG2000 images */ if (pdf_is_jpx_image(ctx, dict)) { image = pdf_load_jpx(xref, dict, forcemask); if (forcemask) { fz_pixmap *mask_pixmap; if (image->n != 2) { /* SumatraPDF: ignore invalid JPX softmasks */ fz_warn(ctx, "soft mask must be grayscale"); mask_pixmap = fz_new_pixmap(ctx, NULL, image->tile->w, image->tile->h); fz_clear_pixmap_with_value(ctx, mask_pixmap, 255); } else mask_pixmap = fz_alpha_from_gray(ctx, image->tile, 1); fz_drop_pixmap(ctx, image->tile); image->tile = mask_pixmap; } break; /* Out of fz_try */ } w = pdf_to_int(pdf_dict_getsa(dict, "Width", "W")); h = pdf_to_int(pdf_dict_getsa(dict, "Height", "H")); bpc = pdf_to_int(pdf_dict_getsa(dict, "BitsPerComponent", "BPC")); if (bpc == 0) bpc = 8; imagemask = pdf_to_bool(pdf_dict_getsa(dict, "ImageMask", "IM")); interpolate = pdf_to_bool(pdf_dict_getsa(dict, "Interpolate", "I")); indexed = 0; usecolorkey = 0; mask = NULL; if (imagemask) bpc = 1; if (w <= 0) fz_throw(ctx, "image width is zero (or less)"); if (h <= 0) fz_throw(ctx, "image height is zero (or less)"); if (bpc <= 0) fz_throw(ctx, "image depth is zero (or less)"); if (bpc > 16) fz_throw(ctx, "image depth is too large: %d", bpc); if (w > (1 << 16)) fz_throw(ctx, "image is too wide"); if (h > (1 << 16)) fz_throw(ctx, "image is too high"); obj = pdf_dict_getsa(dict, "ColorSpace", "CS"); if (obj && !imagemask && !forcemask) { /* colorspace resource lookup is only done for inline images */ if (pdf_is_name(obj)) { res = pdf_dict_get(pdf_dict_gets(rdb, "ColorSpace"), obj); if (res) obj = res; } colorspace = pdf_load_colorspace(xref, obj); if (!strcmp(colorspace->name, "Indexed")) indexed = 1; n = colorspace->n; } else { n = 1; } obj = pdf_dict_getsa(dict, "Decode", "D"); if (obj) { for (i = 0; i < n * 2; i++) decode[i] = pdf_to_real(pdf_array_get(obj, i)); } else { float maxval = indexed ? (1 << bpc) - 1 : 1; for (i = 0; i < n * 2; i++) decode[i] = i & 1 ? maxval : 0; } obj = pdf_dict_getsa(dict, "SMask", "Mask"); if (pdf_is_dict(obj)) { /* Not allowed for inline images or soft masks */ if (cstm) fz_warn(ctx, "Ignoring invalid inline image soft mask"); else if (forcemask) fz_warn(ctx, "Ignoring recursive image soft mask"); else mask = pdf_load_image_imp(xref, rdb, obj, NULL, 1); } else if (pdf_is_array(obj)) { usecolorkey = 1; for (i = 0; i < n * 2; i++) { if (!pdf_is_int(pdf_array_get(obj, i))) { fz_warn(ctx, "invalid value in color key mask"); usecolorkey = 0; } colorkey[i] = pdf_to_int(pdf_array_get(obj, i)); } } /* Now, do we load a ref, or do we load the actual thing? */ if (!cstm) { /* Just load the compressed image data now and we can * decode it on demand. */ int num = pdf_to_num(dict); int gen = pdf_to_gen(dict); fz_compressed_buffer *buffer = pdf_load_compressed_stream(xref, num, gen); image = fz_new_image(ctx, w, h, bpc, colorspace, 96, 96, interpolate, imagemask, decode, usecolorkey ? colorkey : NULL, buffer, mask); break; /* Out of fz_try */ } /* We need to decompress the image now */ if (cstm) { int stride = (w * n * bpc + 7) / 8; stm = pdf_open_inline_stream(xref, dict, stride * h, cstm, NULL); } else { stm = pdf_open_stream(xref, pdf_to_num(dict), pdf_to_gen(dict)); } image = fz_new_image(ctx, w, h, bpc, colorspace, 96, 96, interpolate, imagemask, decode, usecolorkey ? colorkey : NULL, NULL, mask); image->tile = fz_decomp_image_from_stream(ctx, stm, image, cstm != NULL, indexed, 0, 0); } fz_catch(ctx) { /* SumatraPDF: fix memory leak */ if (!image) fz_drop_colorspace(ctx, colorspace); else fz_drop_image(ctx, image); fz_rethrow(ctx); } /* cf. http://bugs.ghostscript.com/show_bug.cgi?id=693517 */ fz_try(ctx) { obj = pdf_dict_getp(dict, "SMask/Matte"); if (pdf_is_array(obj) && image->mask) { assert(!image->usecolorkey); image->usecolorkey = 2; for (i = 0; i < n; i++) image->colorkey[i] = pdf_to_int(pdf_array_get(obj, i)); } } fz_catch(ctx) { fz_drop_image(ctx, image); fz_rethrow(ctx); } return image; }
static pdf_font_desc * pdf_load_simple_font_by_name(pdf_document *doc, pdf_obj *dict, char *basefont) { pdf_obj *descriptor; pdf_obj *encoding; pdf_obj *widths; unsigned short *etable = NULL; pdf_font_desc *fontdesc = NULL; char *subtype; FT_Face face; FT_CharMap cmap; int symbolic; int kind; char *estrings[256]; char ebuffer[256][32]; int i, k, n; int fterr; int has_lock = 0; fz_context *ctx = doc->ctx; fz_var(fontdesc); fz_var(etable); fz_var(has_lock); /* Load font file */ fz_try(ctx) { fontdesc = pdf_new_font_desc(ctx); descriptor = pdf_dict_gets(dict, "FontDescriptor"); /* cf. http://bugs.ghostscript.com/show_bug.cgi?id=691690 */ fz_try(ctx) { if (descriptor) pdf_load_font_descriptor(fontdesc, doc, descriptor, NULL, basefont, 0, pdf_dict_gets(dict, "Encoding") != NULL); else pdf_load_builtin_font(ctx, fontdesc, basefont); /* cf. http://bugs.ghostscript.com/show_bug.cgi?id=691690 */ } fz_catch(ctx) { if (!(fontdesc->flags & PDF_FD_SYMBOLIC)) fz_rethrow(ctx); fz_warn(ctx, "using bullet-substitute font for '%s' (%d %d R)", basefont, pdf_to_num(dict), pdf_to_gen(dict)); pdf_drop_font(ctx, fontdesc); fontdesc = NULL; fontdesc = pdf_load_bullet_font(ctx); face = fontdesc->font->ft_face; kind = ft_kind(face); goto skip_encoding; } /* Some chinese documents mistakenly consider WinAnsiEncoding to be codepage 936 */ if (descriptor && pdf_is_string(pdf_dict_gets(descriptor, "FontName")) && !pdf_dict_gets(dict, "ToUnicode") && !strcmp(pdf_to_name(pdf_dict_gets(dict, "Encoding")), "WinAnsiEncoding") && pdf_to_int(pdf_dict_gets(descriptor, "Flags")) == 4) { char *cp936fonts[] = { "\xCB\xCE\xCC\xE5", "SimSun,Regular", "\xBA\xDA\xCC\xE5", "SimHei,Regular", "\xBF\xAC\xCC\xE5_GB2312", "SimKai,Regular", "\xB7\xC2\xCB\xCE_GB2312", "SimFang,Regular", "\xC1\xA5\xCA\xE9", "SimLi,Regular", NULL }; for (i = 0; cp936fonts[i]; i += 2) if (!strcmp(basefont, cp936fonts[i])) break; if (cp936fonts[i]) { fz_warn(ctx, "workaround for S22PDF lying about chinese font encodings"); pdf_drop_font(ctx, fontdesc); fontdesc = NULL; fontdesc = pdf_new_font_desc(ctx); pdf_load_font_descriptor(fontdesc, doc, descriptor, "Adobe-GB1", cp936fonts[i+1], 0, 1); fontdesc->encoding = pdf_load_system_cmap(ctx, "GBK-EUC-H"); fontdesc->to_unicode = pdf_load_system_cmap(ctx, "Adobe-GB1-UCS2"); fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-GB1-UCS2"); face = fontdesc->font->ft_face; kind = ft_kind(face); goto skip_encoding; } } face = fontdesc->font->ft_face; kind = ft_kind(face); /* Encoding */ symbolic = fontdesc->flags & 4; if (face->num_charmaps > 0) cmap = face->charmaps[0]; else cmap = NULL; for (i = 0; i < face->num_charmaps; i++) { FT_CharMap test = face->charmaps[i]; if (kind == TYPE1) { if (test->platform_id == 7) cmap = test; } if (kind == TRUETYPE) { if (test->platform_id == 1 && test->encoding_id == 0) cmap = test; if (test->platform_id == 3 && test->encoding_id == 1) cmap = test; if (symbolic && test->platform_id == 3 && test->encoding_id == 0) cmap = test; } } if (cmap) { fterr = FT_Set_Charmap(face, cmap); if (fterr) fz_warn(ctx, "freetype could not set cmap: %s", ft_error_string(fterr)); } else fz_warn(ctx, "freetype could not find any cmaps"); etable = fz_malloc_array(ctx, 256, sizeof(unsigned short)); fontdesc->size += 256 * sizeof(unsigned short); for (i = 0; i < 256; i++) { estrings[i] = NULL; etable[i] = 0; } encoding = pdf_dict_gets(dict, "Encoding"); if (encoding) { if (pdf_is_name(encoding)) pdf_load_encoding(estrings, pdf_to_name(encoding)); if (pdf_is_dict(encoding)) { pdf_obj *base, *diff, *item; base = pdf_dict_gets(encoding, "BaseEncoding"); if (pdf_is_name(base)) pdf_load_encoding(estrings, pdf_to_name(base)); else if (!fontdesc->is_embedded && !symbolic) pdf_load_encoding(estrings, "StandardEncoding"); diff = pdf_dict_gets(encoding, "Differences"); if (pdf_is_array(diff)) { n = pdf_array_len(diff); k = 0; for (i = 0; i < n; i++) { item = pdf_array_get(diff, i); if (pdf_is_int(item)) k = pdf_to_int(item); if (pdf_is_name(item) && k >= 0 && k < nelem(estrings)) estrings[k++] = pdf_to_name(item); } } } } /* start with the builtin encoding */ for (i = 0; i < 256; i++) etable[i] = ft_char_index(face, i); fz_lock(ctx, FZ_LOCK_FREETYPE); has_lock = 1; /* built-in and substitute fonts may be a different type than what the document expects */ subtype = pdf_to_name(pdf_dict_gets(dict, "Subtype")); if (!strcmp(subtype, "Type1")) kind = TYPE1; else if (!strcmp(subtype, "MMType1")) kind = TYPE1; else if (!strcmp(subtype, "TrueType")) kind = TRUETYPE; else if (!strcmp(subtype, "CIDFontType0")) kind = TYPE1; else if (!strcmp(subtype, "CIDFontType2")) kind = TRUETYPE; /* encode by glyph name where we can */ if (kind == TYPE1) { for (i = 0; i < 256; i++) { if (estrings[i]) { etable[i] = FT_Get_Name_Index(face, estrings[i]); if (etable[i] == 0) { int aglcode = pdf_lookup_agl(estrings[i]); const char **dupnames = pdf_lookup_agl_duplicates(aglcode); while (*dupnames) { etable[i] = FT_Get_Name_Index(face, (char*)*dupnames); if (etable[i]) break; dupnames++; } } } } } /* encode by glyph name where we can */ /* SumatraPDF: don't encode name-less TrueType fonts just by name (required for Windows 8 fonts) */ if (kind == TRUETYPE || (ft_kind(face) == TRUETYPE && !FT_HAS_GLYPH_NAMES(face))) { /* Unicode cmap */ if (!symbolic && face->charmap && face->charmap->platform_id == 3) { for (i = 0; i < 256; i++) { if (estrings[i]) { int aglcode = pdf_lookup_agl(estrings[i]); if (!aglcode) etable[i] = FT_Get_Name_Index(face, estrings[i]); else etable[i] = ft_char_index(face, aglcode); /* SumatraPDF: prefer non-zero gids */ if (!etable[i]) etable[i] = ft_char_index(face, i); } } } /* MacRoman cmap */ else if (!symbolic && face->charmap && face->charmap->platform_id == 1) { /* cf. http://code.google.com/p/sumatrapdf/issues/detail?id=2123 */ if (pdf_is_name(encoding) && !strcmp(pdf_to_name(encoding), "MacExpertEncoding")) { if (FT_HAS_GLYPH_NAMES(face)) for (i = 0; i < 256; i++) estrings[i] = NULL; } else for (i = 0; i < 256; i++) { if (estrings[i]) { k = lookup_mre_code(estrings[i]); if (k <= 0) etable[i] = FT_Get_Name_Index(face, estrings[i]); else etable[i] = ft_char_index(face, k); /* SumatraPDF: prefer non-zero gids */ if (!etable[i]) etable[i] = ft_char_index(face, i); } } } /* Symbolic cmap */ else if (!face->charmap || face->charmap->encoding != FT_ENCODING_MS_SYMBOL) { for (i = 0; i < 256; i++) { if (estrings[i]) { etable[i] = FT_Get_Name_Index(face, estrings[i]); if (etable[i] == 0) etable[i] = ft_char_index(face, i); /* cf. http://code.google.com/p/sumatrapdf/issues/detail?id=1872 */ if (etable[i] == 0 && symbolic) { int aglcode = pdf_lookup_agl(estrings[i]); if (aglcode) etable[i] = ft_char_index(face, aglcode); } } } } } /* try to reverse the glyph names from the builtin encoding */ for (i = 0; i < 256; i++) { if (etable[i] && !estrings[i]) { if (FT_HAS_GLYPH_NAMES(face)) { fterr = FT_Get_Glyph_Name(face, etable[i], ebuffer[i], 32); if (fterr) fz_warn(ctx, "freetype get glyph name (gid %d): %s", etable[i], ft_error_string(fterr)); if (ebuffer[i][0]) estrings[i] = ebuffer[i]; } else { estrings[i] = (char*) pdf_win_ansi[i]; /* discard const */ } } } /* symbolic Type 1 fonts with an implicit encoding and non-standard glyph names */ if (kind == TYPE1 && symbolic) { for (i = 0; i < 256; i++) if (etable[i] && estrings[i] && !pdf_lookup_agl(estrings[i])) estrings[i] = (char*) pdf_standard[i]; } fz_unlock(ctx, FZ_LOCK_FREETYPE); has_lock = 0; fontdesc->encoding = pdf_new_identity_cmap(ctx, 0, 1); fontdesc->size += pdf_cmap_size(ctx, fontdesc->encoding); fontdesc->cid_to_gid_len = 256; fontdesc->cid_to_gid = etable; fz_try(ctx) { pdf_load_to_unicode(doc, fontdesc, estrings, NULL, pdf_dict_gets(dict, "ToUnicode")); } fz_catch(ctx) { fz_rethrow_if(ctx, FZ_ERROR_TRYLATER); fz_warn(ctx, "cannot load ToUnicode CMap"); } skip_encoding: /* Widths */ pdf_set_default_hmtx(ctx, fontdesc, fontdesc->missing_width); widths = pdf_dict_gets(dict, "Widths"); if (widths) { int first, last; first = pdf_to_int(pdf_dict_gets(dict, "FirstChar")); last = pdf_to_int(pdf_dict_gets(dict, "LastChar")); if (first < 0 || last > 255 || first > last) first = last = 0; for (i = 0; i < last - first + 1; i++) { int wid = pdf_to_int(pdf_array_get(widths, i)); /* cf. http://code.google.com/p/sumatrapdf/issues/detail?id=1616 */ if (!wid && i >= pdf_array_len(widths)) { fz_warn(ctx, "font width missing for glyph %d (%d %d R)", i + first, pdf_to_num(dict), pdf_to_gen(dict)); FT_Set_Char_Size(face, 1000, 1000, 72, 72); wid = ft_width(ctx, fontdesc, i + first); } pdf_add_hmtx(ctx, fontdesc, i + first, i + first, wid); } } else { fz_lock(ctx, FZ_LOCK_FREETYPE); has_lock = 1; fterr = FT_Set_Char_Size(face, 1000, 1000, 72, 72); if (fterr) fz_warn(ctx, "freetype set character size: %s", ft_error_string(fterr)); for (i = 0; i < 256; i++) { pdf_add_hmtx(ctx, fontdesc, i, i, ft_width(ctx, fontdesc, i)); } fz_unlock(ctx, FZ_LOCK_FREETYPE); has_lock = 0; } pdf_end_hmtx(ctx, fontdesc); } fz_catch(ctx) { if (has_lock) fz_unlock(ctx, FZ_LOCK_FREETYPE); if (fontdesc && etable != fontdesc->cid_to_gid) fz_free(ctx, etable); pdf_drop_font(ctx, fontdesc); fz_rethrow_message(ctx, "cannot load simple font (%d %d R)", pdf_to_num(dict), pdf_to_gen(dict)); } return fontdesc; }
static pdf_image * pdf_load_image_imp(pdf_document *xref, pdf_obj *rdb, pdf_obj *dict, fz_stream *cstm, int forcemask) { fz_stream *stm = NULL; pdf_image *image = NULL; pdf_obj *obj, *res; int w, h, bpc, n; int imagemask; int interpolate; int indexed; fz_image *mask = NULL; /* explicit mask/softmask image */ int usecolorkey; int i; fz_context *ctx = xref->ctx; fz_var(stm); fz_var(mask); image = fz_malloc_struct(ctx, pdf_image); fz_try(ctx) { /* special case for JPEG2000 images */ if (pdf_is_jpx_image(ctx, dict)) { // pdf_load_jpx(xref, dict, image); // // if (forcemask) // { // fz_pixmap *mask_pixmap; // if (image->n != 2) // fz_throw(ctx, "softmask must be grayscale"); // mask_pixmap = fz_alpha_from_gray(ctx, image->tile, 1); // fz_drop_pixmap(ctx, image->tile); // image->tile = mask_pixmap; // } break; /* Out of fz_try */ } w = pdf_to_int(pdf_dict_getsa(dict, "Width", "W")); h = pdf_to_int(pdf_dict_getsa(dict, "Height", "H")); bpc = pdf_to_int(pdf_dict_getsa(dict, "BitsPerComponent", "BPC")); imagemask = pdf_to_bool(pdf_dict_getsa(dict, "ImageMask", "IM")); interpolate = pdf_to_bool(pdf_dict_getsa(dict, "Interpolate", "I")); indexed = 0; usecolorkey = 0; mask = NULL; if (imagemask) bpc = 1; if (w <= 0) fz_throw(ctx, "image width is zero (or less)"); if (h <= 0) fz_throw(ctx, "image height is zero (or less)"); if (bpc <= 0) fz_throw(ctx, "image depth is zero (or less)"); if (bpc > 16) fz_throw(ctx, "image depth is too large: %d", bpc); if (w > (1 << 16)) fz_throw(ctx, "image is too wide"); if (h > (1 << 16)) fz_throw(ctx, "image is too high"); obj = pdf_dict_getsa(dict, "ColorSpace", "CS"); if (obj && !imagemask && !forcemask) { /* colorspace resource lookup is only done for inline images */ if (pdf_is_name(obj)) { res = pdf_dict_get(pdf_dict_gets(rdb, "ColorSpace"), obj); if (res) obj = res; } image->base.colorspace = pdf_load_colorspace(xref, obj); if (!strcmp(image->base.colorspace->name, "Indexed")) indexed = 1; n = image->base.colorspace->n; } else { n = 1; } obj = pdf_dict_getsa(dict, "Decode", "D"); if (obj) { for (i = 0; i < n * 2; i++) image->decode[i] = pdf_to_real(pdf_array_get(obj, i)); } else { float maxval = indexed ? (1 << bpc) - 1 : 1; for (i = 0; i < n * 2; i++) image->decode[i] = i & 1 ? maxval : 0; } obj = pdf_dict_getsa(dict, "SMask", "Mask"); if (pdf_is_dict(obj)) { /* Not allowed for inline images */ if (!cstm) { mask = (fz_image *)pdf_load_image_imp(xref, rdb, obj, NULL, 1); } } else if (pdf_is_array(obj)) { usecolorkey = 1; for (i = 0; i < n * 2; i++) { if (!pdf_is_int(pdf_array_get(obj, i))) { fz_warn(ctx, "invalid value in color key mask"); usecolorkey = 0; } image->colorkey[i] = pdf_to_int(pdf_array_get(obj, i)); } } /* Now, do we load a ref, or do we load the actual thing? */ image->params.type = PDF_IMAGE_RAW; FZ_INIT_STORABLE(&image->base, 1, pdf_free_image); image->base.get_pixmap = pdf_image_get_pixmap; image->base.w = w; image->base.h = h; image->n = n; image->bpc = bpc; image->interpolate = interpolate; image->imagemask = imagemask; image->usecolorkey = usecolorkey; image->base.mask = mask; image->params.colorspace = image->base.colorspace; /* Uses the same ref as for the base one */ if (!indexed && !cstm) { /* Just load the compressed image data now and we can * decode it on demand. */ int num = pdf_to_num(dict); int gen = pdf_to_gen(dict); image->buffer = pdf_load_image_stream(xref, num, gen, num, gen, &image->params); break; /* Out of fz_try */ } /* We need to decompress the image now */ if (cstm) { int stride = (w * image->n * image->bpc + 7) / 8; stm = pdf_open_inline_stream(xref, dict, stride * h, cstm, NULL); } else { stm = pdf_open_stream(xref, pdf_to_num(dict), pdf_to_gen(dict)); } image->tile = decomp_image_from_stream(ctx, stm, image, cstm != NULL, indexed, 1, 0); } fz_catch(ctx) { pdf_free_image(ctx, (fz_storable *) image); fz_rethrow(ctx); } return image; }
static fz_image * pdf_load_image_imp(fz_context *ctx, pdf_document *doc, pdf_obj *rdb, pdf_obj *dict, fz_stream *cstm, int forcemask) { fz_image *image = NULL; pdf_obj *obj, *res; int w, h, bpc, n; int imagemask; int interpolate; int indexed; fz_image *mask = NULL; /* explicit mask/soft mask image */ int use_colorkey = 0; fz_colorspace *colorspace = NULL; float decode[FZ_MAX_COLORS * 2]; int colorkey[FZ_MAX_COLORS * 2]; int stride; int i; fz_compressed_buffer *buffer; /* special case for JPEG2000 images */ if (pdf_is_jpx_image(ctx, dict)) return pdf_load_jpx_imp(ctx, doc, rdb, dict, cstm, forcemask); w = pdf_to_int(ctx, pdf_dict_geta(ctx, dict, PDF_NAME(Width), PDF_NAME(W))); h = pdf_to_int(ctx, pdf_dict_geta(ctx, dict, PDF_NAME(Height), PDF_NAME(H))); bpc = pdf_to_int(ctx, pdf_dict_geta(ctx, dict, PDF_NAME(BitsPerComponent), PDF_NAME(BPC))); if (bpc == 0) bpc = 8; imagemask = pdf_to_bool(ctx, pdf_dict_geta(ctx, dict, PDF_NAME(ImageMask), PDF_NAME(IM))); interpolate = pdf_to_bool(ctx, pdf_dict_geta(ctx, dict, PDF_NAME(Interpolate), PDF_NAME(I))); indexed = 0; use_colorkey = 0; if (imagemask) bpc = 1; if (w <= 0) fz_throw(ctx, FZ_ERROR_GENERIC, "image width is zero (or less)"); if (h <= 0) fz_throw(ctx, FZ_ERROR_GENERIC, "image height is zero (or less)"); if (bpc <= 0) fz_throw(ctx, FZ_ERROR_GENERIC, "image depth is zero (or less)"); if (bpc > 16) fz_throw(ctx, FZ_ERROR_GENERIC, "image depth is too large: %d", bpc); if (w > (1 << 16)) fz_throw(ctx, FZ_ERROR_GENERIC, "image is too wide"); if (h > (1 << 16)) fz_throw(ctx, FZ_ERROR_GENERIC, "image is too high"); fz_var(mask); fz_var(image); fz_var(colorspace); fz_try(ctx) { obj = pdf_dict_geta(ctx, dict, PDF_NAME(ColorSpace), PDF_NAME(CS)); if (obj && !imagemask && !forcemask) { /* colorspace resource lookup is only done for inline images */ if (pdf_is_name(ctx, obj)) { res = pdf_dict_get(ctx, pdf_dict_get(ctx, rdb, PDF_NAME(ColorSpace)), obj); if (res) obj = res; } colorspace = pdf_load_colorspace(ctx, obj); indexed = fz_colorspace_is_indexed(ctx, colorspace); n = fz_colorspace_n(ctx, colorspace); } else { n = 1; } obj = pdf_dict_geta(ctx, dict, PDF_NAME(Decode), PDF_NAME(D)); if (obj) { for (i = 0; i < n * 2; i++) decode[i] = pdf_array_get_real(ctx, obj, i); } else if (fz_colorspace_is_lab(ctx, colorspace) || fz_colorspace_is_lab_icc(ctx, colorspace)) { decode[0] = 0; decode[1] = 100; decode[2] = -128; decode[3] = 127; decode[4] = -128; decode[5] = 127; } else { float maxval = indexed ? (1 << bpc) - 1 : 1; for (i = 0; i < n * 2; i++) decode[i] = i & 1 ? maxval : 0; } obj = pdf_dict_geta(ctx, dict, PDF_NAME(SMask), PDF_NAME(Mask)); if (pdf_is_dict(ctx, obj)) { /* Not allowed for inline images or soft masks */ if (cstm) fz_warn(ctx, "Ignoring invalid inline image soft mask"); else if (forcemask) fz_warn(ctx, "Ignoring recursive image soft mask"); else { mask = pdf_load_image_imp(ctx, doc, rdb, obj, NULL, 1); obj = pdf_dict_get(ctx, obj, PDF_NAME(Matte)); if (pdf_is_array(ctx, obj)) { use_colorkey = 1; for (i = 0; i < n; i++) colorkey[i] = pdf_array_get_real(ctx, obj, i) * 255; } } } else if (pdf_is_array(ctx, obj)) { use_colorkey = 1; for (i = 0; i < n * 2; i++) { if (!pdf_is_int(ctx, pdf_array_get(ctx, obj, i))) { fz_warn(ctx, "invalid value in color key mask"); use_colorkey = 0; } colorkey[i] = pdf_array_get_int(ctx, obj, i); } } /* Do we load from a ref, or do we load an inline stream? */ if (cstm == NULL) { /* Just load the compressed image data now and we can decode it on demand. */ buffer = pdf_load_compressed_stream(ctx, doc, pdf_to_num(ctx, dict)); image = fz_new_image_from_compressed_buffer(ctx, w, h, bpc, colorspace, 96, 96, interpolate, imagemask, decode, use_colorkey ? colorkey : NULL, buffer, mask); image->invert_cmyk_jpeg = 0; } else { /* Inline stream */ stride = (w * n * bpc + 7) / 8; image = fz_new_image_from_compressed_buffer(ctx, w, h, bpc, colorspace, 96, 96, interpolate, imagemask, decode, use_colorkey ? colorkey : NULL, NULL, mask); image->invert_cmyk_jpeg = 0; pdf_load_compressed_inline_image(ctx, doc, dict, stride * h, cstm, indexed, (fz_compressed_image *)image); } } fz_always(ctx) { fz_drop_colorspace(ctx, colorspace); fz_drop_image(ctx, mask); } fz_catch(ctx) { fz_drop_image(ctx, image); fz_rethrow(ctx); } return image; }
static fz_image * pdf_load_jpx(fz_context *ctx, pdf_document *doc, pdf_obj *dict, int forcemask) { fz_buffer *buf = NULL; fz_colorspace *colorspace = NULL; fz_pixmap *pix = NULL; pdf_obj *obj; fz_image *mask = NULL; fz_image *img = NULL; fz_var(pix); fz_var(buf); fz_var(colorspace); fz_var(mask); buf = pdf_load_stream(ctx, dict); /* FIXME: We can't handle decode arrays for indexed images currently */ fz_try(ctx) { unsigned char *data; size_t len; obj = pdf_dict_get(ctx, dict, PDF_NAME(ColorSpace)); if (obj) colorspace = pdf_load_colorspace(ctx, obj); len = fz_buffer_storage(ctx, buf, &data); pix = fz_load_jpx(ctx, data, len, colorspace); obj = pdf_dict_geta(ctx, dict, PDF_NAME(SMask), PDF_NAME(Mask)); if (pdf_is_dict(ctx, obj)) { if (forcemask) fz_warn(ctx, "Ignoring recursive JPX soft mask"); else mask = pdf_load_image_imp(ctx, doc, NULL, obj, NULL, 1); } obj = pdf_dict_geta(ctx, dict, PDF_NAME(Decode), PDF_NAME(D)); if (obj && !fz_colorspace_is_indexed(ctx, colorspace)) { float decode[FZ_MAX_COLORS * 2]; int i; for (i = 0; i < pix->n * 2; i++) decode[i] = pdf_array_get_real(ctx, obj, i); fz_decode_tile(ctx, pix, decode); } img = fz_new_image_from_pixmap(ctx, pix, mask); } fz_always(ctx) { fz_drop_image(ctx, mask); fz_drop_pixmap(ctx, pix); fz_drop_colorspace(ctx, colorspace); fz_drop_buffer(ctx, buf); } fz_catch(ctx) { fz_rethrow(ctx); } return img; }