/* * Load predefined CMap from system. */ pdf_cmap * pdf_load_system_cmap(fz_context *ctx, const char *cmap_name) { pdf_cmap *usecmap; pdf_cmap *cmap; cmap = pdf_load_builtin_cmap(ctx, cmap_name); if (!cmap) fz_throw(ctx, FZ_ERROR_GENERIC, "no builtin cmap file: %s", cmap_name); if (cmap->usecmap_name[0] && !cmap->usecmap) { usecmap = pdf_load_system_cmap(ctx, cmap->usecmap_name); if (!usecmap) fz_throw(ctx, FZ_ERROR_GENERIC, "no builtin cmap file: %s", cmap->usecmap_name); pdf_set_usecmap(ctx, cmap, usecmap); } return cmap; }
static pdf_font_desc * load_cid_font(pdf_document *xref, pdf_obj *dict, pdf_obj *encoding, pdf_obj *to_unicode) { pdf_obj *widths; pdf_obj *descriptor; pdf_font_desc *fontdesc = NULL; FT_Face face; int kind; char collection[256]; char *basefont; int i, k, fterr; pdf_obj *obj; int dw; fz_context *ctx = xref->ctx; fz_var(fontdesc); fz_try(ctx) { /* Get font name and CID collection */ basefont = pdf_to_name(pdf_dict_gets(dict, "BaseFont")); { pdf_obj *cidinfo; char tmpstr[64]; int tmplen; cidinfo = pdf_dict_gets(dict, "CIDSystemInfo"); if (!cidinfo) fz_throw(ctx, "cid font is missing info"); obj = pdf_dict_gets(cidinfo, "Registry"); tmplen = fz_mini(sizeof tmpstr - 1, pdf_to_str_len(obj)); memcpy(tmpstr, pdf_to_str_buf(obj), tmplen); tmpstr[tmplen] = '\0'; fz_strlcpy(collection, tmpstr, sizeof collection); fz_strlcat(collection, "-", sizeof collection); obj = pdf_dict_gets(cidinfo, "Ordering"); tmplen = fz_mini(sizeof tmpstr - 1, pdf_to_str_len(obj)); memcpy(tmpstr, pdf_to_str_buf(obj), tmplen); tmpstr[tmplen] = '\0'; fz_strlcat(collection, tmpstr, sizeof collection); } /* Load font file */ fontdesc = pdf_new_font_desc(ctx); descriptor = pdf_dict_gets(dict, "FontDescriptor"); if (!descriptor) fz_throw(ctx, "syntaxerror: missing font descriptor"); pdf_load_font_descriptor(fontdesc, xref, descriptor, collection, basefont); face = fontdesc->font->ft_face; kind = ft_kind(face); /* Encoding */ if (pdf_is_name(encoding)) { if (!strcmp(pdf_to_name(encoding), "Identity-H")) fontdesc->encoding = pdf_new_identity_cmap(ctx, 0, 2); else if (!strcmp(pdf_to_name(encoding), "Identity-V")) fontdesc->encoding = pdf_new_identity_cmap(ctx, 1, 2); else fontdesc->encoding = pdf_load_system_cmap(ctx, pdf_to_name(encoding)); } else if (pdf_is_indirect(encoding)) { fontdesc->encoding = pdf_load_embedded_cmap(xref, encoding); } else { fz_throw(ctx, "syntaxerror: font missing encoding"); } fontdesc->size += pdf_cmap_size(ctx, fontdesc->encoding); pdf_set_font_wmode(ctx, fontdesc, pdf_cmap_wmode(ctx, fontdesc->encoding)); if (kind == TRUETYPE) { pdf_obj *cidtogidmap; cidtogidmap = pdf_dict_gets(dict, "CIDToGIDMap"); if (pdf_is_indirect(cidtogidmap)) { fz_buffer *buf; buf = pdf_load_stream(xref, pdf_to_num(cidtogidmap), pdf_to_gen(cidtogidmap)); fontdesc->cid_to_gid_len = (buf->len) / 2; fontdesc->cid_to_gid = fz_malloc_array(ctx, fontdesc->cid_to_gid_len, sizeof(unsigned short)); fontdesc->size += fontdesc->cid_to_gid_len * sizeof(unsigned short); for (i = 0; i < fontdesc->cid_to_gid_len; i++) fontdesc->cid_to_gid[i] = (buf->data[i * 2] << 8) + buf->data[i * 2 + 1]; fz_drop_buffer(ctx, buf); } /* if truetype font is external, cidtogidmap should not be identity */ /* so we map from cid to unicode and then map that through the (3 1) */ /* unicode cmap to get a glyph id */ else if (fontdesc->font->ft_substitute) { fterr = FT_Select_Charmap(face, ft_encoding_unicode); if (fterr) { fz_throw(ctx, "fonterror: no unicode cmap when emulating CID font: %s", ft_error_string(fterr)); } if (!strcmp(collection, "Adobe-CNS1")) fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-CNS1-UCS2"); else if (!strcmp(collection, "Adobe-GB1")) fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-GB1-UCS2"); else if (!strcmp(collection, "Adobe-Japan1")) fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-Japan1-UCS2"); else if (!strcmp(collection, "Adobe-Japan2")) fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-Japan2-UCS2"); else if (!strcmp(collection, "Adobe-Korea1")) fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-Korea1-UCS2"); } } pdf_load_to_unicode(xref, fontdesc, NULL, collection, to_unicode); /* Horizontal */ dw = 1000; obj = pdf_dict_gets(dict, "DW"); if (obj) dw = pdf_to_int(obj); pdf_set_default_hmtx(ctx, fontdesc, dw); widths = pdf_dict_gets(dict, "W"); if (widths) { int c0, c1, w, n, m; n = pdf_array_len(widths); for (i = 0; i < n; ) { c0 = pdf_to_int(pdf_array_get(widths, i)); obj = pdf_array_get(widths, i + 1); if (pdf_is_array(obj)) { m = pdf_array_len(obj); for (k = 0; k < m; k++) { w = pdf_to_int(pdf_array_get(obj, k)); pdf_add_hmtx(ctx, fontdesc, c0 + k, c0 + k, w); } i += 2; } else { c1 = pdf_to_int(obj); w = pdf_to_int(pdf_array_get(widths, i + 2)); pdf_add_hmtx(ctx, fontdesc, c0, c1, w); i += 3; } } } pdf_end_hmtx(ctx, fontdesc); /* Vertical */ if (pdf_cmap_wmode(ctx, fontdesc->encoding) == 1) { int dw2y = 880; int dw2w = -1000; obj = pdf_dict_gets(dict, "DW2"); if (obj) { dw2y = pdf_to_int(pdf_array_get(obj, 0)); dw2w = pdf_to_int(pdf_array_get(obj, 1)); } pdf_set_default_vmtx(ctx, fontdesc, dw2y, dw2w); widths = pdf_dict_gets(dict, "W2"); if (widths) { int c0, c1, w, x, y, n; n = pdf_array_len(widths); for (i = 0; i < n; ) { c0 = pdf_to_int(pdf_array_get(widths, i)); obj = pdf_array_get(widths, i + 1); if (pdf_is_array(obj)) { int m = pdf_array_len(obj); for (k = 0; k * 3 < m; k ++) { w = pdf_to_int(pdf_array_get(obj, k * 3 + 0)); x = pdf_to_int(pdf_array_get(obj, k * 3 + 1)); y = pdf_to_int(pdf_array_get(obj, k * 3 + 2)); pdf_add_vmtx(ctx, fontdesc, c0 + k, c0 + k, x, y, w); } i += 2; } else { c1 = pdf_to_int(obj); w = pdf_to_int(pdf_array_get(widths, i + 2)); x = pdf_to_int(pdf_array_get(widths, i + 3)); y = pdf_to_int(pdf_array_get(widths, i + 4)); pdf_add_vmtx(ctx, fontdesc, c0, c1, x, y, w); i += 5; } } } pdf_end_vmtx(ctx, fontdesc); } } fz_catch(ctx) { pdf_drop_font(ctx, fontdesc); fz_throw(ctx, "cannot load cid font (%d %d R)", pdf_to_num(dict), pdf_to_gen(dict)); } return fontdesc; }
static pdf_font_desc * pdf_load_simple_font(pdf_document *xref, pdf_obj *dict) { pdf_obj *descriptor; pdf_obj *encoding; pdf_obj *widths; unsigned short *etable = NULL; pdf_font_desc *fontdesc = NULL; char *subtype; FT_Face face; FT_CharMap cmap; int symbolic; int kind; char *basefont; char *estrings[256]; char ebuffer[256][32]; int i, k, n; int fterr; fz_context *ctx = xref->ctx; fz_var(fontdesc); fz_var(etable); basefont = pdf_to_name(pdf_dict_gets(dict, "BaseFont")); /* Load font file */ fz_try(ctx) { fontdesc = pdf_new_font_desc(ctx); descriptor = pdf_dict_gets(dict, "FontDescriptor"); if (descriptor) pdf_load_font_descriptor(fontdesc, xref, descriptor, NULL, basefont); else pdf_load_builtin_font(ctx, fontdesc, basefont); /* Some chinese documents mistakenly consider WinAnsiEncoding to be codepage 936 */ if (descriptor && pdf_is_string(pdf_dict_gets(descriptor, "FontName")) && !pdf_dict_gets(dict, "ToUnicode") && !strcmp(pdf_to_name(pdf_dict_gets(dict, "Encoding")), "WinAnsiEncoding") && pdf_to_int(pdf_dict_gets(descriptor, "Flags")) == 4) { char *cp936fonts[] = { "\xCB\xCE\xCC\xE5", "SimSun,Regular", "\xBA\xDA\xCC\xE5", "SimHei,Regular", "\xBF\xAC\xCC\xE5_GB2312", "SimKai,Regular", "\xB7\xC2\xCB\xCE_GB2312", "SimFang,Regular", "\xC1\xA5\xCA\xE9", "SimLi,Regular", NULL }; for (i = 0; cp936fonts[i]; i += 2) if (!strcmp(basefont, cp936fonts[i])) break; if (cp936fonts[i]) { fz_warn(ctx, "workaround for S22PDF lying about chinese font encodings"); pdf_drop_font(ctx, fontdesc); fontdesc = pdf_new_font_desc(ctx); pdf_load_font_descriptor(fontdesc, xref, descriptor, "Adobe-GB1", cp936fonts[i+1]); fontdesc->encoding = pdf_load_system_cmap(ctx, "GBK-EUC-H"); fontdesc->to_unicode = pdf_load_system_cmap(ctx, "Adobe-GB1-UCS2"); fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-GB1-UCS2"); face = fontdesc->font->ft_face; kind = ft_kind(face); goto skip_encoding; } } face = fontdesc->font->ft_face; kind = ft_kind(face); /* Encoding */ symbolic = fontdesc->flags & 4; if (face->num_charmaps > 0) cmap = face->charmaps[0]; else cmap = NULL; for (i = 0; i < face->num_charmaps; i++) { FT_CharMap test = face->charmaps[i]; if (kind == TYPE1) { if (test->platform_id == 7) cmap = test; } if (kind == TRUETYPE) { if (test->platform_id == 1 && test->encoding_id == 0) cmap = test; if (test->platform_id == 3 && test->encoding_id == 1) cmap = test; if (symbolic && test->platform_id == 3 && test->encoding_id == 0) cmap = test; } } if (cmap) { fterr = FT_Set_Charmap(face, cmap); if (fterr) fz_warn(ctx, "freetype could not set cmap: %s", ft_error_string(fterr)); } else fz_warn(ctx, "freetype could not find any cmaps"); etable = fz_malloc_array(ctx, 256, sizeof(unsigned short)); fontdesc->size += 256 * sizeof(unsigned short); for (i = 0; i < 256; i++) { estrings[i] = NULL; etable[i] = 0; } encoding = pdf_dict_gets(dict, "Encoding"); if (encoding) { if (pdf_is_name(encoding)) pdf_load_encoding(estrings, pdf_to_name(encoding)); if (pdf_is_dict(encoding)) { pdf_obj *base, *diff, *item; base = pdf_dict_gets(encoding, "BaseEncoding"); if (pdf_is_name(base)) pdf_load_encoding(estrings, pdf_to_name(base)); else if (!fontdesc->is_embedded && !symbolic) pdf_load_encoding(estrings, "StandardEncoding"); diff = pdf_dict_gets(encoding, "Differences"); if (pdf_is_array(diff)) { n = pdf_array_len(diff); k = 0; for (i = 0; i < n; i++) { item = pdf_array_get(diff, i); if (pdf_is_int(item)) k = pdf_to_int(item); if (pdf_is_name(item) && k >= 0 && k < nelem(estrings)) estrings[k++] = pdf_to_name(item); } } } } /* start with the builtin encoding */ for (i = 0; i < 256; i++) etable[i] = ft_char_index(face, i); fz_lock(ctx, FZ_LOCK_FREETYPE); /* built-in and substitute fonts may be a different type than what the document expects */ subtype = pdf_to_name(pdf_dict_gets(dict, "Subtype")); if (!strcmp(subtype, "Type1")) kind = TYPE1; else if (!strcmp(subtype, "MMType1")) kind = TYPE1; else if (!strcmp(subtype, "TrueType")) kind = TRUETYPE; else if (!strcmp(subtype, "CIDFontType0")) kind = TYPE1; else if (!strcmp(subtype, "CIDFontType2")) kind = TRUETYPE; /* encode by glyph name where we can */ if (kind == TYPE1) { for (i = 0; i < 256; i++) { if (estrings[i]) { etable[i] = FT_Get_Name_Index(face, estrings[i]); if (etable[i] == 0) { int aglcode = pdf_lookup_agl(estrings[i]); const char **dupnames = pdf_lookup_agl_duplicates(aglcode); while (*dupnames) { etable[i] = FT_Get_Name_Index(face, (char*)*dupnames); if (etable[i]) break; dupnames++; } } } } } /* encode by glyph name where we can */ if (kind == TRUETYPE) { /* Unicode cmap */ if (!symbolic && face->charmap && face->charmap->platform_id == 3) { for (i = 0; i < 256; i++) { if (estrings[i]) { int aglcode = pdf_lookup_agl(estrings[i]); if (!aglcode) etable[i] = FT_Get_Name_Index(face, estrings[i]); else etable[i] = ft_char_index(face, aglcode); } } } /* MacRoman cmap */ else if (!symbolic && face->charmap && face->charmap->platform_id == 1) { for (i = 0; i < 256; i++) { if (estrings[i]) { k = lookup_mre_code(estrings[i]); if (k <= 0) etable[i] = FT_Get_Name_Index(face, estrings[i]); else etable[i] = ft_char_index(face, k); } } } /* Symbolic cmap */ else if (!face->charmap || face->charmap->encoding != FT_ENCODING_MS_SYMBOL) { for (i = 0; i < 256; i++) { if (estrings[i]) { etable[i] = FT_Get_Name_Index(face, estrings[i]); if (etable[i] == 0) etable[i] = ft_char_index(face, i); } } } } /* try to reverse the glyph names from the builtin encoding */ for (i = 0; i < 256; i++) { if (etable[i] && !estrings[i]) { if (FT_HAS_GLYPH_NAMES(face)) { fterr = FT_Get_Glyph_Name(face, etable[i], ebuffer[i], 32); if (fterr) fz_warn(ctx, "freetype get glyph name (gid %d): %s", etable[i], ft_error_string(fterr)); if (ebuffer[i][0]) estrings[i] = ebuffer[i]; } else { estrings[i] = (char*) pdf_win_ansi[i]; /* discard const */ } } } /* symbolic Type 1 fonts with an implicit encoding and non-standard glyph names */ if (kind == TYPE1 && symbolic) { for (i = 0; i < 256; i++) if (etable[i] && estrings[i] && !pdf_lookup_agl(estrings[i])) estrings[i] = (char*) pdf_standard[i]; } fz_unlock(ctx, FZ_LOCK_FREETYPE); fontdesc->encoding = pdf_new_identity_cmap(ctx, 0, 1); fontdesc->size += pdf_cmap_size(ctx, fontdesc->encoding); fontdesc->cid_to_gid_len = 256; fontdesc->cid_to_gid = etable; fz_try(ctx) { pdf_load_to_unicode(xref, fontdesc, estrings, NULL, pdf_dict_gets(dict, "ToUnicode")); } fz_catch(ctx) { fz_warn(ctx, "cannot load ToUnicode CMap"); } skip_encoding: /* Widths */ pdf_set_default_hmtx(ctx, fontdesc, fontdesc->missing_width); widths = pdf_dict_gets(dict, "Widths"); if (widths) { int first, last; first = pdf_to_int(pdf_dict_gets(dict, "FirstChar")); last = pdf_to_int(pdf_dict_gets(dict, "LastChar")); if (first < 0 || last > 255 || first > last) first = last = 0; for (i = 0; i < last - first + 1; i++) { int wid = pdf_to_int(pdf_array_get(widths, i)); pdf_add_hmtx(ctx, fontdesc, i + first, i + first, wid); } } else { fz_lock(ctx, FZ_LOCK_FREETYPE); fterr = FT_Set_Char_Size(face, 1000, 1000, 72, 72); if (fterr) fz_warn(ctx, "freetype set character size: %s", ft_error_string(fterr)); for (i = 0; i < 256; i++) { pdf_add_hmtx(ctx, fontdesc, i, i, ft_width(ctx, fontdesc, i)); } fz_unlock(ctx, FZ_LOCK_FREETYPE); } pdf_end_hmtx(ctx, fontdesc); } fz_catch(ctx) { if (fontdesc && etable != fontdesc->cid_to_gid) fz_free(ctx, etable); pdf_drop_font(ctx, fontdesc); fz_throw(ctx, "cannot load simple font (%d %d R)", pdf_to_num(dict), pdf_to_gen(dict)); } return fontdesc; }
/* * Load CMap stream in PDF file */ fz_error pdf_load_embedded_cmap(pdf_cmap **cmapp, pdf_xref *xref, fz_obj *stmobj) { fz_error error = fz_okay; fz_stream *file = NULL; pdf_cmap *cmap = NULL; pdf_cmap *usecmap; fz_obj *wmode; fz_obj *obj; if ((*cmapp = pdf_find_item(xref->store, pdf_drop_cmap, stmobj))) { pdf_keep_cmap(*cmapp); return fz_okay; } error = pdf_open_stream(&file, xref, fz_to_num(stmobj), fz_to_gen(stmobj)); if (error) { error = fz_rethrow(error, "cannot open cmap stream (%d %d R)", fz_to_num(stmobj), fz_to_gen(stmobj)); goto cleanup; } error = pdf_parse_cmap(&cmap, file); if (error) { error = fz_rethrow(error, "cannot parse cmap stream (%d %d R)", fz_to_num(stmobj), fz_to_gen(stmobj)); goto cleanup; } fz_close(file); wmode = fz_dict_gets(stmobj, "WMode"); if (fz_is_int(wmode)) pdf_set_wmode(cmap, fz_to_int(wmode)); obj = fz_dict_gets(stmobj, "UseCMap"); if (fz_is_name(obj)) { error = pdf_load_system_cmap(&usecmap, fz_to_name(obj)); if (error) { error = fz_rethrow(error, "cannot load system usecmap '%s'", fz_to_name(obj)); goto cleanup; } pdf_set_usecmap(cmap, usecmap); pdf_drop_cmap(usecmap); } else if (fz_is_indirect(obj)) { error = pdf_load_embedded_cmap(&usecmap, xref, obj); if (error) { error = fz_rethrow(error, "cannot load embedded usecmap (%d %d R)", fz_to_num(obj), fz_to_gen(obj)); goto cleanup; } pdf_set_usecmap(cmap, usecmap); pdf_drop_cmap(usecmap); } pdf_store_item(xref->store, pdf_keep_cmap, pdf_drop_cmap, stmobj, cmap); *cmapp = cmap; return fz_okay; cleanup: if (file) fz_close(file); if (cmap) pdf_drop_cmap(cmap); return error; /* already rethrown */ }
/* * Load CMap stream in PDF file */ pdf_cmap * pdf_load_embedded_cmap(pdf_document *doc, pdf_obj *stmobj) { fz_stream *file = NULL; pdf_cmap *cmap = NULL; pdf_cmap *usecmap; pdf_obj *wmode; pdf_obj *obj = NULL; fz_context *ctx = doc->ctx; int phase = 0; fz_var(phase); fz_var(obj); fz_var(file); fz_var(cmap); if (pdf_obj_marked(stmobj)) fz_throw(ctx, FZ_ERROR_GENERIC, "Recursion in embedded cmap"); if ((cmap = pdf_find_item(ctx, pdf_free_cmap_imp, stmobj)) != NULL) { return cmap; } fz_try(ctx) { file = pdf_open_stream(doc, pdf_to_num(stmobj), pdf_to_gen(stmobj)); phase = 1; cmap = pdf_load_cmap(ctx, file); phase = 2; fz_close(file); file = NULL; wmode = pdf_dict_gets(stmobj, "WMode"); if (pdf_is_int(wmode)) pdf_set_cmap_wmode(ctx, cmap, pdf_to_int(wmode)); obj = pdf_dict_gets(stmobj, "UseCMap"); if (pdf_is_name(obj)) { usecmap = pdf_load_system_cmap(ctx, pdf_to_name(obj)); pdf_set_usecmap(ctx, cmap, usecmap); pdf_drop_cmap(ctx, usecmap); } else if (pdf_is_indirect(obj)) { phase = 3; pdf_mark_obj(obj); usecmap = pdf_load_embedded_cmap(doc, obj); pdf_unmark_obj(obj); phase = 4; pdf_set_usecmap(ctx, cmap, usecmap); pdf_drop_cmap(ctx, usecmap); } pdf_store_item(ctx, stmobj, cmap, pdf_cmap_size(ctx, cmap)); } fz_catch(ctx) { if (file) fz_close(file); if (cmap) pdf_drop_cmap(ctx, cmap); if (phase < 1) fz_rethrow_message(ctx, "cannot open cmap stream (%d %d R)", pdf_to_num(stmobj), pdf_to_gen(stmobj)); else if (phase < 2) fz_rethrow_message(ctx, "cannot parse cmap stream (%d %d R)", pdf_to_num(stmobj), pdf_to_gen(stmobj)); else if (phase < 3) fz_rethrow_message(ctx, "cannot load system usecmap '%s'", pdf_to_name(obj)); else { if (phase == 3) pdf_unmark_obj(obj); fz_rethrow_message(ctx, "cannot load embedded usecmap (%d %d R)", pdf_to_num(obj), pdf_to_gen(obj)); } } return cmap; }
void pdf_load_to_unicode(pdf_document *doc, pdf_font_desc *font, char **strings, char *collection, pdf_obj *cmapstm) { pdf_cmap *cmap; int cid; int ucsbuf[8]; int ucslen; int i; fz_context *ctx = doc->ctx; if (pdf_is_stream(doc, pdf_to_num(cmapstm), pdf_to_gen(cmapstm))) { cmap = pdf_load_embedded_cmap(doc, cmapstm); font->to_unicode = pdf_new_cmap(ctx); for (i = 0; i < (strings ? 256 : 65536); i++) { cid = pdf_lookup_cmap(font->encoding, i); if (cid >= 0) { ucslen = pdf_lookup_cmap_full(cmap, i, ucsbuf); if (ucslen == 1) pdf_map_range_to_range(ctx, font->to_unicode, cid, cid, ucsbuf[0]); if (ucslen > 1) pdf_map_one_to_many(ctx, font->to_unicode, cid, ucsbuf, ucslen); } } pdf_sort_cmap(ctx, font->to_unicode); pdf_drop_cmap(ctx, cmap); font->size += pdf_cmap_size(ctx, font->to_unicode); } else if (collection) { if (!strcmp(collection, "Adobe-CNS1")) font->to_unicode = pdf_load_system_cmap(ctx, "Adobe-CNS1-UCS2"); else if (!strcmp(collection, "Adobe-GB1")) font->to_unicode = pdf_load_system_cmap(ctx, "Adobe-GB1-UCS2"); else if (!strcmp(collection, "Adobe-Japan1")) font->to_unicode = pdf_load_system_cmap(ctx, "Adobe-Japan1-UCS2"); else if (!strcmp(collection, "Adobe-Korea1")) font->to_unicode = pdf_load_system_cmap(ctx, "Adobe-Korea1-UCS2"); return; } if (strings) { /* TODO one-to-many mappings */ font->cid_to_ucs_len = 256; font->cid_to_ucs = fz_malloc_array(ctx, 256, sizeof(unsigned short)); font->size += 256 * sizeof(unsigned short); for (i = 0; i < 256; i++) { if (strings[i]) font->cid_to_ucs[i] = pdf_lookup_agl(strings[i]); else font->cid_to_ucs[i] = '?'; } } if (!font->to_unicode && !font->cid_to_ucs) { /* TODO: synthesize a ToUnicode if it's a freetype font with * cmap and/or post tables or if it has glyph names. */ } }
fz_error pdf_load_to_unicode(pdf_font_desc *font, pdf_xref *xref, char **strings, char *collection, fz_obj *cmapstm) { fz_error error = fz_okay; pdf_cmap *cmap; int cid; int ucsbuf[8]; int ucslen; int i; if (pdf_is_stream(xref, fz_to_num(cmapstm), fz_to_gen(cmapstm))) { error = pdf_load_embedded_cmap(&cmap, xref, cmapstm); if (error) return fz_rethrow(error, "cannot load embedded cmap (%d %d R)", fz_to_num(cmapstm), fz_to_gen(cmapstm)); font->to_unicode = pdf_new_cmap(); for (i = 0; i < (strings ? 256 : 65536); i++) { cid = pdf_lookup_cmap(font->encoding, i); if (cid >= 0) { ucslen = pdf_lookup_cmap_full(cmap, i, ucsbuf); if (ucslen == 1) pdf_map_range_to_range(font->to_unicode, cid, cid, ucsbuf[0]); if (ucslen > 1) pdf_map_one_to_many(font->to_unicode, cid, ucsbuf, ucslen); } } pdf_sort_cmap(font->to_unicode); pdf_drop_cmap(cmap); } else if (collection) { error = fz_okay; if (!strcmp(collection, "Adobe-CNS1")) error = pdf_load_system_cmap(&font->to_unicode, "Adobe-CNS1-UCS2"); else if (!strcmp(collection, "Adobe-GB1")) error = pdf_load_system_cmap(&font->to_unicode, "Adobe-GB1-UCS2"); else if (!strcmp(collection, "Adobe-Japan1")) error = pdf_load_system_cmap(&font->to_unicode, "Adobe-Japan1-UCS2"); else if (!strcmp(collection, "Adobe-Korea1")) error = pdf_load_system_cmap(&font->to_unicode, "Adobe-Korea1-UCS2"); if (error) return fz_rethrow(error, "cannot load ToUnicode system cmap %s-UCS2", collection); } if (strings) { /* TODO one-to-many mappings */ font->cid_to_ucs_len = 256; font->cid_to_ucs = fz_calloc(256, sizeof(unsigned short)); for (i = 0; i < 256; i++) { if (strings[i]) font->cid_to_ucs[i] = pdf_lookup_agl(strings[i]); else font->cid_to_ucs[i] = '?'; } } if (!font->to_unicode && !font->cid_to_ucs) { /* TODO: synthesize a ToUnicode if it's a freetype font with * cmap and/or post tables or if it has glyph names. */ } return fz_okay; }
/* * Load CMap stream in PDF file */ pdf_cmap * pdf_load_embedded_cmap(fz_context *ctx, pdf_document *doc, pdf_obj *stmobj) { fz_stream *file = NULL; pdf_cmap *cmap = NULL; pdf_cmap *usecmap = NULL; pdf_obj *obj; fz_var(file); fz_var(cmap); fz_var(usecmap); if (pdf_obj_marked(ctx, stmobj)) fz_throw(ctx, FZ_ERROR_GENERIC, "Recursion in embedded cmap"); if ((cmap = pdf_find_item(ctx, pdf_drop_cmap_imp, stmobj)) != NULL) return cmap; fz_try(ctx) { file = pdf_open_stream(ctx, stmobj); cmap = pdf_load_cmap(ctx, file); obj = pdf_dict_get(ctx, stmobj, PDF_NAME_WMode); if (pdf_is_int(ctx, obj)) pdf_set_cmap_wmode(ctx, cmap, pdf_to_int(ctx, obj)); obj = pdf_dict_get(ctx, stmobj, PDF_NAME_UseCMap); if (pdf_is_name(ctx, obj)) { usecmap = pdf_load_system_cmap(ctx, pdf_to_name(ctx, obj)); pdf_set_usecmap(ctx, cmap, usecmap); } else if (pdf_is_indirect(ctx, obj)) { if (pdf_mark_obj(ctx, obj)) fz_throw(ctx, FZ_ERROR_GENERIC, "recursive CMap"); fz_try(ctx) usecmap = pdf_load_embedded_cmap(ctx, doc, obj); fz_always(ctx) pdf_unmark_obj(ctx, obj); fz_catch(ctx) fz_rethrow(ctx); pdf_set_usecmap(ctx, cmap, usecmap); } pdf_store_item(ctx, stmobj, cmap, pdf_cmap_size(ctx, cmap)); } fz_always(ctx) { fz_drop_stream(ctx, file); pdf_drop_cmap(ctx, usecmap); } fz_catch(ctx) { pdf_drop_cmap(ctx, cmap); fz_rethrow(ctx); } return cmap; }
static pdf_font_desc * load_cid_font(pdf_document *doc, pdf_obj *dict, pdf_obj *encoding, pdf_obj *to_unicode) { pdf_obj *widths; pdf_obj *descriptor; pdf_font_desc *fontdesc = NULL; FT_Face face; int kind; char collection[256]; char *basefont; int i, k, fterr; pdf_obj *obj; int dw; fz_context *ctx = doc->ctx; fz_var(fontdesc); fz_try(ctx) { /* Get font name and CID collection */ basefont = pdf_to_name(pdf_dict_gets(dict, "BaseFont")); { pdf_obj *cidinfo; char tmpstr[64]; int tmplen; cidinfo = pdf_dict_gets(dict, "CIDSystemInfo"); if (!cidinfo) fz_throw(ctx, FZ_ERROR_GENERIC, "cid font is missing info"); obj = pdf_dict_gets(cidinfo, "Registry"); tmplen = fz_mini(sizeof tmpstr - 1, pdf_to_str_len(obj)); memcpy(tmpstr, pdf_to_str_buf(obj), tmplen); tmpstr[tmplen] = '\0'; fz_strlcpy(collection, tmpstr, sizeof collection); fz_strlcat(collection, "-", sizeof collection); obj = pdf_dict_gets(cidinfo, "Ordering"); tmplen = fz_mini(sizeof tmpstr - 1, pdf_to_str_len(obj)); memcpy(tmpstr, pdf_to_str_buf(obj), tmplen); tmpstr[tmplen] = '\0'; fz_strlcat(collection, tmpstr, sizeof collection); } /* Load font file */ fontdesc = pdf_new_font_desc(ctx); descriptor = pdf_dict_gets(dict, "FontDescriptor"); if (!descriptor) fz_throw(ctx, FZ_ERROR_GENERIC, "syntaxerror: missing font descriptor"); pdf_load_font_descriptor(fontdesc, doc, descriptor, collection, basefont, 1, 1); face = fontdesc->font->ft_face; kind = ft_kind(face); /* Encoding */ if (pdf_is_name(encoding)) { if (!strcmp(pdf_to_name(encoding), "Identity-H")) fontdesc->encoding = pdf_new_identity_cmap(ctx, 0, 2); else if (!strcmp(pdf_to_name(encoding), "Identity-V")) fontdesc->encoding = pdf_new_identity_cmap(ctx, 1, 2); else fontdesc->encoding = pdf_load_system_cmap(ctx, pdf_to_name(encoding)); } else if (pdf_is_indirect(encoding)) { fontdesc->encoding = pdf_load_embedded_cmap(doc, encoding); } else { fz_throw(ctx, FZ_ERROR_GENERIC, "syntaxerror: font missing encoding"); } fontdesc->size += pdf_cmap_size(ctx, fontdesc->encoding); pdf_set_font_wmode(ctx, fontdesc, pdf_cmap_wmode(ctx, fontdesc->encoding)); if (kind == TRUETYPE || /* cf. http://code.google.com/p/sumatrapdf/issues/detail?id=1565 */ !strcmp(pdf_to_name(pdf_dict_gets(dict, "Subtype")), "CIDFontType2") || /* cf. http://code.google.com/p/sumatrapdf/issues/detail?id=1997 */ pdf_is_indirect(pdf_dict_gets(dict, "CIDToGIDMap"))) { pdf_obj *cidtogidmap; cidtogidmap = pdf_dict_gets(dict, "CIDToGIDMap"); if (pdf_is_indirect(cidtogidmap)) { fz_buffer *buf; buf = pdf_load_stream(doc, pdf_to_num(cidtogidmap), pdf_to_gen(cidtogidmap)); fontdesc->cid_to_gid_len = (buf->len) / 2; fontdesc->cid_to_gid = fz_malloc_array(ctx, fontdesc->cid_to_gid_len, sizeof(unsigned short)); fontdesc->size += fontdesc->cid_to_gid_len * sizeof(unsigned short); for (i = 0; i < fontdesc->cid_to_gid_len; i++) fontdesc->cid_to_gid[i] = (buf->data[i * 2] << 8) + buf->data[i * 2 + 1]; fz_drop_buffer(ctx, buf); } /* if truetype font is external, cidtogidmap should not be identity */ /* so we map from cid to unicode and then map that through the (3 1) */ /* unicode cmap to get a glyph id */ else if (fontdesc->font->ft_substitute) { fterr = FT_Select_Charmap(face, ft_encoding_unicode); if (fterr) { fz_throw(ctx, FZ_ERROR_GENERIC, "fonterror: no unicode cmap when emulating CID font: %s", ft_error_string(fterr)); } if (!strcmp(collection, "Adobe-CNS1")) fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-CNS1-UCS2"); else if (!strcmp(collection, "Adobe-GB1")) fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-GB1-UCS2"); else if (!strcmp(collection, "Adobe-Japan1")) fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-Japan1-UCS2"); else if (!strcmp(collection, "Adobe-Japan2")) fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-Japan2-UCS2"); else if (!strcmp(collection, "Adobe-Korea1")) fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-Korea1-UCS2"); /* cf. http://code.google.com/p/sumatrapdf/issues/detail?id=2318 */ else if (!strcmp(collection, "Adobe-Identity") && fontdesc->font->ft_file) fontdesc->font->ft_substitute = 0; } } /* cf. http://code.google.com/p/sumatrapdf/issues/detail?id=1961 */ fz_try(ctx) { pdf_load_to_unicode(doc, fontdesc, NULL, collection, to_unicode); } fz_catch(ctx) { fz_warn(ctx, "cannot load ToUnicode CMap"); } /* If we have an identity encoding, we're supposed to use the glyph ids directly. * If we only have a substitute font, that won't work. * Make a last ditch attempt by using * the ToUnicode table if it exists to map via the substitute font's cmap. */ if (strstr(fontdesc->encoding->cmap_name, "Identity-") && fontdesc->font->ft_substitute) { fz_warn(ctx, "non-embedded font using identity encoding: %s", basefont); if (fontdesc->to_unicode && !fontdesc->to_ttf_cmap) fontdesc->to_ttf_cmap = pdf_keep_cmap(ctx, fontdesc->to_unicode); } /* Horizontal */ dw = 1000; obj = pdf_dict_gets(dict, "DW"); if (obj) dw = pdf_to_int(obj); pdf_set_default_hmtx(ctx, fontdesc, dw); widths = pdf_dict_gets(dict, "W"); if (widths) { int c0, c1, w, n, m; n = pdf_array_len(widths); for (i = 0; i < n; ) { c0 = pdf_to_int(pdf_array_get(widths, i)); obj = pdf_array_get(widths, i + 1); if (pdf_is_array(obj)) { m = pdf_array_len(obj); for (k = 0; k < m; k++) { w = pdf_to_int(pdf_array_get(obj, k)); pdf_add_hmtx(ctx, fontdesc, c0 + k, c0 + k, w); } i += 2; } else { c1 = pdf_to_int(obj); w = pdf_to_int(pdf_array_get(widths, i + 2)); pdf_add_hmtx(ctx, fontdesc, c0, c1, w); i += 3; } } } pdf_end_hmtx(ctx, fontdesc); /* Vertical */ if (pdf_cmap_wmode(ctx, fontdesc->encoding) == 1) { int dw2y = 880; int dw2w = -1000; obj = pdf_dict_gets(dict, "DW2"); if (obj) { dw2y = pdf_to_int(pdf_array_get(obj, 0)); dw2w = pdf_to_int(pdf_array_get(obj, 1)); } pdf_set_default_vmtx(ctx, fontdesc, dw2y, dw2w); widths = pdf_dict_gets(dict, "W2"); if (widths) { int c0, c1, w, x, y, n; n = pdf_array_len(widths); for (i = 0; i < n; ) { c0 = pdf_to_int(pdf_array_get(widths, i)); obj = pdf_array_get(widths, i + 1); if (pdf_is_array(obj)) { int m = pdf_array_len(obj); for (k = 0; k * 3 < m; k ++) { w = pdf_to_int(pdf_array_get(obj, k * 3 + 0)); x = pdf_to_int(pdf_array_get(obj, k * 3 + 1)); y = pdf_to_int(pdf_array_get(obj, k * 3 + 2)); pdf_add_vmtx(ctx, fontdesc, c0 + k, c0 + k, x, y, w); } i += 2; } else { c1 = pdf_to_int(obj); w = pdf_to_int(pdf_array_get(widths, i + 2)); x = pdf_to_int(pdf_array_get(widths, i + 3)); y = pdf_to_int(pdf_array_get(widths, i + 4)); pdf_add_vmtx(ctx, fontdesc, c0, c1, x, y, w); i += 5; } } } pdf_end_vmtx(ctx, fontdesc); } } fz_catch(ctx) { pdf_drop_font(ctx, fontdesc); fz_rethrow_message(ctx, "cannot load cid font (%d %d R)", pdf_to_num(dict), pdf_to_gen(dict)); } return fontdesc; }