/* * Load predefined CMap from system. */ fz_error pdf_loadsystemcmap(pdf_cmap **cmapp, char *cmapname) { fz_error error; pdf_cmap *usecmap; pdf_cmap *cmap; int i; pdf_logfont("loading system cmap %s\n", cmapname); for (i = 0; pdf_cmaptable[i]; i++) { if (!strcmp(cmapname, pdf_cmaptable[i]->cmapname)) { cmap = pdf_cmaptable[i]; if (cmap->usecmapname[0] && !cmap->usecmap) { error = pdf_loadsystemcmap(&usecmap, cmap->usecmapname); if (error) return fz_rethrow(error, "could not load usecmap: %s", cmap->usecmapname); pdf_setusecmap(cmap, usecmap); } *cmapp = cmap; return fz_okay; } } return fz_throw("no builtin cmap file: %s", cmapname); }
/* * Load predefined CMap from system. */ fz_error pdf_loadsystemcmap(pdf_cmap **cmapp, char *cmapname) { fz_error error; fz_stream *stream; pdf_cmap *usecmap; pdf_cmap *cmap; unsigned char *data; unsigned int len; int i; pdf_logfont("loading system cmap %s\n", cmapname); error = pdf_getcmapbuffer(cmapname, &data, &len); if(error) return fz_rethrow(error, "no builtin cmap file: %s", cmapname); stream = fz_openrmemory(data, len); error = pdf_parsecmap(&cmap, stream); fz_dropstream(stream); if(error) return fz_rethrow(error, "cannot parse cmap data"); if (cmap->usecmapname[0] && !cmap->usecmap) { pdf_logfont("charmap depends on other charmap (%s), loading it\n", cmap->usecmapname); error = pdf_loadsystemcmap(&usecmap, cmap->usecmapname); if (error) return fz_rethrow(error, "could not load usecmap: %s", cmap->usecmapname); pdf_setusecmap(cmap, usecmap); } *cmapp = cmap; return fz_okay; }
fz_error pdf_loadtounicode(pdf_fontdesc *font, pdf_xref *xref, char **strings, char *collection, fz_obj *cmapstm) { fz_error error = fz_okay; pdf_cmap *cmap; int cid; int ucsbuf[8]; int ucslen; int i; if (pdf_isstream(xref, fz_tonum(cmapstm), fz_togen(cmapstm))) { pdf_logfont("tounicode embedded cmap\n"); error = pdf_loadembeddedcmap(&cmap, xref, cmapstm); if (error) return fz_rethrow(error, "cannot load embedded cmap (%d %d R)", fz_tonum(cmapstm), fz_togen(cmapstm)); font->tounicode = pdf_newcmap(); for (i = 0; i < (strings ? 256 : 65536); i++) { cid = pdf_lookupcmap(font->encoding, i); if (cid >= 0) { ucslen = pdf_lookupcmapfull(cmap, i, ucsbuf); if (ucslen == 1) pdf_maprangetorange(font->tounicode, cid, cid, ucsbuf[0]); if (ucslen > 1) pdf_maponetomany(font->tounicode, cid, ucsbuf, ucslen); } } pdf_sortcmap(font->tounicode); pdf_dropcmap(cmap); } else if (collection) { pdf_logfont("tounicode cid collection (%s)\n", collection); error = fz_okay; if (!strcmp(collection, "Adobe-CNS1")) error = pdf_loadsystemcmap(&font->tounicode, "Adobe-CNS1-UCS2"); else if (!strcmp(collection, "Adobe-GB1")) error = pdf_loadsystemcmap(&font->tounicode, "Adobe-GB1-UCS2"); else if (!strcmp(collection, "Adobe-Japan1")) error = pdf_loadsystemcmap(&font->tounicode, "Adobe-Japan1-UCS2"); else if (!strcmp(collection, "Adobe-Japan2")) error = pdf_loadsystemcmap(&font->tounicode, "Adobe-Japan2-UCS2"); /* where's this? */ else if (!strcmp(collection, "Adobe-Korea1")) error = pdf_loadsystemcmap(&font->tounicode, "Adobe-Korea1-UCS2"); if (error) return fz_rethrow(error, "cannot load tounicode system cmap %s-UCS2", collection); } if (strings) { pdf_logfont("tounicode strings\n"); /* TODO one-to-many mappings */ font->ncidtoucs = 256; font->cidtoucs = fz_calloc(256, sizeof(unsigned short)); for (i = 0; i < 256; i++) { if (strings[i]) font->cidtoucs[i] = pdf_lookupagl(strings[i]); else font->cidtoucs[i] = '?'; } } if (!font->tounicode && !font->cidtoucs) { pdf_logfont("tounicode could not be loaded\n"); /* TODO: synthesize a ToUnicode if it's a freetype font with * cmap and/or post tables or if it has glyph names. */ } return fz_okay; }
static fz_error loadcidfont(pdf_fontdesc **fontdescp, pdf_xref *xref, fz_obj *dict, fz_obj *encoding, fz_obj *tounicode) { fz_error error; fz_obj *widths; fz_obj *descriptor; pdf_fontdesc *fontdesc; FT_Face face; fz_bbox bbox; int kind; char collection[256]; char *basefont; int i, k, fterr; fz_obj *obj; int dw; /* Get font name and CID collection */ basefont = fz_toname(fz_dictgets(dict, "BaseFont")); { fz_obj *cidinfo; char tmpstr[64]; int tmplen; cidinfo = fz_dictgets(dict, "CIDSystemInfo"); if (!cidinfo) return fz_throw("cid font is missing info"); obj = fz_dictgets(cidinfo, "Registry"); tmplen = MIN(sizeof tmpstr - 1, fz_tostrlen(obj)); memcpy(tmpstr, fz_tostrbuf(obj), tmplen); tmpstr[tmplen] = '\0'; fz_strlcpy(collection, tmpstr, sizeof collection); fz_strlcat(collection, "-", sizeof collection); obj = fz_dictgets(cidinfo, "Ordering"); tmplen = MIN(sizeof tmpstr - 1, fz_tostrlen(obj)); memcpy(tmpstr, fz_tostrbuf(obj), tmplen); tmpstr[tmplen] = '\0'; fz_strlcat(collection, tmpstr, sizeof collection); } /* Load font file */ fontdesc = pdf_newfontdesc(); pdf_logfont("load cid font (%d %d R) ptr=%p {\n", fz_tonum(dict), fz_togen(dict), fontdesc); pdf_logfont("basefont %s\n", basefont); pdf_logfont("collection %s\n", collection); descriptor = fz_dictgets(dict, "FontDescriptor"); if (descriptor) error = pdf_loadfontdescriptor(fontdesc, xref, descriptor, collection, basefont); else error = fz_throw("syntaxerror: missing font descriptor"); if (error) goto cleanup; face = fontdesc->font->ftface; kind = ftkind(face); bbox.x0 = (face->bbox.xMin * 1000) / face->units_per_EM; bbox.y0 = (face->bbox.yMin * 1000) / face->units_per_EM; bbox.x1 = (face->bbox.xMax * 1000) / face->units_per_EM; bbox.y1 = (face->bbox.yMax * 1000) / face->units_per_EM; pdf_logfont("ft bbox [%d %d %d %d]\n", bbox.x0, bbox.y0, bbox.x1, bbox.y1); if (bbox.x0 == bbox.x1) fz_setfontbbox(fontdesc->font, -1000, -1000, 2000, 2000); else fz_setfontbbox(fontdesc->font, bbox.x0, bbox.y0, bbox.x1, bbox.y1); /* Encoding */ error = fz_okay; if (fz_isname(encoding)) { pdf_logfont("encoding /%s\n", fz_toname(encoding)); if (!strcmp(fz_toname(encoding), "Identity-H")) fontdesc->encoding = pdf_newidentitycmap(0, 2); else if (!strcmp(fz_toname(encoding), "Identity-V")) fontdesc->encoding = pdf_newidentitycmap(1, 2); else error = pdf_loadsystemcmap(&fontdesc->encoding, fz_toname(encoding)); } else if (fz_isindirect(encoding)) { pdf_logfont("encoding %d %d R\n", fz_tonum(encoding), fz_togen(encoding)); error = pdf_loadembeddedcmap(&fontdesc->encoding, xref, encoding); } else { error = fz_throw("syntaxerror: font missing encoding"); } if (error) goto cleanup; pdf_setfontwmode(fontdesc, pdf_getwmode(fontdesc->encoding)); pdf_logfont("wmode %d\n", pdf_getwmode(fontdesc->encoding)); if (kind == TRUETYPE) { fz_obj *cidtogidmap; cidtogidmap = fz_dictgets(dict, "CIDToGIDMap"); if (fz_isindirect(cidtogidmap)) { fz_buffer *buf; pdf_logfont("cidtogidmap stream\n"); error = pdf_loadstream(&buf, xref, fz_tonum(cidtogidmap), fz_togen(cidtogidmap)); if (error) goto cleanup; fontdesc->ncidtogid = (buf->len) / 2; fontdesc->cidtogid = fz_malloc(fontdesc->ncidtogid * sizeof(unsigned short)); for (i = 0; i < fontdesc->ncidtogid; i++) fontdesc->cidtogid[i] = (buf->data[i * 2] << 8) + buf->data[i * 2 + 1]; fz_dropbuffer(buf); } /* if truetype font is external, cidtogidmap should not be identity */ /* so we map from cid to unicode and then map that through the (3 1) */ /* unicode cmap to get a glyph id */ else if (fontdesc->font->ftsubstitute) { pdf_logfont("emulate ttf cidfont\n"); fterr = FT_Select_Charmap(face, ft_encoding_unicode); if (fterr) { error = fz_throw("fonterror: no unicode cmap when emulating CID font: %s", ft_errorstring(fterr)); goto cleanup; } if (!strcmp(collection, "Adobe-CNS1")) error = pdf_loadsystemcmap(&fontdesc->tottfcmap, "Adobe-CNS1-UCS2"); else if (!strcmp(collection, "Adobe-GB1")) error = pdf_loadsystemcmap(&fontdesc->tottfcmap, "Adobe-GB1-UCS2"); else if (!strcmp(collection, "Adobe-Japan1")) error = pdf_loadsystemcmap(&fontdesc->tottfcmap, "Adobe-Japan1-UCS2"); else if (!strcmp(collection, "Adobe-Japan2")) error = pdf_loadsystemcmap(&fontdesc->tottfcmap, "Adobe-Japan2-UCS2"); else if (!strcmp(collection, "Adobe-Korea1")) error = pdf_loadsystemcmap(&fontdesc->tottfcmap, "Adobe-Korea1-UCS2"); else error = fz_okay; if (error) { error = fz_rethrow(error, "cannot load system cmap %s", collection); goto cleanup; } } } error = pdf_loadtounicode(fontdesc, xref, nil, collection, tounicode); if (error) goto cleanup; /* Horizontal */ dw = 1000; obj = fz_dictgets(dict, "DW"); if (obj) dw = fz_toint(obj); pdf_setdefaulthmtx(fontdesc, dw); widths = fz_dictgets(dict, "W"); if (widths) { int c0, c1, w; for (i = 0; i < fz_arraylen(widths); ) { c0 = fz_toint(fz_arrayget(widths, i)); obj = fz_arrayget(widths, i + 1); if (fz_isarray(obj)) { for (k = 0; k < fz_arraylen(obj); k++) { w = fz_toint(fz_arrayget(obj, k)); pdf_addhmtx(fontdesc, c0 + k, c0 + k, w); } i += 2; } else { c1 = fz_toint(obj); w = fz_toint(fz_arrayget(widths, i + 2)); pdf_addhmtx(fontdesc, c0, c1, w); i += 3; } } } pdf_endhmtx(fontdesc); /* Vertical */ if (pdf_getwmode(fontdesc->encoding) == 1) { int dw2y = 880; int dw2w = -1000; obj = fz_dictgets(dict, "DW2"); if (obj) { dw2y = fz_toint(fz_arrayget(obj, 0)); dw2w = fz_toint(fz_arrayget(obj, 1)); } pdf_setdefaultvmtx(fontdesc, dw2y, dw2w); widths = fz_dictgets(dict, "W2"); if (widths) { int c0, c1, w, x, y; for (i = 0; i < fz_arraylen(widths); ) { c0 = fz_toint(fz_arrayget(widths, i)); obj = fz_arrayget(widths, i + 1); if (fz_isarray(obj)) { for (k = 0; k < fz_arraylen(obj); k += 3) { w = fz_toint(fz_arrayget(obj, k + 0)); x = fz_toint(fz_arrayget(obj, k + 1)); y = fz_toint(fz_arrayget(obj, k + 2)); pdf_addvmtx(fontdesc, c0 + k, c0 + k, x, y, w); } i += 2; } else { c1 = fz_toint(obj); w = fz_toint(fz_arrayget(widths, i + 2)); x = fz_toint(fz_arrayget(widths, i + 3)); y = fz_toint(fz_arrayget(widths, i + 4)); pdf_addvmtx(fontdesc, c0, c1, x, y, w); i += 5; } } } pdf_endvmtx(fontdesc); } pdf_logfont("}\n"); *fontdescp = fontdesc; return fz_okay; cleanup: pdf_dropfont(fontdesc); return fz_rethrow(error, "cannot load cid font (%d %d R)", fz_tonum(dict), fz_togen(dict)); }
/* * Load CMap stream in PDF file */ fz_error pdf_loadembeddedcmap(pdf_cmap **cmapp, pdf_xref *xref, fz_obj *stmref) { fz_error error = fz_okay; fz_obj *stmobj; fz_stream *file = nil; pdf_cmap *cmap = nil; pdf_cmap *usecmap; fz_obj *wmode; fz_obj *obj; if ((*cmapp = pdf_finditem(xref->store, PDF_KCMAP, stmref))) { pdf_keepcmap(*cmapp); return fz_okay; } pdf_logfont("load embedded cmap (%d %d R) {\n", fz_tonum(stmref), fz_togen(stmref)); stmobj = fz_resolveindirect(stmref); error = pdf_openstream(&file, xref, fz_tonum(stmref), fz_togen(stmref)); if (error) { error = fz_rethrow(error, "cannot open cmap stream"); goto cleanup; } error = pdf_parsecmap(&cmap, file); if (error) { error = fz_rethrow(error, "cannot parse cmap stream"); goto cleanup; } fz_dropstream(file); wmode = fz_dictgets(stmobj, "WMode"); if (fz_isint(wmode)) { pdf_logfont("wmode %d\n", wmode); pdf_setwmode(cmap, fz_toint(wmode)); } obj = fz_dictgets(stmobj, "UseCMap"); if (fz_isname(obj)) { pdf_logfont("usecmap /%s\n", fz_toname(obj)); error = pdf_loadsystemcmap(&usecmap, fz_toname(obj)); if (error) { error = fz_rethrow(error, "cannot load system usecmap '%s'", fz_toname(obj)); goto cleanup; } pdf_setusecmap(cmap, usecmap); pdf_dropcmap(usecmap); } else if (fz_isindirect(obj)) { pdf_logfont("usecmap (%d %d R)\n", fz_tonum(obj), fz_togen(obj)); error = pdf_loadembeddedcmap(&usecmap, xref, obj); if (error) { error = fz_rethrow(error, "cannot load embedded usecmap"); goto cleanup; } pdf_setusecmap(cmap, usecmap); pdf_dropcmap(usecmap); } pdf_logfont("}\n"); error = pdf_storeitem(xref->store, PDF_KCMAP, stmref, cmap); if (error) { error = fz_rethrow(error, "cannot store cmap resource"); goto cleanup; } *cmapp = cmap; return fz_okay; cleanup: if (file) fz_dropstream(file); if (cmap) pdf_dropcmap(cmap); return error; /* already rethrown */ }