static fz_error parsebfrangearray(pdf_cmap *cmap, fz_stream *file, int lo, int hi) { fz_error error; char buf[256]; pdf_token_e tok; int len; int dst[256]; int i; while (1) { error = lexcmap(&tok, file, buf, sizeof buf, &len); if (error) return fz_rethrow(error, "syntaxerror in cmap"); if (tok == PDF_TCARRAY) return fz_okay; /* Note: does not handle [ /Name /Name ... ] */ else if (tok != PDF_TSTRING) return fz_throw("expected string or ]"); if (len / 2) { for (i = 0; i < len / 2; i++) dst[i] = codefromstring(buf + i * 2, 2); pdf_maponetomany(cmap, lo, dst, len / 2); } lo ++; } }
static fz_error parsebfchar(pdf_cmap *cmap, fz_stream *file) { fz_error error; char buf[256]; pdf_token_e tok; int len; int dst[256]; int src; int i; while (1) { error = lexcmap(&tok, file, buf, sizeof buf, &len); if (error) return fz_rethrow(error, "syntaxerror in cmap"); if (tok == TENDBFCHAR) return fz_okay; else if (tok != PDF_TSTRING) return fz_throw("expected string or endbfchar"); src = codefromstring(buf, len); error = lexcmap(&tok, file, buf, sizeof buf, &len); if (error) return fz_rethrow(error, "syntaxerror in cmap"); /* Note: does not handle /dstName */ if (tok != PDF_TSTRING) return fz_throw("expected string"); if (len / 2) { for (i = 0; i < len / 2; i++) dst[i] = codefromstring(buf + i * 2, 2); pdf_maponetomany(cmap, src, dst, i); } } }
fz_error pdf_loadtounicode(pdf_fontdesc *font, pdf_xref *xref, char **strings, char *collection, fz_obj *cmapstm) { fz_error error = fz_okay; pdf_cmap *cmap; int cid; int ucsbuf[8]; int ucslen; int i; if (pdf_isstream(xref, fz_tonum(cmapstm), fz_togen(cmapstm))) { pdf_logfont("tounicode embedded cmap\n"); error = pdf_loadembeddedcmap(&cmap, xref, cmapstm); if (error) return fz_rethrow(error, "cannot load embedded cmap (%d %d R)", fz_tonum(cmapstm), fz_togen(cmapstm)); font->tounicode = pdf_newcmap(); for (i = 0; i < (strings ? 256 : 65536); i++) { cid = pdf_lookupcmap(font->encoding, i); if (cid >= 0) { ucslen = pdf_lookupcmapfull(cmap, i, ucsbuf); if (ucslen == 1) pdf_maprangetorange(font->tounicode, cid, cid, ucsbuf[0]); if (ucslen > 1) pdf_maponetomany(font->tounicode, cid, ucsbuf, ucslen); } } pdf_sortcmap(font->tounicode); pdf_dropcmap(cmap); } else if (collection) { pdf_logfont("tounicode cid collection (%s)\n", collection); error = fz_okay; if (!strcmp(collection, "Adobe-CNS1")) error = pdf_loadsystemcmap(&font->tounicode, "Adobe-CNS1-UCS2"); else if (!strcmp(collection, "Adobe-GB1")) error = pdf_loadsystemcmap(&font->tounicode, "Adobe-GB1-UCS2"); else if (!strcmp(collection, "Adobe-Japan1")) error = pdf_loadsystemcmap(&font->tounicode, "Adobe-Japan1-UCS2"); else if (!strcmp(collection, "Adobe-Japan2")) error = pdf_loadsystemcmap(&font->tounicode, "Adobe-Japan2-UCS2"); /* where's this? */ else if (!strcmp(collection, "Adobe-Korea1")) error = pdf_loadsystemcmap(&font->tounicode, "Adobe-Korea1-UCS2"); if (error) return fz_rethrow(error, "cannot load tounicode system cmap %s-UCS2", collection); } if (strings) { pdf_logfont("tounicode strings\n"); /* TODO one-to-many mappings */ font->ncidtoucs = 256; font->cidtoucs = fz_calloc(256, sizeof(unsigned short)); for (i = 0; i < 256; i++) { if (strings[i]) font->cidtoucs[i] = pdf_lookupagl(strings[i]); else font->cidtoucs[i] = '?'; } } if (!font->tounicode && !font->cidtoucs) { pdf_logfont("tounicode could not be loaded\n"); /* TODO: synthesize a ToUnicode if it's a freetype font with * cmap and/or post tables or if it has glyph names. */ } return fz_okay; }
static fz_error parsebfrange(pdf_cmap *cmap, fz_stream *file) { fz_error error; char buf[256]; pdf_token_e tok; int len; int lo, hi, dst; while (1) { error = lexcmap(&tok, file, buf, sizeof buf, &len); if (error) return fz_rethrow(error, "syntaxerror in cmap"); if (tok == TENDBFRANGE) return fz_okay; else if (tok != PDF_TSTRING) return fz_throw("expected string or endbfrange"); lo = codefromstring(buf, len); error = lexcmap(&tok, file, buf, sizeof buf, &len); if (error) return fz_rethrow(error, "syntaxerror in cmap"); if (tok != PDF_TSTRING) return fz_throw("expected string"); hi = codefromstring(buf, len); error = lexcmap(&tok, file, buf, sizeof buf, &len); if (error) return fz_rethrow(error, "syntaxerror in cmap"); if (tok == PDF_TSTRING) { if (len == 2) { dst = codefromstring(buf, len); pdf_maprangetorange(cmap, lo, hi, dst); } else { int dststr[256]; int i; if (len / 2) { for (i = 0; i < len / 2; i++) dststr[i] = codefromstring(buf + i * 2, 2); while (lo <= hi) { dststr[i-1] ++; pdf_maponetomany(cmap, lo, dststr, i); lo ++; } } } } else if (tok == PDF_TOARRAY) { error = parsebfrangearray(cmap, file, lo, hi); if (error) return fz_rethrow(error, "cannot map bfrange"); } else { return fz_throw("expected string or array or endbfrange"); } } }