/* * Create an Identity-* CMap (for both 1 and 2-byte encodings) */ pdf_cmap * pdf_new_identity_cmap(int wmode, int bytes) { pdf_cmap *cmap = pdf_new_cmap(); sprintf(cmap->cmap_name, "Identity-%c", wmode ? 'V' : 'H'); pdf_add_codespace(cmap, 0x0000, 0xffff, bytes); pdf_map_range_to_range(cmap, 0x0000, 0xffff, 0); pdf_sort_cmap(cmap); pdf_set_wmode(cmap, wmode); return cmap; }
/* * Create an Identity-* CMap (for both 1 and 2-byte encodings) */ pdf_cmap * pdf_new_identity_cmap(fz_context *ctx, int wmode, int bytes) { pdf_cmap *cmap = pdf_new_cmap(ctx); fz_try(ctx) { sprintf(cmap->cmap_name, "Identity-%c", wmode ? 'V' : 'H'); pdf_add_codespace(ctx, cmap, 0x0000, 0xffff, bytes); pdf_map_range_to_range(ctx, cmap, 0x0000, 0xffff, 0); pdf_sort_cmap(ctx, cmap); pdf_set_cmap_wmode(ctx, cmap, wmode); } fz_catch(ctx) { pdf_drop_cmap(ctx, cmap); fz_rethrow(ctx); } return cmap; }
/* * Create an Identity-* CMap (for both 1 and 2-byte encodings) */ pdf_cmap * pdf_new_identity_cmap(fz_context *ctx, int wmode, int bytes) { pdf_cmap *cmap = pdf_new_cmap(ctx); fz_try(ctx) { unsigned int high = (1 << (bytes * 8)) - 1; sprintf(cmap->cmap_name, "Identity-%c", wmode ? 'V' : 'H'); pdf_add_codespace(ctx, cmap, 0, high, bytes); pdf_map_range_to_range(ctx, cmap, 0, high, 0); pdf_sort_cmap(ctx, cmap); pdf_set_cmap_wmode(ctx, cmap, wmode); } fz_catch(ctx) { pdf_drop_cmap(ctx, cmap); fz_rethrow(ctx); } return cmap; }
void pdf_load_to_unicode(pdf_document *doc, pdf_font_desc *font, char **strings, char *collection, pdf_obj *cmapstm) { pdf_cmap *cmap; int cid; int ucsbuf[8]; int ucslen; int i; fz_context *ctx = doc->ctx; if (pdf_is_stream(doc, pdf_to_num(cmapstm), pdf_to_gen(cmapstm))) { cmap = pdf_load_embedded_cmap(doc, cmapstm); font->to_unicode = pdf_new_cmap(ctx); for (i = 0; i < (strings ? 256 : 65536); i++) { cid = pdf_lookup_cmap(font->encoding, i); if (cid >= 0) { ucslen = pdf_lookup_cmap_full(cmap, i, ucsbuf); if (ucslen == 1) pdf_map_range_to_range(ctx, font->to_unicode, cid, cid, ucsbuf[0]); if (ucslen > 1) pdf_map_one_to_many(ctx, font->to_unicode, cid, ucsbuf, ucslen); } } pdf_sort_cmap(ctx, font->to_unicode); pdf_drop_cmap(ctx, cmap); font->size += pdf_cmap_size(ctx, font->to_unicode); } else if (collection) { if (!strcmp(collection, "Adobe-CNS1")) font->to_unicode = pdf_load_system_cmap(ctx, "Adobe-CNS1-UCS2"); else if (!strcmp(collection, "Adobe-GB1")) font->to_unicode = pdf_load_system_cmap(ctx, "Adobe-GB1-UCS2"); else if (!strcmp(collection, "Adobe-Japan1")) font->to_unicode = pdf_load_system_cmap(ctx, "Adobe-Japan1-UCS2"); else if (!strcmp(collection, "Adobe-Korea1")) font->to_unicode = pdf_load_system_cmap(ctx, "Adobe-Korea1-UCS2"); return; } if (strings) { /* TODO one-to-many mappings */ font->cid_to_ucs_len = 256; font->cid_to_ucs = fz_malloc_array(ctx, 256, sizeof(unsigned short)); font->size += 256 * sizeof(unsigned short); for (i = 0; i < 256; i++) { if (strings[i]) font->cid_to_ucs[i] = pdf_lookup_agl(strings[i]); else font->cid_to_ucs[i] = '?'; } } if (!font->to_unicode && !font->cid_to_ucs) { /* TODO: synthesize a ToUnicode if it's a freetype font with * cmap and/or post tables or if it has glyph names. */ } }
fz_error pdf_load_to_unicode(pdf_font_desc *font, pdf_xref *xref, char **strings, char *collection, fz_obj *cmapstm) { fz_error error = fz_okay; pdf_cmap *cmap; int cid; int ucsbuf[8]; int ucslen; int i; if (pdf_is_stream(xref, fz_to_num(cmapstm), fz_to_gen(cmapstm))) { error = pdf_load_embedded_cmap(&cmap, xref, cmapstm); if (error) return fz_rethrow(error, "cannot load embedded cmap (%d %d R)", fz_to_num(cmapstm), fz_to_gen(cmapstm)); font->to_unicode = pdf_new_cmap(); for (i = 0; i < (strings ? 256 : 65536); i++) { cid = pdf_lookup_cmap(font->encoding, i); if (cid >= 0) { ucslen = pdf_lookup_cmap_full(cmap, i, ucsbuf); if (ucslen == 1) pdf_map_range_to_range(font->to_unicode, cid, cid, ucsbuf[0]); if (ucslen > 1) pdf_map_one_to_many(font->to_unicode, cid, ucsbuf, ucslen); } } pdf_sort_cmap(font->to_unicode); pdf_drop_cmap(cmap); } else if (collection) { error = fz_okay; if (!strcmp(collection, "Adobe-CNS1")) error = pdf_load_system_cmap(&font->to_unicode, "Adobe-CNS1-UCS2"); else if (!strcmp(collection, "Adobe-GB1")) error = pdf_load_system_cmap(&font->to_unicode, "Adobe-GB1-UCS2"); else if (!strcmp(collection, "Adobe-Japan1")) error = pdf_load_system_cmap(&font->to_unicode, "Adobe-Japan1-UCS2"); else if (!strcmp(collection, "Adobe-Korea1")) error = pdf_load_system_cmap(&font->to_unicode, "Adobe-Korea1-UCS2"); if (error) return fz_rethrow(error, "cannot load ToUnicode system cmap %s-UCS2", collection); } if (strings) { /* TODO one-to-many mappings */ font->cid_to_ucs_len = 256; font->cid_to_ucs = fz_calloc(256, sizeof(unsigned short)); for (i = 0; i < 256; i++) { if (strings[i]) font->cid_to_ucs[i] = pdf_lookup_agl(strings[i]); else font->cid_to_ucs[i] = '?'; } } if (!font->to_unicode && !font->cid_to_ucs) { /* TODO: synthesize a ToUnicode if it's a freetype font with * cmap and/or post tables or if it has glyph names. */ } return fz_okay; }
pdf_cmap * pdf_load_cmap(fz_context *ctx, fz_stream *file) { pdf_cmap *cmap; char key[64]; pdf_lexbuf buf; int tok; const char *where; buf.size = PDF_LEXBUF_SMALL; cmap = pdf_new_cmap(ctx); strcpy(key, ".notdef"); fz_var(where); fz_try(ctx) { while (1) { where = ""; tok = pdf_lex_cmap(file, &buf); if (tok == PDF_TOK_EOF || tok == TOK_END_CMAP) break; else if (tok == PDF_TOK_NAME) { if (!strcmp(buf.scratch, "CMapName")) { where = " after CMapName"; pdf_parse_cmap_name(ctx, cmap, file); } else if (!strcmp(buf.scratch, "WMode")) { where = " after WMode"; pdf_parse_wmode(ctx, cmap, file); } else fz_strlcpy(key, buf.scratch, sizeof key); } else if (tok == TOK_USECMAP) { fz_strlcpy(cmap->usecmap_name, key, sizeof(cmap->usecmap_name)); } else if (tok == TOK_BEGIN_CODESPACE_RANGE) { where = " codespacerange"; pdf_parse_codespace_range(ctx, cmap, file); } else if (tok == TOK_BEGIN_BF_CHAR) { where = " bfchar"; pdf_parse_bf_char(ctx, cmap, file); } else if (tok == TOK_BEGIN_CID_CHAR) { where = " cidchar"; pdf_parse_cid_char(ctx, cmap, file); } else if (tok == TOK_BEGIN_BF_RANGE) { where = " bfrange"; pdf_parse_bf_range(ctx, cmap, file); } else if (tok == TOK_BEGIN_CID_RANGE) { where = "cidrange"; pdf_parse_cid_range(ctx, cmap, file); } /* ignore everything else */ } pdf_sort_cmap(ctx, cmap); } fz_catch(ctx) { pdf_drop_cmap(ctx, cmap); fz_throw(ctx, "syntaxerror in cmap%s", where); } return cmap; }