static void pdf_parse_bf_range_array(fz_context *ctx, pdf_cmap *cmap, fz_stream *file, int lo, int hi) { pdf_lexbuf buf; int tok; int dst[256]; int i; buf.size = PDF_LEXBUF_SMALL; while (1) { tok = pdf_lex_cmap(file, &buf); /* RJW: "syntaxerror in cmap" */ if (tok == PDF_TOK_CLOSE_ARRAY) return; /* Note: does not handle [ /Name /Name ... ] */ else if (tok != PDF_TOK_STRING) fz_throw(ctx, "expected string or ]"); if (buf.len / 2) { for (i = 0; i < buf.len / 2; i++) dst[i] = pdf_code_from_string(&buf.scratch[i * 2], 2); pdf_map_one_to_many(ctx, cmap, lo, dst, buf.len / 2); } lo ++; } }
static void pdf_parse_bf_range_array(pdf_cmap *cmap, fz_stream *file, int lo, int hi) { char buf[256]; int tok; int len; int dst[256]; int i; while (1) { tok = pdf_lex_cmap(file, buf, sizeof buf, &len); /* RJW: "syntaxerror in cmap" */ if (tok == PDF_TOK_CLOSE_ARRAY) return; /* Note: does not handle [ /Name /Name ... ] */ else if (tok != PDF_TOK_STRING) fz_throw(file->ctx, "expected string or ]"); if (len / 2) { for (i = 0; i < len / 2; i++) dst[i] = pdf_code_from_string(buf + i * 2, 2); pdf_map_one_to_many(file->ctx, cmap, lo, dst, len / 2); } lo ++; } }
static void pdf_parse_bf_char(fz_context *ctx, pdf_cmap *cmap, fz_stream *file) { pdf_lexbuf buf; int tok; int dst[256]; int src; int i; buf.size = PDF_LEXBUF_SMALL; while (1) { tok = pdf_lex_cmap(file, &buf); /* RJW: "syntaxerror in cmap" */ if (tok == TOK_END_BF_CHAR) return; else if (tok != PDF_TOK_STRING) fz_throw(ctx, "expected string or endbfchar"); src = pdf_code_from_string(buf.scratch, buf.len); tok = pdf_lex_cmap(file, &buf); /* RJW: "syntaxerror in cmap" */ /* Note: does not handle /dstName */ if (tok != PDF_TOK_STRING) fz_throw(ctx, "expected string"); if (buf.len / 2) { for (i = 0; i < buf.len / 2; i++) dst[i] = pdf_code_from_string(&buf.scratch[i * 2], 2); pdf_map_one_to_many(ctx, cmap, src, dst, i); } } }
static void pdf_parse_bf_char(pdf_cmap *cmap, fz_stream *file) { char buf[256]; int tok; int len; int dst[256]; int src; int i; while (1) { tok = pdf_lex_cmap(file, buf, sizeof buf, &len); /* RJW: "syntaxerror in cmap" */ if (tok == TOK_END_BF_CHAR) return; else if (tok != PDF_TOK_STRING) fz_throw(file->ctx, "expected string or endbfchar"); src = pdf_code_from_string(buf, len); tok = pdf_lex_cmap(file, buf, sizeof buf, &len); /* RJW: "syntaxerror in cmap" */ /* Note: does not handle /dstName */ if (tok != PDF_TOK_STRING) fz_throw(file->ctx, "expected string"); if (len / 2) { for (i = 0; i < len / 2; i++) dst[i] = pdf_code_from_string(buf + i * 2, 2); pdf_map_one_to_many(file->ctx, cmap, src, dst, i); } } }
void pdf_load_to_unicode(pdf_document *doc, pdf_font_desc *font, char **strings, char *collection, pdf_obj *cmapstm) { pdf_cmap *cmap; int cid; int ucsbuf[8]; int ucslen; int i; fz_context *ctx = doc->ctx; if (pdf_is_stream(doc, pdf_to_num(cmapstm), pdf_to_gen(cmapstm))) { cmap = pdf_load_embedded_cmap(doc, cmapstm); font->to_unicode = pdf_new_cmap(ctx); for (i = 0; i < (strings ? 256 : 65536); i++) { cid = pdf_lookup_cmap(font->encoding, i); if (cid >= 0) { ucslen = pdf_lookup_cmap_full(cmap, i, ucsbuf); if (ucslen == 1) pdf_map_range_to_range(ctx, font->to_unicode, cid, cid, ucsbuf[0]); if (ucslen > 1) pdf_map_one_to_many(ctx, font->to_unicode, cid, ucsbuf, ucslen); } } pdf_sort_cmap(ctx, font->to_unicode); pdf_drop_cmap(ctx, cmap); font->size += pdf_cmap_size(ctx, font->to_unicode); } else if (collection) { if (!strcmp(collection, "Adobe-CNS1")) font->to_unicode = pdf_load_system_cmap(ctx, "Adobe-CNS1-UCS2"); else if (!strcmp(collection, "Adobe-GB1")) font->to_unicode = pdf_load_system_cmap(ctx, "Adobe-GB1-UCS2"); else if (!strcmp(collection, "Adobe-Japan1")) font->to_unicode = pdf_load_system_cmap(ctx, "Adobe-Japan1-UCS2"); else if (!strcmp(collection, "Adobe-Korea1")) font->to_unicode = pdf_load_system_cmap(ctx, "Adobe-Korea1-UCS2"); return; } if (strings) { /* TODO one-to-many mappings */ font->cid_to_ucs_len = 256; font->cid_to_ucs = fz_malloc_array(ctx, 256, sizeof(unsigned short)); font->size += 256 * sizeof(unsigned short); for (i = 0; i < 256; i++) { if (strings[i]) font->cid_to_ucs[i] = pdf_lookup_agl(strings[i]); else font->cid_to_ucs[i] = '?'; } } if (!font->to_unicode && !font->cid_to_ucs) { /* TODO: synthesize a ToUnicode if it's a freetype font with * cmap and/or post tables or if it has glyph names. */ } }
fz_error pdf_load_to_unicode(pdf_font_desc *font, pdf_xref *xref, char **strings, char *collection, fz_obj *cmapstm) { fz_error error = fz_okay; pdf_cmap *cmap; int cid; int ucsbuf[8]; int ucslen; int i; if (pdf_is_stream(xref, fz_to_num(cmapstm), fz_to_gen(cmapstm))) { error = pdf_load_embedded_cmap(&cmap, xref, cmapstm); if (error) return fz_rethrow(error, "cannot load embedded cmap (%d %d R)", fz_to_num(cmapstm), fz_to_gen(cmapstm)); font->to_unicode = pdf_new_cmap(); for (i = 0; i < (strings ? 256 : 65536); i++) { cid = pdf_lookup_cmap(font->encoding, i); if (cid >= 0) { ucslen = pdf_lookup_cmap_full(cmap, i, ucsbuf); if (ucslen == 1) pdf_map_range_to_range(font->to_unicode, cid, cid, ucsbuf[0]); if (ucslen > 1) pdf_map_one_to_many(font->to_unicode, cid, ucsbuf, ucslen); } } pdf_sort_cmap(font->to_unicode); pdf_drop_cmap(cmap); } else if (collection) { error = fz_okay; if (!strcmp(collection, "Adobe-CNS1")) error = pdf_load_system_cmap(&font->to_unicode, "Adobe-CNS1-UCS2"); else if (!strcmp(collection, "Adobe-GB1")) error = pdf_load_system_cmap(&font->to_unicode, "Adobe-GB1-UCS2"); else if (!strcmp(collection, "Adobe-Japan1")) error = pdf_load_system_cmap(&font->to_unicode, "Adobe-Japan1-UCS2"); else if (!strcmp(collection, "Adobe-Korea1")) error = pdf_load_system_cmap(&font->to_unicode, "Adobe-Korea1-UCS2"); if (error) return fz_rethrow(error, "cannot load ToUnicode system cmap %s-UCS2", collection); } if (strings) { /* TODO one-to-many mappings */ font->cid_to_ucs_len = 256; font->cid_to_ucs = fz_calloc(256, sizeof(unsigned short)); for (i = 0; i < 256; i++) { if (strings[i]) font->cid_to_ucs[i] = pdf_lookup_agl(strings[i]); else font->cid_to_ucs[i] = '?'; } } if (!font->to_unicode && !font->cid_to_ucs) { /* TODO: synthesize a ToUnicode if it's a freetype font with * cmap and/or post tables or if it has glyph names. */ } return fz_okay; }
static void pdf_parse_bf_range(fz_context *ctx, pdf_cmap *cmap, fz_stream *file) { pdf_lexbuf buf; int tok; int lo, hi, dst; buf.size = PDF_LEXBUF_SMALL; while (1) { tok = pdf_lex_cmap(file, &buf); /* RJW: "syntaxerror in cmap" */ if (tok == TOK_END_BF_RANGE) return; else if (tok != PDF_TOK_STRING) fz_throw(ctx, "expected string or endbfrange"); lo = pdf_code_from_string(buf.scratch, buf.len); tok = pdf_lex_cmap(file, &buf); /* RJW: "syntaxerror in cmap" */ if (tok != PDF_TOK_STRING) fz_throw(ctx, "expected string"); hi = pdf_code_from_string(buf.scratch, buf.len); tok = pdf_lex_cmap(file, &buf); /* RJW: "syntaxerror in cmap" */ if (tok == PDF_TOK_STRING) { if (buf.len == 2) { dst = pdf_code_from_string(buf.scratch, buf.len); pdf_map_range_to_range(ctx, cmap, lo, hi, dst); } else { int dststr[256]; int i; if (buf.len / 2) { for (i = 0; i < buf.len / 2; i++) dststr[i] = pdf_code_from_string(&buf.scratch[i * 2], 2); while (lo <= hi) { dststr[i-1] ++; pdf_map_one_to_many(ctx, cmap, lo, dststr, i); lo ++; } } } } else if (tok == PDF_TOK_OPEN_ARRAY) { pdf_parse_bf_range_array(ctx, cmap, file, lo, hi); /* RJW: "cannot map bfrange" */ } else { fz_throw(ctx, "expected string or array or endbfrange"); } } }
static void pdf_parse_bf_range(pdf_cmap *cmap, fz_stream *file) { char buf[256]; int tok; int len; int lo, hi, dst; while (1) { tok = pdf_lex_cmap(file, buf, sizeof buf, &len); /* RJW: "syntaxerror in cmap" */ if (tok == TOK_END_BF_RANGE) return; else if (tok != PDF_TOK_STRING) fz_throw(file->ctx, "expected string or endbfrange"); lo = pdf_code_from_string(buf, len); tok = pdf_lex_cmap(file, buf, sizeof buf, &len); /* RJW: "syntaxerror in cmap" */ if (tok != PDF_TOK_STRING) fz_throw(file->ctx, "expected string"); hi = pdf_code_from_string(buf, len); tok = pdf_lex_cmap(file, buf, sizeof buf, &len); /* RJW: "syntaxerror in cmap" */ if (tok == PDF_TOK_STRING) { if (len == 2) { dst = pdf_code_from_string(buf, len); pdf_map_range_to_range(file->ctx, cmap, lo, hi, dst); } else { int dststr[256]; int i; if (len / 2) { for (i = 0; i < len / 2; i++) dststr[i] = pdf_code_from_string(buf + i * 2, 2); while (lo <= hi) { dststr[i-1] ++; pdf_map_one_to_many(file->ctx, cmap, lo, dststr, i); lo ++; } } } } else if (tok == PDF_TOK_OPEN_ARRAY) { pdf_parse_bf_range_array(cmap, file, lo, hi); /* RJW: "cannot map bfrange" */ } else { fz_throw(file->ctx, "expected string or array or endbfrange"); } } }