/* Convert Unicode/PdfDocEncoding string into ucs-2 */ unsigned short * pdf_to_ucs2(fz_context *ctx, pdf_obj *src) { unsigned char *srcptr = (unsigned char *) pdf_to_str_buf(src); unsigned short *dstptr, *dst; int srclen = pdf_to_str_len(src); int i; if (srclen >= 2 && srcptr[0] == 254 && srcptr[1] == 255) { dstptr = dst = fz_malloc_array(ctx, (srclen - 2) / 2 + 1, sizeof(short)); for (i = 2; i + 1 < srclen; i += 2) *dstptr++ = srcptr[i] << 8 | srcptr[i+1]; } else if (srclen >= 2 && srcptr[0] == 255 && srcptr[1] == 254) { dstptr = dst = fz_malloc_array(ctx, (srclen - 2) / 2 + 1, sizeof(short)); for (i = 2; i + 1 < srclen; i += 2) *dstptr++ = srcptr[i] | srcptr[i+1] << 8; } else { dstptr = dst = fz_malloc_array(ctx, srclen + 1, sizeof(short)); for (i = 0; i < srclen; i++) *dstptr++ = pdf_doc_encoding[srcptr[i]]; } *dstptr = '\0'; return dst; }
/* (buffer must be at least (fz_to_str_len(src) + 1) * 2 bytes in size) */ void pdf_to_ucs2_buf(unsigned short *buffer, pdf_obj *src) { unsigned char *srcptr = (unsigned char *) pdf_to_str_buf(src); unsigned short *dstptr = buffer; int srclen = pdf_to_str_len(src); int i; if (srclen >= 2 && srcptr[0] == 254 && srcptr[1] == 255) { for (i = 2; i + 1 < srclen; i += 2) *dstptr++ = srcptr[i] << 8 | srcptr[i+1]; } else if (srclen >= 2 && srcptr[0] == 255 && srcptr[1] == 254) { for (i = 2; i + 1 < srclen; i += 2) *dstptr++ = srcptr[i] | srcptr[i+1] << 8; } else { for (i = 0; i < srclen; i++) *dstptr++ = pdf_doc_encoding[srcptr[i]]; } *dstptr = '\0'; }
/* Convert Unicode/PdfDocEncoding string into utf-8 */ char * pdf_to_utf8(fz_context *ctx, pdf_obj *src) { unsigned char *srcptr = (unsigned char *) pdf_to_str_buf(src); char *dstptr, *dst; int srclen = pdf_to_str_len(src); int dstlen = 0; int ucs; int i; if (srclen >= 2 && srcptr[0] == 254 && srcptr[1] == 255) { for (i = 2; i + 1 < srclen; i += 2) { ucs = srcptr[i] << 8 | srcptr[i+1]; dstlen += fz_runelen(ucs); } dstptr = dst = fz_malloc(ctx, dstlen + 1); for (i = 2; i + 1 < srclen; i += 2) { ucs = srcptr[i] << 8 | srcptr[i+1]; dstptr += fz_runetochar(dstptr, ucs); } } else if (srclen >= 2 && srcptr[0] == 255 && srcptr[1] == 254) { for (i = 2; i + 1 < srclen; i += 2) { ucs = srcptr[i] | srcptr[i+1] << 8; dstlen += fz_runelen(ucs); } dstptr = dst = fz_malloc(ctx, dstlen + 1); for (i = 2; i + 1 < srclen; i += 2) { ucs = srcptr[i] | srcptr[i+1] << 8; dstptr += fz_runetochar(dstptr, ucs); } } else { for (i = 0; i < srclen; i++) dstlen += fz_runelen(pdf_doc_encoding[srcptr[i]]); dstptr = dst = fz_malloc(ctx, dstlen + 1); for (i = 0; i < srclen; i++) { ucs = pdf_doc_encoding[srcptr[i]]; dstptr += fz_runetochar(dstptr, ucs); } } *dstptr = '\0'; return dst; }
static fz_colorspace * load_indexed(pdf_document *doc, pdf_obj *array) { fz_context *ctx = doc->ctx; pdf_obj *baseobj = pdf_array_get(array, 1); pdf_obj *highobj = pdf_array_get(array, 2); pdf_obj *lookupobj = pdf_array_get(array, 3); fz_colorspace *base = NULL; fz_colorspace *cs; int i, n, high; unsigned char *lookup = NULL; fz_var(base); fz_var(lookup); fz_try(ctx) { base = pdf_load_colorspace(doc, baseobj); high = pdf_to_int(highobj); high = fz_clampi(high, 0, 255); n = base->n * (high + 1); lookup = fz_malloc_array(ctx, 1, n); if (pdf_is_string(lookupobj) && pdf_to_str_len(lookupobj) >= n) { unsigned char *buf = (unsigned char *) pdf_to_str_buf(lookupobj); for (i = 0; i < n; i++) lookup[i] = buf[i]; } else if (pdf_is_indirect(lookupobj)) { fz_stream *file = NULL; fz_var(file); fz_try(ctx) { file = pdf_open_stream(doc, pdf_to_num(lookupobj), pdf_to_gen(lookupobj)); i = fz_read(file, lookup, n); if (i < n) memset(lookup+i, 0, n-i); } fz_always(ctx) { fz_close(file); } fz_catch(ctx) { fz_rethrow_message(ctx, "cannot open colorspace lookup table (%d 0 R)", pdf_to_num(lookupobj)); } } else { fz_rethrow_message(ctx, "cannot parse colorspace lookup table"); } cs = fz_new_indexed_colorspace(ctx, base, high, lookup); }
static void safe_print_pdf_obj(fz_context *ctx, pdf_obj *obj, const char *dflt) { if (obj == NULL) fprintf(stderr, "%s", dflt); else if (pdf_is_string(ctx, obj)) safe_print_pdf_string(ctx, (unsigned char *)pdf_to_str_buf(ctx, obj), pdf_to_str_len(ctx, obj)); else pdf_print_obj(ctx, fz_stderr(ctx), obj, 1); }
static wchar_t *get_prop_str(fz_context* ctx, pdf_obj *obj, const char *name) { pdf_obj* p_obj = pdf_dict_getp(ctx, obj, name); int len = pdf_to_str_len(ctx, p_obj) + 1; wchar_t *buf = AllocArray<wchar_t>(len); pdf_to_ucs2_buf(ctx, (unsigned short *)buf, p_obj); return p_obj ? pdf_clean_string(buf) : nullptr; }
static char *get_string_or_stream(pdf_document *doc, pdf_obj *obj) { fz_context *ctx = doc->ctx; int len = 0; char *buf = NULL; fz_buffer *strmbuf = NULL; char *text = NULL; fz_var(strmbuf); fz_var(text); fz_try(ctx) { if (pdf_is_string(obj)) { len = pdf_to_str_len(obj); buf = pdf_to_str_buf(obj); } else if (pdf_is_stream(doc, pdf_to_num(obj), pdf_to_gen(obj))) { strmbuf = pdf_load_stream(doc, pdf_to_num(obj), pdf_to_gen(obj)); len = fz_buffer_storage(ctx, strmbuf, (unsigned char **)&buf); } if (buf) { text = fz_malloc(ctx, len+1); memcpy(text, buf, len); text[len] = 0; } } fz_always(ctx) { fz_drop_buffer(ctx, strmbuf); } fz_catch(ctx) { fz_free(ctx, text); fz_rethrow(ctx); } return text; }
static pdf_font_desc * load_cid_font(pdf_document *xref, pdf_obj *dict, pdf_obj *encoding, pdf_obj *to_unicode) { pdf_obj *widths; pdf_obj *descriptor; pdf_font_desc *fontdesc = NULL; FT_Face face; int kind; char collection[256]; char *basefont; int i, k, fterr; pdf_obj *obj; int dw; fz_context *ctx = xref->ctx; fz_var(fontdesc); fz_try(ctx) { /* Get font name and CID collection */ basefont = pdf_to_name(pdf_dict_gets(dict, "BaseFont")); { pdf_obj *cidinfo; char tmpstr[64]; int tmplen; cidinfo = pdf_dict_gets(dict, "CIDSystemInfo"); if (!cidinfo) fz_throw(ctx, "cid font is missing info"); obj = pdf_dict_gets(cidinfo, "Registry"); tmplen = fz_mini(sizeof tmpstr - 1, pdf_to_str_len(obj)); memcpy(tmpstr, pdf_to_str_buf(obj), tmplen); tmpstr[tmplen] = '\0'; fz_strlcpy(collection, tmpstr, sizeof collection); fz_strlcat(collection, "-", sizeof collection); obj = pdf_dict_gets(cidinfo, "Ordering"); tmplen = fz_mini(sizeof tmpstr - 1, pdf_to_str_len(obj)); memcpy(tmpstr, pdf_to_str_buf(obj), tmplen); tmpstr[tmplen] = '\0'; fz_strlcat(collection, tmpstr, sizeof collection); } /* Load font file */ fontdesc = pdf_new_font_desc(ctx); descriptor = pdf_dict_gets(dict, "FontDescriptor"); if (!descriptor) fz_throw(ctx, "syntaxerror: missing font descriptor"); pdf_load_font_descriptor(fontdesc, xref, descriptor, collection, basefont); face = fontdesc->font->ft_face; kind = ft_kind(face); /* Encoding */ if (pdf_is_name(encoding)) { if (!strcmp(pdf_to_name(encoding), "Identity-H")) fontdesc->encoding = pdf_new_identity_cmap(ctx, 0, 2); else if (!strcmp(pdf_to_name(encoding), "Identity-V")) fontdesc->encoding = pdf_new_identity_cmap(ctx, 1, 2); else fontdesc->encoding = pdf_load_system_cmap(ctx, pdf_to_name(encoding)); } else if (pdf_is_indirect(encoding)) { fontdesc->encoding = pdf_load_embedded_cmap(xref, encoding); } else { fz_throw(ctx, "syntaxerror: font missing encoding"); } fontdesc->size += pdf_cmap_size(ctx, fontdesc->encoding); pdf_set_font_wmode(ctx, fontdesc, pdf_cmap_wmode(ctx, fontdesc->encoding)); if (kind == TRUETYPE) { pdf_obj *cidtogidmap; cidtogidmap = pdf_dict_gets(dict, "CIDToGIDMap"); if (pdf_is_indirect(cidtogidmap)) { fz_buffer *buf; buf = pdf_load_stream(xref, pdf_to_num(cidtogidmap), pdf_to_gen(cidtogidmap)); fontdesc->cid_to_gid_len = (buf->len) / 2; fontdesc->cid_to_gid = fz_malloc_array(ctx, fontdesc->cid_to_gid_len, sizeof(unsigned short)); fontdesc->size += fontdesc->cid_to_gid_len * sizeof(unsigned short); for (i = 0; i < fontdesc->cid_to_gid_len; i++) fontdesc->cid_to_gid[i] = (buf->data[i * 2] << 8) + buf->data[i * 2 + 1]; fz_drop_buffer(ctx, buf); } /* if truetype font is external, cidtogidmap should not be identity */ /* so we map from cid to unicode and then map that through the (3 1) */ /* unicode cmap to get a glyph id */ else if (fontdesc->font->ft_substitute) { fterr = FT_Select_Charmap(face, ft_encoding_unicode); if (fterr) { fz_throw(ctx, "fonterror: no unicode cmap when emulating CID font: %s", ft_error_string(fterr)); } if (!strcmp(collection, "Adobe-CNS1")) fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-CNS1-UCS2"); else if (!strcmp(collection, "Adobe-GB1")) fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-GB1-UCS2"); else if (!strcmp(collection, "Adobe-Japan1")) fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-Japan1-UCS2"); else if (!strcmp(collection, "Adobe-Japan2")) fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-Japan2-UCS2"); else if (!strcmp(collection, "Adobe-Korea1")) fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-Korea1-UCS2"); } } pdf_load_to_unicode(xref, fontdesc, NULL, collection, to_unicode); /* Horizontal */ dw = 1000; obj = pdf_dict_gets(dict, "DW"); if (obj) dw = pdf_to_int(obj); pdf_set_default_hmtx(ctx, fontdesc, dw); widths = pdf_dict_gets(dict, "W"); if (widths) { int c0, c1, w, n, m; n = pdf_array_len(widths); for (i = 0; i < n; ) { c0 = pdf_to_int(pdf_array_get(widths, i)); obj = pdf_array_get(widths, i + 1); if (pdf_is_array(obj)) { m = pdf_array_len(obj); for (k = 0; k < m; k++) { w = pdf_to_int(pdf_array_get(obj, k)); pdf_add_hmtx(ctx, fontdesc, c0 + k, c0 + k, w); } i += 2; } else { c1 = pdf_to_int(obj); w = pdf_to_int(pdf_array_get(widths, i + 2)); pdf_add_hmtx(ctx, fontdesc, c0, c1, w); i += 3; } } } pdf_end_hmtx(ctx, fontdesc); /* Vertical */ if (pdf_cmap_wmode(ctx, fontdesc->encoding) == 1) { int dw2y = 880; int dw2w = -1000; obj = pdf_dict_gets(dict, "DW2"); if (obj) { dw2y = pdf_to_int(pdf_array_get(obj, 0)); dw2w = pdf_to_int(pdf_array_get(obj, 1)); } pdf_set_default_vmtx(ctx, fontdesc, dw2y, dw2w); widths = pdf_dict_gets(dict, "W2"); if (widths) { int c0, c1, w, x, y, n; n = pdf_array_len(widths); for (i = 0; i < n; ) { c0 = pdf_to_int(pdf_array_get(widths, i)); obj = pdf_array_get(widths, i + 1); if (pdf_is_array(obj)) { int m = pdf_array_len(obj); for (k = 0; k * 3 < m; k ++) { w = pdf_to_int(pdf_array_get(obj, k * 3 + 0)); x = pdf_to_int(pdf_array_get(obj, k * 3 + 1)); y = pdf_to_int(pdf_array_get(obj, k * 3 + 2)); pdf_add_vmtx(ctx, fontdesc, c0 + k, c0 + k, x, y, w); } i += 2; } else { c1 = pdf_to_int(obj); w = pdf_to_int(pdf_array_get(widths, i + 2)); x = pdf_to_int(pdf_array_get(widths, i + 3)); y = pdf_to_int(pdf_array_get(widths, i + 4)); pdf_add_vmtx(ctx, fontdesc, c0, c1, x, y, w); i += 5; } } } pdf_end_vmtx(ctx, fontdesc); } } fz_catch(ctx) { pdf_drop_font(ctx, fontdesc); fz_throw(ctx, "cannot load cid font (%d %d R)", pdf_to_num(dict), pdf_to_gen(dict)); } return fontdesc; }
static fz_colorspace * load_indexed(pdf_document *xref, pdf_obj *array) { struct indexed *idx = NULL; fz_context *ctx = xref->ctx; pdf_obj *baseobj = pdf_array_get(array, 1); pdf_obj *highobj = pdf_array_get(array, 2); pdf_obj *lookup = pdf_array_get(array, 3); fz_colorspace *base = NULL; fz_colorspace *cs = NULL; int i, n; fz_var(idx); fz_var(base); fz_var(cs); fz_try(ctx) { base = pdf_load_colorspace(xref, baseobj); /* "cannot load base colorspace (%d %d R)", pdf_to_num(baseobj), pdf_to_gen(baseobj) */ idx = fz_malloc_struct(ctx, struct indexed); idx->lookup = NULL; idx->base = base; idx->high = pdf_to_int(highobj); idx->high = CLAMP(idx->high, 0, 255); n = base->n * (idx->high + 1); idx->lookup = fz_malloc_array(ctx, 1, n); cs = fz_new_colorspace(ctx, "Indexed", 1); cs->to_rgb = indexed_to_rgb; cs->free_data = free_indexed; cs->data = idx; cs->size += sizeof(*idx) + n + (base ? base->size : 0); if (pdf_is_string(lookup) && pdf_to_str_len(lookup) == n) { unsigned char *buf = (unsigned char *) pdf_to_str_buf(lookup); for (i = 0; i < n; i++) idx->lookup[i] = buf[i]; } else if (pdf_is_indirect(lookup)) { fz_stream *file = NULL; fz_try(ctx) { file = pdf_open_stream(xref, pdf_to_num(lookup), pdf_to_gen(lookup)); } fz_catch(ctx) { fz_throw(ctx, "cannot open colorspace lookup table (%d 0 R)", pdf_to_num(lookup)); } i = fz_read(file, idx->lookup, n); if (i < 0) { fz_close(file); fz_throw(ctx, "cannot read colorspace lookup table (%d 0 R)", pdf_to_num(lookup)); } fz_close(file); } else { fz_throw(ctx, "cannot parse colorspace lookup table"); } }
/* Convert Unicode/PdfDocEncoding string into utf-8 */ char * pdf_to_utf8(pdf_document *doc, pdf_obj *src) { fz_context *ctx = doc->ctx; fz_buffer *strmbuf = NULL; unsigned char *srcptr; char *dstptr, *dst; int srclen; int dstlen = 0; int ucs; int i; fz_var(strmbuf); fz_try(ctx) { if (pdf_is_string(src)) { srcptr = (unsigned char *) pdf_to_str_buf(src); srclen = pdf_to_str_len(src); } else if (pdf_is_stream(doc, pdf_to_num(src), pdf_to_gen(src))) { strmbuf = pdf_load_stream(doc, pdf_to_num(src), pdf_to_gen(src)); srclen = fz_buffer_storage(ctx, strmbuf, (unsigned char **)&srcptr); } else { srclen = 0; } if (srclen >= 2 && srcptr[0] == 254 && srcptr[1] == 255) { for (i = 2; i + 1 < srclen; i += 2) { ucs = srcptr[i] << 8 | srcptr[i+1]; dstlen += fz_runelen(ucs); } dstptr = dst = fz_malloc(ctx, dstlen + 1); for (i = 2; i + 1 < srclen; i += 2) { ucs = srcptr[i] << 8 | srcptr[i+1]; dstptr += fz_runetochar(dstptr, ucs); } } else if (srclen >= 2 && srcptr[0] == 255 && srcptr[1] == 254) { for (i = 2; i + 1 < srclen; i += 2) { ucs = srcptr[i] | srcptr[i+1] << 8; dstlen += fz_runelen(ucs); } dstptr = dst = fz_malloc(ctx, dstlen + 1); for (i = 2; i + 1 < srclen; i += 2) { ucs = srcptr[i] | srcptr[i+1] << 8; dstptr += fz_runetochar(dstptr, ucs); } } else { for (i = 0; i < srclen; i++) dstlen += fz_runelen(pdf_doc_encoding[srcptr[i]]); dstptr = dst = fz_malloc(ctx, dstlen + 1); for (i = 0; i < srclen; i++) { ucs = pdf_replace_undefined(pdf_doc_encoding[srcptr[i]]); dstptr += fz_runetochar(dstptr, ucs); } } } fz_always(ctx) { fz_drop_buffer(ctx, strmbuf); } fz_catch(ctx) { fz_rethrow(ctx); } *dstptr = '\0'; return dst; }
/* Convert Unicode/PdfDocEncoding string into utf-8 */ char * pdf_to_utf8(fz_context *ctx, pdf_document *doc, pdf_obj *src) { fz_buffer *stmbuf = NULL; unsigned char *srcptr; char *dstptr, *dst; size_t srclen; size_t dstlen = 0; int ucs; size_t i; fz_var(stmbuf); fz_try(ctx) { if (pdf_is_string(ctx, src)) { srcptr = (unsigned char *) pdf_to_str_buf(ctx, src); srclen = pdf_to_str_len(ctx, src); } else if (pdf_is_stream(ctx, src)) { stmbuf = pdf_load_stream(ctx, doc, pdf_to_num(ctx, src), pdf_to_gen(ctx, src)); srclen = fz_buffer_storage(ctx, stmbuf, (unsigned char **)&srcptr); } else { srclen = 0; } /* UTF-16BE */ if (srclen >= 2 && srcptr[0] == 254 && srcptr[1] == 255) { i = 2; while (i + 2 <= srclen) { /* skip language escape codes */ if (i + 6 <= srclen && srcptr[i+0] == 0 && srcptr[i+1] == 27 && srcptr[i+4] == 0 && srcptr[i+5] == 27) { i += 6; } else if (i + 8 <= srclen && srcptr[i+0] == 0 && srcptr[i+1] == 27 && srcptr[i+6] == 0 && srcptr[i+7] == 27) { i += 8; } else { i += rune_from_utf16be(&ucs, srcptr + i, srcptr + srclen); dstlen += fz_runelen(ucs); } } dstptr = dst = fz_malloc(ctx, dstlen + 1); i = 2; while (i + 2 <= srclen) { /* skip language escape codes */ if (i + 6 <= srclen && srcptr[i+0] == 0 && srcptr[i+1] == 27 && srcptr[i+4] == 0 && srcptr[i+5] == 27) { i += 6; } else if (i + 8 <= srclen && srcptr[i+0] == 0 && srcptr[i+1] == 27 && srcptr[i+6] == 0 && srcptr[i+7] == 27) { i += 8; } else { i += rune_from_utf16be(&ucs, srcptr + i, srcptr + srclen); dstptr += fz_runetochar(dstptr, ucs); } } } /* PDFDocEncoding */ else { for (i = 0; i < srclen; i++) dstlen += fz_runelen(pdf_doc_encoding[srcptr[i]]); dstptr = dst = fz_malloc(ctx, dstlen + 1); for (i = 0; i < srclen; i++) { ucs = pdf_doc_encoding[srcptr[i]]; dstptr += fz_runetochar(dstptr, ucs); } } } fz_always(ctx) { fz_drop_buffer(ctx, stmbuf); } fz_catch(ctx) { fz_rethrow(ctx); } *dstptr = '\0'; return dst; }
static pdf_font_desc * load_cid_font(pdf_document *doc, pdf_obj *dict, pdf_obj *encoding, pdf_obj *to_unicode) { pdf_obj *widths; pdf_obj *descriptor; pdf_font_desc *fontdesc = NULL; FT_Face face; int kind; char collection[256]; char *basefont; int i, k, fterr; pdf_obj *obj; int dw; fz_context *ctx = doc->ctx; fz_var(fontdesc); fz_try(ctx) { /* Get font name and CID collection */ basefont = pdf_to_name(pdf_dict_gets(dict, "BaseFont")); { pdf_obj *cidinfo; char tmpstr[64]; int tmplen; cidinfo = pdf_dict_gets(dict, "CIDSystemInfo"); if (!cidinfo) fz_throw(ctx, FZ_ERROR_GENERIC, "cid font is missing info"); obj = pdf_dict_gets(cidinfo, "Registry"); tmplen = fz_mini(sizeof tmpstr - 1, pdf_to_str_len(obj)); memcpy(tmpstr, pdf_to_str_buf(obj), tmplen); tmpstr[tmplen] = '\0'; fz_strlcpy(collection, tmpstr, sizeof collection); fz_strlcat(collection, "-", sizeof collection); obj = pdf_dict_gets(cidinfo, "Ordering"); tmplen = fz_mini(sizeof tmpstr - 1, pdf_to_str_len(obj)); memcpy(tmpstr, pdf_to_str_buf(obj), tmplen); tmpstr[tmplen] = '\0'; fz_strlcat(collection, tmpstr, sizeof collection); } /* Load font file */ fontdesc = pdf_new_font_desc(ctx); descriptor = pdf_dict_gets(dict, "FontDescriptor"); if (!descriptor) fz_throw(ctx, FZ_ERROR_GENERIC, "syntaxerror: missing font descriptor"); pdf_load_font_descriptor(fontdesc, doc, descriptor, collection, basefont, 1, 1); face = fontdesc->font->ft_face; kind = ft_kind(face); /* Encoding */ if (pdf_is_name(encoding)) { if (!strcmp(pdf_to_name(encoding), "Identity-H")) fontdesc->encoding = pdf_new_identity_cmap(ctx, 0, 2); else if (!strcmp(pdf_to_name(encoding), "Identity-V")) fontdesc->encoding = pdf_new_identity_cmap(ctx, 1, 2); else fontdesc->encoding = pdf_load_system_cmap(ctx, pdf_to_name(encoding)); } else if (pdf_is_indirect(encoding)) { fontdesc->encoding = pdf_load_embedded_cmap(doc, encoding); } else { fz_throw(ctx, FZ_ERROR_GENERIC, "syntaxerror: font missing encoding"); } fontdesc->size += pdf_cmap_size(ctx, fontdesc->encoding); pdf_set_font_wmode(ctx, fontdesc, pdf_cmap_wmode(ctx, fontdesc->encoding)); if (kind == TRUETYPE || /* cf. http://code.google.com/p/sumatrapdf/issues/detail?id=1565 */ !strcmp(pdf_to_name(pdf_dict_gets(dict, "Subtype")), "CIDFontType2") || /* cf. http://code.google.com/p/sumatrapdf/issues/detail?id=1997 */ pdf_is_indirect(pdf_dict_gets(dict, "CIDToGIDMap"))) { pdf_obj *cidtogidmap; cidtogidmap = pdf_dict_gets(dict, "CIDToGIDMap"); if (pdf_is_indirect(cidtogidmap)) { fz_buffer *buf; buf = pdf_load_stream(doc, pdf_to_num(cidtogidmap), pdf_to_gen(cidtogidmap)); fontdesc->cid_to_gid_len = (buf->len) / 2; fontdesc->cid_to_gid = fz_malloc_array(ctx, fontdesc->cid_to_gid_len, sizeof(unsigned short)); fontdesc->size += fontdesc->cid_to_gid_len * sizeof(unsigned short); for (i = 0; i < fontdesc->cid_to_gid_len; i++) fontdesc->cid_to_gid[i] = (buf->data[i * 2] << 8) + buf->data[i * 2 + 1]; fz_drop_buffer(ctx, buf); } /* if truetype font is external, cidtogidmap should not be identity */ /* so we map from cid to unicode and then map that through the (3 1) */ /* unicode cmap to get a glyph id */ else if (fontdesc->font->ft_substitute) { fterr = FT_Select_Charmap(face, ft_encoding_unicode); if (fterr) { fz_throw(ctx, FZ_ERROR_GENERIC, "fonterror: no unicode cmap when emulating CID font: %s", ft_error_string(fterr)); } if (!strcmp(collection, "Adobe-CNS1")) fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-CNS1-UCS2"); else if (!strcmp(collection, "Adobe-GB1")) fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-GB1-UCS2"); else if (!strcmp(collection, "Adobe-Japan1")) fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-Japan1-UCS2"); else if (!strcmp(collection, "Adobe-Japan2")) fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-Japan2-UCS2"); else if (!strcmp(collection, "Adobe-Korea1")) fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-Korea1-UCS2"); /* cf. http://code.google.com/p/sumatrapdf/issues/detail?id=2318 */ else if (!strcmp(collection, "Adobe-Identity") && fontdesc->font->ft_file) fontdesc->font->ft_substitute = 0; } } /* cf. http://code.google.com/p/sumatrapdf/issues/detail?id=1961 */ fz_try(ctx) { pdf_load_to_unicode(doc, fontdesc, NULL, collection, to_unicode); } fz_catch(ctx) { fz_warn(ctx, "cannot load ToUnicode CMap"); } /* If we have an identity encoding, we're supposed to use the glyph ids directly. * If we only have a substitute font, that won't work. * Make a last ditch attempt by using * the ToUnicode table if it exists to map via the substitute font's cmap. */ if (strstr(fontdesc->encoding->cmap_name, "Identity-") && fontdesc->font->ft_substitute) { fz_warn(ctx, "non-embedded font using identity encoding: %s", basefont); if (fontdesc->to_unicode && !fontdesc->to_ttf_cmap) fontdesc->to_ttf_cmap = pdf_keep_cmap(ctx, fontdesc->to_unicode); } /* Horizontal */ dw = 1000; obj = pdf_dict_gets(dict, "DW"); if (obj) dw = pdf_to_int(obj); pdf_set_default_hmtx(ctx, fontdesc, dw); widths = pdf_dict_gets(dict, "W"); if (widths) { int c0, c1, w, n, m; n = pdf_array_len(widths); for (i = 0; i < n; ) { c0 = pdf_to_int(pdf_array_get(widths, i)); obj = pdf_array_get(widths, i + 1); if (pdf_is_array(obj)) { m = pdf_array_len(obj); for (k = 0; k < m; k++) { w = pdf_to_int(pdf_array_get(obj, k)); pdf_add_hmtx(ctx, fontdesc, c0 + k, c0 + k, w); } i += 2; } else { c1 = pdf_to_int(obj); w = pdf_to_int(pdf_array_get(widths, i + 2)); pdf_add_hmtx(ctx, fontdesc, c0, c1, w); i += 3; } } } pdf_end_hmtx(ctx, fontdesc); /* Vertical */ if (pdf_cmap_wmode(ctx, fontdesc->encoding) == 1) { int dw2y = 880; int dw2w = -1000; obj = pdf_dict_gets(dict, "DW2"); if (obj) { dw2y = pdf_to_int(pdf_array_get(obj, 0)); dw2w = pdf_to_int(pdf_array_get(obj, 1)); } pdf_set_default_vmtx(ctx, fontdesc, dw2y, dw2w); widths = pdf_dict_gets(dict, "W2"); if (widths) { int c0, c1, w, x, y, n; n = pdf_array_len(widths); for (i = 0; i < n; ) { c0 = pdf_to_int(pdf_array_get(widths, i)); obj = pdf_array_get(widths, i + 1); if (pdf_is_array(obj)) { int m = pdf_array_len(obj); for (k = 0; k * 3 < m; k ++) { w = pdf_to_int(pdf_array_get(obj, k * 3 + 0)); x = pdf_to_int(pdf_array_get(obj, k * 3 + 1)); y = pdf_to_int(pdf_array_get(obj, k * 3 + 2)); pdf_add_vmtx(ctx, fontdesc, c0 + k, c0 + k, x, y, w); } i += 2; } else { c1 = pdf_to_int(obj); w = pdf_to_int(pdf_array_get(widths, i + 2)); x = pdf_to_int(pdf_array_get(widths, i + 3)); y = pdf_to_int(pdf_array_get(widths, i + 4)); pdf_add_vmtx(ctx, fontdesc, c0, c1, x, y, w); i += 5; } } } pdf_end_vmtx(ctx, fontdesc); } } fz_catch(ctx) { pdf_drop_font(ctx, fontdesc); fz_rethrow_message(ctx, "cannot load cid font (%d %d R)", pdf_to_num(dict), pdf_to_gen(dict)); } return fontdesc; }