static void expandstream(fz_obj *obj, int num, int gen) { fz_buffer *buf, *tmp; fz_obj *newlen; buf = pdf_load_stream(xref, num, gen); fz_dict_dels(obj, "Filter"); fz_dict_dels(obj, "DecodeParms"); if (doascii && isbinarystream(buf)) { tmp = hexbuf(buf->data, buf->len); fz_drop_buffer(ctx, buf); buf = tmp; addhexfilter(obj); } newlen = fz_new_int(ctx, buf->len); fz_dict_puts(obj, "Length", newlen); fz_drop_obj(newlen); fprintf(out, "%d %d obj\n", num, gen); fz_fprint_obj(out, obj, doexpand == 0); fprintf(out, "stream\n"); fwrite(buf->data, 1, buf->len, out); fprintf(out, "endstream\nendobj\n\n"); fz_drop_buffer(ctx, buf); }
static fz_jbig2_globals * pdf_load_jbig2_globals(fz_context *ctx, pdf_document *doc, pdf_obj *dict) { fz_jbig2_globals *globals; fz_buffer *buf = NULL; fz_var(buf); if ((globals = pdf_find_item(ctx, fz_drop_jbig2_globals_imp, dict)) != NULL) { return globals; } fz_try(ctx) { buf = pdf_load_stream(ctx, doc, pdf_to_num(ctx, dict), pdf_to_gen(ctx, dict)); globals = fz_load_jbig2_globals(ctx, buf->data, buf->len); pdf_store_item(ctx, dict, globals, buf->len); } fz_always(ctx) { fz_drop_buffer(ctx, buf); } fz_catch(ctx) { fz_rethrow(ctx); } return globals; }
static void pdf_load_embedded_font(pdf_document *doc, pdf_font_desc *fontdesc, char *fontname, pdf_obj *stmref) { fz_buffer *buf; fz_context *ctx = doc->ctx; fz_try(ctx) { buf = pdf_load_stream(doc, pdf_to_num(stmref), pdf_to_gen(stmref)); } fz_catch(ctx) { fz_rethrow_message(ctx, "cannot load font stream (%d %d R)", pdf_to_num(stmref), pdf_to_gen(stmref)); } fz_try(ctx) { fontdesc->font = fz_new_font_from_buffer(ctx, fontname, buf, 0, 1); } fz_always(ctx) { fz_drop_buffer(ctx, buf); } fz_catch(ctx) { fz_rethrow_message(ctx, "cannot load embedded font (%d %d R)", pdf_to_num(stmref), pdf_to_gen(stmref)); } fontdesc->size += buf->len; fontdesc->is_embedded = 1; }
static void pdf_load_embedded_font(pdf_document *xref, pdf_font_desc *fontdesc, char *fontname, pdf_obj *stmref) { fz_buffer *buf; fz_context *ctx = xref->ctx; fz_try(ctx) { buf = pdf_load_stream(xref, pdf_to_num(stmref), pdf_to_gen(stmref)); } fz_catch(ctx) { fz_throw(ctx, "cannot load font stream (%d %d R)", pdf_to_num(stmref), pdf_to_gen(stmref)); } fz_try(ctx) { fontdesc->font = fz_new_font_from_memory(ctx, fontname, buf->data, buf->len, 0, 1); } fz_catch(ctx) { fz_drop_buffer(ctx, buf); fz_throw(ctx, "cannot load embedded font (%d %d R)", pdf_to_num(stmref), pdf_to_gen(stmref)); } fontdesc->size += buf->len; /* save the buffer so we can free it later */ fontdesc->font->ft_data = buf->data; fontdesc->font->ft_size = buf->len; fz_free(ctx, buf); /* only free the fz_buffer struct, not the contained data */ fontdesc->is_embedded = 1; }
static int add_to_srcpage_stream(pdf_document *xref,fz_context *ctx,int pageref,int pagegen, pdf_obj *srcdict) { fz_buffer *srcbuf; fz_buffer *dstbuf; int dstlen; // printf("@add_to_srcpage_stream()...pageref=%d\n",pageref); srcbuf=pdf_load_stream(xref,pdf_to_num(srcdict),pdf_to_gen(srcdict)); if (srcbuf==NULL) { dstbuf=pdf_load_stream(xref,pageref,pagegen); if (dstbuf==NULL) return(0); dstlen=fz_buffer_storage(ctx,dstbuf,NULL); fz_drop_buffer(ctx,dstbuf); return(dstlen); } if (!pdf_is_stream(xref,pageref,pagegen)) dstbuf=fz_new_buffer(ctx,16); else { dstbuf=pdf_load_stream(xref,pageref,pagegen); if (dstbuf==NULL) dstbuf=fz_new_buffer(ctx,16); } /* Concatenate srcbuf to dstbuf: (Will srcbuf->data be allowed?) */ dstlen=fz_buffer_storage(ctx,dstbuf,NULL); /* printf(" dstlen before = %d\n",dstlen); printf(" srclen = %d\n",fz_buffer_storage(ctx,srcbuf,NULL)); printf(" srcptr = %p\n",srcbuf->data); */ fz_write_buffer(ctx,dstbuf,srcbuf->data,fz_buffer_storage(ctx,srcbuf,NULL)); dstlen=fz_buffer_storage(ctx,dstbuf,NULL); // printf(" dstlen after = %d\n",dstlen); fz_drop_buffer(ctx,srcbuf); pdf_update_stream(xref,pageref,dstbuf); fz_drop_buffer(ctx,dstbuf); return(dstlen); }
static fz_error pdf_load_jpx_image(fz_pixmap **imgp, pdf_xref *xref, fz_obj *dict) { fz_error error; fz_buffer *buf; fz_colorspace *colorspace; fz_pixmap *img; fz_obj *obj; colorspace = NULL; error = pdf_load_stream(&buf, xref, fz_to_num(dict), fz_to_gen(dict)); if (error) return fz_rethrow(error, "cannot load jpx image data"); obj = fz_dict_gets(dict, "ColorSpace"); if (obj) { error = pdf_load_colorspace(&colorspace, xref, obj); if (error) fz_catch(error, "cannot load image colorspace"); } error = fz_load_jpx_image(&img, buf->data, buf->len, colorspace); if (error) { if (colorspace) fz_drop_colorspace(colorspace); fz_drop_buffer(buf); return fz_rethrow(error, "cannot load jpx image"); } if (colorspace) fz_drop_colorspace(colorspace); fz_drop_buffer(buf); obj = fz_dict_getsa(dict, "SMask", "Mask"); if (fz_is_dict(obj)) { error = pdf_load_image_imp(&img->mask, xref, NULL, obj, NULL, 1); if (error) { fz_drop_pixmap(img); return fz_rethrow(error, "cannot load image mask/softmask"); } } *imgp = img; return fz_okay; }
fz_error pdf_load_pattern(pdf_pattern **patp, pdf_xref *xref, fz_obj *dict) { fz_error error; pdf_pattern *pat; fz_obj *obj; if ((*patp = pdf_find_item(xref->store, pdf_drop_pattern, dict))) { pdf_keep_pattern(*patp); return fz_okay; } pat = fz_malloc(sizeof(pdf_pattern)); pat->refs = 1; pat->resources = NULL; pat->contents = NULL; /* Store pattern now, to avoid possible recursion if objects refer back to this one */ pdf_store_item(xref->store, pdf_keep_pattern, pdf_drop_pattern, dict, pat); pat->ismask = fz_to_int(fz_dict_gets(dict, "PaintType")) == 2; pat->xstep = fz_to_real(fz_dict_gets(dict, "XStep")); pat->ystep = fz_to_real(fz_dict_gets(dict, "YStep")); obj = fz_dict_gets(dict, "BBox"); pat->bbox = pdf_to_rect(obj); obj = fz_dict_gets(dict, "Matrix"); if (obj) pat->matrix = pdf_to_matrix(obj); else pat->matrix = fz_identity; pat->resources = fz_dict_gets(dict, "Resources"); if (pat->resources) fz_keep_obj(pat->resources); error = pdf_load_stream(&pat->contents, xref, fz_to_num(dict), fz_to_gen(dict)); if (error) { pdf_remove_item(xref->store, pdf_drop_pattern, dict); pdf_drop_pattern(pat); return fz_rethrow(error, "cannot load pattern stream (%d %d R)", fz_to_num(dict), fz_to_gen(dict)); } *patp = pat; return fz_okay; }
static int stream_deflate(pdf_document *xref,fz_context *ctx,int pageref,int pagegen, int *length) { fz_buffer *strbuf; int n; unsigned char *p; #ifdef HAVE_Z_LIB char tempfile[512]; gzFile gz; FILE *f; int nw; #endif strbuf=pdf_load_stream(xref,pageref,pagegen); n=fz_buffer_storage(ctx,strbuf,&p); #ifdef HAVE_Z_LIB wfile_abstmpnam(tempfile); gz=gzopen(tempfile,"sab7"); gzwrite(gz,p,n); gzclose(gz); nw=wfile_size(tempfile); fz_resize_buffer(ctx,strbuf,nw+1); fz_buffer_storage(ctx,strbuf,&p); f=fopen(tempfile,"rb"); if (f==NULL || fread(p,1,nw,f)<nw) aprintf(ANSI_RED "** wmupdf: Error writing compressed stream to PDF file! **\n" ANSI_NORMAL); if (f!=NULL) fclose(f); remove(tempfile); p[nw]='\n'; strbuf->len=nw+1; pdf_update_stream(xref,pageref,strbuf); fz_drop_buffer(ctx,strbuf); /* printf(" After drop, xref->table[%d].stm_buf=%p, refs=%d\n",pageref,xref->table[pageref].stm_buf, xref->table[pageref].stm_buf->refs); */ (*length)=nw; return(1); #else fz_drop_buffer(ctx,strbuf); (*length)=n; return(0); #endif }
static char *get_string_or_stream(pdf_document *doc, pdf_obj *obj) { fz_context *ctx = doc->ctx; int len = 0; char *buf = NULL; fz_buffer *strmbuf = NULL; char *text = NULL; fz_var(strmbuf); fz_var(text); fz_try(ctx) { if (pdf_is_string(obj)) { len = pdf_to_str_len(obj); buf = pdf_to_str_buf(obj); } else if (pdf_is_stream(doc, pdf_to_num(obj), pdf_to_gen(obj))) { strmbuf = pdf_load_stream(doc, pdf_to_num(obj), pdf_to_gen(obj)); len = fz_buffer_storage(ctx, strmbuf, (unsigned char **)&buf); } if (buf) { text = fz_malloc(ctx, len+1); memcpy(text, buf, len); text[len] = 0; } } fz_always(ctx) { fz_drop_buffer(ctx, strmbuf); } fz_catch(ctx) { fz_free(ctx, text); fz_rethrow(ctx); } return text; }
static pdf_font_desc * load_cid_font(pdf_document *doc, pdf_obj *dict, pdf_obj *encoding, pdf_obj *to_unicode) { pdf_obj *widths; pdf_obj *descriptor; pdf_font_desc *fontdesc = NULL; FT_Face face; int kind; char collection[256]; char *basefont; int i, k, fterr; pdf_obj *obj; int dw; fz_context *ctx = doc->ctx; fz_var(fontdesc); fz_try(ctx) { /* Get font name and CID collection */ basefont = pdf_to_name(pdf_dict_gets(dict, "BaseFont")); { pdf_obj *cidinfo; char tmpstr[64]; int tmplen; cidinfo = pdf_dict_gets(dict, "CIDSystemInfo"); if (!cidinfo) fz_throw(ctx, FZ_ERROR_GENERIC, "cid font is missing info"); obj = pdf_dict_gets(cidinfo, "Registry"); tmplen = fz_mini(sizeof tmpstr - 1, pdf_to_str_len(obj)); memcpy(tmpstr, pdf_to_str_buf(obj), tmplen); tmpstr[tmplen] = '\0'; fz_strlcpy(collection, tmpstr, sizeof collection); fz_strlcat(collection, "-", sizeof collection); obj = pdf_dict_gets(cidinfo, "Ordering"); tmplen = fz_mini(sizeof tmpstr - 1, pdf_to_str_len(obj)); memcpy(tmpstr, pdf_to_str_buf(obj), tmplen); tmpstr[tmplen] = '\0'; fz_strlcat(collection, tmpstr, sizeof collection); } /* Load font file */ fontdesc = pdf_new_font_desc(ctx); descriptor = pdf_dict_gets(dict, "FontDescriptor"); if (!descriptor) fz_throw(ctx, FZ_ERROR_GENERIC, "syntaxerror: missing font descriptor"); pdf_load_font_descriptor(fontdesc, doc, descriptor, collection, basefont, 1, 1); face = fontdesc->font->ft_face; kind = ft_kind(face); /* Encoding */ if (pdf_is_name(encoding)) { if (!strcmp(pdf_to_name(encoding), "Identity-H")) fontdesc->encoding = pdf_new_identity_cmap(ctx, 0, 2); else if (!strcmp(pdf_to_name(encoding), "Identity-V")) fontdesc->encoding = pdf_new_identity_cmap(ctx, 1, 2); else fontdesc->encoding = pdf_load_system_cmap(ctx, pdf_to_name(encoding)); } else if (pdf_is_indirect(encoding)) { fontdesc->encoding = pdf_load_embedded_cmap(doc, encoding); } else { fz_throw(ctx, FZ_ERROR_GENERIC, "syntaxerror: font missing encoding"); } fontdesc->size += pdf_cmap_size(ctx, fontdesc->encoding); pdf_set_font_wmode(ctx, fontdesc, pdf_cmap_wmode(ctx, fontdesc->encoding)); if (kind == TRUETYPE || /* cf. http://code.google.com/p/sumatrapdf/issues/detail?id=1565 */ !strcmp(pdf_to_name(pdf_dict_gets(dict, "Subtype")), "CIDFontType2") || /* cf. http://code.google.com/p/sumatrapdf/issues/detail?id=1997 */ pdf_is_indirect(pdf_dict_gets(dict, "CIDToGIDMap"))) { pdf_obj *cidtogidmap; cidtogidmap = pdf_dict_gets(dict, "CIDToGIDMap"); if (pdf_is_indirect(cidtogidmap)) { fz_buffer *buf; buf = pdf_load_stream(doc, pdf_to_num(cidtogidmap), pdf_to_gen(cidtogidmap)); fontdesc->cid_to_gid_len = (buf->len) / 2; fontdesc->cid_to_gid = fz_malloc_array(ctx, fontdesc->cid_to_gid_len, sizeof(unsigned short)); fontdesc->size += fontdesc->cid_to_gid_len * sizeof(unsigned short); for (i = 0; i < fontdesc->cid_to_gid_len; i++) fontdesc->cid_to_gid[i] = (buf->data[i * 2] << 8) + buf->data[i * 2 + 1]; fz_drop_buffer(ctx, buf); } /* if truetype font is external, cidtogidmap should not be identity */ /* so we map from cid to unicode and then map that through the (3 1) */ /* unicode cmap to get a glyph id */ else if (fontdesc->font->ft_substitute) { fterr = FT_Select_Charmap(face, ft_encoding_unicode); if (fterr) { fz_throw(ctx, FZ_ERROR_GENERIC, "fonterror: no unicode cmap when emulating CID font: %s", ft_error_string(fterr)); } if (!strcmp(collection, "Adobe-CNS1")) fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-CNS1-UCS2"); else if (!strcmp(collection, "Adobe-GB1")) fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-GB1-UCS2"); else if (!strcmp(collection, "Adobe-Japan1")) fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-Japan1-UCS2"); else if (!strcmp(collection, "Adobe-Japan2")) fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-Japan2-UCS2"); else if (!strcmp(collection, "Adobe-Korea1")) fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-Korea1-UCS2"); /* cf. http://code.google.com/p/sumatrapdf/issues/detail?id=2318 */ else if (!strcmp(collection, "Adobe-Identity") && fontdesc->font->ft_file) fontdesc->font->ft_substitute = 0; } } /* cf. http://code.google.com/p/sumatrapdf/issues/detail?id=1961 */ fz_try(ctx) { pdf_load_to_unicode(doc, fontdesc, NULL, collection, to_unicode); } fz_catch(ctx) { fz_warn(ctx, "cannot load ToUnicode CMap"); } /* If we have an identity encoding, we're supposed to use the glyph ids directly. * If we only have a substitute font, that won't work. * Make a last ditch attempt by using * the ToUnicode table if it exists to map via the substitute font's cmap. */ if (strstr(fontdesc->encoding->cmap_name, "Identity-") && fontdesc->font->ft_substitute) { fz_warn(ctx, "non-embedded font using identity encoding: %s", basefont); if (fontdesc->to_unicode && !fontdesc->to_ttf_cmap) fontdesc->to_ttf_cmap = pdf_keep_cmap(ctx, fontdesc->to_unicode); } /* Horizontal */ dw = 1000; obj = pdf_dict_gets(dict, "DW"); if (obj) dw = pdf_to_int(obj); pdf_set_default_hmtx(ctx, fontdesc, dw); widths = pdf_dict_gets(dict, "W"); if (widths) { int c0, c1, w, n, m; n = pdf_array_len(widths); for (i = 0; i < n; ) { c0 = pdf_to_int(pdf_array_get(widths, i)); obj = pdf_array_get(widths, i + 1); if (pdf_is_array(obj)) { m = pdf_array_len(obj); for (k = 0; k < m; k++) { w = pdf_to_int(pdf_array_get(obj, k)); pdf_add_hmtx(ctx, fontdesc, c0 + k, c0 + k, w); } i += 2; } else { c1 = pdf_to_int(obj); w = pdf_to_int(pdf_array_get(widths, i + 2)); pdf_add_hmtx(ctx, fontdesc, c0, c1, w); i += 3; } } } pdf_end_hmtx(ctx, fontdesc); /* Vertical */ if (pdf_cmap_wmode(ctx, fontdesc->encoding) == 1) { int dw2y = 880; int dw2w = -1000; obj = pdf_dict_gets(dict, "DW2"); if (obj) { dw2y = pdf_to_int(pdf_array_get(obj, 0)); dw2w = pdf_to_int(pdf_array_get(obj, 1)); } pdf_set_default_vmtx(ctx, fontdesc, dw2y, dw2w); widths = pdf_dict_gets(dict, "W2"); if (widths) { int c0, c1, w, x, y, n; n = pdf_array_len(widths); for (i = 0; i < n; ) { c0 = pdf_to_int(pdf_array_get(widths, i)); obj = pdf_array_get(widths, i + 1); if (pdf_is_array(obj)) { int m = pdf_array_len(obj); for (k = 0; k * 3 < m; k ++) { w = pdf_to_int(pdf_array_get(obj, k * 3 + 0)); x = pdf_to_int(pdf_array_get(obj, k * 3 + 1)); y = pdf_to_int(pdf_array_get(obj, k * 3 + 2)); pdf_add_vmtx(ctx, fontdesc, c0 + k, c0 + k, x, y, w); } i += 2; } else { c1 = pdf_to_int(obj); w = pdf_to_int(pdf_array_get(widths, i + 2)); x = pdf_to_int(pdf_array_get(widths, i + 3)); y = pdf_to_int(pdf_array_get(widths, i + 4)); pdf_add_vmtx(ctx, fontdesc, c0, c1, x, y, w); i += 5; } } } pdf_end_vmtx(ctx, fontdesc); } } fz_catch(ctx) { pdf_drop_font(ctx, fontdesc); fz_rethrow_message(ctx, "cannot load cid font (%d %d R)", pdf_to_num(dict), pdf_to_gen(dict)); } return fontdesc; }
static fz_image * pdf_load_jpx(fz_context *ctx, pdf_document *doc, pdf_obj *dict, int forcemask) { fz_buffer *buf = NULL; fz_colorspace *colorspace = NULL; fz_pixmap *pix = NULL; pdf_obj *obj; fz_image *mask = NULL; fz_image *img = NULL; fz_var(pix); fz_var(buf); fz_var(colorspace); fz_var(mask); buf = pdf_load_stream(ctx, dict); /* FIXME: We can't handle decode arrays for indexed images currently */ fz_try(ctx) { unsigned char *data; size_t len; obj = pdf_dict_get(ctx, dict, PDF_NAME(ColorSpace)); if (obj) colorspace = pdf_load_colorspace(ctx, obj); len = fz_buffer_storage(ctx, buf, &data); pix = fz_load_jpx(ctx, data, len, colorspace); obj = pdf_dict_geta(ctx, dict, PDF_NAME(SMask), PDF_NAME(Mask)); if (pdf_is_dict(ctx, obj)) { if (forcemask) fz_warn(ctx, "Ignoring recursive JPX soft mask"); else mask = pdf_load_image_imp(ctx, doc, NULL, obj, NULL, 1); } obj = pdf_dict_geta(ctx, dict, PDF_NAME(Decode), PDF_NAME(D)); if (obj && !fz_colorspace_is_indexed(ctx, colorspace)) { float decode[FZ_MAX_COLORS * 2]; int i; for (i = 0; i < pix->n * 2; i++) decode[i] = pdf_array_get_real(ctx, obj, i); fz_decode_tile(ctx, pix, decode); } img = fz_new_image_from_pixmap(ctx, pix, mask); } fz_always(ctx) { fz_drop_image(ctx, mask); fz_drop_pixmap(ctx, pix); fz_drop_colorspace(ctx, colorspace); fz_drop_buffer(ctx, buf); } fz_catch(ctx) { fz_rethrow(ctx); } return img; }
/* Convert Unicode/PdfDocEncoding string into utf-8 */ char * pdf_to_utf8(pdf_document *doc, pdf_obj *src) { fz_context *ctx = doc->ctx; fz_buffer *strmbuf = NULL; unsigned char *srcptr; char *dstptr, *dst; int srclen; int dstlen = 0; int ucs; int i; fz_var(strmbuf); fz_try(ctx) { if (pdf_is_string(src)) { srcptr = (unsigned char *) pdf_to_str_buf(src); srclen = pdf_to_str_len(src); } else if (pdf_is_stream(doc, pdf_to_num(src), pdf_to_gen(src))) { strmbuf = pdf_load_stream(doc, pdf_to_num(src), pdf_to_gen(src)); srclen = fz_buffer_storage(ctx, strmbuf, (unsigned char **)&srcptr); } else { srclen = 0; } if (srclen >= 2 && srcptr[0] == 254 && srcptr[1] == 255) { for (i = 2; i + 1 < srclen; i += 2) { ucs = srcptr[i] << 8 | srcptr[i+1]; dstlen += fz_runelen(ucs); } dstptr = dst = fz_malloc(ctx, dstlen + 1); for (i = 2; i + 1 < srclen; i += 2) { ucs = srcptr[i] << 8 | srcptr[i+1]; dstptr += fz_runetochar(dstptr, ucs); } } else if (srclen >= 2 && srcptr[0] == 255 && srcptr[1] == 254) { for (i = 2; i + 1 < srclen; i += 2) { ucs = srcptr[i] | srcptr[i+1] << 8; dstlen += fz_runelen(ucs); } dstptr = dst = fz_malloc(ctx, dstlen + 1); for (i = 2; i + 1 < srclen; i += 2) { ucs = srcptr[i] | srcptr[i+1] << 8; dstptr += fz_runetochar(dstptr, ucs); } } else { for (i = 0; i < srclen; i++) dstlen += fz_runelen(pdf_doc_encoding[srcptr[i]]); dstptr = dst = fz_malloc(ctx, dstlen + 1); for (i = 0; i < srclen; i++) { ucs = pdf_replace_undefined(pdf_doc_encoding[srcptr[i]]); dstptr += fz_runetochar(dstptr, ucs); } } } fz_always(ctx) { fz_drop_buffer(ctx, strmbuf); } fz_catch(ctx) { fz_rethrow(ctx); } *dstptr = '\0'; return dst; }
pdf_font_desc * pdf_load_type3_font(fz_context *ctx, pdf_document *doc, pdf_obj *rdb, pdf_obj *dict) { char buf[256]; char *estrings[256]; pdf_font_desc *fontdesc = NULL; pdf_obj *encoding; pdf_obj *widths; pdf_obj *charprocs; pdf_obj *obj; int first, last; int i, k, n; fz_rect bbox; fz_matrix matrix; fz_font *font; fz_var(fontdesc); /* Make a new type3 font entry in the document */ if (doc->num_type3_fonts == doc->max_type3_fonts) { int new_max = doc->max_type3_fonts * 2; if (new_max == 0) new_max = 4; doc->type3_fonts = fz_resize_array(ctx, doc->type3_fonts, new_max, sizeof(*doc->type3_fonts)); doc->max_type3_fonts = new_max; } fz_try(ctx) { obj = pdf_dict_get(ctx, dict, PDF_NAME_Name); if (pdf_is_name(ctx, obj)) fz_strlcpy(buf, pdf_to_name(ctx, obj), sizeof buf); else fz_strlcpy(buf, "Unnamed-T3", sizeof buf); fontdesc = pdf_new_font_desc(ctx); obj = pdf_dict_get(ctx, dict, PDF_NAME_FontMatrix); pdf_to_matrix(ctx, obj, &matrix); obj = pdf_dict_get(ctx, dict, PDF_NAME_FontBBox); fz_transform_rect(pdf_to_rect(ctx, obj, &bbox), &matrix); font = fz_new_type3_font(ctx, buf, &matrix); fontdesc->font = font; fontdesc->size += sizeof(fz_font) + 256 * (sizeof(fz_buffer*) + sizeof(float)); fz_set_font_bbox(ctx, font, bbox.x0, bbox.y0, bbox.x1, bbox.y1); /* Encoding */ for (i = 0; i < 256; i++) estrings[i] = NULL; encoding = pdf_dict_get(ctx, dict, PDF_NAME_Encoding); if (!encoding) { fz_throw(ctx, FZ_ERROR_GENERIC, "syntaxerror: Type3 font missing Encoding"); } if (pdf_is_name(ctx, encoding)) pdf_load_encoding(estrings, pdf_to_name(ctx, encoding)); if (pdf_is_dict(ctx, encoding)) { pdf_obj *base, *diff, *item; base = pdf_dict_get(ctx, encoding, PDF_NAME_BaseEncoding); if (pdf_is_name(ctx, base)) pdf_load_encoding(estrings, pdf_to_name(ctx, base)); diff = pdf_dict_get(ctx, encoding, PDF_NAME_Differences); if (pdf_is_array(ctx, diff)) { n = pdf_array_len(ctx, diff); k = 0; for (i = 0; i < n; i++) { item = pdf_array_get(ctx, diff, i); if (pdf_is_int(ctx, item)) k = pdf_to_int(ctx, item); if (pdf_is_name(ctx, item) && k >= 0 && k < nelem(estrings)) estrings[k++] = pdf_to_name(ctx, item); } } } fontdesc->encoding = pdf_new_identity_cmap(ctx, 0, 1); fontdesc->size += pdf_cmap_size(ctx, fontdesc->encoding); pdf_load_to_unicode(ctx, doc, fontdesc, estrings, NULL, pdf_dict_get(ctx, dict, PDF_NAME_ToUnicode)); /* Widths */ pdf_set_default_hmtx(ctx, fontdesc, 0); first = pdf_to_int(ctx, pdf_dict_get(ctx, dict, PDF_NAME_FirstChar)); last = pdf_to_int(ctx, pdf_dict_get(ctx, dict, PDF_NAME_LastChar)); if (first < 0 || last > 255 || first > last) first = last = 0; widths = pdf_dict_get(ctx, dict, PDF_NAME_Widths); if (!widths) { fz_throw(ctx, FZ_ERROR_GENERIC, "syntaxerror: Type3 font missing Widths"); } for (i = first; i <= last; i++) { float w = pdf_to_real(ctx, pdf_array_get(ctx, widths, i - first)); w = font->t3matrix.a * w * 1000; font->t3widths[i] = w * 0.001f; pdf_add_hmtx(ctx, fontdesc, i, i, w); } pdf_end_hmtx(ctx, fontdesc); /* Resources -- inherit page resources if the font doesn't have its own */ font->t3freeres = pdf_t3_free_resources; font->t3resources = pdf_dict_get(ctx, dict, PDF_NAME_Resources); if (!font->t3resources) font->t3resources = rdb; if (font->t3resources) pdf_keep_obj(ctx, font->t3resources); if (!font->t3resources) fz_warn(ctx, "no resource dictionary for type 3 font!"); font->t3doc = doc; font->t3run = pdf_run_glyph_func; /* CharProcs */ charprocs = pdf_dict_get(ctx, dict, PDF_NAME_CharProcs); if (!charprocs) { fz_throw(ctx, FZ_ERROR_GENERIC, "syntaxerror: Type3 font missing CharProcs"); } for (i = 0; i < 256; i++) { if (estrings[i]) { obj = pdf_dict_gets(ctx, charprocs, estrings[i]); if (pdf_is_stream(ctx, obj)) { font->t3procs[i] = pdf_load_stream(ctx, obj); fz_trim_buffer(ctx, font->t3procs[i]); fontdesc->size += fz_buffer_storage(ctx, font->t3procs[i], NULL); fontdesc->size += 0; // TODO: display list size calculation } } } } fz_catch(ctx) { pdf_drop_font(ctx, fontdesc); fz_rethrow(ctx); } doc->type3_fonts[doc->num_type3_fonts++] = fz_keep_font(ctx, font); return fontdesc; }
/* * Create a filter given a name and param dictionary. */ static fz_stream * build_filter(fz_stream *chain, pdf_document * xref, pdf_obj * f, pdf_obj * p, int num, int gen, pdf_image_params *params) { fz_context *ctx = chain->ctx; char *s = pdf_to_name(f); int predictor = pdf_to_int(pdf_dict_gets(p, "Predictor")); int columns = pdf_to_int(pdf_dict_gets(p, "Columns")); int colors = pdf_to_int(pdf_dict_gets(p, "Colors")); int bpc = pdf_to_int(pdf_dict_gets(p, "BitsPerComponent")); if (predictor == 0) predictor = 1; if (columns == 0) columns = 1; if (colors == 0) colors = 1; if (bpc == 0) bpc = 8; if (!strcmp(s, "ASCIIHexDecode") || !strcmp(s, "AHx")) return fz_open_ahxd(chain); else if (!strcmp(s, "ASCII85Decode") || !strcmp(s, "A85")) return fz_open_a85d(chain); else if (!strcmp(s, "CCITTFaxDecode") || !strcmp(s, "CCF")) { pdf_obj *k = pdf_dict_gets(p, "K"); pdf_obj *eol = pdf_dict_gets(p, "EndOfLine"); pdf_obj *eba = pdf_dict_gets(p, "EncodedByteAlign"); pdf_obj *columns = pdf_dict_gets(p, "Columns"); pdf_obj *rows = pdf_dict_gets(p, "Rows"); pdf_obj *eob = pdf_dict_gets(p, "EndOfBlock"); pdf_obj *bi1 = pdf_dict_gets(p, "BlackIs1"); if (params) { /* We will shortstop here */ params->type = PDF_IMAGE_FAX; params->u.fax.k = (k ? pdf_to_int(k) : 0); params->u.fax.eol = (eol ? pdf_to_bool(eol) : 0); params->u.fax.eba = (eba ? pdf_to_bool(eba) : 0); params->u.fax.columns = (columns ? pdf_to_int(columns) : 1728); params->u.fax.rows = (rows ? pdf_to_int(rows) : 0); params->u.fax.eob = (eob ? pdf_to_bool(eob) : 1); params->u.fax.bi1 = (bi1 ? pdf_to_bool(bi1) : 0); return chain; } return fz_open_faxd(chain, k ? pdf_to_int(k) : 0, eol ? pdf_to_bool(eol) : 0, eba ? pdf_to_bool(eba) : 0, columns ? pdf_to_int(columns) : 1728, rows ? pdf_to_int(rows) : 0, eob ? pdf_to_bool(eob) : 1, bi1 ? pdf_to_bool(bi1) : 0); } else if (!strcmp(s, "DCTDecode") || !strcmp(s, "DCT")) { pdf_obj *ct = pdf_dict_gets(p, "ColorTransform"); if (params) { /* We will shortstop here */ params->type = PDF_IMAGE_JPEG; params->u.jpeg.ct = (ct ? pdf_to_int(ct) : -1); return chain; } return fz_open_dctd(chain, ct ? pdf_to_int(ct) : -1); } else if (!strcmp(s, "RunLengthDecode") || !strcmp(s, "RL")) { if (params) { /* We will shortstop here */ params->type = PDF_IMAGE_RLD; return chain; } return fz_open_rld(chain); } else if (!strcmp(s, "FlateDecode") || !strcmp(s, "Fl")) { if (params) { /* We will shortstop here */ params->type = PDF_IMAGE_FLATE; params->u.flate.predictor = predictor; params->u.flate.columns = columns; params->u.flate.colors = colors; params->u.flate.bpc = bpc; return chain; } chain = fz_open_flated(chain); if (predictor > 1) chain = fz_open_predict(chain, predictor, columns, colors, bpc); return chain; } else if (!strcmp(s, "LZWDecode") || !strcmp(s, "LZW")) { pdf_obj *ec = pdf_dict_gets(p, "EarlyChange"); if (params) { /* We will shortstop here */ params->type = PDF_IMAGE_LZW; params->u.lzw.predictor = predictor; params->u.lzw.columns = columns; params->u.lzw.colors = colors; params->u.lzw.bpc = bpc; params->u.lzw.ec = (ec ? pdf_to_int(ec) : 1); return chain; } chain = fz_open_lzwd(chain, ec ? pdf_to_int(ec) : 1); if (predictor > 1) chain = fz_open_predict(chain, predictor, columns, colors, bpc); return chain; } else if (!strcmp(s, "JBIG2Decode")) { fz_buffer *globals = NULL; pdf_obj *obj = pdf_dict_gets(p, "JBIG2Globals"); if (obj) globals = pdf_load_stream(xref, pdf_to_num(obj), pdf_to_gen(obj)); /* fz_open_jbig2d takes possession of globals */ return fz_open_jbig2d(chain, globals); } else if (!strcmp(s, "JPXDecode")) return chain; /* JPX decoding is special cased in the image loading code */ else if (!strcmp(s, "Crypt")) { pdf_obj *name; if (!xref->crypt) { fz_warn(ctx, "crypt filter in unencrypted document"); return chain; } name = pdf_dict_gets(p, "Name"); if (pdf_is_name(name)) return pdf_open_crypt_with_filter(chain, xref->crypt, pdf_to_name(name), num, gen); return chain; } fz_warn(ctx, "unknown filter name (%s)", s); return chain; }
/* Convert Unicode/PdfDocEncoding string into utf-8 */ char * pdf_to_utf8(fz_context *ctx, pdf_document *doc, pdf_obj *src) { fz_buffer *stmbuf = NULL; unsigned char *srcptr; char *dstptr, *dst; size_t srclen; size_t dstlen = 0; int ucs; size_t i; fz_var(stmbuf); fz_try(ctx) { if (pdf_is_string(ctx, src)) { srcptr = (unsigned char *) pdf_to_str_buf(ctx, src); srclen = pdf_to_str_len(ctx, src); } else if (pdf_is_stream(ctx, src)) { stmbuf = pdf_load_stream(ctx, doc, pdf_to_num(ctx, src), pdf_to_gen(ctx, src)); srclen = fz_buffer_storage(ctx, stmbuf, (unsigned char **)&srcptr); } else { srclen = 0; } /* UTF-16BE */ if (srclen >= 2 && srcptr[0] == 254 && srcptr[1] == 255) { i = 2; while (i + 2 <= srclen) { /* skip language escape codes */ if (i + 6 <= srclen && srcptr[i+0] == 0 && srcptr[i+1] == 27 && srcptr[i+4] == 0 && srcptr[i+5] == 27) { i += 6; } else if (i + 8 <= srclen && srcptr[i+0] == 0 && srcptr[i+1] == 27 && srcptr[i+6] == 0 && srcptr[i+7] == 27) { i += 8; } else { i += rune_from_utf16be(&ucs, srcptr + i, srcptr + srclen); dstlen += fz_runelen(ucs); } } dstptr = dst = fz_malloc(ctx, dstlen + 1); i = 2; while (i + 2 <= srclen) { /* skip language escape codes */ if (i + 6 <= srclen && srcptr[i+0] == 0 && srcptr[i+1] == 27 && srcptr[i+4] == 0 && srcptr[i+5] == 27) { i += 6; } else if (i + 8 <= srclen && srcptr[i+0] == 0 && srcptr[i+1] == 27 && srcptr[i+6] == 0 && srcptr[i+7] == 27) { i += 8; } else { i += rune_from_utf16be(&ucs, srcptr + i, srcptr + srclen); dstptr += fz_runetochar(dstptr, ucs); } } } /* PDFDocEncoding */ else { for (i = 0; i < srclen; i++) dstlen += fz_runelen(pdf_doc_encoding[srcptr[i]]); dstptr = dst = fz_malloc(ctx, dstlen + 1); for (i = 0; i < srclen; i++) { ucs = pdf_doc_encoding[srcptr[i]]; dstptr += fz_runetochar(dstptr, ucs); } } } fz_always(ctx) { fz_drop_buffer(ctx, stmbuf); } fz_catch(ctx) { fz_rethrow(ctx); } *dstptr = '\0'; return dst; }
fz_buffer *pdf_portfolio_entry(fz_context *ctx, pdf_document *doc, int entry) { pdf_obj *obj = pdf_portfolio_entry_obj(ctx, doc, entry); return pdf_load_stream(ctx, pdf_dict_getl(ctx, obj, PDF_NAME_EF, PDF_NAME_F, NULL)); }
pdf_font_desc * pdf_load_type3_font(pdf_document *doc, pdf_obj *rdb, pdf_obj *dict) { char buf[256]; char *estrings[256]; pdf_font_desc *fontdesc = NULL; pdf_obj *encoding; pdf_obj *widths; pdf_obj *charprocs; pdf_obj *obj; int first, last; int i, k, n; fz_rect bbox; fz_matrix matrix; fz_context *ctx = doc->ctx; fz_var(fontdesc); /* Make a new type3 font entry in the document */ if (doc->num_type3_fonts == doc->max_type3_fonts) { int new_max = doc->max_type3_fonts * 2; if (new_max == 0) new_max = 4; doc->type3_fonts = fz_resize_array(doc->ctx, doc->type3_fonts, new_max, sizeof(*doc->type3_fonts)); doc->max_type3_fonts = new_max; } fz_try(ctx) { obj = pdf_dict_gets(dict, "Name"); if (pdf_is_name(obj)) fz_strlcpy(buf, pdf_to_name(obj), sizeof buf); else sprintf(buf, "Unnamed-T3"); fontdesc = pdf_new_font_desc(ctx); obj = pdf_dict_gets(dict, "FontMatrix"); pdf_to_matrix(ctx, obj, &matrix); obj = pdf_dict_gets(dict, "FontBBox"); fz_transform_rect(pdf_to_rect(ctx, obj, &bbox), &matrix); fontdesc->font = fz_new_type3_font(ctx, buf, &matrix); fontdesc->size += sizeof(fz_font) + 256 * (sizeof(fz_buffer*) + sizeof(float)); fz_set_font_bbox(ctx, fontdesc->font, bbox.x0, bbox.y0, bbox.x1, bbox.y1); /* SumatraPDF: expose Type3 FontDescriptor flags */ fontdesc->flags = pdf_to_int(pdf_dict_gets(pdf_dict_gets(dict, "FontDescriptor"), "Flags")); /* Encoding */ for (i = 0; i < 256; i++) estrings[i] = NULL; encoding = pdf_dict_gets(dict, "Encoding"); if (!encoding) { fz_throw(ctx, FZ_ERROR_GENERIC, "syntaxerror: Type3 font missing Encoding"); } if (pdf_is_name(encoding)) pdf_load_encoding(estrings, pdf_to_name(encoding)); if (pdf_is_dict(encoding)) { pdf_obj *base, *diff, *item; base = pdf_dict_gets(encoding, "BaseEncoding"); if (pdf_is_name(base)) pdf_load_encoding(estrings, pdf_to_name(base)); diff = pdf_dict_gets(encoding, "Differences"); if (pdf_is_array(diff)) { n = pdf_array_len(diff); k = 0; for (i = 0; i < n; i++) { item = pdf_array_get(diff, i); if (pdf_is_int(item)) k = pdf_to_int(item); if (pdf_is_name(item) && k >= 0 && k < nelem(estrings)) estrings[k++] = pdf_to_name(item); } } } fontdesc->encoding = pdf_new_identity_cmap(ctx, 0, 1); fontdesc->size += pdf_cmap_size(ctx, fontdesc->encoding); pdf_load_to_unicode(doc, fontdesc, estrings, NULL, pdf_dict_gets(dict, "ToUnicode")); /* SumatraPDF: trying to match Adobe Reader's behavior */ if (!(fontdesc->flags & PDF_FD_SYMBOLIC) && fontdesc->cid_to_ucs_len >= 128) for (i = 32; i < 128; i++) if (fontdesc->cid_to_ucs[i] == '?' || fontdesc->cid_to_ucs[i] == '\0') fontdesc->cid_to_ucs[i] = i; /* Widths */ pdf_set_default_hmtx(ctx, fontdesc, 0); first = pdf_to_int(pdf_dict_gets(dict, "FirstChar")); last = pdf_to_int(pdf_dict_gets(dict, "LastChar")); /* cf. http://code.google.com/p/sumatrapdf/issues/detail?id=1966 */ if (first >= 256 && last - first < 256) { fz_warn(ctx, "ignoring out-of-bound values for FirstChar/LastChar: %d/%d", first, last); last -= first; first = 0; } if (first < 0 || last > 255 || first > last) first = last = 0; widths = pdf_dict_gets(dict, "Widths"); if (!widths) { fz_throw(ctx, FZ_ERROR_GENERIC, "syntaxerror: Type3 font missing Widths"); } for (i = first; i <= last; i++) { float w = pdf_to_real(pdf_array_get(widths, i - first)); w = fontdesc->font->t3matrix.a * w * 1000; fontdesc->font->t3widths[i] = w * 0.001f; pdf_add_hmtx(ctx, fontdesc, i, i, w); } pdf_end_hmtx(ctx, fontdesc); /* Resources -- inherit page resources if the font doesn't have its own */ fontdesc->font->t3freeres = pdf_t3_free_resources; fontdesc->font->t3resources = pdf_dict_gets(dict, "Resources"); if (!fontdesc->font->t3resources) fontdesc->font->t3resources = rdb; if (fontdesc->font->t3resources) pdf_keep_obj(fontdesc->font->t3resources); if (!fontdesc->font->t3resources) fz_warn(ctx, "no resource dictionary for type 3 font!"); fontdesc->font->t3doc = doc; fontdesc->font->t3run = pdf_run_glyph_func; /* CharProcs */ charprocs = pdf_dict_gets(dict, "CharProcs"); if (!charprocs) { fz_throw(ctx, FZ_ERROR_GENERIC, "syntaxerror: Type3 font missing CharProcs"); } for (i = 0; i < 256; i++) { if (estrings[i]) { /* SumatraPDF: don't reject fonts with few broken glyphs */ fz_try(ctx) { obj = pdf_dict_gets(charprocs, estrings[i]); if (pdf_is_stream(doc, pdf_to_num(obj), pdf_to_gen(obj))) { fontdesc->font->t3procs[i] = pdf_load_stream(doc, pdf_to_num(obj), pdf_to_gen(obj)); fontdesc->size += fontdesc->font->t3procs[i]->cap; fontdesc->size += 0; // TODO: display list size calculation } } fz_catch(ctx) { fz_warn(ctx, "failed to get data for type 3 glyph '%s'", estrings[i]); } } } } fz_catch(ctx) { if (fontdesc) pdf_drop_font(ctx, fontdesc); fz_rethrow_message(ctx, "cannot load type3 font (%d %d R)", pdf_to_num(dict), pdf_to_gen(dict)); } doc->type3_fonts[doc->num_type3_fonts++] = fz_keep_font(ctx, fontdesc->font); return fontdesc; }
/* * Create a filter given a name and param dictionary. */ static fz_stream * build_filter(fz_stream *chain, pdf_xref * xref, fz_obj * f, fz_obj * p, int num, int gen) { fz_error error; char *s; s = fz_to_name(f); if (!strcmp(s, "ASCIIHexDecode") || !strcmp(s, "AHx")) return fz_open_ahxd(chain); else if (!strcmp(s, "ASCII85Decode") || !strcmp(s, "A85")) return fz_open_a85d(chain); else if (!strcmp(s, "CCITTFaxDecode") || !strcmp(s, "CCF")) return fz_open_faxd(chain, p); else if (!strcmp(s, "DCTDecode") || !strcmp(s, "DCT")) return fz_open_dctd(chain, p); else if (!strcmp(s, "RunLengthDecode") || !strcmp(s, "RL")) return fz_open_rld(chain); else if (!strcmp(s, "FlateDecode") || !strcmp(s, "Fl")) { fz_obj *obj = fz_dict_gets(p, "Predictor"); if (fz_to_int(obj) > 1) return fz_open_predict(fz_open_flated(chain), p); return fz_open_flated(chain); } else if (!strcmp(s, "LZWDecode") || !strcmp(s, "LZW")) { fz_obj *obj = fz_dict_gets(p, "Predictor"); if (fz_to_int(obj) > 1) return fz_open_predict(fz_open_lzwd(chain, p), p); return fz_open_lzwd(chain, p); } else if (!strcmp(s, "JBIG2Decode")) { fz_obj *obj = fz_dict_gets(p, "JBIG2Globals"); if (obj) { fz_buffer *globals; error = pdf_load_stream(&globals, xref, fz_to_num(obj), fz_to_gen(obj)); if (error) fz_catch(error, "cannot load jbig2 global segments"); chain = fz_open_jbig2d(chain, globals); fz_drop_buffer(globals); return chain; } return fz_open_jbig2d(chain, NULL); } else if (!strcmp(s, "JPXDecode")) return chain; /* JPX decoding is special cased in the image loading code */ else if (!strcmp(s, "Crypt")) { fz_obj *name; if (!xref->crypt) { fz_warn("crypt filter in unencrypted document"); return chain; } name = fz_dict_gets(p, "Name"); if (fz_is_name(name)) return pdf_open_crypt_with_filter(chain, xref->crypt, fz_to_name(name), num, gen); return chain; } fz_warn("unknown filter name (%s)", s); return chain; }
pdf_font_desc * pdf_load_type3_font(pdf_document *xref, pdf_obj *rdb, pdf_obj *dict) { char buf[256]; char *estrings[256]; pdf_font_desc *fontdesc = NULL; pdf_obj *encoding; pdf_obj *widths; pdf_obj *charprocs; pdf_obj *obj; int first, last; int i, k, n; fz_rect bbox; fz_matrix matrix; fz_context *ctx = xref->ctx; fz_var(fontdesc); fz_try(ctx) { obj = pdf_dict_gets(dict, "Name"); if (pdf_is_name(obj)) fz_strlcpy(buf, pdf_to_name(obj), sizeof buf); else sprintf(buf, "Unnamed-T3"); fontdesc = pdf_new_font_desc(ctx); obj = pdf_dict_gets(dict, "FontMatrix"); pdf_to_matrix(ctx, obj, &matrix); obj = pdf_dict_gets(dict, "FontBBox"); fz_transform_rect(pdf_to_rect(ctx, obj, &bbox), &matrix); fontdesc->font = fz_new_type3_font(ctx, buf, &matrix); fontdesc->size += sizeof(fz_font) + 256 * (sizeof(fz_buffer*) + sizeof(float)); fz_set_font_bbox(ctx, fontdesc->font, bbox.x0, bbox.y0, bbox.x1, bbox.y1); /* Encoding */ for (i = 0; i < 256; i++) estrings[i] = NULL; encoding = pdf_dict_gets(dict, "Encoding"); if (!encoding) { fz_throw(ctx, "syntaxerror: Type3 font missing Encoding"); } if (pdf_is_name(encoding)) pdf_load_encoding(estrings, pdf_to_name(encoding)); if (pdf_is_dict(encoding)) { pdf_obj *base, *diff, *item; base = pdf_dict_gets(encoding, "BaseEncoding"); if (pdf_is_name(base)) pdf_load_encoding(estrings, pdf_to_name(base)); diff = pdf_dict_gets(encoding, "Differences"); if (pdf_is_array(diff)) { n = pdf_array_len(diff); k = 0; for (i = 0; i < n; i++) { item = pdf_array_get(diff, i); if (pdf_is_int(item)) k = pdf_to_int(item); if (pdf_is_name(item) && k >= 0 && k < nelem(estrings)) estrings[k++] = pdf_to_name(item); } } } fontdesc->encoding = pdf_new_identity_cmap(ctx, 0, 1); fontdesc->size += pdf_cmap_size(ctx, fontdesc->encoding); pdf_load_to_unicode(xref, fontdesc, estrings, NULL, pdf_dict_gets(dict, "ToUnicode")); /* Widths */ pdf_set_default_hmtx(ctx, fontdesc, 0); first = pdf_to_int(pdf_dict_gets(dict, "FirstChar")); last = pdf_to_int(pdf_dict_gets(dict, "LastChar")); if (first < 0 || last > 255 || first > last) first = last = 0; widths = pdf_dict_gets(dict, "Widths"); if (!widths) { fz_throw(ctx, "syntaxerror: Type3 font missing Widths"); } for (i = first; i <= last; i++) { float w = pdf_to_real(pdf_array_get(widths, i - first)); w = fontdesc->font->t3matrix.a * w * 1000; fontdesc->font->t3widths[i] = w * 0.001f; pdf_add_hmtx(ctx, fontdesc, i, i, w); } pdf_end_hmtx(ctx, fontdesc); /* Resources -- inherit page resources if the font doesn't have its own */ fontdesc->font->t3freeres = pdf_t3_free_resources; fontdesc->font->t3resources = pdf_dict_gets(dict, "Resources"); if (!fontdesc->font->t3resources) fontdesc->font->t3resources = rdb; if (fontdesc->font->t3resources) pdf_keep_obj(fontdesc->font->t3resources); if (!fontdesc->font->t3resources) fz_warn(ctx, "no resource dictionary for type 3 font!"); fontdesc->font->t3doc = xref; fontdesc->font->t3run = pdf_run_glyph_func; /* CharProcs */ charprocs = pdf_dict_gets(dict, "CharProcs"); if (!charprocs) { fz_throw(ctx, "syntaxerror: Type3 font missing CharProcs"); } for (i = 0; i < 256; i++) { if (estrings[i]) { obj = pdf_dict_gets(charprocs, estrings[i]); if (pdf_is_stream(xref, pdf_to_num(obj), pdf_to_gen(obj))) { fontdesc->font->t3procs[i] = pdf_load_stream(xref, pdf_to_num(obj), pdf_to_gen(obj)); fontdesc->size += fontdesc->font->t3procs[i]->cap; fontdesc->size += 0; // TODO: display list size calculation } } } } fz_catch(ctx) { if (fontdesc) pdf_drop_font(ctx, fontdesc); fz_throw(ctx, "cannot load type3 font (%d %d R)", pdf_to_num(dict), pdf_to_gen(dict)); } return fontdesc; }
static void savefont(pdf_obj *dict, int num) { char namebuf[1024]; fz_buffer *buf; pdf_obj *stream = NULL; pdf_obj *obj; char *ext = ""; fz_output *out; char *fontname = "font"; size_t len; unsigned char *data; obj = pdf_dict_get(ctx, dict, PDF_NAME_FontName); if (obj) fontname = pdf_to_name(ctx, obj); obj = pdf_dict_get(ctx, dict, PDF_NAME_FontFile); if (obj) { stream = obj; ext = "pfa"; } obj = pdf_dict_get(ctx, dict, PDF_NAME_FontFile2); if (obj) { stream = obj; ext = "ttf"; } obj = pdf_dict_get(ctx, dict, PDF_NAME_FontFile3); if (obj) { stream = obj; obj = pdf_dict_get(ctx, obj, PDF_NAME_Subtype); if (obj && !pdf_is_name(ctx, obj)) fz_throw(ctx, FZ_ERROR_GENERIC, "invalid font descriptor subtype"); if (pdf_name_eq(ctx, obj, PDF_NAME_Type1C)) ext = "cff"; else if (pdf_name_eq(ctx, obj, PDF_NAME_CIDFontType0C)) ext = "cid"; else if (pdf_name_eq(ctx, obj, PDF_NAME_OpenType)) ext = "otf"; else fz_throw(ctx, FZ_ERROR_GENERIC, "unhandled font type '%s'", pdf_to_name(ctx, obj)); } if (!stream) { fz_warn(ctx, "unhandled font type"); return; } buf = pdf_load_stream(ctx, doc, pdf_to_num(ctx, stream)); len = fz_buffer_storage(ctx, buf, &data); fz_try(ctx) { snprintf(namebuf, sizeof(namebuf), "%s-%04d.%s", fontname, num, ext); printf("extracting font %s\n", namebuf); out = fz_new_output_with_path(ctx, namebuf, 0); fz_try(ctx) fz_write(ctx, out, data, len); fz_always(ctx) fz_drop_output(ctx, out); fz_catch(ctx) fz_rethrow(ctx); } fz_always(ctx) fz_drop_buffer(ctx, buf); fz_catch(ctx) fz_rethrow(ctx); }
static void savefont(pdf_obj *dict, int num) { char namebuf[1024]; char *subtype; fz_buffer *buf; pdf_obj *stream = NULL; pdf_obj *obj; char *ext = ""; FILE *f; char *fontname = "font"; int n, len; unsigned char *data; obj = pdf_dict_gets(dict, "FontName"); if (obj) fontname = pdf_to_name(obj); obj = pdf_dict_gets(dict, "FontFile"); if (obj) { stream = obj; ext = "pfa"; } obj = pdf_dict_gets(dict, "FontFile2"); if (obj) { stream = obj; ext = "ttf"; } obj = pdf_dict_gets(dict, "FontFile3"); if (obj) { stream = obj; obj = pdf_dict_gets(obj, "Subtype"); if (obj && !pdf_is_name(obj)) fz_throw(ctx, FZ_ERROR_GENERIC, "invalid font descriptor subtype"); subtype = pdf_to_name(obj); if (!strcmp(subtype, "Type1C")) ext = "cff"; else if (!strcmp(subtype, "CIDFontType0C")) ext = "cid"; else if (!strcmp(subtype, "OpenType")) ext = "otf"; else fz_throw(ctx, FZ_ERROR_GENERIC, "unhandled font type '%s'", subtype); } if (!stream) { fz_warn(ctx, "unhandled font type"); return; } buf = pdf_load_stream(doc, pdf_to_num(stream), pdf_to_gen(stream)); snprintf(namebuf, sizeof(namebuf), "%s-%04d.%s", fontname, num, ext); printf("extracting font %s\n", namebuf); f = fopen(namebuf, "wb"); if (!f) fz_throw(ctx, FZ_ERROR_GENERIC, "cannot create font file"); len = fz_buffer_storage(ctx, buf, &data); n = fwrite(data, 1, len, f); if (n < len) fz_throw(ctx, FZ_ERROR_GENERIC, "cannot write font file"); if (fclose(f) < 0) fz_throw(ctx, FZ_ERROR_GENERIC, "cannot close font file"); fz_drop_buffer(ctx, buf); }
static fz_image * pdf_load_jpx(pdf_document *doc, pdf_obj *dict, int forcemask) { fz_buffer *buf = NULL; fz_colorspace *colorspace = NULL; fz_pixmap *img = NULL; pdf_obj *obj; fz_context *ctx = doc->ctx; int indexed = 0; fz_image *mask = NULL; fz_var(img); fz_var(buf); fz_var(colorspace); fz_var(mask); buf = pdf_load_stream(doc, pdf_to_num(dict), pdf_to_gen(dict)); /* FIXME: We can't handle decode arrays for indexed images currently */ fz_try(ctx) { obj = pdf_dict_gets(dict, "ColorSpace"); if (obj) { colorspace = pdf_load_colorspace(doc, obj); indexed = fz_colorspace_is_indexed(colorspace); } img = fz_load_jpx(ctx, buf->data, buf->len, colorspace, indexed); obj = pdf_dict_getsa(dict, "SMask", "Mask"); if (pdf_is_dict(obj)) { if (forcemask) fz_warn(ctx, "Ignoring recursive JPX soft mask"); else mask = pdf_load_image_imp(doc, NULL, obj, NULL, 1); } obj = pdf_dict_getsa(dict, "Decode", "D"); if (obj && !indexed) { float decode[FZ_MAX_COLORS * 2]; int i; for (i = 0; i < img->n * 2; i++) decode[i] = pdf_to_real(pdf_array_get(obj, i)); fz_decode_tile(img, decode); } } fz_always(ctx) { fz_drop_colorspace(ctx, colorspace); fz_drop_buffer(ctx, buf); } fz_catch(ctx) { fz_drop_pixmap(ctx, img); fz_rethrow(ctx); } return fz_new_image_from_pixmap(ctx, img, mask); }
static void pdf_load_jpx(pdf_document *xref, pdf_obj *dict, pdf_image *image, int forcemask) { fz_buffer *buf = NULL; fz_colorspace *colorspace = NULL; fz_pixmap *img = NULL; pdf_obj *obj; fz_context *ctx = xref->ctx; int indexed = 0; fz_var(img); fz_var(buf); fz_var(colorspace); buf = pdf_load_stream(xref, pdf_to_num(dict), pdf_to_gen(dict)); /* FIXME: We can't handle decode arrays for indexed images currently */ fz_try(ctx) { obj = pdf_dict_gets(dict, "ColorSpace"); if (obj) { colorspace = pdf_load_colorspace(xref, obj); indexed = !strcmp(colorspace->name, "Indexed"); } img = fz_load_jpx(ctx, buf->data, buf->len, colorspace, indexed); if (img && colorspace == NULL) colorspace = fz_keep_colorspace(ctx, img->colorspace); fz_drop_buffer(ctx, buf); buf = NULL; obj = pdf_dict_getsa(dict, "SMask", "Mask"); if (pdf_is_dict(obj)) { if (forcemask) fz_warn(ctx, "Ignoring recursive JPX soft mask"); else image->base.mask = (fz_image *)pdf_load_image_imp(xref, NULL, obj, NULL, 1); } obj = pdf_dict_getsa(dict, "Decode", "D"); if (obj && !indexed) { float decode[FZ_MAX_COLORS * 2]; int i; for (i = 0; i < img->n * 2; i++) decode[i] = pdf_to_real(pdf_array_get(obj, i)); fz_decode_tile(img, decode); } } fz_catch(ctx) { if (colorspace) fz_drop_colorspace(ctx, colorspace); fz_drop_buffer(ctx, buf); fz_drop_pixmap(ctx, img); fz_rethrow(ctx); } FZ_INIT_STORABLE(&image->base, 1, pdf_free_image); image->base.get_pixmap = pdf_image_get_pixmap; image->base.w = img->w; image->base.h = img->h; image->base.bpc = 8; image->base.colorspace = colorspace; image->buffer = NULL; image->tile = img; image->n = img->n; image->interpolate = 0; image->imagemask = 0; image->usecolorkey = 0; }
pdf_xobject * pdf_load_xobject(pdf_document *xref, pdf_obj *dict) { pdf_xobject *form; pdf_obj *obj; fz_context *ctx = xref->ctx; if ((form = pdf_find_item(ctx, pdf_free_xobject_imp, dict))) { return form; } form = fz_malloc_struct(ctx, pdf_xobject); FZ_INIT_STORABLE(form, 1, pdf_free_xobject_imp); form->resources = NULL; form->contents = NULL; form->colorspace = NULL; form->me = NULL; /* Store item immediately, to avoid possible recursion if objects refer back to this one */ pdf_store_item(ctx, dict, form, pdf_xobject_size(form)); obj = pdf_dict_gets(dict, "BBox"); form->bbox = pdf_to_rect(ctx, obj); obj = pdf_dict_gets(dict, "Matrix"); if (obj) form->matrix = pdf_to_matrix(ctx, obj); else form->matrix = fz_identity; form->isolated = 0; form->knockout = 0; form->transparency = 0; obj = pdf_dict_gets(dict, "Group"); if (obj) { pdf_obj *attrs = obj; form->isolated = pdf_to_bool(pdf_dict_gets(attrs, "I")); form->knockout = pdf_to_bool(pdf_dict_gets(attrs, "K")); obj = pdf_dict_gets(attrs, "S"); if (pdf_is_name(obj) && !strcmp(pdf_to_name(obj), "Transparency")) form->transparency = 1; obj = pdf_dict_gets(attrs, "CS"); if (obj) { form->colorspace = pdf_load_colorspace(xref, obj); if (!form->colorspace) fz_throw(ctx, "cannot load xobject colorspace"); } } form->resources = pdf_dict_gets(dict, "Resources"); if (form->resources) pdf_keep_obj(form->resources); fz_try(ctx) { form->contents = pdf_load_stream(xref, pdf_to_num(dict), pdf_to_gen(dict)); } fz_catch(ctx) { pdf_remove_item(ctx, pdf_free_xobject_imp, dict); pdf_drop_xobject(ctx, form); fz_throw(ctx, "cannot load xobject content stream (%d %d R)", pdf_to_num(dict), pdf_to_gen(dict)); } form->me = pdf_keep_obj(dict); return form; }
static pdf_font_desc * load_cid_font(pdf_document *xref, pdf_obj *dict, pdf_obj *encoding, pdf_obj *to_unicode) { pdf_obj *widths; pdf_obj *descriptor; pdf_font_desc *fontdesc = NULL; FT_Face face; int kind; char collection[256]; char *basefont; int i, k, fterr; pdf_obj *obj; int dw; fz_context *ctx = xref->ctx; fz_var(fontdesc); fz_try(ctx) { /* Get font name and CID collection */ basefont = pdf_to_name(pdf_dict_gets(dict, "BaseFont")); { pdf_obj *cidinfo; char tmpstr[64]; int tmplen; cidinfo = pdf_dict_gets(dict, "CIDSystemInfo"); if (!cidinfo) fz_throw(ctx, "cid font is missing info"); obj = pdf_dict_gets(cidinfo, "Registry"); tmplen = fz_mini(sizeof tmpstr - 1, pdf_to_str_len(obj)); memcpy(tmpstr, pdf_to_str_buf(obj), tmplen); tmpstr[tmplen] = '\0'; fz_strlcpy(collection, tmpstr, sizeof collection); fz_strlcat(collection, "-", sizeof collection); obj = pdf_dict_gets(cidinfo, "Ordering"); tmplen = fz_mini(sizeof tmpstr - 1, pdf_to_str_len(obj)); memcpy(tmpstr, pdf_to_str_buf(obj), tmplen); tmpstr[tmplen] = '\0'; fz_strlcat(collection, tmpstr, sizeof collection); } /* Load font file */ fontdesc = pdf_new_font_desc(ctx); descriptor = pdf_dict_gets(dict, "FontDescriptor"); if (!descriptor) fz_throw(ctx, "syntaxerror: missing font descriptor"); pdf_load_font_descriptor(fontdesc, xref, descriptor, collection, basefont); face = fontdesc->font->ft_face; kind = ft_kind(face); /* Encoding */ if (pdf_is_name(encoding)) { if (!strcmp(pdf_to_name(encoding), "Identity-H")) fontdesc->encoding = pdf_new_identity_cmap(ctx, 0, 2); else if (!strcmp(pdf_to_name(encoding), "Identity-V")) fontdesc->encoding = pdf_new_identity_cmap(ctx, 1, 2); else fontdesc->encoding = pdf_load_system_cmap(ctx, pdf_to_name(encoding)); } else if (pdf_is_indirect(encoding)) { fontdesc->encoding = pdf_load_embedded_cmap(xref, encoding); } else { fz_throw(ctx, "syntaxerror: font missing encoding"); } fontdesc->size += pdf_cmap_size(ctx, fontdesc->encoding); pdf_set_font_wmode(ctx, fontdesc, pdf_cmap_wmode(ctx, fontdesc->encoding)); if (kind == TRUETYPE) { pdf_obj *cidtogidmap; cidtogidmap = pdf_dict_gets(dict, "CIDToGIDMap"); if (pdf_is_indirect(cidtogidmap)) { fz_buffer *buf; buf = pdf_load_stream(xref, pdf_to_num(cidtogidmap), pdf_to_gen(cidtogidmap)); fontdesc->cid_to_gid_len = (buf->len) / 2; fontdesc->cid_to_gid = fz_malloc_array(ctx, fontdesc->cid_to_gid_len, sizeof(unsigned short)); fontdesc->size += fontdesc->cid_to_gid_len * sizeof(unsigned short); for (i = 0; i < fontdesc->cid_to_gid_len; i++) fontdesc->cid_to_gid[i] = (buf->data[i * 2] << 8) + buf->data[i * 2 + 1]; fz_drop_buffer(ctx, buf); } /* if truetype font is external, cidtogidmap should not be identity */ /* so we map from cid to unicode and then map that through the (3 1) */ /* unicode cmap to get a glyph id */ else if (fontdesc->font->ft_substitute) { fterr = FT_Select_Charmap(face, ft_encoding_unicode); if (fterr) { fz_throw(ctx, "fonterror: no unicode cmap when emulating CID font: %s", ft_error_string(fterr)); } if (!strcmp(collection, "Adobe-CNS1")) fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-CNS1-UCS2"); else if (!strcmp(collection, "Adobe-GB1")) fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-GB1-UCS2"); else if (!strcmp(collection, "Adobe-Japan1")) fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-Japan1-UCS2"); else if (!strcmp(collection, "Adobe-Japan2")) fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-Japan2-UCS2"); else if (!strcmp(collection, "Adobe-Korea1")) fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-Korea1-UCS2"); } } pdf_load_to_unicode(xref, fontdesc, NULL, collection, to_unicode); /* Horizontal */ dw = 1000; obj = pdf_dict_gets(dict, "DW"); if (obj) dw = pdf_to_int(obj); pdf_set_default_hmtx(ctx, fontdesc, dw); widths = pdf_dict_gets(dict, "W"); if (widths) { int c0, c1, w, n, m; n = pdf_array_len(widths); for (i = 0; i < n; ) { c0 = pdf_to_int(pdf_array_get(widths, i)); obj = pdf_array_get(widths, i + 1); if (pdf_is_array(obj)) { m = pdf_array_len(obj); for (k = 0; k < m; k++) { w = pdf_to_int(pdf_array_get(obj, k)); pdf_add_hmtx(ctx, fontdesc, c0 + k, c0 + k, w); } i += 2; } else { c1 = pdf_to_int(obj); w = pdf_to_int(pdf_array_get(widths, i + 2)); pdf_add_hmtx(ctx, fontdesc, c0, c1, w); i += 3; } } } pdf_end_hmtx(ctx, fontdesc); /* Vertical */ if (pdf_cmap_wmode(ctx, fontdesc->encoding) == 1) { int dw2y = 880; int dw2w = -1000; obj = pdf_dict_gets(dict, "DW2"); if (obj) { dw2y = pdf_to_int(pdf_array_get(obj, 0)); dw2w = pdf_to_int(pdf_array_get(obj, 1)); } pdf_set_default_vmtx(ctx, fontdesc, dw2y, dw2w); widths = pdf_dict_gets(dict, "W2"); if (widths) { int c0, c1, w, x, y, n; n = pdf_array_len(widths); for (i = 0; i < n; ) { c0 = pdf_to_int(pdf_array_get(widths, i)); obj = pdf_array_get(widths, i + 1); if (pdf_is_array(obj)) { int m = pdf_array_len(obj); for (k = 0; k * 3 < m; k ++) { w = pdf_to_int(pdf_array_get(obj, k * 3 + 0)); x = pdf_to_int(pdf_array_get(obj, k * 3 + 1)); y = pdf_to_int(pdf_array_get(obj, k * 3 + 2)); pdf_add_vmtx(ctx, fontdesc, c0 + k, c0 + k, x, y, w); } i += 2; } else { c1 = pdf_to_int(obj); w = pdf_to_int(pdf_array_get(widths, i + 2)); x = pdf_to_int(pdf_array_get(widths, i + 3)); y = pdf_to_int(pdf_array_get(widths, i + 4)); pdf_add_vmtx(ctx, fontdesc, c0, c1, x, y, w); i += 5; } } } pdf_end_vmtx(ctx, fontdesc); } } fz_catch(ctx) { pdf_drop_font(ctx, fontdesc); fz_throw(ctx, "cannot load cid font (%d %d R)", pdf_to_num(dict), pdf_to_gen(dict)); } return fontdesc; }
static void savefont(fz_obj *dict, int num) { fz_error error; char name[1024]; char *subtype; fz_buffer *buf; fz_obj *stream = NULL; fz_obj *obj; char *ext = ""; FILE *f; char *fontname = "font"; int n; obj = fz_dict_gets(ctx, dict, "FontName"); if (obj) fontname = fz_to_name(ctx, obj); obj = fz_dict_gets(ctx, dict, "FontFile"); if (obj) { stream = obj; ext = "pfa"; } obj = fz_dict_gets(ctx, dict, "FontFile2"); if (obj) { stream = obj; ext = "ttf"; } obj = fz_dict_gets(ctx, dict, "FontFile3"); if (obj) { stream = obj; obj = fz_dict_gets(ctx, obj, "Subtype"); if (obj && !fz_is_name(ctx, obj)) die(fz_error_make(ctx, "Invalid font descriptor subtype")); subtype = fz_to_name(ctx, obj); if (!strcmp(subtype, "Type1C")) ext = "cff"; else if (!strcmp(subtype, "CIDFontType0C")) ext = "cid"; else die(fz_error_make(ctx, "Unhandled font type '%s'", subtype)); } if (!stream) { fz_warn(ctx, "Unhandled font type"); return; } buf = fz_new_buffer(ctx, 0); error = pdf_load_stream(&buf, xref, fz_to_num(stream), fz_to_gen(stream)); if (error) die(error); sprintf(name, "%s-%04d.%s", fontname, num, ext); printf("extracting font %s\n", name); f = fopen(name, "wb"); if (f == NULL) die(fz_error_make(ctx, "Error creating font file")); n = fwrite(buf->data, 1, buf->len, f); if (n < buf->len) die(fz_error_make(ctx, "Error writing font file")); if (fclose(f) < 0) die(fz_error_make(ctx, "Error closing font file")); fz_drop_buffer(ctx, buf); }