static void pdf_load_mesh_params(pdf_xref *xref, fz_obj *dict, struct mesh_params *p) { fz_obj *obj; int i, n; p->x0 = p->y0 = 0; p->x1 = p->y1 = 1; for (i = 0; i < FZ_MAX_COLORS; i++) { p->c0[i] = 0; p->c1[i] = 1; } p->vprow = fz_to_int(fz_dict_gets(dict, "VerticesPerRow")); p->bpflag = fz_to_int(fz_dict_gets(dict, "BitsPerFlag")); p->bpcoord = fz_to_int(fz_dict_gets(dict, "BitsPerCoordinate")); p->bpcomp = fz_to_int(fz_dict_gets(dict, "BitsPerComponent")); obj = fz_dict_gets(dict, "Decode"); if (fz_array_len(obj) >= 6) { n = (fz_array_len(obj) - 4) / 2; p->x0 = fz_to_real(fz_array_get(obj, 0)); p->x1 = fz_to_real(fz_array_get(obj, 1)); p->y0 = fz_to_real(fz_array_get(obj, 2)); p->y1 = fz_to_real(fz_array_get(obj, 3)); for (i = 0; i < n; i++) { p->c0[i] = fz_to_real(fz_array_get(obj, 4 + i * 2)); p->c1[i] = fz_to_real(fz_array_get(obj, 5 + i * 2)); } } if (p->vprow < 2) p->vprow = 2; if (p->bpflag != 2 && p->bpflag != 4 && p->bpflag != 8) p->bpflag = 8; if (p->bpcoord != 1 && p->bpcoord != 2 && p->bpcoord != 4 && p->bpcoord != 8 && p->bpcoord != 12 && p->bpcoord != 16 && p->bpcoord != 24 && p->bpcoord != 32) p->bpcoord = 8; if (p->bpcomp != 1 && p->bpcomp != 2 && p->bpcomp != 4 && p->bpcomp != 8 && p->bpcomp != 12 && p->bpcomp != 16) p->bpcomp = 8; }
/* * Load raw (compressed but decrypted) contents of a stream into buf. */ fz_error pdf_load_raw_stream(fz_buffer **bufp, pdf_xref *xref, int num, int gen) { fz_error error; fz_stream *stm; fz_obj *dict; int len; error = pdf_load_object(&dict, xref, num, gen); if (error) return fz_rethrow(error, "cannot load stream dictionary (%d %d R)", num, gen); len = fz_to_int(fz_dict_gets(dict, "Length")); fz_drop_obj(dict); error = pdf_open_raw_stream(&stm, xref, num, gen); if (error) return fz_rethrow(error, "cannot open raw stream (%d %d R)", num, gen); error = fz_read_all(bufp, stm, len); if (error) { fz_close(stm); return fz_rethrow(error, "cannot read raw stream (%d %d R)", num, gen); } fz_close(stm); return fz_okay; }
/* SumatraPDF: synthesize appearance streams for a few more annotations */ static pdf_annot * pdf_create_annot(fz_context *ctx, fz_rect rect, fz_obj *base_obj, fz_buffer *content, fz_obj *resources, int transparency) { pdf_annot *annot; pdf_xobject *form; int rotate = fz_to_int(ctx, fz_dict_gets(ctx, fz_dict_gets(ctx, base_obj, "MK"), "R")); form = fz_malloc(ctx, sizeof(pdf_xobject)); memset(form, 0, sizeof(pdf_xobject)); form->refs = 1; form->matrix = fz_rotate(rotate); form->bbox.x1 = (rotate % 180 == 0) ? rect.x1 - rect.x0 : rect.y1 - rect.y0; form->bbox.y1 = (rotate % 180 == 0) ? rect.y1 - rect.y0 : rect.x1 - rect.x0; form->transparency = transparency; form->isolated = !transparency; form->contents = content; form->resources = resources; annot = fz_malloc(ctx, sizeof(pdf_annot)); annot->obj = base_obj; annot->rect = rect; annot->ap = form; annot->next = NULL; pdf_transform_annot(annot); return annot; }
/* * Load uncompressed contents of a stream into buf. */ fz_error pdf_load_stream(fz_buffer **bufp, pdf_xref *xref, int num, int gen) { fz_error error; fz_stream *stm; fz_obj *dict, *obj; int i, len; error = pdf_open_stream(&stm, xref, num, gen); if (error) return fz_rethrow(error, "cannot open stream (%d %d R)", num, gen); error = pdf_load_object(&dict, xref, num, gen); if (error) return fz_rethrow(error, "cannot load stream dictionary (%d %d R)", num, gen); len = fz_to_int(fz_dict_gets(dict, "Length")); obj = fz_dict_gets(dict, "Filter"); len = pdf_guess_filter_length(len, fz_to_name(obj)); for (i = 0; i < fz_array_len(obj); i++) len = pdf_guess_filter_length(len, fz_to_name(fz_array_get(obj, i))); fz_drop_obj(dict); error = fz_read_all(bufp, stm, len); if (error) { fz_close(stm); return fz_rethrow(error, "cannot read raw stream (%d %d R)", num, gen); } fz_close(stm); return fz_okay; }
static pdf_annot * pdf_create_freetext_annot(pdf_xref *xref, fz_obj *obj) { fz_context *ctx = xref->ctx; fz_buffer *content = fz_new_buffer(ctx, 256); fz_buffer *base_ap = fz_new_buffer(ctx, 256); fz_obj *ap = fz_dict_gets(ctx, obj, "DA"); fz_obj *value = fz_dict_gets(ctx, obj, "Contents"); fz_rect rect = pdf_to_rect(ctx, fz_dict_gets(ctx, obj, "Rect")); int align = fz_to_int(ctx, fz_dict_gets(ctx, obj, "Q")); fz_obj *res = pdf_dict_from_string(xref, ANNOT_FREETEXT_AP_RESOURCES); unsigned short *ucs2, *rest; float x; char *font_name = NULL; float font_size = pdf_extract_font_size(xref, fz_to_str_buf(ctx, ap), &font_name); if (!font_size) font_size = 10; /* TODO: what resource dictionary does this font name refer to? */ if (font_name) { fz_obj *font = fz_dict_gets(ctx, res, "Font"); fz_dict_puts(ctx, font, font_name, fz_dict_gets(ctx, font, "Default")); fz_free(ctx, font_name); } fz_buffer_printf(ctx, content, "q 1 1 %.4f %.4f re W n BT %s ", rect.x1 - rect.x0 - 2.0f, rect.y1 - rect.y0 - 2.0f, fz_to_str_buf(ctx, ap)); fz_buffer_printf(ctx, base_ap, "q BT %s ", fz_to_str_buf(ctx, ap)); fz_buffer_printf(ctx, content, "/Default %.4f Tf ", font_size); fz_buffer_printf(ctx, base_ap, "/Default %.4f Tf ", font_size); fz_buffer_printf(ctx, content, "1 0 0 1 2 %.4f Tm ", rect.y1 - rect.y0 - 2); /* Adobe Reader seems to consider "[1 0 0] r" and "1 0 0 rg" to mean the same(?) */ if (strchr(base_ap->data, '[')) { float r, g, b; if (sscanf(strchr(base_ap->data, '['), "[%f %f %f] r", &r, &g, &b) == 3) fz_buffer_printf(ctx, content, "%.4f %.4f %.4f rg ", r, g, b); } ucs2 = pdf_to_ucs2(ctx, value); for (rest = ucs2; *rest; rest++) if (*rest > 0xFF) *rest = '?'; x = 0; rest = ucs2; while (*rest) rest = pdf_append_line(xref, res, content, base_ap, rest, font_size, align, rect.x1 - rect.x0 - 4.0f, 1, &x); fz_free(ctx, ucs2); fz_buffer_printf(ctx, content, "ET Q"); fz_drop_buffer(ctx, base_ap); return pdf_create_annot(ctx, rect, fz_keep_obj(obj), content, res, 0); }
static fz_error pdf_read_xref_sections(pdf_xref *xref, int ofs, char *buf, int cap) { fz_error error; fz_obj *trailer; fz_obj *prev; fz_obj *xrefstm; error = pdf_read_xref(&trailer, xref, ofs, buf, cap); if (error) return fz_rethrow(error, "cannot read xref section"); /* FIXME: do we overwrite free entries properly? */ xrefstm = fz_dict_gets(trailer, "XRefStm"); if (xrefstm) { error = pdf_read_xref_sections(xref, fz_to_int(xrefstm), buf, cap); if (error) { fz_drop_obj(trailer); return fz_rethrow(error, "cannot read /XRefStm xref section"); } } prev = fz_dict_gets(trailer, "Prev"); if (prev) { error = pdf_read_xref_sections(xref, fz_to_int(prev), buf, cap); if (error) { fz_drop_obj(trailer); return fz_rethrow(error, "cannot read /Prev xref section"); } } fz_drop_obj(trailer); return fz_okay; }
fz_error pdf_load_pattern(pdf_pattern **patp, pdf_xref *xref, fz_obj *dict) { fz_error error; pdf_pattern *pat; fz_obj *obj; if ((*patp = pdf_find_item(xref->store, pdf_drop_pattern, dict))) { pdf_keep_pattern(*patp); return fz_okay; } pat = fz_malloc(sizeof(pdf_pattern)); pat->refs = 1; pat->resources = NULL; pat->contents = NULL; /* Store pattern now, to avoid possible recursion if objects refer back to this one */ pdf_store_item(xref->store, pdf_keep_pattern, pdf_drop_pattern, dict, pat); pat->ismask = fz_to_int(fz_dict_gets(dict, "PaintType")) == 2; pat->xstep = fz_to_real(fz_dict_gets(dict, "XStep")); pat->ystep = fz_to_real(fz_dict_gets(dict, "YStep")); obj = fz_dict_gets(dict, "BBox"); pat->bbox = pdf_to_rect(obj); obj = fz_dict_gets(dict, "Matrix"); if (obj) pat->matrix = pdf_to_matrix(obj); else pat->matrix = fz_identity; pat->resources = fz_dict_gets(dict, "Resources"); if (pat->resources) fz_keep_obj(pat->resources); error = pdf_load_stream(&pat->contents, xref, fz_to_num(dict), fz_to_gen(dict)); if (error) { pdf_remove_item(xref->store, pdf_drop_pattern, dict); pdf_drop_pattern(pat); return fz_rethrow(error, "cannot load pattern stream (%d %d R)", fz_to_num(dict), fz_to_gen(dict)); } *patp = pat; return fz_okay; }
static fz_error load_icc_based(fz_colorspace **csp, pdf_xref *xref, fz_obj *dict) { int n; n = fz_to_int(fz_dict_gets(dict, "N")); switch (n) { case 1: *csp = fz_device_gray; return fz_okay; case 3: *csp = fz_device_rgb; return fz_okay; case 4: *csp = fz_device_cmyk; return fz_okay; } return fz_throw("syntaxerror: ICCBased must have 1, 3 or 4 components"); }
static fz_error pdf_load_xref(pdf_xref *xref, char *buf, int bufsize) { fz_error error; fz_obj *size; int i; error = pdf_load_version(xref); if (error) return fz_rethrow(error, "cannot read version marker"); error = pdf_read_start_xref(xref); if (error) return fz_rethrow(error, "cannot read startxref"); error = pdf_read_trailer(xref, buf, bufsize); if (error) return fz_rethrow(error, "cannot read trailer"); size = fz_dict_gets(xref->trailer, "Size"); if (!size) return fz_throw("trailer missing Size entry"); pdf_resize_xref(xref, fz_to_int(size)); error = pdf_read_xref_sections(xref, xref->startxref, buf, bufsize); if (error) return fz_rethrow(error, "cannot read xref"); /* broken pdfs where first object is not free */ if (xref->table[0].type != 'f') return fz_throw("first object in xref is not free"); /* broken pdfs where object offsets are out of range */ for (i = 0; i < xref->len; i++) { if (xref->table[i].type == 'n') if (xref->table[i].ofs <= 0 || xref->table[i].ofs >= xref->file_size) return fz_throw("object offset out of range: %d (%d 0 R)", xref->table[i].ofs, i); if (xref->table[i].type == 'o') if (xref->table[i].ofs <= 0 || xref->table[i].ofs >= xref->len || xref->table[xref->table[i].ofs].type != 'n') return fz_throw("invalid reference to an objstm that does not exist: %d (%d 0 R)", xref->table[i].ofs, i); } return fz_okay; }
static fz_colorspace * load_icc_based(pdf_document *xref, fz_obj *dict) { int n; n = fz_to_int(fz_dict_gets(dict, "N")); switch (n) { case 1: return fz_device_gray; case 3: return fz_device_rgb; case 4: return fz_device_cmyk; } fz_throw(xref->ctx, "syntaxerror: ICCBased must have 1, 3 or 4 components"); return NULL; /* Stupid MSVC */ }
static int _pdf_doc_find_page_no( struct _pdf_doc *self, fz_obj *dest) { if (fz_is_dict(dest)) { /* The destination is linked from a Go-To action's D array. */ fz_obj * D = fz_dict_gets(dest, "D"); if (D && fz_is_array(D)) dest = D; } if (fz_is_array(dest)) dest = fz_array_get(dest, 0); if (fz_is_int(dest)) return fz_to_int(dest); return pdf_find_page_number(self->xref, dest); }
/* * Build a filter for reading raw stream data. * This is a null filter to constrain reading to the * stream length, followed by a decryption filter. */ static fz_stream * pdf_open_raw_filter(fz_stream *chain, pdf_xref *xref, fz_obj *stmobj, int num, int gen) { int hascrypt; int len; /* don't close chain when we close this filter */ fz_keep_stream(chain); len = fz_to_int(fz_dict_gets(stmobj, "Length")); chain = fz_open_null(chain, len); hascrypt = pdf_stream_has_crypt(stmobj); if (xref->crypt && !hascrypt) chain = pdf_open_crypt(chain, xref->crypt, num, gen); return chain; }
fz_stream * fz_open_dctd(fz_stream *chain, fz_obj *params) { fz_dctd *state; fz_obj *obj; state = fz_malloc(chain->ctx, sizeof(fz_dctd)); memset(state, 0, sizeof(fz_dctd)); state->chain = chain; state->color_transform = -1; /* unset */ state->init = 0; obj = fz_dict_gets(chain->ctx, params, "ColorTransform"); if (obj) state->color_transform = fz_to_int(chain->ctx, obj); return fz_new_stream(chain->ctx, state, read_dctd, close_dctd); }
/* * Load CMap stream in PDF file */ fz_error pdf_load_embedded_cmap(pdf_cmap **cmapp, pdf_xref *xref, fz_obj *stmobj) { fz_error error = fz_okay; fz_stream *file = NULL; pdf_cmap *cmap = NULL; pdf_cmap *usecmap; fz_obj *wmode; fz_obj *obj; if ((*cmapp = pdf_find_item(xref->store, pdf_drop_cmap, stmobj))) { pdf_keep_cmap(*cmapp); return fz_okay; } error = pdf_open_stream(&file, xref, fz_to_num(stmobj), fz_to_gen(stmobj)); if (error) { error = fz_rethrow(error, "cannot open cmap stream (%d %d R)", fz_to_num(stmobj), fz_to_gen(stmobj)); goto cleanup; } error = pdf_parse_cmap(&cmap, file); if (error) { error = fz_rethrow(error, "cannot parse cmap stream (%d %d R)", fz_to_num(stmobj), fz_to_gen(stmobj)); goto cleanup; } fz_close(file); wmode = fz_dict_gets(stmobj, "WMode"); if (fz_is_int(wmode)) pdf_set_wmode(cmap, fz_to_int(wmode)); obj = fz_dict_gets(stmobj, "UseCMap"); if (fz_is_name(obj)) { error = pdf_load_system_cmap(&usecmap, fz_to_name(obj)); if (error) { error = fz_rethrow(error, "cannot load system usecmap '%s'", fz_to_name(obj)); goto cleanup; } pdf_set_usecmap(cmap, usecmap); pdf_drop_cmap(usecmap); } else if (fz_is_indirect(obj)) { error = pdf_load_embedded_cmap(&usecmap, xref, obj); if (error) { error = fz_rethrow(error, "cannot load embedded usecmap (%d %d R)", fz_to_num(obj), fz_to_gen(obj)); goto cleanup; } pdf_set_usecmap(cmap, usecmap); pdf_drop_cmap(usecmap); } pdf_store_item(xref->store, pdf_keep_cmap, pdf_drop_cmap, stmobj, cmap); *cmapp = cmap; return fz_okay; cleanup: if (file) fz_close(file); if (cmap) pdf_drop_cmap(cmap); return error; /* already rethrown */ }
/* * Create a filter given a name and param dictionary. */ static fz_stream * build_filter(fz_stream *chain, pdf_xref * xref, fz_obj * f, fz_obj * p, int num, int gen) { fz_error error; char *s; s = fz_to_name(f); if (!strcmp(s, "ASCIIHexDecode") || !strcmp(s, "AHx")) return fz_open_ahxd(chain); else if (!strcmp(s, "ASCII85Decode") || !strcmp(s, "A85")) return fz_open_a85d(chain); else if (!strcmp(s, "CCITTFaxDecode") || !strcmp(s, "CCF")) return fz_open_faxd(chain, p); else if (!strcmp(s, "DCTDecode") || !strcmp(s, "DCT")) return fz_open_dctd(chain, p); else if (!strcmp(s, "RunLengthDecode") || !strcmp(s, "RL")) return fz_open_rld(chain); else if (!strcmp(s, "FlateDecode") || !strcmp(s, "Fl")) { fz_obj *obj = fz_dict_gets(p, "Predictor"); if (fz_to_int(obj) > 1) return fz_open_predict(fz_open_flated(chain), p); return fz_open_flated(chain); } else if (!strcmp(s, "LZWDecode") || !strcmp(s, "LZW")) { fz_obj *obj = fz_dict_gets(p, "Predictor"); if (fz_to_int(obj) > 1) return fz_open_predict(fz_open_lzwd(chain, p), p); return fz_open_lzwd(chain, p); } else if (!strcmp(s, "JBIG2Decode")) { fz_obj *obj = fz_dict_gets(p, "JBIG2Globals"); if (obj) { fz_buffer *globals; error = pdf_load_stream(&globals, xref, fz_to_num(obj), fz_to_gen(obj)); if (error) fz_catch(error, "cannot load jbig2 global segments"); chain = fz_open_jbig2d(chain, globals); fz_drop_buffer(globals); return chain; } return fz_open_jbig2d(chain, NULL); } else if (!strcmp(s, "JPXDecode")) return chain; /* JPX decoding is special cased in the image loading code */ else if (!strcmp(s, "Crypt")) { fz_obj *name; if (!xref->crypt) { fz_warn("crypt filter in unencrypted document"); return chain; } name = fz_dict_gets(p, "Name"); if (fz_is_name(name)) return pdf_open_crypt_with_filter(chain, xref->crypt, fz_to_name(name), num, gen); return chain; } fz_warn("unknown filter name (%s)", s); return chain; }
static pdf_annot * pdf_update_tx_widget_annot(pdf_xref *xref, fz_obj *obj) { fz_obj *ap, *res, *value; fz_rect rect; fz_buffer *content, *base_ap; int flags, align, rotate, is_multiline; float font_size, x, y; char *font_name; unsigned short *ucs2, *rest; fz_context *ctx = xref->ctx; if (strcmp(fz_to_name(ctx, fz_dict_gets(ctx, obj, "Subtype")), "Widget") != 0) return NULL; if (!fz_to_bool(ctx, pdf_dict_get_inheritable(xref, NULL, "NeedAppearances")) && pdf_get_ap_stream(xref, obj)) return NULL; value = pdf_dict_get_inheritable(xref, obj, "FT"); if (strcmp(fz_to_name(ctx, value), "Tx") != 0) return NULL; ap = pdf_dict_get_inheritable(xref, obj, "DA"); value = pdf_dict_get_inheritable(xref, obj, "V"); if (!ap || !value) return NULL; res = pdf_dict_get_inheritable(xref, obj, "DR"); rect = pdf_to_rect(ctx, fz_dict_gets(ctx, obj, "Rect")); rotate = fz_to_int(ctx, fz_dict_gets(ctx, fz_dict_gets(ctx, obj, "MK"), "R")); rect = fz_transform_rect(fz_rotate(rotate), rect); flags = fz_to_int(ctx, fz_dict_gets(ctx, obj, "Ff")); is_multiline = (flags & (1 << 12)) != 0; if ((flags & (1 << 25) /* richtext */)) fz_warn(ctx, "missing support for richtext fields"); align = fz_to_int(ctx, fz_dict_gets(ctx, obj, "Q")); font_size = pdf_extract_font_size(xref, fz_to_str_buf(ctx, ap), &font_name); if (!font_size || !font_name) font_size = is_multiline ? 10 /* FIXME */ : floor(rect.y1 - rect.y0 - 2); content = fz_new_buffer(ctx, 256); base_ap = fz_new_buffer(ctx, 256); pdf_prepend_ap_background(content, xref, obj); fz_buffer_printf(ctx, content, "/Tx BMC q 1 1 %.4f %.4f re W n BT %s ", rect.x1 - rect.x0 - 2.0f, rect.y1 - rect.y0 - 2.0f, fz_to_str_buf(ctx, ap)); fz_buffer_printf(ctx, base_ap, "/Tx BMC q BT %s ", fz_to_str_buf(ctx, ap)); if (font_name) { fz_buffer_printf(ctx, content, "/%s %.4f Tf ", font_name, font_size); fz_buffer_printf(ctx, base_ap, "/%s %.4f Tf ", font_name, font_size); fz_free(ctx, font_name); } y = 0.5f * (rect.y1 - rect.y0) + 0.6f * font_size; if (is_multiline) y = rect.y1 - rect.y0 - 2; fz_buffer_printf(ctx, content, "1 0 0 1 2 %.4f Tm ", y); ucs2 = pdf_to_ucs2(ctx, value); for (rest = ucs2; *rest; rest++) if (*rest > 0xFF) *rest = '?'; if ((flags & (1 << 13) /* password */)) for (rest = ucs2; *rest; rest++) *rest = '*'; x = 0; rest = ucs2; if ((flags & (1 << 24) /* comb */)) { pdf_append_combed_line(xref, res, content, base_ap, ucs2, font_size, rect.x1 - rect.x0, fz_to_int(ctx, pdf_dict_get_inheritable(xref, obj, "MaxLen"))); rest = L""; } while (*rest) rest = pdf_append_line(xref, res, content, base_ap, rest, font_size, align, rect.x1 - rect.x0 - 4.0f, is_multiline, &x); fz_free(ctx, ucs2); fz_buffer_printf(ctx, content, "ET Q EMC"); fz_drop_buffer(ctx, base_ap); rect = fz_transform_rect(fz_rotate(-rotate), rect); return pdf_create_annot(ctx, rect, fz_keep_obj(obj), content, res ? fz_keep_obj(res) : NULL, 0); }
static fz_error load_indexed(fz_colorspace **csp, pdf_xref *xref, fz_obj *array) { fz_error error; fz_colorspace *cs; struct indexed *idx; fz_obj *baseobj = fz_array_get(array, 1); fz_obj *highobj = fz_array_get(array, 2); fz_obj *lookup = fz_array_get(array, 3); fz_colorspace *base; int i, n; error = pdf_load_colorspace(&base, xref, baseobj); if (error) return fz_rethrow(error, "cannot load base colorspace (%d %d R)", fz_to_num(baseobj), fz_to_gen(baseobj)); idx = fz_malloc(sizeof(struct indexed)); idx->base = base; idx->high = fz_to_int(highobj); idx->high = CLAMP(idx->high, 0, 255); n = base->n * (idx->high + 1); idx->lookup = fz_malloc(n); memset(idx->lookup, 0, n); cs = fz_new_colorspace("Indexed", 1); cs->to_rgb = indexed_to_rgb; cs->free_data = free_indexed; cs->data = idx; if (fz_is_string(lookup) && fz_to_str_len(lookup) == n) { unsigned char *buf = (unsigned char *) fz_to_str_buf(lookup); for (i = 0; i < n; i++) idx->lookup[i] = buf[i]; } else if (fz_is_indirect(lookup)) { fz_stream *file; error = pdf_open_stream(&file, xref, fz_to_num(lookup), fz_to_gen(lookup)); if (error) { fz_drop_colorspace(cs); return fz_rethrow(error, "cannot open colorspace lookup table (%d 0 R)", fz_to_num(lookup)); } i = fz_read(file, idx->lookup, n); if (i < 0) { fz_drop_colorspace(cs); return fz_throw("cannot read colorspace lookup table (%d 0 R)", fz_to_num(lookup)); } fz_close(file); } else { fz_drop_colorspace(cs); return fz_throw("cannot parse colorspace lookup table"); } *csp = cs; return fz_okay; }
static fz_error pdf_read_new_xref(fz_obj **trailerp, pdf_xref *xref, char *buf, int cap) { fz_error error; fz_stream *stm; fz_obj *trailer; fz_obj *index; fz_obj *obj; int num, gen, stm_ofs; int size, w0, w1, w2; int t; error = pdf_parse_ind_obj(&trailer, xref, xref->file, buf, cap, &num, &gen, &stm_ofs); if (error) return fz_rethrow(error, "cannot parse compressed xref stream object"); obj = fz_dict_gets(trailer, "Size"); if (!obj) { fz_drop_obj(trailer); return fz_throw("xref stream missing Size entry (%d %d R)", num, gen); } size = fz_to_int(obj); if (size > xref->len) { pdf_resize_xref(xref, size); } if (num < 0 || num >= xref->len) { fz_drop_obj(trailer); return fz_throw("object id (%d %d R) out of range (0..%d)", num, gen, xref->len - 1); } obj = fz_dict_gets(trailer, "W"); if (!obj) { fz_drop_obj(trailer); return fz_throw("xref stream missing W entry (%d %d R)", num, gen); } w0 = fz_to_int(fz_array_get(obj, 0)); w1 = fz_to_int(fz_array_get(obj, 1)); w2 = fz_to_int(fz_array_get(obj, 2)); index = fz_dict_gets(trailer, "Index"); error = pdf_open_stream_at(&stm, xref, num, gen, trailer, stm_ofs); if (error) { fz_drop_obj(trailer); return fz_rethrow(error, "cannot open compressed xref stream (%d %d R)", num, gen); } if (!index) { error = pdf_read_new_xref_section(xref, stm, 0, size, w0, w1, w2); if (error) { fz_close(stm); fz_drop_obj(trailer); return fz_rethrow(error, "cannot read xref stream (%d %d R)", num, gen); } } else { for (t = 0; t < fz_array_len(index); t += 2) { int i0 = fz_to_int(fz_array_get(index, t + 0)); int i1 = fz_to_int(fz_array_get(index, t + 1)); error = pdf_read_new_xref_section(xref, stm, i0, i1, w0, w1, w2); if (error) { fz_close(stm); fz_drop_obj(trailer); return fz_rethrow(error, "cannot read xref stream section (%d %d R)", num, gen); } } } fz_close(stm); *trailerp = trailer; return fz_okay; }
static fz_colorspace * load_indexed(pdf_document *xref, fz_obj *array) { struct indexed *idx = NULL; fz_context *ctx = xref->ctx; fz_obj *baseobj = fz_array_get(array, 1); fz_obj *highobj = fz_array_get(array, 2); fz_obj *lookup = fz_array_get(array, 3); fz_colorspace *base = NULL; fz_colorspace *cs = NULL; int i, n; fz_var(idx); fz_var(base); fz_var(cs); fz_try(ctx) { base = pdf_load_colorspace(xref, baseobj); /* "cannot load base colorspace (%d %d R)", fz_to_num(baseobj), fz_to_gen(baseobj) */ idx = fz_malloc_struct(ctx, struct indexed); idx->lookup = NULL; idx->base = base; idx->high = fz_to_int(highobj); idx->high = CLAMP(idx->high, 0, 255); n = base->n * (idx->high + 1); idx->lookup = fz_malloc_array(ctx, 1, n); cs = fz_new_colorspace(ctx, "Indexed", 1); cs->to_rgb = indexed_to_rgb; cs->free_data = free_indexed; cs->data = idx; cs->size += sizeof(*idx) + n + (base ? base->size : 0); if (fz_is_string(lookup) && fz_to_str_len(lookup) == n) { unsigned char *buf = (unsigned char *) fz_to_str_buf(lookup); for (i = 0; i < n; i++) idx->lookup[i] = buf[i]; } else if (fz_is_indirect(lookup)) { fz_stream *file = NULL; fz_try(ctx) { file = pdf_open_stream(xref, fz_to_num(lookup), fz_to_gen(lookup)); } fz_catch(ctx) { fz_throw(ctx, "cannot open colorspace lookup table (%d 0 R)", fz_to_num(lookup)); } i = fz_read(file, idx->lookup, n); if (i < 0) { fz_close(file); fz_throw(ctx, "cannot read colorspace lookup table (%d 0 R)", fz_to_num(lookup)); } fz_close(file); } else { fz_throw(ctx, "cannot parse colorspace lookup table"); } }
static fz_outline * pdf_load_outline_imp(pdf_xref *xref, fz_obj *dict) { pdf_link *link; fz_outline *node; fz_obj *obj; /* SumatraPDF: prevent potential stack overflow */ fz_outline *prev, *root = NULL; fz_obj *origDict = dict; fz_context *ctx = xref->ctx; if (fz_is_null(ctx, dict)) return NULL; /* SumatraPDF: prevent cyclic outlines */ do { if (fz_dict_gets(ctx, dict, ".seen")) break; obj = fz_new_null(ctx); fz_dict_puts(ctx, dict, ".seen", obj); fz_drop_obj(ctx, obj); node = fz_malloc(ctx, sizeof(fz_outline)); node->title = NULL; node->page = -1; node->down = NULL; node->next = NULL; obj = fz_dict_gets(ctx, dict, "Title"); if (obj) node->title = pdf_to_utf8(ctx, obj); /* SumatraPDF: support expansion states */ node->is_open = fz_to_int(ctx, fz_dict_gets(ctx, dict, "Count")) >= 0; /* SumatraPDF: extended outline actions */ node->data = node->free_data = NULL; if (fz_dict_gets(ctx, dict, "Dest") || fz_dict_gets(ctx, dict, "A")) { link = pdf_load_link(xref, dict); if (link) /* SumatraPDF: don't crash if it's no link after all */ { if (link->kind == PDF_LINK_GOTO) node->page = pdf_find_page_number(xref, fz_array_get(ctx, link->dest, 0)); /* SumatraPDF: extended outline actions */ node->data = link; node->free_data = pdf_free_link; } } obj = fz_dict_gets(ctx, dict, "First"); if (obj) node->down = pdf_load_outline_imp(xref, obj); /* SumatraPDF: prevent potential stack overflow */ if (!root) prev = root = node; else prev = prev->next = node; dict = fz_dict_gets(ctx, dict, "Next"); } while (dict && !fz_is_null(ctx, dict)); node = root; /* SumatraPDF: prevent cyclic outlines */ for (dict = origDict; dict && fz_dict_gets(ctx, dict, ".seen"); dict = fz_dict_gets(ctx, dict, "Next")) fz_dict_dels(ctx, dict, ".seen"); return node; }
static fz_error pdf_load_image_imp(fz_pixmap **imgp, pdf_xref *xref, fz_obj *rdb, fz_obj *dict, fz_stream *cstm, int forcemask) { fz_stream *stm; fz_pixmap *tile; fz_obj *obj, *res; fz_error error; int w, h, bpc, n; int imagemask; int interpolate; int indexed; fz_colorspace *colorspace; fz_pixmap *mask; /* explicit mask/softmask image */ int usecolorkey; int colorkey[FZ_MAX_COLORS * 2]; float decode[FZ_MAX_COLORS * 2]; int stride; unsigned char *samples; int i, len; /* special case for JPEG2000 images */ if (pdf_is_jpx_image(dict)) { tile = NULL; error = pdf_load_jpx_image(&tile, xref, dict); if (error) return fz_rethrow(error, "cannot load jpx image"); if (forcemask) { if (tile->n != 2) { fz_drop_pixmap(tile); return fz_throw("softmask must be grayscale"); } mask = fz_alpha_from_gray(tile, 1); fz_drop_pixmap(tile); *imgp = mask; return fz_okay; } *imgp = tile; return fz_okay; } w = fz_to_int(fz_dict_getsa(dict, "Width", "W")); h = fz_to_int(fz_dict_getsa(dict, "Height", "H")); bpc = fz_to_int(fz_dict_getsa(dict, "BitsPerComponent", "BPC")); imagemask = fz_to_bool(fz_dict_getsa(dict, "ImageMask", "IM")); interpolate = fz_to_bool(fz_dict_getsa(dict, "Interpolate", "I")); indexed = 0; usecolorkey = 0; colorspace = NULL; mask = NULL; if (imagemask) bpc = 1; if (w == 0) return fz_throw("image width is zero"); if (h == 0) return fz_throw("image height is zero"); if (bpc == 0) return fz_throw("image depth is zero"); if (w > (1 << 16)) return fz_throw("image is too wide"); if (h > (1 << 16)) return fz_throw("image is too high"); obj = fz_dict_getsa(dict, "ColorSpace", "CS"); if (obj && !imagemask && !forcemask) { /* colorspace resource lookup is only done for inline images */ if (fz_is_name(obj)) { res = fz_dict_get(fz_dict_gets(rdb, "ColorSpace"), obj); if (res) obj = res; } error = pdf_load_colorspace(&colorspace, xref, obj); if (error) return fz_rethrow(error, "cannot load image colorspace"); if (!strcmp(colorspace->name, "Indexed")) indexed = 1; n = colorspace->n; } else { n = 1; } obj = fz_dict_getsa(dict, "Decode", "D"); if (obj) { for (i = 0; i < n * 2; i++) decode[i] = fz_to_real(fz_array_get(obj, i)); } else { float maxval = indexed ? (1 << bpc) - 1 : 1; for (i = 0; i < n * 2; i++) decode[i] = i & 1 ? maxval : 0; } obj = fz_dict_getsa(dict, "SMask", "Mask"); if (fz_is_dict(obj)) { /* Not allowed for inline images */ if (!cstm) { error = pdf_load_image_imp(&mask, xref, rdb, obj, NULL, 1); if (error) { if (colorspace) fz_drop_colorspace(colorspace); return fz_rethrow(error, "cannot load image mask/softmask"); } } } else if (fz_is_array(obj)) { usecolorkey = 1; for (i = 0; i < n * 2; i++) colorkey[i] = fz_to_int(fz_array_get(obj, i)); } /* Allocate now, to fail early if we run out of memory */ tile = fz_new_pixmap_with_limit(colorspace, w, h); if (!tile) { if (colorspace) fz_drop_colorspace(colorspace); if (mask) fz_drop_pixmap(mask); return fz_throw("out of memory"); } if (colorspace) fz_drop_colorspace(colorspace); tile->mask = mask; tile->interpolate = interpolate; stride = (w * n * bpc + 7) / 8; if (cstm) { stm = pdf_open_inline_stream(cstm, xref, dict, stride * h); } else { error = pdf_open_stream(&stm, xref, fz_to_num(dict), fz_to_gen(dict)); if (error) { fz_drop_pixmap(tile); return fz_rethrow(error, "cannot open image data stream (%d 0 R)", fz_to_num(dict)); } } samples = fz_calloc(h, stride); len = fz_read(stm, samples, h * stride); if (len < 0) { fz_close(stm); fz_free(samples); fz_drop_pixmap(tile); return fz_rethrow(len, "cannot read image data"); } /* Make sure we read the EOF marker (for inline images only) */ if (cstm) { unsigned char tbuf[512]; int tlen = fz_read(stm, tbuf, sizeof tbuf); if (tlen < 0) fz_catch(tlen, "ignoring error at end of image"); if (tlen > 0) fz_warn("ignoring garbage at end of image"); } fz_close(stm); /* Pad truncated images */ if (len < stride * h) { fz_warn("padding truncated image (%d 0 R)", fz_to_num(dict)); memset(samples + len, 0, stride * h - len); } /* Invert 1-bit image masks */ if (imagemask) { /* 0=opaque and 1=transparent so we need to invert */ unsigned char *p = samples; len = h * stride; for (i = 0; i < len; i++) p[i] = ~p[i]; } fz_unpack_tile(tile, samples, n, bpc, stride, indexed); fz_free(samples); if (usecolorkey) pdf_mask_color_key(tile, n, colorkey); if (indexed) { fz_pixmap *conv; fz_decode_indexed_tile(tile, decode, (1 << bpc) - 1); conv = pdf_expand_indexed_pixmap(tile); fz_drop_pixmap(tile); tile = conv; } else { fz_decode_tile(tile, decode); } *imgp = tile; return fz_okay; }
static fz_error pdf_load_obj_stm(pdf_xref *xref, int num, int gen, char *buf, int cap) { fz_error error; fz_stream *stm; fz_obj *objstm; int *numbuf; int *ofsbuf; fz_obj *obj; int first; int count; int i, n; int tok; error = pdf_load_object(&objstm, xref, num, gen); if (error) return fz_rethrow(error, "cannot load object stream object (%d %d R)", num, gen); count = fz_to_int(fz_dict_gets(objstm, "N")); first = fz_to_int(fz_dict_gets(objstm, "First")); numbuf = fz_calloc(count, sizeof(int)); ofsbuf = fz_calloc(count, sizeof(int)); error = pdf_open_stream(&stm, xref, num, gen); if (error) { error = fz_rethrow(error, "cannot open object stream (%d %d R)", num, gen); goto cleanupbuf; } for (i = 0; i < count; i++) { error = pdf_lex(&tok, stm, buf, cap, &n); if (error || tok != PDF_TOK_INT) { error = fz_rethrow(error, "corrupt object stream (%d %d R)", num, gen); goto cleanupstm; } numbuf[i] = atoi(buf); error = pdf_lex(&tok, stm, buf, cap, &n); if (error || tok != PDF_TOK_INT) { error = fz_rethrow(error, "corrupt object stream (%d %d R)", num, gen); goto cleanupstm; } ofsbuf[i] = atoi(buf); } fz_seek(stm, first, 0); for (i = 0; i < count; i++) { fz_seek(stm, first + ofsbuf[i], 0); error = pdf_parse_stm_obj(&obj, xref, stm, buf, cap); if (error) { error = fz_rethrow(error, "cannot parse object %d in stream (%d %d R)", i, num, gen); goto cleanupstm; } if (numbuf[i] < 1 || numbuf[i] >= xref->len) { fz_drop_obj(obj); error = fz_throw("object id (%d 0 R) out of range (0..%d)", numbuf[i], xref->len - 1); goto cleanupstm; } if (xref->table[numbuf[i]].type == 'o' && xref->table[numbuf[i]].ofs == num) { if (xref->table[numbuf[i]].obj) fz_drop_obj(xref->table[numbuf[i]].obj); xref->table[numbuf[i]].obj = obj; } else { fz_drop_obj(obj); } } fz_close(stm); fz_free(ofsbuf); fz_free(numbuf); fz_drop_obj(objstm); return fz_okay; cleanupstm: fz_close(stm); cleanupbuf: fz_free(ofsbuf); fz_free(numbuf); fz_drop_obj(objstm); return error; /* already rethrown */ }
static int _pdf_doc_load(struct _pdf_doc *self, mume_stream_t *stm) { fz_error error; fz_stream *fzstm; fz_rect *mbox; fz_obj *page_obj, *box_obj; int i, c; _pdf_doc_clear(self); fzstm = fz_new_stream(stm, _pdf_stream_read, _pdf_stream_close); mume_stream_reference(stm); fzstm->seek = _pdf_stream_seek; error = pdf_open_xref_with_stream(&self->xref, fzstm, NULL); fz_close(fzstm); if (error) { mume_error(("Read xref failed\n", error)); return 0; } assert(!pdf_needs_password(self->xref)); /* Load meta information. */ error = pdf_load_page_tree(self->xref); if (error) { mume_error(("Cannot load page tree\n")); return 0; } c = pdf_count_pages(self->xref); self->glyph_cache = fz_new_glyph_cache(); self->pages = calloc_abort(c, sizeof(pdf_page*)); self->disps = calloc_abort(c, sizeof(fz_display_list*)); self->media_boxes = malloc_abort(c * sizeof(fz_rect)); self->page_rotates = malloc_abort(c * sizeof(int)); /* Extract each pages' media box and rotation. */ for (i = 0; i < c; ++i) { mbox = self->media_boxes + i; page_obj = self->xref->page_objs[i]; if (!page_obj) { *mbox = fz_empty_rect; continue; } box_obj = fz_dict_gets(page_obj, "MediaBox"); *mbox = pdf_to_rect(box_obj); if (fz_is_empty_rect(*mbox)) { fz_warn("Cannot find page bounds, guessing page bounds."); mbox->x1 = 612; mbox->y1 = 792; } box_obj = fz_dict_gets(page_obj, "CropBox"); if (fz_is_array(box_obj)) *mbox = fz_intersect_rect(*mbox, pdf_to_rect(box_obj)); self->page_rotates[i] = fz_to_int( fz_dict_gets(page_obj, "Rotate")); if (self->page_rotates[i] % 90) self->page_rotates[i] = 0; } return 1; }
static fz_error pdf_load_shading_dict(fz_shade **shadep, pdf_xref *xref, fz_obj *dict, fz_matrix transform) { fz_error error; fz_shade *shade; pdf_function *func[FZ_MAX_COLORS] = { NULL }; fz_stream *stream = NULL; fz_obj *obj; int funcs; int type; int i; shade = fz_malloc(sizeof(fz_shade)); shade->refs = 1; shade->type = FZ_MESH; shade->use_background = 0; shade->use_function = 0; shade->matrix = transform; shade->bbox = fz_infinite_rect; shade->extend[0] = 0; shade->extend[1] = 0; shade->mesh_len = 0; shade->mesh_cap = 0; shade->mesh = NULL; shade->colorspace = NULL; funcs = 0; obj = fz_dict_gets(dict, "ShadingType"); type = fz_to_int(obj); obj = fz_dict_gets(dict, "ColorSpace"); if (!obj) { fz_drop_shade(shade); return fz_throw("shading colorspace is missing"); } error = pdf_load_colorspace(&shade->colorspace, xref, obj); if (error) { fz_drop_shade(shade); return fz_rethrow(error, "cannot load colorspace (%d %d R)", fz_to_num(obj), fz_to_gen(obj)); } obj = fz_dict_gets(dict, "Background"); if (obj) { shade->use_background = 1; for (i = 0; i < shade->colorspace->n; i++) shade->background[i] = fz_to_real(fz_array_get(obj, i)); } obj = fz_dict_gets(dict, "BBox"); if (fz_is_array(obj)) { shade->bbox = pdf_to_rect(obj); } obj = fz_dict_gets(dict, "Function"); if (fz_is_dict(obj)) { funcs = 1; error = pdf_load_function(&func[0], xref, obj); if (error) { error = fz_rethrow(error, "cannot load shading function (%d %d R)", fz_to_num(obj), fz_to_gen(obj)); goto cleanup; } } else if (fz_is_array(obj)) { funcs = fz_array_len(obj); if (funcs != 1 && funcs != shade->colorspace->n) { error = fz_throw("incorrect number of shading functions"); goto cleanup; } for (i = 0; i < funcs; i++) { error = pdf_load_function(&func[i], xref, fz_array_get(obj, i)); if (error) { error = fz_rethrow(error, "cannot load shading function (%d %d R)", fz_to_num(obj), fz_to_gen(obj)); goto cleanup; } } } if (type >= 4 && type <= 7) { error = pdf_open_stream(&stream, xref, fz_to_num(dict), fz_to_gen(dict)); if (error) { error = fz_rethrow(error, "cannot open shading stream (%d %d R)", fz_to_num(dict), fz_to_gen(dict)); goto cleanup; } } switch (type) { case 1: pdf_load_function_based_shading(shade, xref, dict, func[0]); break; case 2: pdf_load_axial_shading(shade, xref, dict, funcs, func); break; case 3: pdf_load_radial_shading(shade, xref, dict, funcs, func); break; case 4: pdf_load_type4_shade(shade, xref, dict, funcs, func, stream); break; case 5: pdf_load_type5_shade(shade, xref, dict, funcs, func, stream); break; case 6: pdf_load_type6_shade(shade, xref, dict, funcs, func, stream); break; case 7: pdf_load_type7_shade(shade, xref, dict, funcs, func, stream); break; default: error = fz_throw("unknown shading type: %d", type); goto cleanup; } if (stream) fz_close(stream); for (i = 0; i < funcs; i++) if (func[i]) pdf_drop_function(func[i]); *shadep = shade; return fz_okay; cleanup: if (stream) fz_close(stream); for (i = 0; i < funcs; i++) if (func[i]) pdf_drop_function(func[i]); fz_drop_shade(shade); return fz_rethrow(error, "cannot load shading type %d (%d %d R)", type, fz_to_num(dict), fz_to_gen(dict)); }