/* * Scan stream dictionary for an explicit /Crypt filter */ static int pdf_stream_has_crypt(fz_context *ctx, pdf_obj *stm) { pdf_obj *filters; pdf_obj *obj; int i; filters = pdf_dict_geta(ctx, stm, PDF_NAME_Filter, PDF_NAME_F); if (filters) { if (pdf_name_eq(ctx, filters, PDF_NAME_Crypt)) return 1; if (pdf_is_array(ctx, filters)) { int n = pdf_array_len(ctx, filters); for (i = 0; i < n; i++) { obj = pdf_array_get(ctx, filters, i); if (pdf_name_eq(ctx, obj, PDF_NAME_Crypt)) return 1; } } } return 0; }
/* Check if an entry has a cached stream and return whether it is directly * reusable. A buffer is directly reusable only if the stream is * uncompressed, or if it is compressed purely a compression method we can * return details of in fz_compression_params. * * If the stream is reusable return 1, and set params as required, otherwise * return 0. */ static int can_reuse_buffer(fz_context *ctx, pdf_xref_entry *entry, fz_compression_params *params) { pdf_obj *f; pdf_obj *p; if (!entry || !entry->obj || !entry->stm_buf) return 0; if (params) params->type = FZ_IMAGE_RAW; f = pdf_dict_geta(ctx, entry->obj, PDF_NAME_Filter, PDF_NAME_F); /* If there are no filters, it's uncompressed, and we can use it */ if (!f) return 1; p = pdf_dict_geta(ctx, entry->obj, PDF_NAME_DecodeParms, PDF_NAME_DP); if (pdf_is_array(ctx, f)) { int len = pdf_array_len(ctx, f); /* Empty array of filters. It's uncompressed. We can cope. */ if (len == 0) return 1; /* 1 filter is the most we can hope to cope with - if more,*/ if (len != 1) return 0; p = pdf_array_get(ctx, p, 0); } if (pdf_is_null(ctx, f)) return 1; /* Null filter is uncompressed */ if (!pdf_is_name(ctx, f)) return 0; /* There are filters, so unless we have the option of shortstopping, * we can't use the existing buffer. */ if (!params) return 0; build_compression_params(ctx, f, p, params); return (params->type == FZ_IMAGE_RAW) ? 0 : 1; }
/* * Construct a filter to decode a stream, without * constraining to stream length, and without decryption. */ fz_stream * pdf_open_inline_stream(fz_context *ctx, pdf_document *doc, pdf_obj *stmobj, int length, fz_stream *chain, fz_compression_params *imparams) { pdf_obj *filters; pdf_obj *params; filters = pdf_dict_geta(ctx, stmobj, PDF_NAME_Filter, PDF_NAME_F); params = pdf_dict_geta(ctx, stmobj, PDF_NAME_DecodeParms, PDF_NAME_DP); /* don't close chain when we close this filter */ fz_keep_stream(ctx, chain); if (pdf_is_name(ctx, filters)) return build_filter(ctx, chain, doc, filters, params, 0, 0, imparams); if (pdf_array_len(ctx, filters) > 0) return build_filter_chain(ctx, chain, doc, filters, params, 0, 0, imparams); if (imparams) imparams->type = FZ_IMAGE_RAW; return fz_open_null(ctx, chain, length, fz_tell(ctx, chain)); }
char * pdf_parse_file_spec(fz_context *ctx, pdf_document *doc, pdf_obj *file_spec, pdf_obj *dest) { pdf_obj *filename = NULL; const char *path; char *uri; char frag[256]; if (pdf_is_string(ctx, file_spec)) filename = file_spec; if (pdf_is_dict(ctx, file_spec)) { #ifdef _WIN32 filename = pdf_dict_get(ctx, file_spec, PDF_NAME(DOS)); #else filename = pdf_dict_get(ctx, file_spec, PDF_NAME(Unix)); #endif if (!filename) filename = pdf_dict_geta(ctx, file_spec, PDF_NAME(UF), PDF_NAME(F)); } if (!pdf_is_string(ctx, filename)) { fz_warn(ctx, "cannot parse file specification"); return NULL; } if (pdf_is_array(ctx, dest)) fz_snprintf(frag, sizeof frag, "#page=%d", pdf_array_get_int(ctx, dest, 0) + 1); else if (pdf_is_name(ctx, dest)) fz_snprintf(frag, sizeof frag, "#%s", pdf_to_name(ctx, dest)); else if (pdf_is_string(ctx, dest)) fz_snprintf(frag, sizeof frag, "#%s", pdf_to_str_buf(ctx, dest)); else frag[0] = 0; path = pdf_to_text_string(ctx, filename); uri = NULL; #ifdef _WIN32 if (!pdf_name_eq(ctx, pdf_dict_get(ctx, file_spec, PDF_NAME(FS)), PDF_NAME(URL))) { /* Fix up the drive letter (change "/C/Documents/Foo" to "C:/Documents/Foo") */ if (path[0] == '/' && (('A' <= path[1] && path[1] <= 'Z') || ('a' <= path[1] && path[1] <= 'z')) && path[2] == '/') uri = fz_asprintf(ctx, "file://%c:%s%s", path[1], path+2, frag); } #endif if (!uri) uri = fz_asprintf(ctx, "file://%s%s", path, frag); return uri; }
/* * Construct a filter to decode a stream, constraining * to stream length and decrypting. */ static fz_stream * pdf_open_filter(fz_context *ctx, pdf_document *doc, fz_stream *chain, pdf_obj *stmobj, int num, fz_off_t offset, fz_compression_params *imparams) { pdf_obj *filters; pdf_obj *params; int orig_num, orig_gen; filters = pdf_dict_geta(ctx, stmobj, PDF_NAME_Filter, PDF_NAME_F); params = pdf_dict_geta(ctx, stmobj, PDF_NAME_DecodeParms, PDF_NAME_DP); chain = pdf_open_raw_filter(ctx, chain, doc, stmobj, num, &orig_num, &orig_gen, offset); fz_var(chain); fz_try(ctx) { if (pdf_is_name(ctx, filters)) { fz_stream *chain2 = chain; chain = NULL; chain = build_filter(ctx, chain2, doc, filters, params, orig_num, orig_gen, imparams); } else if (pdf_array_len(ctx, filters) > 0) { fz_stream *chain2 = chain; chain = NULL; chain = build_filter_chain(ctx, chain2, doc, filters, params, orig_num, orig_gen, imparams); } } fz_catch(ctx) { fz_drop_stream(ctx, chain); fz_rethrow(ctx); } return chain; }
static fz_link * pdf_load_link(fz_context *ctx, pdf_document *doc, pdf_obj *dict, int pagenum, const fz_matrix *page_ctm) { pdf_obj *action; pdf_obj *obj; fz_rect bbox; char *uri; fz_link *link = NULL; obj = pdf_dict_get(ctx, dict, PDF_NAME_Subtype); if (!pdf_name_eq(ctx, obj, PDF_NAME_Link)) return NULL; obj = pdf_dict_get(ctx, dict, PDF_NAME_Rect); if (!obj) return NULL; pdf_to_rect(ctx, obj, &bbox); fz_transform_rect(&bbox, page_ctm); obj = pdf_dict_get(ctx, dict, PDF_NAME_Dest); if (obj) uri = pdf_parse_link_dest(ctx, doc, obj); else { action = pdf_dict_get(ctx, dict, PDF_NAME_A); /* fall back to additional action button's down/up action */ if (!action) action = pdf_dict_geta(ctx, pdf_dict_get(ctx, dict, PDF_NAME_AA), PDF_NAME_U, PDF_NAME_D); uri = pdf_parse_link_action(ctx, doc, action, pagenum); } if (!uri) return NULL; fz_try(ctx) link = fz_new_link(ctx, &bbox, doc, uri); fz_always(ctx) fz_free(ctx, uri); fz_catch(ctx) fz_rethrow(ctx); return link; }
char * pdf_parse_file_spec(fz_context *ctx, pdf_document *doc, pdf_obj *file_spec, pdf_obj *dest) { pdf_obj *filename=NULL; char *path = NULL; char *uri = NULL; char buf[256]; size_t n; if (pdf_is_string(ctx, file_spec)) filename = file_spec; if (pdf_is_dict(ctx, file_spec)) { #if defined(_WIN32) || defined(_WIN64) filename = pdf_dict_get(ctx, file_spec, PDF_NAME_DOS); #else filename = pdf_dict_get(ctx, file_spec, PDF_NAME_Unix); #endif if (!filename) filename = pdf_dict_geta(ctx, file_spec, PDF_NAME_UF, PDF_NAME_F); } if (!pdf_is_string(ctx, filename)) { fz_warn(ctx, "cannot parse file specification"); return NULL; } path = pdf_to_utf8(ctx, filename); #if defined(_WIN32) || defined(_WIN64) if (strcmp(pdf_to_name(ctx, pdf_dict_gets(ctx, file_spec, "FS")), "URL") != 0) { /* move the file name into the expected place and use the expected path separator */ char *c; if (path[0] == '/' && (('A' <= path[1] && path[1] <= 'Z') || ('a' <= path[1] && path[1] <= 'z')) && path[2] == '/') { path[0] = path[1]; path[1] = ':'; } for (c = path; *c; c++) { if (*c == '/') *c = '\\'; } } #endif if (pdf_is_array(ctx, dest)) fz_snprintf(buf, sizeof buf, "#page=%d", pdf_to_int(ctx, pdf_array_get(ctx, dest, 0)) + 1); else if (pdf_is_name(ctx, dest)) fz_snprintf(buf, sizeof buf, "#%s", pdf_to_name(ctx, dest)); else if (pdf_is_string(ctx, dest)) fz_snprintf(buf, sizeof buf, "#%s", pdf_to_str_buf(ctx, dest)); else buf[0] = 0; n = 7 + strlen(path) + strlen(buf) + 1; uri = fz_malloc(ctx, n); fz_strlcpy(uri, "file://", n); fz_strlcat(uri, path, n); fz_strlcat(uri, buf, n); fz_free(ctx, path); return uri; }
static fz_image * pdf_load_image_imp(fz_context *ctx, pdf_document *doc, pdf_obj *rdb, pdf_obj *dict, fz_stream *cstm, int forcemask) { fz_image *image = NULL; pdf_obj *obj, *res; int w, h, bpc, n; int imagemask; int interpolate; int indexed; fz_image *mask = NULL; /* explicit mask/soft mask image */ int use_colorkey = 0; fz_colorspace *colorspace = NULL; float decode[FZ_MAX_COLORS * 2]; int colorkey[FZ_MAX_COLORS * 2]; int stride; int i; fz_compressed_buffer *buffer; /* special case for JPEG2000 images */ if (pdf_is_jpx_image(ctx, dict)) return pdf_load_jpx_imp(ctx, doc, rdb, dict, cstm, forcemask); w = pdf_to_int(ctx, pdf_dict_geta(ctx, dict, PDF_NAME(Width), PDF_NAME(W))); h = pdf_to_int(ctx, pdf_dict_geta(ctx, dict, PDF_NAME(Height), PDF_NAME(H))); bpc = pdf_to_int(ctx, pdf_dict_geta(ctx, dict, PDF_NAME(BitsPerComponent), PDF_NAME(BPC))); if (bpc == 0) bpc = 8; imagemask = pdf_to_bool(ctx, pdf_dict_geta(ctx, dict, PDF_NAME(ImageMask), PDF_NAME(IM))); interpolate = pdf_to_bool(ctx, pdf_dict_geta(ctx, dict, PDF_NAME(Interpolate), PDF_NAME(I))); indexed = 0; use_colorkey = 0; if (imagemask) bpc = 1; if (w <= 0) fz_throw(ctx, FZ_ERROR_GENERIC, "image width is zero (or less)"); if (h <= 0) fz_throw(ctx, FZ_ERROR_GENERIC, "image height is zero (or less)"); if (bpc <= 0) fz_throw(ctx, FZ_ERROR_GENERIC, "image depth is zero (or less)"); if (bpc > 16) fz_throw(ctx, FZ_ERROR_GENERIC, "image depth is too large: %d", bpc); if (w > (1 << 16)) fz_throw(ctx, FZ_ERROR_GENERIC, "image is too wide"); if (h > (1 << 16)) fz_throw(ctx, FZ_ERROR_GENERIC, "image is too high"); fz_var(mask); fz_var(image); fz_var(colorspace); fz_try(ctx) { obj = pdf_dict_geta(ctx, dict, PDF_NAME(ColorSpace), PDF_NAME(CS)); if (obj && !imagemask && !forcemask) { /* colorspace resource lookup is only done for inline images */ if (pdf_is_name(ctx, obj)) { res = pdf_dict_get(ctx, pdf_dict_get(ctx, rdb, PDF_NAME(ColorSpace)), obj); if (res) obj = res; } colorspace = pdf_load_colorspace(ctx, obj); indexed = fz_colorspace_is_indexed(ctx, colorspace); n = fz_colorspace_n(ctx, colorspace); } else { n = 1; } obj = pdf_dict_geta(ctx, dict, PDF_NAME(Decode), PDF_NAME(D)); if (obj) { for (i = 0; i < n * 2; i++) decode[i] = pdf_array_get_real(ctx, obj, i); } else if (fz_colorspace_is_lab(ctx, colorspace) || fz_colorspace_is_lab_icc(ctx, colorspace)) { decode[0] = 0; decode[1] = 100; decode[2] = -128; decode[3] = 127; decode[4] = -128; decode[5] = 127; } else { float maxval = indexed ? (1 << bpc) - 1 : 1; for (i = 0; i < n * 2; i++) decode[i] = i & 1 ? maxval : 0; } obj = pdf_dict_geta(ctx, dict, PDF_NAME(SMask), PDF_NAME(Mask)); if (pdf_is_dict(ctx, obj)) { /* Not allowed for inline images or soft masks */ if (cstm) fz_warn(ctx, "Ignoring invalid inline image soft mask"); else if (forcemask) fz_warn(ctx, "Ignoring recursive image soft mask"); else { mask = pdf_load_image_imp(ctx, doc, rdb, obj, NULL, 1); obj = pdf_dict_get(ctx, obj, PDF_NAME(Matte)); if (pdf_is_array(ctx, obj)) { use_colorkey = 1; for (i = 0; i < n; i++) colorkey[i] = pdf_array_get_real(ctx, obj, i) * 255; } } } else if (pdf_is_array(ctx, obj)) { use_colorkey = 1; for (i = 0; i < n * 2; i++) { if (!pdf_is_int(ctx, pdf_array_get(ctx, obj, i))) { fz_warn(ctx, "invalid value in color key mask"); use_colorkey = 0; } colorkey[i] = pdf_array_get_int(ctx, obj, i); } } /* Do we load from a ref, or do we load an inline stream? */ if (cstm == NULL) { /* Just load the compressed image data now and we can decode it on demand. */ buffer = pdf_load_compressed_stream(ctx, doc, pdf_to_num(ctx, dict)); image = fz_new_image_from_compressed_buffer(ctx, w, h, bpc, colorspace, 96, 96, interpolate, imagemask, decode, use_colorkey ? colorkey : NULL, buffer, mask); image->invert_cmyk_jpeg = 0; } else { /* Inline stream */ stride = (w * n * bpc + 7) / 8; image = fz_new_image_from_compressed_buffer(ctx, w, h, bpc, colorspace, 96, 96, interpolate, imagemask, decode, use_colorkey ? colorkey : NULL, NULL, mask); image->invert_cmyk_jpeg = 0; pdf_load_compressed_inline_image(ctx, doc, dict, stride * h, cstm, indexed, (fz_compressed_image *)image); } } fz_always(ctx) { fz_drop_colorspace(ctx, colorspace); fz_drop_image(ctx, mask); } fz_catch(ctx) { fz_drop_image(ctx, image); fz_rethrow(ctx); } return image; }
static fz_image * pdf_load_jpx(fz_context *ctx, pdf_document *doc, pdf_obj *dict, int forcemask) { fz_buffer *buf = NULL; fz_colorspace *colorspace = NULL; fz_pixmap *pix = NULL; pdf_obj *obj; fz_image *mask = NULL; fz_image *img = NULL; fz_var(pix); fz_var(buf); fz_var(colorspace); fz_var(mask); buf = pdf_load_stream(ctx, dict); /* FIXME: We can't handle decode arrays for indexed images currently */ fz_try(ctx) { unsigned char *data; size_t len; obj = pdf_dict_get(ctx, dict, PDF_NAME(ColorSpace)); if (obj) colorspace = pdf_load_colorspace(ctx, obj); len = fz_buffer_storage(ctx, buf, &data); pix = fz_load_jpx(ctx, data, len, colorspace); obj = pdf_dict_geta(ctx, dict, PDF_NAME(SMask), PDF_NAME(Mask)); if (pdf_is_dict(ctx, obj)) { if (forcemask) fz_warn(ctx, "Ignoring recursive JPX soft mask"); else mask = pdf_load_image_imp(ctx, doc, NULL, obj, NULL, 1); } obj = pdf_dict_geta(ctx, dict, PDF_NAME(Decode), PDF_NAME(D)); if (obj && !fz_colorspace_is_indexed(ctx, colorspace)) { float decode[FZ_MAX_COLORS * 2]; int i; for (i = 0; i < pix->n * 2; i++) decode[i] = pdf_array_get_real(ctx, obj, i); fz_decode_tile(ctx, pix, decode); } img = fz_new_image_from_pixmap(ctx, pix, mask); } fz_always(ctx) { fz_drop_image(ctx, mask); fz_drop_pixmap(ctx, pix); fz_drop_colorspace(ctx, colorspace); fz_drop_buffer(ctx, buf); } fz_catch(ctx) { fz_rethrow(ctx); } return img; }