/* * Load uncompressed contents of a stream into buf. */ fz_error pdf_load_stream(fz_buffer **bufp, pdf_xref *xref, int num, int gen) { fz_error error; fz_stream *stm; fz_obj *dict, *obj; int i, len; error = pdf_open_stream(&stm, xref, num, gen); if (error) return fz_rethrow(error, "cannot open stream (%d %d R)", num, gen); error = pdf_load_object(&dict, xref, num, gen); if (error) return fz_rethrow(error, "cannot load stream dictionary (%d %d R)", num, gen); len = fz_to_int(fz_dict_gets(dict, "Length")); obj = fz_dict_gets(dict, "Filter"); len = pdf_guess_filter_length(len, fz_to_name(obj)); for (i = 0; i < fz_array_len(obj); i++) len = pdf_guess_filter_length(len, fz_to_name(fz_array_get(obj, i))); fz_drop_obj(dict); error = fz_read_all(bufp, stm, len); if (error) { fz_close(stm); return fz_rethrow(error, "cannot read raw stream (%d %d R)", num, gen); } fz_close(stm); return fz_okay; }
fz_buffer * pdf_load_raw_renumbered_stream(fz_context *ctx, pdf_document *doc, int num, int gen, int orig_num, int orig_gen) { fz_stream *stm; pdf_obj *dict; int len; fz_buffer *buf; if (num > 0 && num < pdf_xref_len(ctx, doc)) { pdf_xref_entry *entry = pdf_get_xref_entry(ctx, doc, num); if (entry->stm_buf) return fz_keep_buffer(ctx, entry->stm_buf); } dict = pdf_load_object(ctx, doc, num, gen); len = pdf_to_int(ctx, pdf_dict_get(ctx, dict, PDF_NAME_Length)); pdf_drop_obj(ctx, dict); stm = pdf_open_raw_renumbered_stream(ctx, doc, num, gen, orig_num, orig_gen); buf = fz_read_all(ctx, stm, len); fz_drop_stream(ctx, stm); return buf; }
/* * Load raw (compressed but decrypted) contents of a stream into buf. */ fz_error pdf_load_raw_stream(fz_buffer **bufp, pdf_xref *xref, int num, int gen) { fz_error error; fz_stream *stm; fz_obj *dict; int len; error = pdf_load_object(&dict, xref, num, gen); if (error) return fz_rethrow(error, "cannot load stream dictionary (%d %d R)", num, gen); len = fz_to_int(fz_dict_gets(dict, "Length")); fz_drop_obj(dict); error = pdf_open_raw_stream(&stm, xref, num, gen); if (error) return fz_rethrow(error, "cannot open raw stream (%d %d R)", num, gen); error = fz_read_all(bufp, stm, len); if (error) { fz_close(stm); return fz_rethrow(error, "cannot read raw stream (%d %d R)", num, gen); } fz_close(stm); return fz_okay; }
fz_buffer * pdf_load_raw_renumbered_stream(pdf_document *xref, int num, int gen, int orig_num, int orig_gen) { fz_stream *stm; pdf_obj *dict; int len; fz_buffer *buf; if (num > 0 && num < xref->len && xref->table[num].stm_buf) return fz_keep_buffer(xref->ctx, xref->table[num].stm_buf); dict = pdf_load_object(xref, num, gen); /* RJW: "cannot load stream dictionary (%d %d R)", num, gen */ len = pdf_to_int(pdf_dict_gets(dict, "Length")); pdf_drop_obj(dict); stm = pdf_open_raw_renumbered_stream(xref, num, gen, orig_num, orig_gen); /* RJW: "cannot open raw stream (%d %d R)", num, gen */ buf = fz_read_all(stm, len); /* RJW: "cannot read raw stream (%d %d R)", num, gen */ fz_close(stm); return buf; }
/* Image specific methods */ static void res_image_init(fz_context *ctx, pdf_document *doc, pdf_res_table *table) { int len, k; pdf_obj *obj; pdf_obj *type; pdf_obj *res = NULL; fz_image *image = NULL; unsigned char digest[16]; fz_var(obj); fz_var(image); fz_var(res); fz_try(ctx) { table->hash = fz_new_hash_table(ctx, 4096, 16, -1); len = pdf_count_objects(ctx, doc); for (k = 1; k < len; k++) { obj = pdf_load_object(ctx, doc, k, 0); type = pdf_dict_get(ctx, obj, PDF_NAME_Subtype); if (pdf_name_eq(ctx, type, PDF_NAME_Image)) { image = pdf_load_image(ctx, doc, obj); res_image_get_md5(ctx, image, digest); fz_drop_image(ctx, image); image = NULL; /* Don't allow overwrites. */ if (fz_hash_find(ctx, table->hash, digest) == NULL) fz_hash_insert(ctx, table->hash, digest, obj); } else { pdf_drop_obj(ctx, obj); } obj = NULL; } } fz_always(ctx) { fz_drop_image(ctx, image); pdf_drop_obj(ctx, obj); } fz_catch(ctx) { res_table_free(ctx, table); fz_rethrow(ctx); } }
static fz_buffer * pdf_load_image_stream(fz_context *ctx, pdf_document *doc, int num, int gen, int orig_num, int orig_gen, fz_compression_params *params, int *truncated) { fz_stream *stm = NULL; pdf_obj *dict, *obj; int i, len, n; fz_buffer *buf; fz_var(buf); if (num > 0 && num < pdf_xref_len(ctx, doc)) { pdf_xref_entry *entry = pdf_get_xref_entry(ctx, doc, num); /* Return ref to existing buffer, but only if uncompressed, * or shortstoppable */ if (can_reuse_buffer(ctx, entry, params)) return fz_keep_buffer(ctx, entry->stm_buf); } dict = pdf_load_object(ctx, doc, num, gen); len = pdf_to_int(ctx, pdf_dict_get(ctx, dict, PDF_NAME_Length)); obj = pdf_dict_get(ctx, dict, PDF_NAME_Filter); len = pdf_guess_filter_length(len, pdf_to_name(ctx, obj)); n = pdf_array_len(ctx, obj); for (i = 0; i < n; i++) len = pdf_guess_filter_length(len, pdf_to_name(ctx, pdf_array_get(ctx, obj, i))); pdf_drop_obj(ctx, dict); stm = pdf_open_image_stream(ctx, doc, num, gen, orig_num, orig_gen, params); fz_try(ctx) { if (truncated) buf = fz_read_best(ctx, stm, len, truncated); else buf = fz_read_all(ctx, stm, len); } fz_always(ctx) { fz_drop_stream(ctx, stm); } fz_catch(ctx) { fz_rethrow_message(ctx, "cannot read raw stream (%d %d R)", num, gen); } return buf; }
static void preloadobjstms(void) { fz_obj *obj; int num; for (num = 0; num < xref->len; num++) { if (xref->table[num].type == 'o') { obj = pdf_load_object(xref, num, 0); fz_drop_obj(obj); } } }
static void preloadobjstms(void) { fz_error error; fz_obj *obj; int num; for (num = 0; num < xref->len; num++) { if (xref->table[num].type == 'o') { error = pdf_load_object(&obj, xref, num, 0); if (error) die(error); fz_drop_obj(ctx, obj); } } }
static void writeobject(int num, int gen) { fz_error error; fz_obj *obj; fz_obj *type; error = pdf_load_object(&obj, xref, num, gen); if (error) die(error); /* skip ObjStm and XRef objects */ if (fz_is_dict(ctx, obj)) { type = fz_dict_gets(ctx, obj, "Type"); if (fz_is_name(ctx, type) && !strcmp(fz_to_name(ctx, type), "ObjStm")) { uselist[num] = 0; fz_drop_obj(ctx, obj); return; } if (fz_is_name(ctx, type) && !strcmp(fz_to_name(ctx, type), "XRef")) { uselist[num] = 0; fz_drop_obj(ctx, obj); return; } } if (!pdf_is_stream(xref, num, gen)) { fprintf(out, "%d %d obj\n", num, gen); fz_fprint_obj(ctx, out, obj, !doexpand); fprintf(out, "endobj\n\n"); } else { if (doexpand && !pdf_is_jpx_image(ctx, obj)) expandstream(obj, num, gen); else copystream(obj, num, gen); } fz_drop_obj(ctx, obj); }
fz_buffer * pdf_load_image_stream(pdf_document *xref, int num, int gen, int orig_num, int orig_gen, pdf_image_params *params) { fz_context *ctx = xref->ctx; fz_stream *stm = NULL; pdf_obj *dict, *obj; int i, len, n; fz_buffer *buf; fz_var(buf); if (num > 0 && num < xref->len && xref->table[num].stm_buf) return fz_keep_buffer(xref->ctx, xref->table[num].stm_buf); dict = pdf_load_object(xref, num, gen); /* RJW: "cannot load stream dictionary (%d %d R)", num, gen */ len = pdf_to_int(pdf_dict_gets(dict, "Length")); obj = pdf_dict_gets(dict, "Filter"); len = pdf_guess_filter_length(len, pdf_to_name(obj)); n = pdf_array_len(obj); for (i = 0; i < n; i++) len = pdf_guess_filter_length(len, pdf_to_name(pdf_array_get(obj, i))); pdf_drop_obj(dict); stm = pdf_open_image_stream(xref, num, gen, orig_num, orig_gen, params); /* RJW: "cannot open stream (%d %d R)", num, gen */ fz_try(ctx) { buf = fz_read_all(stm, len); } fz_always(ctx) { fz_close(stm); } fz_catch(ctx) { fz_throw(ctx, "cannot read raw stream (%d %d R)", num, gen); } return buf; }
static void showobject(int num) { fz_error error; fz_obj *obj; if (!xref) die(fz_error_make(ctx, "no file specified")); error = pdf_load_object(&obj, xref, num, 0); if (error) die(error); if (isimage(obj)) saveimage(num); else if (isfontdesc(obj)) savefont(obj, num); fz_drop_obj(ctx, obj); }
static void showobject(int num) { pdf_obj *obj; if (!doc) fz_throw(ctx, FZ_ERROR_GENERIC, "no file specified"); fz_try(ctx) { obj = pdf_load_object(ctx, doc, num); if (isimage(obj)) saveimage(num); else if (isfontdesc(obj)) savefont(obj, num); pdf_drop_obj(ctx, obj); } fz_catch(ctx) { fz_warn(ctx, "ignoring object %d", num); } }
/* * Load raw (compressed but decrypted) contents of a stream into buf. */ fz_buffer * pdf_load_raw_stream(pdf_document *xref, int num, int gen) { fz_stream *stm; pdf_obj *dict; int len; fz_buffer *buf; dict = pdf_load_object(xref, num, gen); /* RJW: "cannot load stream dictionary (%d %d R)", num, gen */ len = pdf_to_int(pdf_dict_gets(dict, "Length")); pdf_drop_obj(dict); stm = pdf_open_raw_stream(xref, num, gen); /* RJW: "cannot open raw stream (%d %d R)", num, gen */ buf = fz_read_all(stm, len); /* RJW: "cannot read raw stream (%d %d R)", num, gen */ fz_close(stm); return buf; }
static void writeobject(int num, int gen) { fz_obj *obj; fz_obj *type; obj = pdf_load_object(xref, num, gen); /* skip ObjStm and XRef objects */ if (fz_is_dict(obj)) { type = fz_dict_gets(obj, "Type"); if (fz_is_name(type) && !strcmp(fz_to_name(type), "ObjStm")) { uselist[num] = 0; fz_drop_obj(obj); return; } if (fz_is_name(type) && !strcmp(fz_to_name(type), "XRef")) { uselist[num] = 0; fz_drop_obj(obj); return; } } if (!pdf_is_stream(xref, num, gen)) { fprintf(out, "%d %d obj\n", num, gen); fz_fprint_obj(out, obj, doexpand == 0); fprintf(out, "endobj\n\n"); } else { int dontexpand = 0; if (doexpand != 0 && doexpand != expand_all) { fz_obj *o; if ((o = fz_dict_gets(obj, "Type"), !strcmp(fz_to_name(o), "XObject")) && (o = fz_dict_gets(obj, "Subtype"), !strcmp(fz_to_name(o), "Image"))) dontexpand = !(doexpand & expand_images); if (o = fz_dict_gets(obj, "Type"), !strcmp(fz_to_name(o), "Font")) dontexpand = !(doexpand & expand_fonts); if (o = fz_dict_gets(obj, "Type"), !strcmp(fz_to_name(o), "FontDescriptor")) dontexpand = !(doexpand & expand_fonts); if ((o = fz_dict_gets(obj, "Length1")) != NULL) dontexpand = !(doexpand & expand_fonts); if ((o = fz_dict_gets(obj, "Length2")) != NULL) dontexpand = !(doexpand & expand_fonts); if ((o = fz_dict_gets(obj, "Length3")) != NULL) dontexpand = !(doexpand & expand_fonts); if (o = fz_dict_gets(obj, "Subtype"), !strcmp(fz_to_name(o), "Type1C")) dontexpand = !(doexpand & expand_fonts); if (o = fz_dict_gets(obj, "Subtype"), !strcmp(fz_to_name(o), "CIDFontType0C")) dontexpand = !(doexpand & expand_fonts); } if (doexpand && !dontexpand && !pdf_is_jpx_image(ctx, obj)) expandstream(obj, num, gen); else copystream(obj, num, gen); } fz_drop_obj(obj); }
static fz_error pdf_load_obj_stm(pdf_xref *xref, int num, int gen, char *buf, int cap) { fz_error error; fz_stream *stm; fz_obj *objstm; int *numbuf; int *ofsbuf; fz_obj *obj; int first; int count; int i, n; int tok; error = pdf_load_object(&objstm, xref, num, gen); if (error) return fz_rethrow(error, "cannot load object stream object (%d %d R)", num, gen); count = fz_to_int(fz_dict_gets(objstm, "N")); first = fz_to_int(fz_dict_gets(objstm, "First")); numbuf = fz_calloc(count, sizeof(int)); ofsbuf = fz_calloc(count, sizeof(int)); error = pdf_open_stream(&stm, xref, num, gen); if (error) { error = fz_rethrow(error, "cannot open object stream (%d %d R)", num, gen); goto cleanupbuf; } for (i = 0; i < count; i++) { error = pdf_lex(&tok, stm, buf, cap, &n); if (error || tok != PDF_TOK_INT) { error = fz_rethrow(error, "corrupt object stream (%d %d R)", num, gen); goto cleanupstm; } numbuf[i] = atoi(buf); error = pdf_lex(&tok, stm, buf, cap, &n); if (error || tok != PDF_TOK_INT) { error = fz_rethrow(error, "corrupt object stream (%d %d R)", num, gen); goto cleanupstm; } ofsbuf[i] = atoi(buf); } fz_seek(stm, first, 0); for (i = 0; i < count; i++) { fz_seek(stm, first + ofsbuf[i], 0); error = pdf_parse_stm_obj(&obj, xref, stm, buf, cap); if (error) { error = fz_rethrow(error, "cannot parse object %d in stream (%d %d R)", i, num, gen); goto cleanupstm; } if (numbuf[i] < 1 || numbuf[i] >= xref->len) { fz_drop_obj(obj); error = fz_throw("object id (%d 0 R) out of range (0..%d)", numbuf[i], xref->len - 1); goto cleanupstm; } if (xref->table[numbuf[i]].type == 'o' && xref->table[numbuf[i]].ofs == num) { if (xref->table[numbuf[i]].obj) fz_drop_obj(xref->table[numbuf[i]].obj); xref->table[numbuf[i]].obj = obj; } else { fz_drop_obj(obj); } } fz_close(stm); fz_free(ofsbuf); fz_free(numbuf); fz_drop_obj(objstm); return fz_okay; cleanupstm: fz_close(stm); cleanupbuf: fz_free(ofsbuf); fz_free(numbuf); fz_drop_obj(objstm); return error; /* already rethrown */ }
fz_error pdf_open_xref_with_stream(pdf_xref **xrefp, fz_stream *file, char *password) { pdf_xref *xref; fz_error error; fz_obj *encrypt, *id; fz_obj *dict, *obj; int i, repaired = 0; /* install pdf specific callback */ fz_resolve_indirect = pdf_resolve_indirect; xref = fz_malloc(sizeof(pdf_xref)); memset(xref, 0, sizeof(pdf_xref)); xref->file = fz_keep_stream(file); error = pdf_load_xref(xref, xref->scratch, sizeof xref->scratch); if (error) { fz_catch(error, "trying to repair"); if (xref->table) { fz_free(xref->table); xref->table = NULL; xref->len = 0; } if (xref->trailer) { fz_drop_obj(xref->trailer); xref->trailer = NULL; } error = pdf_repair_xref(xref, xref->scratch, sizeof xref->scratch); if (error) { pdf_free_xref(xref); return fz_rethrow(error, "cannot repair document"); } repaired = 1; } encrypt = fz_dict_gets(xref->trailer, "Encrypt"); id = fz_dict_gets(xref->trailer, "ID"); if (fz_is_dict(encrypt)) { error = pdf_new_crypt(&xref->crypt, encrypt, id); if (error) { pdf_free_xref(xref); return fz_rethrow(error, "cannot decrypt document"); } } if (pdf_needs_password(xref)) { /* Only care if we have a password */ if (password) { int okay = pdf_authenticate_password(xref, password); if (!okay) { pdf_free_xref(xref); return fz_throw("invalid password"); } } } if (repaired) { int hasroot, hasinfo; error = pdf_repair_obj_stms(xref); if (error) { pdf_free_xref(xref); return fz_rethrow(error, "cannot repair document"); } hasroot = fz_dict_gets(xref->trailer, "Root") != NULL; hasinfo = fz_dict_gets(xref->trailer, "Info") != NULL; for (i = 1; i < xref->len; i++) { if (xref->table[i].type == 0 || xref->table[i].type == 'f') continue; error = pdf_load_object(&dict, xref, i, 0); if (error) { fz_catch(error, "ignoring broken object (%d 0 R)", i); continue; } if (!hasroot) { obj = fz_dict_gets(dict, "Type"); if (fz_is_name(obj) && !strcmp(fz_to_name(obj), "Catalog")) { obj = fz_new_indirect(i, 0, xref); fz_dict_puts(xref->trailer, "Root", obj); fz_drop_obj(obj); } } if (!hasinfo) { if (fz_dict_gets(dict, "Creator") || fz_dict_gets(dict, "Producer")) { obj = fz_new_indirect(i, 0, xref); fz_dict_puts(xref->trailer, "Info", obj); fz_drop_obj(obj); } } fz_drop_obj(dict); } } error = pdf_read_ocg(xref); if (error) { pdf_free_xref(xref); return fz_rethrow(error, "Broken Optional Content"); } *xrefp = xref; return fz_okay; }