static void sweepref(fz_obj *obj) { int num = fz_to_num(obj); int gen = fz_to_gen(obj); if (num < 0 || num >= xref->len) return; if (uselist[num]) return; uselist[num] = 1; /* Bake in /Length in stream objects */ fz_try(ctx) { if (pdf_is_stream(xref, num, gen)) { fz_obj *len = fz_dict_gets(obj, "Length"); if (fz_is_indirect(len)) { uselist[fz_to_num(len)] = 0; len = fz_resolve_indirect(len); fz_dict_puts(obj, "Length", len); } } } fz_catch(ctx) { /* Leave broken */ } sweepobj(fz_resolve_indirect(obj)); }
static void expandstream(fz_obj *obj, int num, int gen) { fz_buffer *buf, *tmp; fz_obj *newlen; buf = pdf_load_stream(xref, num, gen); fz_dict_dels(obj, "Filter"); fz_dict_dels(obj, "DecodeParms"); if (doascii && isbinarystream(buf)) { tmp = hexbuf(buf->data, buf->len); fz_drop_buffer(ctx, buf); buf = tmp; addhexfilter(obj); } newlen = fz_new_int(ctx, buf->len); fz_dict_puts(obj, "Length", newlen); fz_drop_obj(newlen); fprintf(out, "%d %d obj\n", num, gen); fz_fprint_obj(out, obj, doexpand == 0); fprintf(out, "stream\n"); fwrite(buf->data, 1, buf->len, out); fprintf(out, "endstream\nendobj\n\n"); fz_drop_buffer(ctx, buf); }
static void copystream(fz_obj *obj, int num, int gen) { fz_error error; fz_buffer *buf, *tmp; fz_obj *newlen; error = pdf_load_raw_stream(&buf, xref, num, gen); if (error) die(error); if (doascii && isbinarystream(buf)) { tmp = hexbuf(buf->data, buf->len); fz_drop_buffer(ctx, buf); buf = tmp; addhexfilter(obj); newlen = fz_new_int(ctx, buf->len); fz_dict_puts(ctx, obj, "Length", newlen); fz_drop_obj(ctx, newlen); } fprintf(out, "%d %d obj\n", num, gen); fz_fprint_obj(ctx, out, obj, !doexpand); fprintf(out, "stream\n"); fwrite(buf->data, 1, buf->len, out); fprintf(out, "endstream\nendobj\n\n"); fz_drop_buffer(ctx, buf); }
static void addhexfilter(fz_obj *dict) { fz_obj *f, *dp, *newf, *newdp; fz_obj *ahx, *nullobj; ahx = fz_new_name(ctx, "ASCIIHexDecode"); nullobj = fz_new_null(ctx); newf = newdp = NULL; f = fz_dict_gets(dict, "Filter"); dp = fz_dict_gets(dict, "DecodeParms"); if (fz_is_name(f)) { newf = fz_new_array(ctx, 2); fz_array_push(newf, ahx); fz_array_push(newf, f); f = newf; if (fz_is_dict(dp)) { newdp = fz_new_array(ctx, 2); fz_array_push(newdp, nullobj); fz_array_push(newdp, dp); dp = newdp; } } else if (fz_is_array(f)) { fz_array_insert(f, ahx); if (fz_is_array(dp)) fz_array_insert(dp, nullobj); } else f = ahx; fz_dict_puts(dict, "Filter", f); if (dp) fz_dict_puts(dict, "DecodeParms", dp); fz_drop_obj(ahx); fz_drop_obj(nullobj); if (newf) fz_drop_obj(newf); if (newdp) fz_drop_obj(newdp); }
static void writexref(void) { fz_obj *trailer; fz_obj *obj; int startxref; int num; startxref = ftell(out); fprintf(out, "xref\n0 %d\n", xref->len); for (num = 0; num < xref->len; num++) { if (uselist[num]) fprintf(out, "%010d %05d n \n", ofslist[num], genlist[num]); else fprintf(out, "%010d %05d f \n", ofslist[num], genlist[num]); } fprintf(out, "\n"); trailer = fz_new_dict(ctx, 5); obj = fz_new_int(ctx, xref->len); fz_dict_puts(trailer, "Size", obj); fz_drop_obj(obj); obj = fz_dict_gets(xref->trailer, "Info"); if (obj) fz_dict_puts(trailer, "Info", obj); obj = fz_dict_gets(xref->trailer, "Root"); if (obj) fz_dict_puts(trailer, "Root", obj); obj = fz_dict_gets(xref->trailer, "ID"); if (obj) fz_dict_puts(trailer, "ID", obj); fprintf(out, "trailer\n"); fz_fprint_obj(out, trailer, doexpand == 0); fprintf(out, "\n"); fz_drop_obj(trailer); fprintf(out, "startxref\n%d\n%%%%EOF\n", startxref); }
static pdf_annot * pdf_create_freetext_annot(pdf_xref *xref, fz_obj *obj) { fz_context *ctx = xref->ctx; fz_buffer *content = fz_new_buffer(ctx, 256); fz_buffer *base_ap = fz_new_buffer(ctx, 256); fz_obj *ap = fz_dict_gets(ctx, obj, "DA"); fz_obj *value = fz_dict_gets(ctx, obj, "Contents"); fz_rect rect = pdf_to_rect(ctx, fz_dict_gets(ctx, obj, "Rect")); int align = fz_to_int(ctx, fz_dict_gets(ctx, obj, "Q")); fz_obj *res = pdf_dict_from_string(xref, ANNOT_FREETEXT_AP_RESOURCES); unsigned short *ucs2, *rest; float x; char *font_name = NULL; float font_size = pdf_extract_font_size(xref, fz_to_str_buf(ctx, ap), &font_name); if (!font_size) font_size = 10; /* TODO: what resource dictionary does this font name refer to? */ if (font_name) { fz_obj *font = fz_dict_gets(ctx, res, "Font"); fz_dict_puts(ctx, font, font_name, fz_dict_gets(ctx, font, "Default")); fz_free(ctx, font_name); } fz_buffer_printf(ctx, content, "q 1 1 %.4f %.4f re W n BT %s ", rect.x1 - rect.x0 - 2.0f, rect.y1 - rect.y0 - 2.0f, fz_to_str_buf(ctx, ap)); fz_buffer_printf(ctx, base_ap, "q BT %s ", fz_to_str_buf(ctx, ap)); fz_buffer_printf(ctx, content, "/Default %.4f Tf ", font_size); fz_buffer_printf(ctx, base_ap, "/Default %.4f Tf ", font_size); fz_buffer_printf(ctx, content, "1 0 0 1 2 %.4f Tm ", rect.y1 - rect.y0 - 2); /* Adobe Reader seems to consider "[1 0 0] r" and "1 0 0 rg" to mean the same(?) */ if (strchr(base_ap->data, '[')) { float r, g, b; if (sscanf(strchr(base_ap->data, '['), "[%f %f %f] r", &r, &g, &b) == 3) fz_buffer_printf(ctx, content, "%.4f %.4f %.4f rg ", r, g, b); } ucs2 = pdf_to_ucs2(ctx, value); for (rest = ucs2; *rest; rest++) if (*rest > 0xFF) *rest = '?'; x = 0; rest = ucs2; while (*rest) rest = pdf_append_line(xref, res, content, base_ap, rest, font_size, align, rect.x1 - rect.x0 - 4.0f, 1, &x); fz_free(ctx, ucs2); fz_buffer_printf(ctx, content, "ET Q"); fz_drop_buffer(ctx, base_ap); return pdf_create_annot(ctx, rect, fz_keep_obj(obj), content, res, 0); }
static fz_obj * pdf_clone_for_view_only(pdf_xref *xref, fz_obj *obj) { fz_obj *ocgs = pdf_dict_from_string(xref, ANNOT_OC_VIEW_ONLY); obj = fz_copy_dict(xref->ctx, pdf_resolve_indirect(obj)); fz_dict_puts(xref->ctx, obj, "OC", ocgs); fz_drop_obj(xref->ctx, ocgs); return obj; }
static int xps_decode_tiff_fax(struct tiff *tiff, int comp, fz_stream *chain, byte *wp, int wlen) { fz_stream *stm; fz_obj *params; fz_obj *columns, *rows, *black_is_1, *k, *encoded_byte_align; int n; fz_context *ctx = tiff->ctx; columns = fz_new_int(ctx, tiff->imagewidth); rows = fz_new_int(ctx, tiff->imagelength); black_is_1 = fz_new_bool(ctx, tiff->photometric == 0); k = fz_new_int(ctx, comp == 4 ? -1 : 0); encoded_byte_align = fz_new_bool(ctx, comp == 2); params = fz_new_dict(ctx, 5); fz_dict_puts(ctx, params, "Columns", columns); fz_dict_puts(ctx, params, "Rows", rows); fz_dict_puts(ctx, params, "BlackIs1", black_is_1); fz_dict_puts(ctx, params, "K", k); fz_dict_puts(ctx, params, "EncodedByteAlign", encoded_byte_align); fz_drop_obj(ctx, columns); fz_drop_obj(ctx, rows); fz_drop_obj(ctx, black_is_1); fz_drop_obj(ctx, k); fz_drop_obj(ctx, encoded_byte_align); stm = fz_open_faxd(chain, params); n = fz_read(stm, wp, wlen); fz_close(stm); fz_drop_obj(ctx, params); if (n < 0) return fz_error_note(ctx, n, "cannot read fax strip"); return fz_okay; }
static void xps_hacky_get_prop(fz_context *ctx, char *data, fz_obj *dict, char *name, char *tag_name) { char *start, *end; fz_obj *value; start = strstr(data, tag_name); if (!start || start == data || start[-1] != '<') return; end = strstr(start + 1, tag_name); start = strchr(start, '>'); if (!start || !end || start >= end || end[-2] != '<' || end[-1] != '/') return; for (start++; iswhite(*start); start++); for (end -= 3; iswhite(*end) && end > start; end--); value = fz_new_string(ctx, start, end - start + 1); fz_dict_puts(ctx, dict, name, value); fz_drop_obj(ctx, value); }
static void retainpages(int argc, char **argv) { fz_obj *oldroot, *root, *pages, *kids, *countobj, *parent, *olddests; /* Keep only pages/type and (reduced) dest entries to avoid * references to unretained pages */ oldroot = fz_dict_gets(xref->trailer, "Root"); pages = fz_dict_gets(oldroot, "Pages"); olddests = pdf_load_name_tree(xref, "Dests"); root = fz_new_dict(ctx, 2); fz_dict_puts(root, "Type", fz_dict_gets(oldroot, "Type")); fz_dict_puts(root, "Pages", fz_dict_gets(oldroot, "Pages")); pdf_update_object(xref, fz_to_num(oldroot), fz_to_gen(oldroot), root); fz_drop_obj(root); /* Create a new kids array with only the pages we want to keep */ parent = fz_new_indirect(ctx, fz_to_num(pages), fz_to_gen(pages), xref); kids = fz_new_array(ctx, 1); /* Retain pages specified */ while (argc - fz_optind) { int page, spage, epage; char *spec, *dash; char *pagelist = argv[fz_optind]; spec = fz_strsep(&pagelist, ","); while (spec) { dash = strchr(spec, '-'); if (dash == spec) spage = epage = pdf_count_pages(xref); else spage = epage = atoi(spec); if (dash) { if (strlen(dash) > 1) epage = atoi(dash + 1); else epage = pdf_count_pages(xref); } if (spage > epage) page = spage, spage = epage, epage = page; if (spage < 1) spage = 1; if (epage > pdf_count_pages(xref)) epage = pdf_count_pages(xref); for (page = spage; page <= epage; page++) { fz_obj *pageobj = xref->page_objs[page-1]; fz_obj *pageref = xref->page_refs[page-1]; fz_dict_puts(pageobj, "Parent", parent); /* Store page object in new kids array */ fz_array_push(kids, pageref); } spec = fz_strsep(&pagelist, ","); } fz_optind++; } fz_drop_obj(parent); /* Update page count and kids array */ countobj = fz_new_int(ctx, fz_array_len(kids)); fz_dict_puts(pages, "Count", countobj); fz_drop_obj(countobj); fz_dict_puts(pages, "Kids", kids); fz_drop_obj(kids); /* Also preserve the (partial) Dests name tree */ if (olddests) { int i; fz_obj *names = fz_new_dict(ctx, 1); fz_obj *dests = fz_new_dict(ctx, 1); fz_obj *names_list = fz_new_array(ctx, 32); for (i = 0; i < fz_dict_len(olddests); i++) { fz_obj *key = fz_dict_get_key(olddests, i); fz_obj *val = fz_dict_get_val(olddests, i); fz_obj *key_str = fz_new_string(ctx, fz_to_name(key), strlen(fz_to_name(key))); fz_obj *dest = fz_dict_gets(val, "D"); dest = fz_array_get(dest ? dest : val, 0); if (fz_array_contains(fz_dict_gets(pages, "Kids"), dest)) { fz_array_push(names_list, key_str); fz_array_push(names_list, val); } fz_drop_obj(key_str); } root = fz_dict_gets(xref->trailer, "Root"); fz_dict_puts(dests, "Names", names_list); fz_dict_puts(names, "Dests", dests); fz_dict_puts(root, "Names", names); fz_drop_obj(names); fz_drop_obj(dests); fz_drop_obj(names_list); fz_drop_obj(olddests); } }
static fz_outline * pdf_load_outline_imp(pdf_xref *xref, fz_obj *dict) { pdf_link *link; fz_outline *node; fz_obj *obj; /* SumatraPDF: prevent potential stack overflow */ fz_outline *prev, *root = NULL; fz_obj *origDict = dict; fz_context *ctx = xref->ctx; if (fz_is_null(ctx, dict)) return NULL; /* SumatraPDF: prevent cyclic outlines */ do { if (fz_dict_gets(ctx, dict, ".seen")) break; obj = fz_new_null(ctx); fz_dict_puts(ctx, dict, ".seen", obj); fz_drop_obj(ctx, obj); node = fz_malloc(ctx, sizeof(fz_outline)); node->title = NULL; node->page = -1; node->down = NULL; node->next = NULL; obj = fz_dict_gets(ctx, dict, "Title"); if (obj) node->title = pdf_to_utf8(ctx, obj); /* SumatraPDF: support expansion states */ node->is_open = fz_to_int(ctx, fz_dict_gets(ctx, dict, "Count")) >= 0; /* SumatraPDF: extended outline actions */ node->data = node->free_data = NULL; if (fz_dict_gets(ctx, dict, "Dest") || fz_dict_gets(ctx, dict, "A")) { link = pdf_load_link(xref, dict); if (link) /* SumatraPDF: don't crash if it's no link after all */ { if (link->kind == PDF_LINK_GOTO) node->page = pdf_find_page_number(xref, fz_array_get(ctx, link->dest, 0)); /* SumatraPDF: extended outline actions */ node->data = link; node->free_data = pdf_free_link; } } obj = fz_dict_gets(ctx, dict, "First"); if (obj) node->down = pdf_load_outline_imp(xref, obj); /* SumatraPDF: prevent potential stack overflow */ if (!root) prev = root = node; else prev = prev->next = node; dict = fz_dict_gets(ctx, dict, "Next"); } while (dict && !fz_is_null(ctx, dict)); node = root; /* SumatraPDF: prevent cyclic outlines */ for (dict = origDict; dict && fz_dict_gets(ctx, dict, ".seen"); dict = fz_dict_gets(ctx, dict, "Next")) fz_dict_dels(ctx, dict, ".seen"); return node; }
fz_error pdf_open_xref_with_stream(pdf_xref **xrefp, fz_stream *file, char *password) { pdf_xref *xref; fz_error error; fz_obj *encrypt, *id; fz_obj *dict, *obj; int i, repaired = 0; /* install pdf specific callback */ fz_resolve_indirect = pdf_resolve_indirect; xref = fz_malloc(sizeof(pdf_xref)); memset(xref, 0, sizeof(pdf_xref)); xref->file = fz_keep_stream(file); error = pdf_load_xref(xref, xref->scratch, sizeof xref->scratch); if (error) { fz_catch(error, "trying to repair"); if (xref->table) { fz_free(xref->table); xref->table = NULL; xref->len = 0; } if (xref->trailer) { fz_drop_obj(xref->trailer); xref->trailer = NULL; } error = pdf_repair_xref(xref, xref->scratch, sizeof xref->scratch); if (error) { pdf_free_xref(xref); return fz_rethrow(error, "cannot repair document"); } repaired = 1; } encrypt = fz_dict_gets(xref->trailer, "Encrypt"); id = fz_dict_gets(xref->trailer, "ID"); if (fz_is_dict(encrypt)) { error = pdf_new_crypt(&xref->crypt, encrypt, id); if (error) { pdf_free_xref(xref); return fz_rethrow(error, "cannot decrypt document"); } } if (pdf_needs_password(xref)) { /* Only care if we have a password */ if (password) { int okay = pdf_authenticate_password(xref, password); if (!okay) { pdf_free_xref(xref); return fz_throw("invalid password"); } } } if (repaired) { int hasroot, hasinfo; error = pdf_repair_obj_stms(xref); if (error) { pdf_free_xref(xref); return fz_rethrow(error, "cannot repair document"); } hasroot = fz_dict_gets(xref->trailer, "Root") != NULL; hasinfo = fz_dict_gets(xref->trailer, "Info") != NULL; for (i = 1; i < xref->len; i++) { if (xref->table[i].type == 0 || xref->table[i].type == 'f') continue; error = pdf_load_object(&dict, xref, i, 0); if (error) { fz_catch(error, "ignoring broken object (%d 0 R)", i); continue; } if (!hasroot) { obj = fz_dict_gets(dict, "Type"); if (fz_is_name(obj) && !strcmp(fz_to_name(obj), "Catalog")) { obj = fz_new_indirect(i, 0, xref); fz_dict_puts(xref->trailer, "Root", obj); fz_drop_obj(obj); } } if (!hasinfo) { if (fz_dict_gets(dict, "Creator") || fz_dict_gets(dict, "Producer")) { obj = fz_new_indirect(i, 0, xref); fz_dict_puts(xref->trailer, "Info", obj); fz_drop_obj(obj); } } fz_drop_obj(dict); } } error = pdf_read_ocg(xref); if (error) { pdf_free_xref(xref); return fz_rethrow(error, "Broken Optional Content"); } *xrefp = xref; return fz_okay; }