static void copystream(fz_obj *obj, int num, int gen) { fz_error error; fz_buffer *buf, *tmp; fz_obj *newlen; error = pdf_load_raw_stream(&buf, xref, num, gen); if (error) die(error); if (doascii && isbinarystream(buf)) { tmp = hexbuf(buf->data, buf->len); fz_drop_buffer(ctx, buf); buf = tmp; addhexfilter(obj); newlen = fz_new_int(ctx, buf->len); fz_dict_puts(ctx, obj, "Length", newlen); fz_drop_obj(ctx, newlen); } fprintf(out, "%d %d obj\n", num, gen); fz_fprint_obj(ctx, out, obj, !doexpand); fprintf(out, "stream\n"); fwrite(buf->data, 1, buf->len, out); fprintf(out, "endstream\nendobj\n\n"); fz_drop_buffer(ctx, buf); }
static void expandstream(fz_obj *obj, int num, int gen) { fz_buffer *buf, *tmp; fz_obj *newlen; buf = pdf_load_stream(xref, num, gen); fz_dict_dels(obj, "Filter"); fz_dict_dels(obj, "DecodeParms"); if (doascii && isbinarystream(buf)) { tmp = hexbuf(buf->data, buf->len); fz_drop_buffer(ctx, buf); buf = tmp; addhexfilter(obj); } newlen = fz_new_int(ctx, buf->len); fz_dict_puts(obj, "Length", newlen); fz_drop_obj(newlen); fprintf(out, "%d %d obj\n", num, gen); fz_fprint_obj(out, obj, doexpand == 0); fprintf(out, "stream\n"); fwrite(buf->data, 1, buf->len, out); fprintf(out, "endstream\nendobj\n\n"); fz_drop_buffer(ctx, buf); }
fz_error pdf_parse_stm_obj(fz_obj **op, pdf_xref *xref, fz_stream *file, char *buf, int cap) { fz_error error; int tok; int len; error = pdf_lex(&tok, file, buf, cap, &len); if (error) return fz_rethrow(error, "cannot parse token in object stream"); switch (tok) { case PDF_TOK_OPEN_ARRAY: error = pdf_parse_array(op, xref, file, buf, cap); if (error) return fz_rethrow(error, "cannot parse object stream"); break; case PDF_TOK_OPEN_DICT: error = pdf_parse_dict(op, xref, file, buf, cap); if (error) return fz_rethrow(error, "cannot parse object stream"); break; case PDF_TOK_NAME: *op = fz_new_name(buf); break; case PDF_TOK_REAL: *op = fz_new_real(fz_atof(buf)); break; case PDF_TOK_STRING: *op = fz_new_string(buf, len); break; case PDF_TOK_TRUE: *op = fz_new_bool(1); break; case PDF_TOK_FALSE: *op = fz_new_bool(0); break; case PDF_TOK_NULL: *op = fz_new_null(); break; case PDF_TOK_INT: *op = fz_new_int(atoi(buf)); break; default: return fz_throw("unknown token in object stream"); } return fz_okay; }
static int xps_decode_tiff_fax(struct tiff *tiff, int comp, fz_stream *chain, byte *wp, int wlen) { fz_stream *stm; fz_obj *params; fz_obj *columns, *rows, *black_is_1, *k, *encoded_byte_align; int n; fz_context *ctx = tiff->ctx; columns = fz_new_int(ctx, tiff->imagewidth); rows = fz_new_int(ctx, tiff->imagelength); black_is_1 = fz_new_bool(ctx, tiff->photometric == 0); k = fz_new_int(ctx, comp == 4 ? -1 : 0); encoded_byte_align = fz_new_bool(ctx, comp == 2); params = fz_new_dict(ctx, 5); fz_dict_puts(ctx, params, "Columns", columns); fz_dict_puts(ctx, params, "Rows", rows); fz_dict_puts(ctx, params, "BlackIs1", black_is_1); fz_dict_puts(ctx, params, "K", k); fz_dict_puts(ctx, params, "EncodedByteAlign", encoded_byte_align); fz_drop_obj(ctx, columns); fz_drop_obj(ctx, rows); fz_drop_obj(ctx, black_is_1); fz_drop_obj(ctx, k); fz_drop_obj(ctx, encoded_byte_align); stm = fz_open_faxd(chain, params); n = fz_read(stm, wp, wlen); fz_close(stm); fz_drop_obj(ctx, params); if (n < 0) return fz_error_note(ctx, n, "cannot read fax strip"); return fz_okay; }
static void writexref(void) { fz_obj *trailer; fz_obj *obj; int startxref; int num; startxref = ftell(out); fprintf(out, "xref\n0 %d\n", xref->len); for (num = 0; num < xref->len; num++) { if (uselist[num]) fprintf(out, "%010d %05d n \n", ofslist[num], genlist[num]); else fprintf(out, "%010d %05d f \n", ofslist[num], genlist[num]); } fprintf(out, "\n"); trailer = fz_new_dict(ctx, 5); obj = fz_new_int(ctx, xref->len); fz_dict_puts(trailer, "Size", obj); fz_drop_obj(obj); obj = fz_dict_gets(xref->trailer, "Info"); if (obj) fz_dict_puts(trailer, "Info", obj); obj = fz_dict_gets(xref->trailer, "Root"); if (obj) fz_dict_puts(trailer, "Root", obj); obj = fz_dict_gets(xref->trailer, "ID"); if (obj) fz_dict_puts(trailer, "ID", obj); fprintf(out, "trailer\n"); fz_fprint_obj(out, trailer, doexpand == 0); fprintf(out, "\n"); fz_drop_obj(trailer); fprintf(out, "startxref\n%d\n%%%%EOF\n", startxref); }
static void retainpages(int argc, char **argv) { fz_obj *oldroot, *root, *pages, *kids, *countobj, *parent, *olddests; /* Keep only pages/type and (reduced) dest entries to avoid * references to unretained pages */ oldroot = fz_dict_gets(xref->trailer, "Root"); pages = fz_dict_gets(oldroot, "Pages"); olddests = pdf_load_name_tree(xref, "Dests"); root = fz_new_dict(ctx, 2); fz_dict_puts(root, "Type", fz_dict_gets(oldroot, "Type")); fz_dict_puts(root, "Pages", fz_dict_gets(oldroot, "Pages")); pdf_update_object(xref, fz_to_num(oldroot), fz_to_gen(oldroot), root); fz_drop_obj(root); /* Create a new kids array with only the pages we want to keep */ parent = fz_new_indirect(ctx, fz_to_num(pages), fz_to_gen(pages), xref); kids = fz_new_array(ctx, 1); /* Retain pages specified */ while (argc - fz_optind) { int page, spage, epage; char *spec, *dash; char *pagelist = argv[fz_optind]; spec = fz_strsep(&pagelist, ","); while (spec) { dash = strchr(spec, '-'); if (dash == spec) spage = epage = pdf_count_pages(xref); else spage = epage = atoi(spec); if (dash) { if (strlen(dash) > 1) epage = atoi(dash + 1); else epage = pdf_count_pages(xref); } if (spage > epage) page = spage, spage = epage, epage = page; if (spage < 1) spage = 1; if (epage > pdf_count_pages(xref)) epage = pdf_count_pages(xref); for (page = spage; page <= epage; page++) { fz_obj *pageobj = xref->page_objs[page-1]; fz_obj *pageref = xref->page_refs[page-1]; fz_dict_puts(pageobj, "Parent", parent); /* Store page object in new kids array */ fz_array_push(kids, pageref); } spec = fz_strsep(&pagelist, ","); } fz_optind++; } fz_drop_obj(parent); /* Update page count and kids array */ countobj = fz_new_int(ctx, fz_array_len(kids)); fz_dict_puts(pages, "Count", countobj); fz_drop_obj(countobj); fz_dict_puts(pages, "Kids", kids); fz_drop_obj(kids); /* Also preserve the (partial) Dests name tree */ if (olddests) { int i; fz_obj *names = fz_new_dict(ctx, 1); fz_obj *dests = fz_new_dict(ctx, 1); fz_obj *names_list = fz_new_array(ctx, 32); for (i = 0; i < fz_dict_len(olddests); i++) { fz_obj *key = fz_dict_get_key(olddests, i); fz_obj *val = fz_dict_get_val(olddests, i); fz_obj *key_str = fz_new_string(ctx, fz_to_name(key), strlen(fz_to_name(key))); fz_obj *dest = fz_dict_gets(val, "D"); dest = fz_array_get(dest ? dest : val, 0); if (fz_array_contains(fz_dict_gets(pages, "Kids"), dest)) { fz_array_push(names_list, key_str); fz_array_push(names_list, val); } fz_drop_obj(key_str); } root = fz_dict_gets(xref->trailer, "Root"); fz_dict_puts(dests, "Names", names_list); fz_dict_puts(names, "Dests", dests); fz_dict_puts(root, "Names", names); fz_drop_obj(names); fz_drop_obj(dests); fz_drop_obj(names_list); fz_drop_obj(olddests); } }
fz_error pdf_parse_ind_obj(fz_obj **op, pdf_xref *xref, fz_stream *file, char *buf, int cap, int *onum, int *ogen, int *ostmofs) { fz_error error = fz_okay; fz_obj *obj = NULL; int num = 0, gen = 0, stm_ofs; int tok; int len; int a, b; error = pdf_lex(&tok, file, buf, cap, &len); if (error) return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen); if (tok != PDF_TOK_INT) return fz_throw("expected object number (%d %d R)", num, gen); num = atoi(buf); error = pdf_lex(&tok, file, buf, cap, &len); if (error) return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen); if (tok != PDF_TOK_INT) return fz_throw("expected generation number (%d %d R)", num, gen); gen = atoi(buf); error = pdf_lex(&tok, file, buf, cap, &len); if (error) return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen); if (tok != PDF_TOK_OBJ) return fz_throw("expected 'obj' keyword (%d %d R)", num, gen); error = pdf_lex(&tok, file, buf, cap, &len); if (error) return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen); switch (tok) { case PDF_TOK_OPEN_ARRAY: error = pdf_parse_array(&obj, xref, file, buf, cap); if (error) return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen); break; case PDF_TOK_OPEN_DICT: error = pdf_parse_dict(&obj, xref, file, buf, cap); if (error) return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen); break; case PDF_TOK_NAME: obj = fz_new_name(buf); break; case PDF_TOK_REAL: obj = fz_new_real(fz_atof(buf)); break; case PDF_TOK_STRING: obj = fz_new_string(buf, len); break; case PDF_TOK_TRUE: obj = fz_new_bool(1); break; case PDF_TOK_FALSE: obj = fz_new_bool(0); break; case PDF_TOK_NULL: obj = fz_new_null(); break; case PDF_TOK_INT: a = atoi(buf); error = pdf_lex(&tok, file, buf, cap, &len); if (error) return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen); if (tok == PDF_TOK_STREAM || tok == PDF_TOK_ENDOBJ) { obj = fz_new_int(a); goto skip; } if (tok == PDF_TOK_INT) { b = atoi(buf); error = pdf_lex(&tok, file, buf, cap, &len); if (error) return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen); if (tok == PDF_TOK_R) { obj = fz_new_indirect(a, b, xref); break; } } return fz_throw("expected 'R' keyword (%d %d R)", num, gen); case PDF_TOK_ENDOBJ: obj = fz_new_null(); goto skip; default: return fz_throw("syntax error in object (%d %d R)", num, gen); } error = pdf_lex(&tok, file, buf, cap, &len); if (error) { fz_drop_obj(obj); return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen); } skip: if (tok == PDF_TOK_STREAM) { int c = fz_read_byte(file); while (c == ' ') c = fz_read_byte(file); if (c == '\r') { c = fz_peek_byte(file); if (c != '\n') fz_warn("line feed missing after stream begin marker (%d %d R)", num, gen); else fz_read_byte(file); } stm_ofs = fz_tell(file); } else if (tok == PDF_TOK_ENDOBJ) { stm_ofs = 0; } else { fz_warn("expected 'endobj' or 'stream' keyword (%d %d R)", num, gen); stm_ofs = 0; } if (onum) *onum = num; if (ogen) *ogen = gen; if (ostmofs) *ostmofs = stm_ofs; *op = obj; return fz_okay; }
fz_error pdf_parse_dict(fz_obj **op, pdf_xref *xref, fz_stream *file, char *buf, int cap) { fz_error error = fz_okay; fz_obj *dict = NULL; fz_obj *key = NULL; fz_obj *val = NULL; int tok; int len; int a, b; dict = fz_new_dict(8); while (1) { error = pdf_lex(&tok, file, buf, cap, &len); if (error) { fz_drop_obj(dict); return fz_rethrow(error, "cannot parse dict"); } skip: if (tok == PDF_TOK_CLOSE_DICT) { *op = dict; return fz_okay; } /* for BI .. ID .. EI in content streams */ if (tok == PDF_TOK_KEYWORD && !strcmp(buf, "ID")) { *op = dict; return fz_okay; } if (tok != PDF_TOK_NAME) { fz_drop_obj(dict); return fz_throw("invalid key in dict"); } key = fz_new_name(buf); error = pdf_lex(&tok, file, buf, cap, &len); if (error) { fz_drop_obj(key); fz_drop_obj(dict); return fz_rethrow(error, "cannot parse dict"); } switch (tok) { case PDF_TOK_OPEN_ARRAY: error = pdf_parse_array(&val, xref, file, buf, cap); if (error) { fz_drop_obj(key); fz_drop_obj(dict); return fz_rethrow(error, "cannot parse dict"); } break; case PDF_TOK_OPEN_DICT: error = pdf_parse_dict(&val, xref, file, buf, cap); if (error) { fz_drop_obj(key); fz_drop_obj(dict); return fz_rethrow(error, "cannot parse dict"); } break; case PDF_TOK_NAME: val = fz_new_name(buf); break; case PDF_TOK_REAL: val = fz_new_real(fz_atof(buf)); break; case PDF_TOK_STRING: val = fz_new_string(buf, len); break; case PDF_TOK_TRUE: val = fz_new_bool(1); break; case PDF_TOK_FALSE: val = fz_new_bool(0); break; case PDF_TOK_NULL: val = fz_new_null(); break; case PDF_TOK_INT: /* 64-bit to allow for numbers > INT_MAX and overflow */ a = (int) strtoll(buf, 0, 10); error = pdf_lex(&tok, file, buf, cap, &len); if (error) { fz_drop_obj(key); fz_drop_obj(dict); return fz_rethrow(error, "cannot parse dict"); } if (tok == PDF_TOK_CLOSE_DICT || tok == PDF_TOK_NAME || (tok == PDF_TOK_KEYWORD && !strcmp(buf, "ID"))) { val = fz_new_int(a); fz_dict_put(dict, key, val); fz_drop_obj(val); fz_drop_obj(key); goto skip; } if (tok == PDF_TOK_INT) { b = atoi(buf); error = pdf_lex(&tok, file, buf, cap, &len); if (error) { fz_drop_obj(key); fz_drop_obj(dict); return fz_rethrow(error, "cannot parse dict"); } if (tok == PDF_TOK_R) { val = fz_new_indirect(a, b, xref); break; } } fz_drop_obj(key); fz_drop_obj(dict); return fz_throw("invalid indirect reference in dict"); default: fz_drop_obj(key); fz_drop_obj(dict); return fz_throw("unknown token in dict"); } fz_dict_put(dict, key, val); fz_drop_obj(val); fz_drop_obj(key); } }
fz_error pdf_parse_array(fz_obj **op, pdf_xref *xref, fz_stream *file, char *buf, int cap) { fz_error error = fz_okay; fz_obj *ary = NULL; fz_obj *obj = NULL; int a = 0, b = 0, n = 0; int tok; int len; ary = fz_new_array(4); while (1) { error = pdf_lex(&tok, file, buf, cap, &len); if (error) { fz_drop_obj(ary); return fz_rethrow(error, "cannot parse array"); } if (tok != PDF_TOK_INT && tok != PDF_TOK_R) { if (n > 0) { obj = fz_new_int(a); fz_array_push(ary, obj); fz_drop_obj(obj); } if (n > 1) { obj = fz_new_int(b); fz_array_push(ary, obj); fz_drop_obj(obj); } n = 0; } if (tok == PDF_TOK_INT && n == 2) { obj = fz_new_int(a); fz_array_push(ary, obj); fz_drop_obj(obj); a = b; n --; } switch (tok) { case PDF_TOK_CLOSE_ARRAY: *op = ary; return fz_okay; case PDF_TOK_INT: if (n == 0) a = atoi(buf); if (n == 1) b = atoi(buf); n ++; break; case PDF_TOK_R: if (n != 2) { fz_drop_obj(ary); return fz_throw("cannot parse indirect reference in array"); } obj = fz_new_indirect(a, b, xref); fz_array_push(ary, obj); fz_drop_obj(obj); n = 0; break; case PDF_TOK_OPEN_ARRAY: error = pdf_parse_array(&obj, xref, file, buf, cap); if (error) { fz_drop_obj(ary); return fz_rethrow(error, "cannot parse array"); } fz_array_push(ary, obj); fz_drop_obj(obj); break; case PDF_TOK_OPEN_DICT: error = pdf_parse_dict(&obj, xref, file, buf, cap); if (error) { fz_drop_obj(ary); return fz_rethrow(error, "cannot parse array"); } fz_array_push(ary, obj); fz_drop_obj(obj); break; case PDF_TOK_NAME: obj = fz_new_name(buf); fz_array_push(ary, obj); fz_drop_obj(obj); break; case PDF_TOK_REAL: obj = fz_new_real(fz_atof(buf)); fz_array_push(ary, obj); fz_drop_obj(obj); break; case PDF_TOK_STRING: obj = fz_new_string(buf, len); fz_array_push(ary, obj); fz_drop_obj(obj); break; case PDF_TOK_TRUE: obj = fz_new_bool(1); fz_array_push(ary, obj); fz_drop_obj(obj); break; case PDF_TOK_FALSE: obj = fz_new_bool(0); fz_array_push(ary, obj); fz_drop_obj(obj); break; case PDF_TOK_NULL: obj = fz_new_null(); fz_array_push(ary, obj); fz_drop_obj(obj); break; default: fz_drop_obj(ary); return fz_throw("cannot parse token in array"); } } }