static void saveimage(int num) { fz_image *image = NULL; fz_pixmap *pix = NULL; pdf_obj *ref; char buf[32]; ref = pdf_new_indirect(ctx, doc, num, 0); fz_var(image); fz_var(pix); fz_try(ctx) { /* TODO: detect DCTD and save as jpeg */ image = pdf_load_image(ctx, doc, ref); pix = fz_get_pixmap_from_image(ctx, image, NULL, NULL, 0, 0); snprintf(buf, sizeof(buf), "img-%04d", num); writepixmap(ctx, pix, buf, dorgb); } fz_always(ctx) { fz_drop_image(ctx, image); fz_drop_pixmap(ctx, pix); pdf_drop_obj(ctx, ref); } fz_catch(ctx) fz_rethrow(ctx); }
static void page_merge(int page_from, int page_to, pdf_graft_map *graft_map) { pdf_obj *pageref = NULL; pdf_obj *page_dict; pdf_obj *obj = NULL, *ref = NULL; /* Include minimal number of objects for page. Do not include items that * reference other pages */ pdf_obj *known_page_objs[] = { PDF_NAME_Contents, PDF_NAME_Resources, PDF_NAME_MediaBox, PDF_NAME_CropBox, PDF_NAME_BleedBox, PDF_NAME_TrimBox, PDF_NAME_ArtBox, PDF_NAME_Rotate, PDF_NAME_UserUnit}; int n = nelem(known_page_objs); int i; int num; fz_var(obj); fz_var(ref); fz_try(ctx) { pageref = pdf_lookup_page_obj(ctx, doc_src, page_from - 1); /* Make a new dictionary and copy over the items from the source object to * the new dict that we want to deep copy. */ page_dict = pdf_new_dict(ctx, doc_des, 4); pdf_dict_put_drop(ctx, page_dict, PDF_NAME_Type, PDF_NAME_Page); for (i = 0; i < n; i++) { obj = pdf_dict_get(ctx, pageref, known_page_objs[i]); if (obj != NULL) pdf_dict_put_drop(ctx, page_dict, known_page_objs[i], pdf_graft_object(ctx, doc_des, doc_src, obj, graft_map)); } /* Add the dictionary */ obj = pdf_add_object_drop(ctx, doc_des, page_dict); /* Get indirect ref */ num = pdf_to_num(ctx, obj); ref = pdf_new_indirect(ctx, doc_des, num, 0); /* Insert */ pdf_insert_page(ctx, doc_des, page_to - 1, ref); } fz_always(ctx) { pdf_drop_obj(ctx, obj); pdf_drop_obj(ctx, ref); } fz_catch(ctx) { fz_rethrow(ctx); } }
ErrorCode juggler_add_pages_from_file(juggler_t *dest, juggler_t *src, int dest_index) { pdf_obj *dest_pages = pdf_dict_getp(dest->ctx, pdf_trailer(dest->ctx, dest->pdf), "Root/Pages"); int dest_pages_index = pdf_array_len(dest->ctx, pdf_dict_gets(dest->ctx, dest_pages, "Kids")); /* be aware that this function does not change the two variables if the page index is greater than the number of pages */ find_destination_pages(dest->ctx, dest_pages, dest_index, &dest_pages, &dest_pages_index); pdf_obj *dest_kids = pdf_dict_gets(dest->ctx, dest_pages, "Kids"); if(!pdf_is_indirect(dest->ctx, dest_pages) || !pdf_is_dict(dest->ctx, dest_pages) || !pdf_is_array(dest->ctx, dest_kids)) { return(ERROR_INVALID_RANGE); } pdf_obj *pages_root = pdf_dict_getp(src->ctx, pdf_trailer(src->ctx, src->pdf), "Root/Pages"); if(!pdf_is_indirect(src->ctx, pages_root) || !pdf_is_dict(src->ctx, pages_root)) return(ERROR_NO_PAGES); /* if we copy the root pages-node and it's referenced objects, we will copy all pages and all objects those pages need */ pdf_obj *new_pages_ref = copy_object_single(dest->ctx, dest->pdf, src->ctx, src->pdf, pages_root); /* insert new pages-node */ pdf_array_insert_drop(dest->ctx, dest_kids, new_pages_ref, dest_pages_index); /* update the parent */ pdf_obj *new_pages_parent = pdf_new_indirect(dest->ctx, dest->pdf, pdf_to_num(dest->ctx, dest_pages), pdf_to_gen(dest->ctx, dest_pages)); pdf_dict_puts_drop(dest->ctx, new_pages_ref, "Parent", new_pages_parent); /* TODO: If dest_pages contains anything inheritable but not the new node we need to insert empty items to prevent this inerhitance */ /* update count */ int new_count = pdf_to_int(dest->ctx, pdf_dict_gets(dest->ctx, dest_pages, "Count")) + src->pagecount; pdf_dict_puts_drop(dest->ctx, dest_pages, "Count", pdf_new_int(dest->ctx, dest->pdf, new_count)); /* let MuPDF rebuild the page tree */ pdf_finish_edit(dest->ctx, dest->pdf); dest->pdf->page_count = new_count; /* update juggler's state */ juggler_page_tree_changed_due_to_insert(dest, dest_index, src->pagecount); return(NoError); }
static void saveimage(int num) { fz_image *image; fz_pixmap *pix; pdf_obj *ref; char buf[32]; ref = pdf_new_indirect(doc, num, 0); /* TODO: detect DCTD and save as jpeg */ image = pdf_load_image(doc, ref); pix = fz_new_pixmap_from_image(ctx, image, 0, 0); fz_drop_image(ctx, image); snprintf(buf, sizeof(buf), "img-%04d", num); writepixmap(ctx, pix, buf, dorgb); fz_drop_pixmap(ctx, pix); pdf_drop_obj(ref); }
pdf_obj * pdf_parse_dict(pdf_document *xref, fz_stream *file, pdf_lexbuf *buf) { pdf_obj *dict; pdf_obj *key = NULL; pdf_obj *val = NULL; int tok; int a, b; fz_context *ctx = file->ctx; dict = pdf_new_dict(ctx, 8); fz_var(key); fz_var(val); fz_try(ctx) { while (1) { tok = pdf_lex(file, buf); skip: if (tok == PDF_TOK_CLOSE_DICT) break; /* for BI .. ID .. EI in content streams */ if (tok == PDF_TOK_KEYWORD && !strcmp(buf->scratch, "ID")) break; if (tok != PDF_TOK_NAME) fz_throw(ctx, "invalid key in dict"); key = fz_new_name(ctx, buf->scratch); tok = pdf_lex(file, buf); switch (tok) { case PDF_TOK_OPEN_ARRAY: /* cf. http://code.google.com/p/sumatrapdf/issues/detail?id=1643 */ fz_try(ctx) { val = pdf_parse_array(xref, file, buf); } fz_catch(ctx) { fz_warn(ctx, "ignoring broken array for '%s'", pdf_to_name(key)); pdf_drop_obj(key); val = key = NULL; do tok = pdf_lex(file, buf); while (tok != PDF_TOK_CLOSE_DICT && tok != PDF_TOK_CLOSE_ARRAY && tok != PDF_TOK_EOF && tok != PDF_TOK_OPEN_ARRAY && tok != PDF_TOK_OPEN_DICT); if (tok == PDF_TOK_CLOSE_DICT) goto skip; if (tok == PDF_TOK_CLOSE_ARRAY) continue; fz_throw(ctx, "cannot make sense of broken array after all"); } break; case PDF_TOK_OPEN_DICT: val = pdf_parse_dict(xref, file, buf); break; case PDF_TOK_NAME: val = fz_new_name(ctx, buf->scratch); break; case PDF_TOK_REAL: val = pdf_new_real(ctx, buf->f); break; case PDF_TOK_STRING: val = pdf_new_string(ctx, buf->scratch, buf->len); break; case PDF_TOK_TRUE: val = pdf_new_bool(ctx, 1); break; case PDF_TOK_FALSE: val = pdf_new_bool(ctx, 0); break; case PDF_TOK_NULL: val = pdf_new_null(ctx); break; case PDF_TOK_INT: /* 64-bit to allow for numbers > INT_MAX and overflow */ a = buf->i; tok = pdf_lex(file, buf); if (tok == PDF_TOK_CLOSE_DICT || tok == PDF_TOK_NAME || (tok == PDF_TOK_KEYWORD && !strcmp(buf->scratch, "ID"))) { val = pdf_new_int(ctx, a); fz_dict_put(dict, key, val); pdf_drop_obj(val); val = NULL; pdf_drop_obj(key); key = NULL; goto skip; } if (tok == PDF_TOK_INT) { b = buf->i; tok = pdf_lex(file, buf); if (tok == PDF_TOK_R) { val = pdf_new_indirect(ctx, a, b, xref); break; } } fz_throw(ctx, "invalid indirect reference in dict"); default: fz_throw(ctx, "unknown token in dict"); } fz_dict_put(dict, key, val); pdf_drop_obj(val); val = NULL; pdf_drop_obj(key); key = NULL; } } fz_catch(ctx) { pdf_drop_obj(dict); pdf_drop_obj(key); pdf_drop_obj(val); fz_throw(ctx, "cannot parse dict"); } return dict; }
pdf_obj * pdf_parse_array(pdf_document *xref, fz_stream *file, pdf_lexbuf *buf) { pdf_obj *ary = NULL; pdf_obj *obj = NULL; int a = 0, b = 0, n = 0; int tok; fz_context *ctx = file->ctx; pdf_obj *op; fz_var(obj); ary = pdf_new_array(ctx, 4); fz_try(ctx) { while (1) { tok = pdf_lex(file, buf); if (tok != PDF_TOK_INT && tok != PDF_TOK_R) { if (n > 0) { obj = pdf_new_int(ctx, a); pdf_array_push(ary, obj); pdf_drop_obj(obj); obj = NULL; } if (n > 1) { obj = pdf_new_int(ctx, b); pdf_array_push(ary, obj); pdf_drop_obj(obj); obj = NULL; } n = 0; } if (tok == PDF_TOK_INT && n == 2) { obj = pdf_new_int(ctx, a); pdf_array_push(ary, obj); pdf_drop_obj(obj); obj = NULL; a = b; n --; } switch (tok) { case PDF_TOK_CLOSE_ARRAY: op = ary; goto end; case PDF_TOK_INT: if (n == 0) a = buf->i; if (n == 1) b = buf->i; n ++; break; case PDF_TOK_R: if (n != 2) fz_throw(ctx, "cannot parse indirect reference in array"); obj = pdf_new_indirect(ctx, a, b, xref); pdf_array_push(ary, obj); pdf_drop_obj(obj); obj = NULL; n = 0; break; case PDF_TOK_OPEN_ARRAY: obj = pdf_parse_array(xref, file, buf); pdf_array_push(ary, obj); pdf_drop_obj(obj); obj = NULL; break; case PDF_TOK_OPEN_DICT: obj = pdf_parse_dict(xref, file, buf); pdf_array_push(ary, obj); pdf_drop_obj(obj); obj = NULL; break; case PDF_TOK_NAME: obj = fz_new_name(ctx, buf->scratch); pdf_array_push(ary, obj); pdf_drop_obj(obj); obj = NULL; break; case PDF_TOK_REAL: obj = pdf_new_real(ctx, buf->f); pdf_array_push(ary, obj); pdf_drop_obj(obj); obj = NULL; break; case PDF_TOK_STRING: obj = pdf_new_string(ctx, buf->scratch, buf->len); pdf_array_push(ary, obj); pdf_drop_obj(obj); obj = NULL; break; case PDF_TOK_TRUE: obj = pdf_new_bool(ctx, 1); pdf_array_push(ary, obj); pdf_drop_obj(obj); obj = NULL; break; case PDF_TOK_FALSE: obj = pdf_new_bool(ctx, 0); pdf_array_push(ary, obj); pdf_drop_obj(obj); obj = NULL; break; case PDF_TOK_NULL: obj = pdf_new_null(ctx); pdf_array_push(ary, obj); pdf_drop_obj(obj); obj = NULL; break; default: fz_throw(ctx, "cannot parse token in array"); } } end: {} } fz_catch(ctx) { pdf_drop_obj(obj); pdf_drop_obj(ary); fz_throw(ctx, "cannot parse array"); } return op; }
static void retainpages(fz_context *ctx, globals *glo, int argc, char **argv) { pdf_obj *oldroot, *root, *pages, *kids, *countobj, *parent, *olddests; pdf_document *doc = glo->doc; int argidx = 0; pdf_obj *names_list = NULL; int pagecount; int i; /* Keep only pages/type and (reduced) dest entries to avoid * references to unretained pages */ oldroot = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME_Root); pages = pdf_dict_get(ctx, oldroot, PDF_NAME_Pages); olddests = pdf_load_name_tree(ctx, doc, PDF_NAME_Dests); root = pdf_new_dict(ctx, doc, 2); pdf_dict_put(ctx, root, PDF_NAME_Type, pdf_dict_get(ctx, oldroot, PDF_NAME_Type)); pdf_dict_put(ctx, root, PDF_NAME_Pages, pdf_dict_get(ctx, oldroot, PDF_NAME_Pages)); pdf_update_object(ctx, doc, pdf_to_num(ctx, oldroot), root); pdf_drop_obj(ctx, root); /* Create a new kids array with only the pages we want to keep */ parent = pdf_new_indirect(ctx, doc, pdf_to_num(ctx, pages), pdf_to_gen(ctx, pages)); kids = pdf_new_array(ctx, doc, 1); /* Retain pages specified */ while (argc - argidx) { int page, spage, epage; char *spec, *dash; char *pagelist = argv[argidx]; pagecount = pdf_count_pages(ctx, doc); spec = fz_strsep(&pagelist, ","); while (spec) { dash = strchr(spec, '-'); if (dash == spec) spage = epage = pagecount; else spage = epage = atoi(spec); if (dash) { if (strlen(dash) > 1) epage = atoi(dash + 1); else epage = pagecount; } spage = fz_clampi(spage, 1, pagecount); epage = fz_clampi(epage, 1, pagecount); if (spage < epage) for (page = spage; page <= epage; ++page) retainpage(ctx, doc, parent, kids, page); else for (page = spage; page >= epage; --page) retainpage(ctx, doc, parent, kids, page); spec = fz_strsep(&pagelist, ","); } argidx++; } pdf_drop_obj(ctx, parent); /* Update page count and kids array */ countobj = pdf_new_int(ctx, doc, pdf_array_len(ctx, kids)); pdf_dict_put(ctx, pages, PDF_NAME_Count, countobj); pdf_drop_obj(ctx, countobj); pdf_dict_put(ctx, pages, PDF_NAME_Kids, kids); pdf_drop_obj(ctx, kids); /* Also preserve the (partial) Dests name tree */ if (olddests) { pdf_obj *names = pdf_new_dict(ctx, doc, 1); pdf_obj *dests = pdf_new_dict(ctx, doc, 1); int len = pdf_dict_len(ctx, olddests); names_list = pdf_new_array(ctx, doc, 32); for (i = 0; i < len; i++) { pdf_obj *key = pdf_dict_get_key(ctx, olddests, i); pdf_obj *val = pdf_dict_get_val(ctx, olddests, i); pdf_obj *dest = pdf_dict_get(ctx, val, PDF_NAME_D); dest = pdf_array_get(ctx, dest ? dest : val, 0); if (pdf_array_contains(ctx, pdf_dict_get(ctx, pages, PDF_NAME_Kids), dest)) { pdf_obj *key_str = pdf_new_string(ctx, doc, pdf_to_name(ctx, key), strlen(pdf_to_name(ctx, key))); pdf_array_push(ctx, names_list, key_str); pdf_array_push(ctx, names_list, val); pdf_drop_obj(ctx, key_str); } } root = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME_Root); pdf_dict_put(ctx, dests, PDF_NAME_Names, names_list); pdf_dict_put(ctx, names, PDF_NAME_Dests, dests); pdf_dict_put(ctx, root, PDF_NAME_Names, names); pdf_drop_obj(ctx, names); pdf_drop_obj(ctx, dests); pdf_drop_obj(ctx, names_list); pdf_drop_obj(ctx, olddests); } /* Force the next call to pdf_count_pages to recount */ glo->doc->page_count = 0; /* Edit each pages /Annot list to remove any links that point to * nowhere. */ pagecount = pdf_count_pages(ctx, doc); for (i = 0; i < pagecount; i++) { pdf_obj *pageref = pdf_lookup_page_obj(ctx, doc, i); pdf_obj *pageobj = pdf_resolve_indirect(ctx, pageref); pdf_obj *annots = pdf_dict_get(ctx, pageobj, PDF_NAME_Annots); int len = pdf_array_len(ctx, annots); int j; for (j = 0; j < len; j++) { pdf_obj *o = pdf_array_get(ctx, annots, j); pdf_obj *p; if (!pdf_name_eq(ctx, pdf_dict_get(ctx, o, PDF_NAME_Subtype), PDF_NAME_Link)) continue; p = pdf_dict_get(ctx, o, PDF_NAME_A); if (!pdf_name_eq(ctx, pdf_dict_get(ctx, p, PDF_NAME_S), PDF_NAME_GoTo)) continue; if (string_in_names_list(ctx, pdf_dict_get(ctx, p, PDF_NAME_D), names_list)) continue; /* FIXME: Should probably look at Next too */ /* Remove this annotation */ pdf_array_delete(ctx, annots, j); j--; } } }
pdf_obj * pdf_parse_dict(pdf_document *doc, fz_stream *file, pdf_lexbuf *buf) { pdf_obj *dict; pdf_obj *key = NULL; pdf_obj *val = NULL; pdf_token tok; int a, b; fz_context *ctx = file->ctx; dict = pdf_new_dict(doc, 8); fz_var(key); fz_var(val); fz_try(ctx) { while (1) { tok = pdf_lex(file, buf); skip: if (tok == PDF_TOK_CLOSE_DICT) break; /* for BI .. ID .. EI in content streams */ if (tok == PDF_TOK_KEYWORD && !strcmp(buf->scratch, "ID")) break; if (tok != PDF_TOK_NAME) fz_throw(ctx, FZ_ERROR_GENERIC, "invalid key in dict"); key = pdf_new_name(doc, buf->scratch); tok = pdf_lex(file, buf); switch (tok) { case PDF_TOK_OPEN_ARRAY: val = pdf_parse_array(doc, file, buf); break; case PDF_TOK_OPEN_DICT: val = pdf_parse_dict(doc, file, buf); break; case PDF_TOK_NAME: val = pdf_new_name(doc, buf->scratch); break; case PDF_TOK_REAL: val = pdf_new_real(doc, buf->f); break; case PDF_TOK_STRING: val = pdf_new_string(doc, buf->scratch, buf->len); break; case PDF_TOK_TRUE: val = pdf_new_bool(doc, 1); break; case PDF_TOK_FALSE: val = pdf_new_bool(doc, 0); break; case PDF_TOK_NULL: val = pdf_new_null(doc); break; case PDF_TOK_INT: /* 64-bit to allow for numbers > INT_MAX and overflow */ a = buf->i; tok = pdf_lex(file, buf); if (tok == PDF_TOK_CLOSE_DICT || tok == PDF_TOK_NAME || (tok == PDF_TOK_KEYWORD && !strcmp(buf->scratch, "ID"))) { val = pdf_new_int(doc, a); pdf_dict_put(dict, key, val); pdf_drop_obj(val); val = NULL; pdf_drop_obj(key); key = NULL; goto skip; } if (tok == PDF_TOK_INT) { b = buf->i; tok = pdf_lex(file, buf); if (tok == PDF_TOK_R) { val = pdf_new_indirect(doc, a, b); break; } } fz_throw(ctx, FZ_ERROR_GENERIC, "invalid indirect reference in dict"); default: fz_throw(ctx, FZ_ERROR_GENERIC, "unknown token in dict"); } pdf_dict_put(dict, key, val); pdf_drop_obj(val); val = NULL; pdf_drop_obj(key); key = NULL; } } fz_catch(ctx) { pdf_drop_obj(dict); pdf_drop_obj(key); pdf_drop_obj(val); fz_rethrow_message(ctx, "cannot parse dict"); } return dict; }
pdf_annot * pdf_create_annot(fz_context *ctx, pdf_page *page, fz_annot_type type) { pdf_annot *annot = NULL; pdf_document *doc = page->doc; pdf_obj *annot_obj = pdf_new_dict(ctx, doc, 0); pdf_obj *ind_obj = NULL; fz_var(annot); fz_var(ind_obj); fz_try(ctx) { int ind_obj_num; fz_rect rect = {0.0f, 0.0f, 0.0f, 0.0f}; const char *type_str; pdf_obj *annot_arr; type_str = pdf_string_from_annot_type(ctx, type); if (type == PDF_ANNOT_UNKNOWN) fz_throw(ctx, FZ_ERROR_GENERIC, "cannot create unknown annotation"); annot_arr = pdf_dict_get(ctx, page->obj, PDF_NAME_Annots); if (annot_arr == NULL) { annot_arr = pdf_new_array(ctx, doc, 0); pdf_dict_put_drop(ctx, page->obj, PDF_NAME_Annots, annot_arr); } pdf_dict_put_drop(ctx, annot_obj, PDF_NAME_Type, PDF_NAME_Annot); pdf_dict_put_drop(ctx, annot_obj, PDF_NAME_Subtype, pdf_new_name(ctx, doc, type_str)); pdf_dict_put_drop(ctx, annot_obj, PDF_NAME_Rect, pdf_new_rect(ctx, doc, &rect)); /* Make printable as default */ pdf_dict_put_drop(ctx, annot_obj, PDF_NAME_F, pdf_new_int(ctx, doc, PDF_ANNOT_IS_PRINT)); annot = pdf_new_annot(ctx, page); annot->ap = NULL; /* Both annotation object and annotation structure are now created. Insert the object in the hierarchy and the structure in the page's array. */ ind_obj_num = pdf_create_object(ctx, doc); pdf_update_object(ctx, doc, ind_obj_num, annot_obj); ind_obj = pdf_new_indirect(ctx, doc, ind_obj_num, 0); pdf_array_push(ctx, annot_arr, ind_obj); annot->obj = pdf_keep_obj(ctx, ind_obj); /* Linking must be done after any call that might throw because pdf_drop_annots below actually frees a list. Put the new annot at the end of the list, so that it will be drawn last. */ *page->annot_tailp = annot; page->annot_tailp = &annot->next; doc->dirty = 1; } fz_always(ctx) { pdf_drop_obj(ctx, annot_obj); pdf_drop_obj(ctx, ind_obj); } fz_catch(ctx) { pdf_drop_annots(ctx, annot); fz_rethrow(ctx); } return annot; }
pdf_obj * pdf_parse_ind_obj(fz_context *ctx, pdf_document *doc, fz_stream *file, pdf_lexbuf *buf, int *onum, int *ogen, fz_off_t *ostmofs, int *try_repair) { pdf_obj *obj = NULL; int num = 0, gen = 0; fz_off_t stm_ofs; pdf_token tok; fz_off_t a, b; fz_var(obj); tok = pdf_lex(ctx, file, buf); if (tok != PDF_TOK_INT) { if (try_repair) *try_repair = 1; fz_throw(ctx, FZ_ERROR_GENERIC, "expected object number"); } num = buf->i; tok = pdf_lex(ctx, file, buf); if (tok != PDF_TOK_INT) { if (try_repair) *try_repair = 1; fz_throw(ctx, FZ_ERROR_GENERIC, "expected generation number (%d ? obj)", num); } gen = buf->i; tok = pdf_lex(ctx, file, buf); if (tok != PDF_TOK_OBJ) { if (try_repair) *try_repair = 1; fz_throw(ctx, FZ_ERROR_GENERIC, "expected 'obj' keyword (%d %d ?)", num, gen); } tok = pdf_lex(ctx, file, buf); switch (tok) { case PDF_TOK_OPEN_ARRAY: obj = pdf_parse_array(ctx, doc, file, buf); break; case PDF_TOK_OPEN_DICT: obj = pdf_parse_dict(ctx, doc, file, buf); break; case PDF_TOK_NAME: obj = pdf_new_name(ctx, doc, buf->scratch); break; case PDF_TOK_REAL: obj = pdf_new_real(ctx, doc, buf->f); break; case PDF_TOK_STRING: obj = pdf_new_string(ctx, doc, buf->scratch, buf->len); break; case PDF_TOK_TRUE: obj = pdf_new_bool(ctx, doc, 1); break; case PDF_TOK_FALSE: obj = pdf_new_bool(ctx, doc, 0); break; case PDF_TOK_NULL: obj = pdf_new_null(ctx, doc); break; case PDF_TOK_INT: a = buf->i; tok = pdf_lex(ctx, file, buf); if (tok == PDF_TOK_STREAM || tok == PDF_TOK_ENDOBJ) { obj = pdf_new_int_offset(ctx, doc, a); goto skip; } if (tok == PDF_TOK_INT) { b = buf->i; tok = pdf_lex(ctx, file, buf); if (tok == PDF_TOK_R) { obj = pdf_new_indirect(ctx, doc, a, b); break; } } fz_throw(ctx, FZ_ERROR_GENERIC, "expected 'R' keyword (%d %d R)", num, gen); case PDF_TOK_ENDOBJ: obj = pdf_new_null(ctx, doc); goto skip; default: fz_throw(ctx, FZ_ERROR_GENERIC, "syntax error in object (%d %d R)", num, gen); } fz_try(ctx) { tok = pdf_lex(ctx, file, buf); } fz_catch(ctx) { pdf_drop_obj(ctx, obj); fz_rethrow(ctx); } skip: if (tok == PDF_TOK_STREAM) { int c = fz_read_byte(ctx, file); while (c == ' ') c = fz_read_byte(ctx, file); if (c == '\r') { c = fz_peek_byte(ctx, file); if (c != '\n') fz_warn(ctx, "line feed missing after stream begin marker (%d %d R)", num, gen); else fz_read_byte(ctx, file); } stm_ofs = fz_tell(ctx, file); } else if (tok == PDF_TOK_ENDOBJ) { stm_ofs = 0; } else { fz_warn(ctx, "expected 'endobj' or 'stream' keyword (%d %d R)", num, gen); stm_ofs = 0; } if (onum) *onum = num; if (ogen) *ogen = gen; if (ostmofs) *ostmofs = stm_ofs; return obj; }
pdf_annot * pdf_create_annot(fz_context *ctx, pdf_document *doc, pdf_page *page, fz_annot_type type) { pdf_annot *annot = NULL; pdf_obj *annot_obj = pdf_new_dict(ctx, doc, 0); pdf_obj *ind_obj = NULL; fz_var(annot); fz_var(ind_obj); fz_try(ctx) { int ind_obj_num; fz_rect rect = {0.0, 0.0, 0.0, 0.0}; const char *type_str = annot_type_str(type); pdf_obj *annot_arr = pdf_dict_get(ctx, page->me, PDF_NAME_Annots); if (annot_arr == NULL) { annot_arr = pdf_new_array(ctx, doc, 0); pdf_dict_put_drop(ctx, page->me, PDF_NAME_Annots, annot_arr); } pdf_dict_put_drop(ctx, annot_obj, PDF_NAME_Type, PDF_NAME_Annot); pdf_dict_put_drop(ctx, annot_obj, PDF_NAME_Subtype, pdf_new_name(ctx, doc, type_str)); pdf_dict_put_drop(ctx, annot_obj, PDF_NAME_Rect, pdf_new_rect(ctx, doc, &rect)); /* Make printable as default */ pdf_dict_put_drop(ctx, annot_obj, PDF_NAME_F, pdf_new_int(ctx, doc, F_Print)); annot = fz_malloc_struct(ctx, pdf_annot); annot->page = page; annot->rect = rect; annot->pagerect = rect; annot->ap = NULL; annot->widget_type = PDF_WIDGET_TYPE_NOT_WIDGET; annot->annot_type = type; /* Both annotation object and annotation structure are now created. Insert the object in the hierarchy and the structure in the page's array. */ ind_obj_num = pdf_create_object(ctx, doc); pdf_update_object(ctx, doc, ind_obj_num, annot_obj); ind_obj = pdf_new_indirect(ctx, doc, ind_obj_num, 0); pdf_array_push(ctx, annot_arr, ind_obj); annot->obj = pdf_keep_obj(ctx, ind_obj); /* Linking must be done after any call that might throw because pdf_drop_annot below actually frees a list. Put the new annot at the end of the list, so that it will be drawn last. */ *page->annot_tailp = annot; page->annot_tailp = &annot->next; doc->dirty = 1; } fz_always(ctx) { pdf_drop_obj(ctx, annot_obj); pdf_drop_obj(ctx, ind_obj); } fz_catch(ctx) { pdf_drop_annot(ctx, annot); fz_rethrow(ctx); } return annot; }
static void retainpages(fz_context *ctx, globals *glo, int argc, char **argv) { pdf_obj *oldroot, *root, *pages, *kids, *countobj, *parent, *olddests; pdf_document *doc = glo->doc; int argidx = 0; pdf_obj *names_list = NULL; pdf_obj *outlines; int pagecount; int i; int *page_object_nums; /* Keep only pages/type and (reduced) dest entries to avoid * references to unretained pages */ oldroot = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME_Root); pages = pdf_dict_get(ctx, oldroot, PDF_NAME_Pages); olddests = pdf_load_name_tree(ctx, doc, PDF_NAME_Dests); outlines = pdf_dict_get(ctx, oldroot, PDF_NAME_Outlines); root = pdf_new_dict(ctx, doc, 3); pdf_dict_put(ctx, root, PDF_NAME_Type, pdf_dict_get(ctx, oldroot, PDF_NAME_Type)); pdf_dict_put(ctx, root, PDF_NAME_Pages, pdf_dict_get(ctx, oldroot, PDF_NAME_Pages)); pdf_dict_put(ctx, root, PDF_NAME_Outlines, outlines); pdf_update_object(ctx, doc, pdf_to_num(ctx, oldroot), root); /* Create a new kids array with only the pages we want to keep */ parent = pdf_new_indirect(ctx, doc, pdf_to_num(ctx, pages), pdf_to_gen(ctx, pages)); kids = pdf_new_array(ctx, doc, 1); /* Retain pages specified */ while (argc - argidx) { int page, spage, epage; char *spec, *dash; char *pagelist = argv[argidx]; pagecount = pdf_count_pages(ctx, doc); spec = fz_strsep(&pagelist, ","); while (spec) { dash = strchr(spec, '-'); if (dash == spec) spage = epage = pagecount; else spage = epage = atoi(spec); if (dash) { if (strlen(dash) > 1) epage = atoi(dash + 1); else epage = pagecount; } spage = fz_clampi(spage, 1, pagecount); epage = fz_clampi(epage, 1, pagecount); if (spage < epage) for (page = spage; page <= epage; ++page) retainpage(ctx, doc, parent, kids, page); else for (page = spage; page >= epage; --page) retainpage(ctx, doc, parent, kids, page); spec = fz_strsep(&pagelist, ","); } argidx++; } pdf_drop_obj(ctx, parent); /* Update page count and kids array */ countobj = pdf_new_int(ctx, doc, pdf_array_len(ctx, kids)); pdf_dict_put(ctx, pages, PDF_NAME_Count, countobj); pdf_drop_obj(ctx, countobj); pdf_dict_put(ctx, pages, PDF_NAME_Kids, kids); pdf_drop_obj(ctx, kids); /* Force the next call to pdf_count_pages to recount */ glo->doc->page_count = 0; pagecount = pdf_count_pages(ctx, doc); page_object_nums = fz_calloc(ctx, pagecount, sizeof(*page_object_nums)); for (i = 0; i < pagecount; i++) { pdf_obj *pageref = pdf_lookup_page_obj(ctx, doc, i); page_object_nums[i] = pdf_to_num(ctx, pageref); } /* If we had an old Dests tree (now reformed as an olddests * dictionary), keep any entries in there that point to * valid pages. This may mean we keep more than we need, but * it's safe at least. */ if (olddests) { pdf_obj *names = pdf_new_dict(ctx, doc, 1); pdf_obj *dests = pdf_new_dict(ctx, doc, 1); int len = pdf_dict_len(ctx, olddests); names_list = pdf_new_array(ctx, doc, 32); for (i = 0; i < len; i++) { pdf_obj *key = pdf_dict_get_key(ctx, olddests, i); pdf_obj *val = pdf_dict_get_val(ctx, olddests, i); pdf_obj *dest = pdf_dict_get(ctx, val, PDF_NAME_D); dest = pdf_array_get(ctx, dest ? dest : val, 0); if (dest_is_valid_page(ctx, dest, page_object_nums, pagecount)) { pdf_obj *key_str = pdf_new_string(ctx, doc, pdf_to_name(ctx, key), strlen(pdf_to_name(ctx, key))); pdf_array_push(ctx, names_list, key_str); pdf_array_push(ctx, names_list, val); pdf_drop_obj(ctx, key_str); } } pdf_dict_put(ctx, dests, PDF_NAME_Names, names_list); pdf_dict_put(ctx, names, PDF_NAME_Dests, dests); pdf_dict_put(ctx, root, PDF_NAME_Names, names); pdf_drop_obj(ctx, names); pdf_drop_obj(ctx, dests); pdf_drop_obj(ctx, olddests); } /* Edit each pages /Annot list to remove any links that point to * nowhere. */ for (i = 0; i < pagecount; i++) { pdf_obj *pageref = pdf_lookup_page_obj(ctx, doc, i); pdf_obj *pageobj = pdf_resolve_indirect(ctx, pageref); pdf_obj *annots = pdf_dict_get(ctx, pageobj, PDF_NAME_Annots); int len = pdf_array_len(ctx, annots); int j; for (j = 0; j < len; j++) { pdf_obj *o = pdf_array_get(ctx, annots, j); if (!pdf_name_eq(ctx, pdf_dict_get(ctx, o, PDF_NAME_Subtype), PDF_NAME_Link)) continue; if (!dest_is_valid(ctx, o, pagecount, page_object_nums, names_list)) { /* Remove this annotation */ pdf_array_delete(ctx, annots, j); j--; } } } if (strip_outlines(ctx, doc, outlines, pagecount, page_object_nums, names_list) == 0) { pdf_dict_del(ctx, root, PDF_NAME_Outlines); } fz_free(ctx, page_object_nums); pdf_drop_obj(ctx, names_list); pdf_drop_obj(ctx, root); }
pdf_obj * pdf_parse_ind_obj(pdf_document *xref, fz_stream *file, pdf_lexbuf *buf, int *onum, int *ogen, int *ostmofs) { pdf_obj *obj = NULL; int num = 0, gen = 0, stm_ofs; int tok; int a, b; fz_context *ctx = file->ctx; fz_var(obj); tok = pdf_lex(file, buf); /* RJW: cannot parse indirect object (%d %d R)", num, gen */ if (tok != PDF_TOK_INT) fz_throw(ctx, "expected object number (%d %d R)", num, gen); num = buf->i; tok = pdf_lex(file, buf); /* RJW: "cannot parse indirect object (%d %d R)", num, gen */ if (tok != PDF_TOK_INT) fz_throw(ctx, "expected generation number (%d %d R)", num, gen); gen = buf->i; tok = pdf_lex(file, buf); /* RJW: "cannot parse indirect object (%d %d R)", num, gen */ if (tok != PDF_TOK_OBJ) fz_throw(ctx, "expected 'obj' keyword (%d %d R)", num, gen); tok = pdf_lex(file, buf); /* RJW: "cannot parse indirect object (%d %d R)", num, gen */ switch (tok) { case PDF_TOK_OPEN_ARRAY: obj = pdf_parse_array(xref, file, buf); /* RJW: "cannot parse indirect object (%d %d R)", num, gen */ break; case PDF_TOK_OPEN_DICT: obj = pdf_parse_dict(xref, file, buf); /* RJW: "cannot parse indirect object (%d %d R)", num, gen */ break; case PDF_TOK_NAME: obj = fz_new_name(ctx, buf->scratch); break; case PDF_TOK_REAL: obj = pdf_new_real(ctx, buf->f); break; case PDF_TOK_STRING: obj = pdf_new_string(ctx, buf->scratch, buf->len); break; case PDF_TOK_TRUE: obj = pdf_new_bool(ctx, 1); break; case PDF_TOK_FALSE: obj = pdf_new_bool(ctx, 0); break; case PDF_TOK_NULL: obj = pdf_new_null(ctx); break; case PDF_TOK_INT: a = buf->i; tok = pdf_lex(file, buf); /* "cannot parse indirect object (%d %d R)", num, gen */ if (tok == PDF_TOK_STREAM || tok == PDF_TOK_ENDOBJ) { obj = pdf_new_int(ctx, a); goto skip; } if (tok == PDF_TOK_INT) { b = buf->i; tok = pdf_lex(file, buf); /* RJW: "cannot parse indirect object (%d %d R)", num, gen); */ if (tok == PDF_TOK_R) { obj = pdf_new_indirect(ctx, a, b, xref); break; } } fz_throw(ctx, "expected 'R' keyword (%d %d R)", num, gen); case PDF_TOK_ENDOBJ: obj = pdf_new_null(ctx); goto skip; default: fz_throw(ctx, "syntax error in object (%d %d R)", num, gen); } fz_try(ctx) { tok = pdf_lex(file, buf); } fz_catch(ctx) { pdf_drop_obj(obj); fz_throw(ctx, "cannot parse indirect object (%d %d R)", num, gen); } skip: if (tok == PDF_TOK_STREAM) { int c = fz_read_byte(file); while (c == ' ') c = fz_read_byte(file); if (c == '\r') { c = fz_peek_byte(file); if (c != '\n') fz_warn(ctx, "line feed missing after stream begin marker (%d %d R)", num, gen); else fz_read_byte(file); } stm_ofs = fz_tell(file); } else if (tok == PDF_TOK_ENDOBJ) { stm_ofs = 0; } else { fz_warn(ctx, "expected 'endobj' or 'stream' keyword (%d %d R)", num, gen); stm_ofs = 0; } if (onum) *onum = num; if (ogen) *ogen = gen; if (ostmofs) *ostmofs = stm_ofs; return obj; }
static void decimatepages(pdf_document *xref) { pdf_obj *oldroot, *root, *pages, *kids, *parent; fz_context *ctx = xref->ctx; int num_pages = pdf_count_pages(xref); int page, kidcount; /* Keep only pages/type and (reduced) dest entries to avoid * references to unretained pages */ oldroot = pdf_dict_gets(pdf_trailer(xref), "Root"); pages = pdf_dict_gets(oldroot, "Pages"); root = pdf_new_dict(ctx, 2); pdf_dict_puts(root, "Type", pdf_dict_gets(oldroot, "Type")); pdf_dict_puts(root, "Pages", pdf_dict_gets(oldroot, "Pages")); pdf_update_object(xref, pdf_to_num(oldroot), root); pdf_drop_obj(root); /* Create a new kids array with only the pages we want to keep */ parent = pdf_new_indirect(ctx, pdf_to_num(pages), pdf_to_gen(pages), xref); kids = pdf_new_array(ctx, 1); kidcount = 0; for (page=0; page < num_pages; page++) { pdf_page *page_details = pdf_load_page(xref, page); int xf = x_factor, yf = y_factor; int x, y; float w = page_details->mediabox.x1 - page_details->mediabox.x0; float h = page_details->mediabox.y1 - page_details->mediabox.y0; if (xf == 0 && yf == 0) { /* Nothing specified, so split along the long edge */ if (w > h) xf = 2, yf = 1; else xf = 1, yf = 2; } else if (xf == 0) xf = 1; else if (yf == 0) yf = 1; for (y = yf-1; y >= 0; y--) { for (x = 0; x < xf; x++) { pdf_obj *newpageobj, *newpageref, *newmediabox; fz_rect mb; int num; newpageobj = pdf_copy_dict(ctx, xref->page_objs[page]); num = pdf_create_object(xref); pdf_update_object(xref, num, newpageobj); newpageref = pdf_new_indirect(ctx, num, 0, xref); newmediabox = pdf_new_array(ctx, 4); mb.x0 = page_details->mediabox.x0 + (w/xf)*x; if (x == xf-1) mb.x1 = page_details->mediabox.x1; else mb.x1 = page_details->mediabox.x0 + (w/xf)*(x+1); mb.y0 = page_details->mediabox.y0 + (h/yf)*y; if (y == yf-1) mb.y1 = page_details->mediabox.y1; else mb.y1 = page_details->mediabox.y0 + (h/yf)*(y+1); pdf_array_push(newmediabox, pdf_new_real(ctx, mb.x0)); pdf_array_push(newmediabox, pdf_new_real(ctx, mb.y0)); pdf_array_push(newmediabox, pdf_new_real(ctx, mb.x1)); pdf_array_push(newmediabox, pdf_new_real(ctx, mb.y1)); pdf_dict_puts(newpageobj, "Parent", parent); pdf_dict_puts(newpageobj, "MediaBox", newmediabox); /* Store page object in new kids array */ pdf_array_push(kids, newpageref); kidcount++; } } } pdf_drop_obj(parent); /* Update page count and kids array */ pdf_dict_puts(pages, "Count", pdf_new_int(ctx, kidcount)); pdf_dict_puts(pages, "Kids", kids); pdf_drop_obj(kids); }
/* dest points to the new pages content-streams-dict, src is a reference to one source content-stream */ int copy_content_stream_of_page(fz_context *dest_ctx, pdf_obj *dest, fz_context *src_ctx, pdf_obj *src, struct put_info *info, struct pos_info *pos) { if(!pdf_is_array(dest_ctx, dest) && !pdf_is_indirect(src_ctx, src)) return(-1); /* translation: 1 0 0 1 diff_x diff_y scale: scale 0 0 scale 0 0 rotation: cos sin -sin cos 0 0 ------------------------------------------------- rotation 0: 1 0 0 1 0 0 rotation 90: 0 1 -1 0 0 0 rotation 180: -1 0 0 -1 0 0 rotation 270: 0 -1 1 0 0 0 */ fz_buffer *buffer = fz_new_buffer(dest_ctx, 1024); fz_output *output = fz_new_output_with_buffer(dest_ctx, buffer); fz_printf(dest_ctx, output, "q\n"); /* set the outer clip region */ fz_printf(dest_ctx, output, "%f %f %f %f re W n\n", pos->outer_clip_x, pos->outer_clip_y, pos->outer_clip_width, pos->outer_clip_height); /* position the page correctly */ if(pos->rotate == 0) { fz_printf(dest_ctx, output, "1 0 0 1 %f %f cm\n", pos->x + pos->content_translate_x, pos->y + pos->content_translate_y); } else if(pos->rotate == 90) { fz_printf(dest_ctx, output, "0 1 -1 0 %f %f cm\n", pos->x + pos->width, pos->y); } else if(pos->rotate == 180) { fz_printf(dest_ctx, output, "-1 0 0 -1 %f %f cm\n", pos->width + pos->x - pos->content_translate_x, pos->height + pos->y - pos->content_translate_y); } else if(pos->rotate == 270) { fz_printf(dest_ctx, output, "0 -1 1 0 %f %f cm\n", pos->x, pos->y + pos->height); } if(pos->bleed_clip_x != 0.0 || pos->bleed_clip_y != 0.0 || pos->bleed_clip_width != 0.0 || pos->bleed_clip_height != 0.0) { fz_printf(dest_ctx, output, "%f %f %f %f re W n\n", pos->bleed_clip_x, pos->bleed_clip_y, pos->bleed_clip_width, pos->bleed_clip_height); } int src_num = pdf_to_num(src_ctx, src); int src_gen = pdf_to_gen(src_ctx, src); fz_stream *input = pdf_open_stream(src_ctx, info->src_doc, src_num, src_gen); rename_res_in_content_stream(src_ctx, input, dest_ctx, output, info->rename_dict); fz_printf(dest_ctx, output, "Q"); fz_drop_output(dest_ctx, output); fz_drop_stream(dest_ctx, input); int new_num = pdf_create_object(dest_ctx, info->dest_doc); pdf_obj *new_ref = pdf_new_indirect(dest_ctx, info->dest_doc, new_num, 0); /* each stream has a dict containing at least its length... */ pdf_obj *stream_info_dict = pdf_new_dict(dest_ctx, info->dest_doc, 1); pdf_dict_puts_drop(dest_ctx, stream_info_dict, "Length", pdf_new_int(dest_ctx, info->dest_doc, buffer->len)); pdf_update_object(dest_ctx, info->dest_doc, new_num, stream_info_dict); pdf_drop_obj(dest_ctx, stream_info_dict); pdf_update_stream(dest_ctx, info->dest_doc, new_ref, buffer, 0); fz_drop_buffer(dest_ctx, buffer); pdf_array_push(dest_ctx, dest, new_ref); pdf_drop_obj(dest_ctx, new_ref); return(0); }
/* Graft object from dst to source */ pdf_obj * pdf_graft_object(fz_context *ctx, pdf_document *dst, pdf_document *src, pdf_obj *obj_ref, pdf_graft_map *map) { pdf_obj *val, *key; pdf_obj *new_obj = NULL; pdf_obj *new_dict = NULL; pdf_obj *new_array = NULL; pdf_obj *ref = NULL; fz_buffer *buffer = NULL; pdf_graft_map *drop_map = NULL; int new_num, src_num, len, i; if (map == NULL) drop_map = map = pdf_new_graft_map(ctx, src); if (pdf_is_indirect(ctx, obj_ref)) { src_num = pdf_to_num(ctx, obj_ref); /* Check if we have done this one. If yes, then drop map (if allocated) * and return our indirect ref */ if (map->dst_from_src[src_num] != 0) { int dest_num = map->dst_from_src[src_num]; pdf_drop_graft_map(ctx, drop_map); return pdf_new_indirect(ctx, dst, dest_num, 0); } fz_var(buffer); fz_var(ref); fz_try(ctx) { /* Create new slot for our src object, set the mapping and call again * using the resolved indirect reference */ new_num = pdf_create_object(ctx, dst); map->dst_from_src[src_num] = new_num; new_obj = pdf_graft_object(ctx, dst, src, pdf_resolve_indirect(ctx, obj_ref), map); /* Return a ref to the new_obj making sure to attach any stream */ pdf_update_object(ctx, dst, new_num, new_obj); pdf_drop_obj(ctx, new_obj); ref = pdf_new_indirect(ctx, dst, new_num, 0); if (pdf_is_stream(ctx, obj_ref)) { buffer = pdf_load_raw_stream(ctx, src, src_num, 0); pdf_update_stream(ctx, dst, ref, buffer, 1); } } fz_always(ctx) { fz_drop_buffer(ctx, buffer); pdf_drop_graft_map(ctx, drop_map); } fz_catch(ctx) { pdf_drop_obj(ctx, ref); fz_rethrow(ctx); } return ref; } else if (pdf_is_dict(ctx, obj_ref))
pdf_obj * pdf_new_xobject(pdf_document *doc, const fz_rect *bbox, const fz_matrix *mat) { int idict_num; pdf_obj *idict = NULL; pdf_obj *dict = NULL; pdf_xobject *form = NULL; pdf_obj *obj = NULL; pdf_obj *res = NULL; pdf_obj *procset = NULL; fz_context *ctx = doc->ctx; fz_var(idict); fz_var(dict); fz_var(form); fz_var(obj); fz_var(res); fz_var(procset); fz_try(ctx) { dict = pdf_new_dict(doc, 0); obj = pdf_new_rect(doc, bbox); pdf_print_obj(obj); pdf_dict_puts(dict, "BBox", obj); pdf_drop_obj(obj); obj = NULL; obj = pdf_new_int(doc, 1); pdf_dict_puts(dict, "FormType", obj); pdf_drop_obj(obj); obj = NULL; obj = pdf_new_int(doc, 0); pdf_dict_puts(dict, "Length", obj); pdf_drop_obj(obj); obj = NULL; obj = pdf_new_matrix(doc, mat); pdf_dict_puts(dict, "Matrix", obj); pdf_drop_obj(obj); obj = NULL; res = pdf_new_dict(doc, 0); procset = pdf_new_array(doc, 2); obj = pdf_new_name(doc, "PDF"); pdf_array_push(procset, obj); pdf_drop_obj(obj); obj = NULL; obj = pdf_new_name(doc, "Text"); pdf_array_push(procset, obj); pdf_drop_obj(obj); obj = NULL; pdf_dict_puts(res, "ProcSet", procset); pdf_drop_obj(procset); procset = NULL; pdf_dict_puts(dict, "Resources", res); obj = pdf_new_name(doc, "Form"); pdf_dict_puts(dict, "Subtype", obj); pdf_drop_obj(obj); obj = NULL; obj = pdf_new_name(doc, "XObject"); pdf_dict_puts(dict, "Type", obj); pdf_drop_obj(obj); obj = NULL; form = fz_malloc_struct(ctx, pdf_xobject); FZ_INIT_STORABLE(form, 1, pdf_free_xobject_imp); form->resources = NULL; form->contents = NULL; form->colorspace = NULL; form->me = NULL; form->iteration = 0; form->bbox = *bbox; form->matrix = *mat; form->isolated = 0; form->knockout = 0; form->transparency = 0; form->resources = res; res = NULL; idict_num = pdf_create_object(doc); pdf_update_object(doc, idict_num, dict); idict = pdf_new_indirect(doc, idict_num, 0); pdf_drop_obj(dict); dict = NULL; pdf_store_item(ctx, idict, form, pdf_xobject_size(form)); form->contents = pdf_keep_obj(idict); form->me = pdf_keep_obj(idict); pdf_drop_xobject(ctx, form); form = NULL; } fz_catch(ctx) { pdf_drop_obj(procset); pdf_drop_obj(res); pdf_drop_obj(obj); pdf_drop_obj(dict); pdf_drop_obj(idict); pdf_drop_xobject(ctx, form); fz_rethrow_message(ctx, "failed to create xobject)"); } return idict; }
static void retainpages(int argc, char **argv) { pdf_obj *oldroot, *root, *pages, *kids, *countobj, *parent, *olddests; /* Keep only pages/type and (reduced) dest entries to avoid * references to unretained pages */ oldroot = pdf_dict_gets(xref->trailer, "Root"); pages = pdf_dict_gets(oldroot, "Pages"); olddests = pdf_load_name_tree(xref, "Dests"); root = pdf_new_dict(ctx, 2); pdf_dict_puts(root, "Type", pdf_dict_gets(oldroot, "Type")); pdf_dict_puts(root, "Pages", pdf_dict_gets(oldroot, "Pages")); pdf_update_object(xref, pdf_to_num(oldroot), root); pdf_drop_obj(root); /* Create a new kids array with only the pages we want to keep */ parent = pdf_new_indirect(ctx, pdf_to_num(pages), pdf_to_gen(pages), xref); kids = pdf_new_array(ctx, 1); /* Retain pages specified */ while (argc - fz_optind) { int page, spage, epage, pagecount; char *spec, *dash; char *pagelist = argv[fz_optind]; pagecount = pdf_count_pages(xref); spec = fz_strsep(&pagelist, ","); while (spec) { dash = strchr(spec, '-'); if (dash == spec) spage = epage = pagecount; else spage = epage = atoi(spec); if (dash) { if (strlen(dash) > 1) epage = atoi(dash + 1); else epage = pagecount; } if (spage > epage) page = spage, spage = epage, epage = page; spage = fz_clampi(spage, 1, pagecount); epage = fz_clampi(epage, 1, pagecount); for (page = spage; page <= epage; page++) { pdf_obj *pageobj = xref->page_objs[page-1]; pdf_obj *pageref = xref->page_refs[page-1]; pdf_dict_puts(pageobj, "Parent", parent); /* Store page object in new kids array */ pdf_array_push(kids, pageref); } spec = fz_strsep(&pagelist, ","); } fz_optind++; } pdf_drop_obj(parent); /* Update page count and kids array */ countobj = pdf_new_int(ctx, pdf_array_len(kids)); pdf_dict_puts(pages, "Count", countobj); pdf_drop_obj(countobj); pdf_dict_puts(pages, "Kids", kids); pdf_drop_obj(kids); /* Also preserve the (partial) Dests name tree */ if (olddests) { int i; pdf_obj *names = pdf_new_dict(ctx, 1); pdf_obj *dests = pdf_new_dict(ctx, 1); pdf_obj *names_list = pdf_new_array(ctx, 32); int len = pdf_dict_len(olddests); for (i = 0; i < len; i++) { pdf_obj *key = pdf_dict_get_key(olddests, i); pdf_obj *val = pdf_dict_get_val(olddests, i); pdf_obj *key_str = pdf_new_string(ctx, pdf_to_name(key), strlen(pdf_to_name(key))); pdf_obj *dest = pdf_dict_gets(val, "D"); dest = pdf_array_get(dest ? dest : val, 0); if (pdf_array_contains(pdf_dict_gets(pages, "Kids"), dest)) { pdf_array_push(names_list, key_str); pdf_array_push(names_list, val); } pdf_drop_obj(key_str); } root = pdf_dict_gets(xref->trailer, "Root"); pdf_dict_puts(dests, "Names", names_list); pdf_dict_puts(names, "Dests", dests); pdf_dict_puts(root, "Names", names); pdf_drop_obj(names); pdf_drop_obj(dests); pdf_drop_obj(names_list); pdf_drop_obj(olddests); } }
static void decimatepages(fz_context *ctx, pdf_document *doc) { pdf_obj *oldroot, *root, *pages, *kids, *parent; int num_pages = pdf_count_pages(ctx, doc); int page, kidcount; oldroot = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME_Root); pages = pdf_dict_get(ctx, oldroot, PDF_NAME_Pages); root = pdf_new_dict(ctx, doc, 2); pdf_dict_put(ctx, root, PDF_NAME_Type, pdf_dict_get(ctx, oldroot, PDF_NAME_Type)); pdf_dict_put(ctx, root, PDF_NAME_Pages, pdf_dict_get(ctx, oldroot, PDF_NAME_Pages)); pdf_update_object(ctx, doc, pdf_to_num(ctx, oldroot), root); pdf_drop_obj(ctx, root); /* Create a new kids array with our new pages in */ parent = pdf_new_indirect(ctx, doc, pdf_to_num(ctx, pages), pdf_to_gen(ctx, pages)); kids = pdf_new_array(ctx, doc, 1); kidcount = 0; for (page=0; page < num_pages; page++) { pdf_page *page_details = pdf_load_page(ctx, doc, page); int xf = x_factor, yf = y_factor; int x, y; float w = page_details->mediabox.x1 - page_details->mediabox.x0; float h = page_details->mediabox.y1 - page_details->mediabox.y0; if (xf == 0 && yf == 0) { /* Nothing specified, so split along the long edge */ if (w > h) xf = 2, yf = 1; else xf = 1, yf = 2; } else if (xf == 0) xf = 1; else if (yf == 0) yf = 1; for (y = yf-1; y >= 0; y--) { for (x = 0; x < xf; x++) { pdf_obj *newpageobj, *newpageref, *newmediabox; fz_rect mb; int num; newpageobj = pdf_copy_dict(ctx, pdf_lookup_page_obj(ctx, doc, page)); num = pdf_create_object(ctx, doc); pdf_update_object(ctx, doc, num, newpageobj); newpageref = pdf_new_indirect(ctx, doc, num, 0); newmediabox = pdf_new_array(ctx, doc, 4); mb.x0 = page_details->mediabox.x0 + (w/xf)*x; if (x == xf-1) mb.x1 = page_details->mediabox.x1; else mb.x1 = page_details->mediabox.x0 + (w/xf)*(x+1); mb.y0 = page_details->mediabox.y0 + (h/yf)*y; if (y == yf-1) mb.y1 = page_details->mediabox.y1; else mb.y1 = page_details->mediabox.y0 + (h/yf)*(y+1); pdf_array_push(ctx, newmediabox, pdf_new_real(ctx, doc, mb.x0)); pdf_array_push(ctx, newmediabox, pdf_new_real(ctx, doc, mb.y0)); pdf_array_push(ctx, newmediabox, pdf_new_real(ctx, doc, mb.x1)); pdf_array_push(ctx, newmediabox, pdf_new_real(ctx, doc, mb.y1)); pdf_dict_put(ctx, newpageobj, PDF_NAME_Parent, parent); pdf_dict_put(ctx, newpageobj, PDF_NAME_MediaBox, newmediabox); /* Store page object in new kids array */ pdf_array_push(ctx, kids, newpageref); kidcount++; } } } pdf_drop_obj(ctx, parent); /* Update page count and kids array */ pdf_dict_put(ctx, pages, PDF_NAME_Count, pdf_new_int(ctx, doc, kidcount)); pdf_dict_put(ctx, pages, PDF_NAME_Kids, kids); pdf_drop_obj(ctx, kids); }
static int wmupdf_pdfdoc_newpages(pdf_document *xref,fz_context *ctx,WPDFPAGEINFO *pageinfo, int use_forms,FILE *out) { static char *funcname="wmupdf_pdfdoc_newpages"; pdf_obj *root,*oldroot,*pages,*kids,*countobj,*parent,*olddests; pdf_obj *srcpageobj,*srcpagecontents; pdf_obj *destpageobj,*destpagecontents,*destpageresources; double srcx0,srcy0; int qref,i,i0,pagecount,srccount,destpageref,nbb; int *srcpageused; char *bigbuf; double srcpagerot; /* Avoid compiler warning */ destpageref = 0; destpageobj = NULL; srcx0=srcy0=0.; /* Keep only pages/type and (reduced) dest entries to avoid references to unretained pages */ pagecount = pdf_count_pages(xref); if (use_forms) { willus_mem_alloc_warn((void **)&srcpageused,sizeof(int)*(pagecount+1),funcname,10); /* Mark all source pages as "not done" */ for (i=0;i<=pagecount;i++) srcpageused[i]=0; nbb=4096; willus_mem_alloc_warn((void **)&bigbuf,nbb,funcname,10); bigbuf[0]='\0'; } oldroot = pdf_dict_gets(xref->trailer,"Root"); /* ** pages points to /Pages object in PDF file. ** Has: /Type /Pages, /Count <numpages>, /Kids [ obj obj obj obj ] */ pages = pdf_dict_gets(oldroot,"Pages"); olddests = pdf_load_name_tree(xref,"Dests"); /* ** Create new root object with only /Pages and /Type (and reduced dest entries) ** to avoid references to unretained pages. */ root = pdf_new_dict(ctx,4); pdf_dict_puts(root,"Type",pdf_dict_gets(oldroot,"Type")); pdf_dict_puts(root,"Pages",pages); pdf_update_object(xref,pdf_to_num(oldroot),root); pdf_drop_obj(root); /* Parent indirectly references the /Pages object in the file */ /* (Each new page we create has to point to this.) */ parent = pdf_new_indirect(ctx, pdf_to_num(pages), pdf_to_gen(pages), xref); /* Create a new kids array with only the pages we want to keep */ kids = pdf_new_array(ctx, 1); qref=0; /* Avoid compiler warnings */ destpageresources=NULL; destpagecontents=NULL; srcpagecontents=NULL; srcpagerot=0.; for (i=0;i<=pageinfo->boxes.n;i++) if (pageinfo->boxes.box[i].dstpage>0) break; if (i>0) { if (i<pageinfo->boxes.n) memmove(&pageinfo->boxes.box[0],&pageinfo->boxes.box[i],sizeof(WPDFBOX)*pageinfo->boxes.n-i); pageinfo->boxes.n -= i; } /* Walk through PFDBOXES array */ for (i=srccount=i0=0;i<=pageinfo->boxes.n;i++) { WPDFBOX *box; int j,k,newsrc; static char buf[512]; pdf_obj *s1indirect,*qindirect,*rotobj; static double cpm[3][3],m[3][3],m1[3][3]; static double xclip[4],yclip[4]; /* printf("box[%d]\n",i); if (i<pageinfo->boxes.n) { box=&pageinfo->boxes.box[i]; printf(" srcpage=%d, dstpage=%d\n",box->srcbox.pageno,box->dstpage); printf(" x0=%g, y0=%g\n",box->x0,box->y0); printf(" w=%g, h=%g\n",box->w,box->h); printf(" x1=%g, y1=%g\n",box->x1,box->y1); printf(" sr=%g, dr=%g\n",box->srcrot_deg,box->dstrot_deg); printf(" scale=%g\n",box->scale); } */ /* Check to see if we are done with an output page */ if (srccount>0 && (i==pageinfo->boxes.n || (i>0 && pageinfo->boxes.box[i].dstpage!=pageinfo->boxes.box[i-1].dstpage))) { pdf_obj *newpageref; /* ** Store destination page into document structure */ /* printf(" ADDING NEW PAGE. (srccount=%d)\n",srccount); */ if (use_forms) { pdf_obj *dest_stream; /* Create new object in document for destination page stream */ dest_stream = pdf_new_indirect(ctx,new_stream_object(xref,ctx,bigbuf), 0,(void *)xref); /* Store this into the destination page contents array */ pdf_array_push(destpagecontents,dest_stream); pdf_drop_obj(dest_stream); } newpageref=pdf_new_indirect(ctx,destpageref,0,(void *)xref); /* Reference parent list of pages */ pdf_dict_puts(destpageobj,"Parent",parent); pdf_dict_puts(destpageobj,"Contents",destpagecontents); pdf_dict_puts(destpageobj,"Resources",destpageresources); /* Store page object in document's kids array */ pdf_array_push(kids,newpageref); /* Update document with new page */ pdf_update_object(xref,destpageref,destpageobj); /* Clean up */ pdf_drop_obj(newpageref); pdf_drop_obj(destpageresources); pdf_drop_obj(destpagecontents); pdf_drop_obj(destpageobj); /* Reset source page and index to start of new destination page */ i0=i; srccount=0; } /* Quit loop if beyond last box */ if (i>=pageinfo->boxes.n) break; box=&pageinfo->boxes.box[i]; if (box->srcbox.pageno<1 || box->srcbox.pageno>pagecount) continue; /* Is this a source page we haven't processed yet (for this destination page)? */ for (newsrc=1,j=i0;j<i;j++) { if (pageinfo->boxes.box[j].srcbox.pageno==box->srcbox.pageno) { newsrc=0; break; } } if (newsrc) { double v[4]; srccount++; if (use_forms) srcpageused[box->srcbox.pageno]=1; /* printf(" NEW SOURCE PAGE (srccount=%d)\n",srccount); */ if (srccount==1) { /* ** Start a new destination page. ** ** Each new page object is a dict type with: ** /Type /Page ** /Contents (array of objects) ** /Resources (dict) ** /MediaBox [0 0 612 792] ** /Parent <PagesObj> ** [Can have /Rotate 90, for example.] ** */ /* printf(" (STARTING NEW DEST. PAGE)\n"); */ destpageobj=start_new_destpage(ctx,box->dst_width_pts,box->dst_height_pts); destpageresources=pdf_new_dict(ctx,1); if (use_forms) pdf_dict_puts(destpageresources,"XObject",pdf_new_dict(ctx,1)); destpageref=pdf_create_object(xref); destpagecontents=pdf_new_array(ctx,1); /* Init the destination page stream for forms */ if (use_forms) bigbuf[0]='\0'; } /* New source page, so get the source page objects */ srcpageobj = xref->page_objs[box->srcbox.pageno-1]; wmupdf_page_bbox(srcpageobj,v); srcx0=v[0]; srcy0=v[1]; /* printf("SRCX0=%g, SRCY0=%g\n",srcx0,srcy0); */ rotobj=pdf_dict_gets(srcpageobj,"Rotate"); srcpagerot = rotobj!=NULL ? pdf_to_real(rotobj) : 0.; /* printf("Page rotation = %g\n",srcpagerot); */ srcpagecontents=pdf_dict_gets(srcpageobj,"Contents"); /* if (pdf_is_array(srcpagecontents)) { int k; printf(" source page contents = array.\n"); for (k=0;k<pdf_array_len(srcpagecontents);k++) { pdf_obj *obj; obj=pdf_array_get(srcpagecontents,k); if (pdf_is_indirect(obj)) { printf(" contents[%d] = indirect (%d)\n",k,pdf_to_num(obj)); pdf_resolve_indirect(obj); } } } */ if (use_forms) { pdf_obj *xobjdict; int pageno; xobjdict=pdf_dict_gets(destpageresources,"XObject"); pageno=box->srcbox.pageno; pdf_dict_puts(xobjdict,xobject_name(pageno),xref->page_refs[pageno-1]); pdf_dict_puts(destpageresources,"XObject",xobjdict); } else { pdf_obj *srcpageresources; /* Merge source page resources into destination page resources */ srcpageresources=pdf_dict_gets(srcpageobj,"Resources"); /* printf("box->dstpage=%d, srcpage=%d (ind.#=%d)\n",box->dstpage,box->srcbox.pageno,pdf_to_num(xref->page_refs[box->srcbox.pageno-1])); */ wmupdf_dict_merge(ctx,"Resources",destpageresources,srcpageresources); } } /* ** Process this source box: ** ** Create a tranformation matrix and clipping path to only show the ** desired part of the source page at the appropriate place on the ** destination page. ** ** How the tranformation matrix works: ** - Translations shall be specified as [ 1 0 0 1 tx ty ], where tx and ty ** shall be the distances to translate the origin of the coordinate system ** in the horizontal and vertical dimensions, respectively. ** ** - Scaling shall be obtained by [ sx 0 0 sy 0 0 ]. This scales the coordinates ** so that 1 unit in the horizontal and vertical dimensions of the new coordinate ** system is the same size as sx and sy units, respectively, in the previous ** coordinate system. ** ** - Rotations shall be produced by [ cos q sin q -sin q cos q 0 0 ], which has the ** effect of rotating the coordinate system axes by an angle q counter-clockwise. ** ** - Skew shall be specified by [ 1 tan a tan b 1 0 0 ], which skews the x axis by ** an angle a and the y axis by an angle b. ** */ wpdfbox_determine_original_source_position(box); /* printf("Before unrotate.\n"); printf("box->srcrot=%g\n",box->srcrot_deg); printf("box->x0=%g, box->y0=%g\n",box->x0,box->y0); printf("box->w=%g, box->h=%g\n",box->w,box->h); printf("box->pw=%g, box->ph=%g\n",box->src_width_pts,box->src_height_pts); */ if (fabs(srcpagerot) > 1.0e-4) wpdfbox_unrotate(box,srcpagerot); /* printf("box->srcrot=%g\n",box->srcrot_deg); printf("box->x0=%g, box->y0=%g\n",box->x0,box->y0); printf("box->w=%g, box->h=%g\n",box->w,box->h); printf("box->pw=%g, box->ph=%g\n",box->src_width_pts,box->src_height_pts); */ matrix_unity(m,1.); /* printf("xfmatrix = [ %9.6f %9.6f %9.6f ]\n" " [ %9.6f %9.6f %9.6f ]\n" " [ %9.6f %9.6f %9.6f ]\n", m[0][0],m[0][1],m[0][2], m[1][0],m[1][1],m[1][2], m[2][0],m[2][1],m[2][2]); */ matrix_translate(m1,-box->x0-srcx0,-box->y0-srcy0); matrix_mul(m,m1); matrix_rotate(m1,-box->srcrot_deg+box->dstrot_deg); matrix_mul(m,m1); matrix_unity(m1,box->scale); matrix_mul(m,m1); matrix_translate(m1,box->x1,box->y1); matrix_mul(m,m1); matrix_zero_round(m); matrix_rotate(cpm,box->srcrot_deg); matrix_translate(m1,box->x0+srcx0,box->y0+srcy0); matrix_mul(cpm,m1); /* printf("Clip matrix:\n"); printf("xfmatrix = [ %9.6f %9.6f %9.6f ]\n" " [ %9.6f %9.6f %9.6f ]\n" " [ %9.6f %9.6f %9.6f ]\n", cpm[0][0],cpm[0][1],cpm[0][2], cpm[1][0],cpm[1][1],cpm[1][2], cpm[2][0],cpm[2][1],cpm[2][2]); */ set_clip_array(xclip,yclip,box->srcrot_deg,box->w,box->h); for (k=0;k<4;k++) matrix_xymul(cpm,&xclip[k],&yclip[k]); /* printf("Clip path:\n %7.2f %7.2f\n %7.2f,%7.2f\n %7.2f,%7.2f\n" " %7.2f %7.2f\n %7.2f,%7.2f\n", xclip[0],yclip[0],xclip[1],yclip[1],xclip[2],yclip[2], xclip[3],yclip[3],xclip[0],yclip[0]); */ strcpy(buf,"q"); for (k=0;k<=2;k++) { cat_pdf_double(buf,m[k][0]); cat_pdf_double(buf,m[k][1]); } strcat(buf," cm"); for (k=0;k<=4;k++) { cat_pdf_double(buf,xclip[k&3]); cat_pdf_double(buf,yclip[k&3]); strcat(buf,k==0 ? " m" : " l"); } strcat(buf," W n"); if (use_forms) { /* FORM METHOD */ sprintf(&buf[strlen(buf)]," /%s Do Q\n",xobject_name(box->srcbox.pageno)); if (strlen(bigbuf)+strlen(buf) > nbb) { int newsize; newsize=nbb*2; willus_mem_realloc_robust_warn((void **)&bigbuf,newsize,nbb,funcname,10); nbb=newsize; } strcat(bigbuf,buf); } else { /* NO-FORMS METHOD */ strcat(buf,"\n"); /* Create new objects in document for tx matrix and restore matrix */ s1indirect = pdf_new_indirect(ctx,new_stream_object(xref,ctx,buf),0,(void *)xref); if (qref==0) qref=new_stream_object(xref,ctx,"Q\n"); qindirect = pdf_new_indirect(ctx,qref,0,(void *)xref); /* Store this region into the destination page contents array */ pdf_array_push(destpagecontents,s1indirect); if (pdf_is_array(srcpagecontents)) { int k; for (k=0;k<pdf_array_len(srcpagecontents);k++) pdf_array_push(destpagecontents,pdf_array_get(srcpagecontents,k)); } else pdf_array_push(destpagecontents,srcpagecontents); pdf_array_push(destpagecontents,qindirect); pdf_drop_obj(s1indirect); pdf_drop_obj(qindirect); } } pdf_drop_obj(parent); /* For forms, convert all original source pages to XObject Forms */ if (use_forms) wmupdf_convert_pages_to_forms(xref,ctx,srcpageused); /* Update page count and kids array */ countobj = pdf_new_int(ctx, pdf_array_len(kids)); pdf_dict_puts(pages, "Count", countobj); pdf_drop_obj(countobj); pdf_dict_puts(pages, "Kids", kids); pdf_drop_obj(kids); /* Also preserve the (partial) Dests name tree */ if (olddests) wmupdf_preserve_old_dests(olddests,ctx,xref,pages); if (use_forms) { /* Free memory */ willus_mem_free((double **)&bigbuf,funcname); willus_mem_free((double **)&srcpageused,funcname); } return(0); }