static int strip_outlines(fz_context *ctx, pdf_document *doc, pdf_obj *outlines, int page_count, int *page_object_nums, pdf_obj *names_list) { int nc; pdf_obj *first; pdf_obj *last; first = pdf_dict_get(ctx, outlines, PDF_NAME_First); if (first == NULL) nc = 0; else nc = strip_outline(ctx, doc, first, page_count, page_object_nums, names_list, &first, &last); if (nc == 0) { pdf_dict_del(ctx, outlines, PDF_NAME_First); pdf_dict_del(ctx, outlines, PDF_NAME_Last); pdf_dict_del(ctx, outlines, PDF_NAME_Count); } else { int old_count = pdf_to_int(ctx, pdf_dict_get(ctx, outlines, PDF_NAME_Count)); pdf_dict_put(ctx, outlines, PDF_NAME_First, first); pdf_dict_put(ctx, outlines, PDF_NAME_Last, last); pdf_dict_put(ctx, outlines, PDF_NAME_Count, pdf_new_int(ctx, doc, old_count > 0 ? nc : -nc)); } return nc; }
void pdf_delete_portfolio_schema(fz_context *ctx, pdf_document *doc, int entry) { pdf_portfolio **pp; pdf_portfolio *p; pdf_obj *s; if (!doc) fz_throw(ctx, FZ_ERROR_GENERIC, "Bad pdf_delete_portfolio_schema call"); if (doc->portfolio == NULL) load_portfolio(ctx, doc); pp = &doc->portfolio; while (*pp && entry > 0) pp = &(*pp)->next, entry--; p = *pp; if (p == NULL || entry) fz_throw(ctx, FZ_ERROR_GENERIC, "entry out of range in pdf_delete_portfolio_schema"); *pp = p->next; /* Delete the key from the schema */ s = pdf_dict_getl(ctx, pdf_trailer(ctx, doc), PDF_NAME_Root, PDF_NAME_Collection, PDF_NAME_Schema, NULL); pdf_dict_del(ctx, s, p->key); /* Delete this entry from all the collection entries */ s = pdf_dict_getl(ctx, pdf_trailer(ctx, doc), PDF_NAME_Root, PDF_NAME_Names, PDF_NAME_EmbeddedFiles, NULL); pdf_name_tree_map(ctx, s, delete_from_node, p->key); pdf_drop_obj(ctx, p->entry.name); pdf_drop_obj(ctx, p->key); pdf_drop_obj(ctx, p->val); fz_free(ctx, p); }
void pdf_clear_annot_quad_points(fz_context *ctx, pdf_annot *annot) { check_allowed_subtypes(ctx, annot, PDF_NAME(QuadPoints), quad_point_subtypes); pdf_dict_del(ctx, annot->obj, PDF_NAME(QuadPoints)); pdf_dirty_annot(ctx, annot); }
void pdf_clear_annot_ink_list(fz_context *ctx, pdf_annot *annot) { check_allowed_subtypes(ctx, annot, PDF_NAME(InkList), ink_list_subtypes); pdf_dict_del(ctx, annot->obj, PDF_NAME(InkList)); pdf_dirty_annot(ctx, annot); }
static int delete_from_node(fz_context *ctx, pdf_obj *container, pdf_obj *key, pdf_obj *val, void *arg) { pdf_obj *delete_key = (pdf_obj *)arg; pdf_dict_del(ctx, pdf_dict_get(ctx, val, PDF_NAME_CI), delete_key); return 0; }
void pdf_set_annot_opacity(fz_context *ctx, pdf_annot *annot, float opacity) { if (opacity != 1) pdf_dict_put_real(ctx, annot->obj, PDF_NAME(CA), opacity); else pdf_dict_del(ctx, annot->obj, PDF_NAME(CA)); pdf_dirty_annot(ctx, annot); }
void pdf_update_xobject(fz_context *ctx, pdf_document *doc, pdf_obj *form, fz_rect bbox, fz_matrix matrix, pdf_obj *res, fz_buffer *contents) { pdf_dict_put_rect(ctx, form, PDF_NAME(BBox), bbox); pdf_dict_put_matrix(ctx, form, PDF_NAME(Matrix), matrix); if (res) pdf_dict_put(ctx, form, PDF_NAME(Resources), res); else pdf_dict_del(ctx, form, PDF_NAME(Resources)); pdf_update_stream(ctx, doc, form, contents, 0); }
void pdf_set_annot_border(fz_context *ctx, pdf_annot *annot, float w) { pdf_document *doc = annot->page->doc; pdf_obj *border = pdf_dict_get(ctx, annot->obj, PDF_NAME_Border); if (pdf_is_array(ctx, border)) pdf_array_put_drop(ctx, border, 2, pdf_new_real(ctx, doc, w)); else { border = pdf_new_array(ctx, doc, 3); pdf_array_push_drop(ctx, border, pdf_new_real(ctx, doc, 0)); pdf_array_push_drop(ctx, border, pdf_new_real(ctx, doc, 0)); pdf_array_push_drop(ctx, border, pdf_new_real(ctx, doc, w)); pdf_dict_put_drop(ctx, annot->obj, PDF_NAME_Border, border); } /* Remove border style and effect dictionaries so they won't interfere. */ pdf_dict_del(ctx, annot->obj, PDF_NAME_BS); pdf_dict_del(ctx, annot->obj, PDF_NAME_BE); annot->changed = 1; }
/* When resetting or submitting a form, the fields to act upon are defined by an array of either field references or field names, plus a flag determining whether to act upon the fields in the array, or all fields other than those in the array. specified_fields interprets this information and produces the array of fields to be acted upon. */ static pdf_obj *specified_fields(fz_context *ctx, pdf_document *doc, pdf_obj *fields, int exclude) { pdf_obj *form = pdf_dict_getl(ctx, pdf_trailer(ctx, doc), PDF_NAME_Root, PDF_NAME_AcroForm, PDF_NAME_Fields, NULL); int i, n; pdf_obj *result = pdf_new_array(ctx, doc, 0); pdf_obj *nil = NULL; fz_var(nil); fz_try(ctx) { /* The 'fields' array not being present signals that all fields * should be acted upon, so handle it using the exclude case - excluding none */ if (exclude || !fields) { /* mark the fields we don't want to act upon */ nil = pdf_new_null(ctx, doc); n = pdf_array_len(ctx, fields); for (i = 0; i < n; i++) { pdf_obj *field = pdf_array_get(ctx, fields, i); if (pdf_is_string(ctx, field)) field = pdf_lookup_field(ctx, form, pdf_to_str_buf(ctx, field)); if (field) pdf_dict_put(ctx, field, PDF_NAME_Exclude, nil); } /* Act upon all unmarked fields */ n = pdf_array_len(ctx, form); for (i = 0; i < n; i++) add_field_hierarchy_to_array(ctx, result, pdf_array_get(ctx, form, i)); /* Unmark the marked fields */ n = pdf_array_len(ctx, fields); for (i = 0; i < n; i++) { pdf_obj *field = pdf_array_get(ctx, fields, i); if (pdf_is_string(ctx, field)) field = pdf_lookup_field(ctx, form, pdf_to_str_buf(ctx, field)); if (field) pdf_dict_del(ctx, field, PDF_NAME_Exclude); } } else { n = pdf_array_len(ctx, fields); for (i = 0; i < n; i++) { pdf_obj *field = pdf_array_get(ctx, fields, i); if (pdf_is_string(ctx, field)) field = pdf_lookup_field(ctx, form, pdf_to_str_buf(ctx, field)); if (field) add_field_hierarchy_to_array(ctx, result, field); } } } fz_always(ctx) { pdf_drop_obj(ctx, nil); } fz_catch(ctx) { pdf_drop_obj(ctx, result); fz_rethrow(ctx); } return result; }
static void reset_field(fz_context *ctx, pdf_document *doc, pdf_obj *field) { /* Set V to DV whereever DV is present, and delete V where DV is not. * FIXME: we assume for now that V has not been set unequal * to DV higher in the hierarchy than "field". * * At the bottom of the hierarchy we may find widget annotations * that aren't also fields, but DV and V will not be present in their * dictionaries, and attempts to remove V will be harmless. */ pdf_obj *dv = pdf_dict_get(ctx, field, PDF_NAME_DV); pdf_obj *kids = pdf_dict_get(ctx, field, PDF_NAME_Kids); if (dv) pdf_dict_put(ctx, field, PDF_NAME_V, dv); else pdf_dict_del(ctx, field, PDF_NAME_V); if (kids == NULL) { /* The leaves of the tree are widget annotations * In some cases we need to update the appearance state; * in others we need to mark the field as dirty so that * the appearance stream will be regenerated. */ switch (pdf_field_type(ctx, doc, field)) { case PDF_WIDGET_TYPE_RADIOBUTTON: case PDF_WIDGET_TYPE_CHECKBOX: { pdf_obj *leafv = pdf_get_inheritable(ctx, doc, field, PDF_NAME_V); if (leafv) pdf_keep_obj(ctx, leafv); else leafv = PDF_NAME_Off; fz_try(ctx) { pdf_dict_put(ctx, field, PDF_NAME_AS, leafv); } fz_always(ctx) { pdf_drop_obj(ctx, leafv); } fz_catch(ctx) { fz_rethrow(ctx); } } break; case PDF_WIDGET_TYPE_PUSHBUTTON: break; default: pdf_field_mark_dirty(ctx, doc, field); break; } } if (pdf_field_dirties_document(ctx, doc, field)) doc->dirty = 1; }
void pdf_clear_annot_vertices(fz_context *ctx, pdf_annot *annot) { check_allowed_subtypes(ctx, annot, PDF_NAME(Vertices), vertices_subtypes); pdf_dict_del(ctx, annot->obj, PDF_NAME(Vertices)); pdf_dirty_annot(ctx, annot); }
static void wmupdf_convert_single_page_to_form(pdf_document *xref,fz_context *ctx,int pageno) { pdf_obj *array,*srcpageobj,*srcpagecontents; int i,len,streamlen,pageref,pagegen,compressed; double bbox_array[4]; double matrix[6]; /* New source page, so get the source page objects */ srcpageobj = xref->page_objs[pageno-1]; pageref=pdf_to_num(xref->page_refs[pageno-1]); pagegen=pdf_to_gen(xref->page_refs[pageno-1]); wmupdf_page_bbox(srcpageobj,bbox_array); for (i=0;i<6;i++) matrix[i]=0.; matrix[0]=matrix[3]=1.; srcpagecontents=pdf_dict_gets(srcpageobj,"Contents"); /* Concatenate all indirect streams from source page directly into it. */ // printf("Adding streams to source page %d (pageref=%d, pagegen=%d)...\n",pageno,pageref,pagegen); streamlen=0; if (pdf_is_array(srcpagecontents)) { int k; for (k=0;k<pdf_array_len(srcpagecontents);k++) { pdf_obj *obj; obj=pdf_array_get(srcpagecontents,k); if (pdf_is_indirect(obj)) pdf_resolve_indirect(obj); streamlen=add_to_srcpage_stream(xref,ctx,pageref,pagegen,obj); } } else { if (pdf_is_indirect(srcpagecontents)) pdf_resolve_indirect(srcpagecontents); streamlen=add_to_srcpage_stream(xref,ctx,pageref,pagegen,srcpagecontents); } compressed=stream_deflate(xref,ctx,pageref,pagegen,&streamlen); srcpageobj = xref->page_objs[pageno-1]; pageref=pdf_to_num(xref->page_refs[pageno-1]); len=pdf_dict_len(srcpageobj); for (i=0;i<len;i++) { pdf_obj *key; /* *value */ key=pdf_dict_get_key(srcpageobj,i); /* if (pdf_is_name(key)) printf("key[%d] = name = %s\n",i,pdf_to_name(key)); else printf("key[%d] = ??\n",i); */ /* value=pdf_dict_get_val(srcpageobj,i); */ /* Keep same resources */ if (!pdf_is_name(key)) continue; if (pdf_is_name(key) && !stricmp("Resources",pdf_to_name(key))) continue; /* Drop dictionary entry otherwise */ // printf("Deleting key %s.\n",pdf_to_name(key)); pdf_dict_del(srcpageobj,key); i=-1; len=pdf_dict_len(srcpageobj); } pdf_dict_puts(srcpageobj,"Type",pdf_new_name(ctx,"XObject")); pdf_dict_puts(srcpageobj,"Subtype",pdf_new_name(ctx,"Form")); pdf_dict_puts(srcpageobj,"FormType",pdf_new_int(ctx,1)); if (compressed) pdf_dict_puts(srcpageobj,"Filter",pdf_new_name(ctx,"FlateDecode")); pdf_dict_puts(srcpageobj,"Length",pdf_new_int(ctx,streamlen)); array=pdf_new_array(ctx,4); for (i=0;i<4;i++) pdf_array_push(array,pdf_new_real(ctx,bbox_array[i])); pdf_dict_puts(srcpageobj,"BBox",array); array=pdf_new_array(ctx,6); for (i=0;i<6;i++) pdf_array_push(array,pdf_new_real(ctx,matrix[i])); pdf_dict_puts(srcpageobj,"Matrix",array); /* (It's no longer a "page"--it's a Form-type XObject) */ /* I don't think this call should be made since it will call fz_drop_object on srcpageobj */ /* pdf_update_object(xref,pageref,srcpageobj); */ }
void pdf_clean_page_contents(fz_context *ctx, pdf_document *doc, pdf_page *page, fz_cookie *cookie, pdf_page_contents_process_fn *proc_fn, void *proc_arg) { pdf_processor *proc_buffer = NULL; pdf_processor *proc_filter = NULL; pdf_obj *new_obj = NULL; pdf_obj *new_ref = NULL; pdf_obj *res = NULL; pdf_obj *ref = NULL; pdf_obj *obj; pdf_obj *contents; fz_buffer *buffer; fz_var(new_obj); fz_var(new_ref); fz_var(res); fz_var(ref); fz_var(proc_buffer); fz_var(proc_filter); buffer = fz_new_buffer(ctx, 1024); fz_try(ctx) { res = pdf_new_dict(ctx, doc, 1); proc_buffer = pdf_new_buffer_processor(ctx, buffer); proc_filter = pdf_new_filter_processor(ctx, proc_buffer, doc, page->resources, res); pdf_process_contents(ctx, proc_filter, doc, page->resources, page->contents, cookie); contents = page->contents; if (pdf_is_array(ctx, contents)) { /* create a new object to replace the array */ new_obj = pdf_new_dict(ctx, doc, 1); new_ref = pdf_new_ref(ctx, doc, new_obj); page->contents = contents = new_ref; } else { pdf_dict_del(ctx, contents, PDF_NAME_Filter); pdf_dict_del(ctx, contents, PDF_NAME_DecodeParms); } /* Now deal with resources. The spec allows for Type3 fonts and form * XObjects to omit a resource dictionary and look in the parent. * Avoid that by flattening here as part of the cleaning. This could * conceivably cause changes in rendering, but we don't care. */ /* ExtGState */ obj = pdf_dict_get(ctx, res, PDF_NAME_ExtGState); if (obj) { int i, l; l = pdf_dict_len(ctx, obj); for (i = 0; i < l; i++) { pdf_obj *o = pdf_dict_get(ctx, pdf_dict_get_val(ctx, obj, i), PDF_NAME_SMask); if (!o) continue; o = pdf_dict_get(ctx, o, PDF_NAME_G); if (!o) continue; /* Transparency group XObject */ pdf_clean_stream_object(ctx, doc, o, page->resources, cookie, 1); } } /* ColorSpace - no cleaning possible */ /* Pattern */ obj = pdf_dict_get(ctx, res, PDF_NAME_Pattern); if (obj) { int i, l; l = pdf_dict_len(ctx, obj); for (i = 0; i < l; i++) { pdf_obj *pat = pdf_dict_get_val(ctx, obj, i); if (!pat) continue; if (pdf_to_int(ctx, pdf_dict_get(ctx, pat, PDF_NAME_PatternType)) == 1) pdf_clean_stream_object(ctx, doc, pat, page->resources, cookie, 0); } } /* Shading - no cleaning possible */ /* XObject */ obj = pdf_dict_get(ctx, res, PDF_NAME_XObject); if (obj) { int i, l; l = pdf_dict_len(ctx, obj); for (i = 0; i < l; i++) { pdf_obj *xobj = pdf_dict_get_val(ctx, obj, i); if (!pdf_name_eq(ctx, PDF_NAME_Form, pdf_dict_get(ctx, xobj, PDF_NAME_Subtype))) continue; pdf_clean_stream_object(ctx, doc, xobj, page->resources, cookie, 1); } } /* Font */ obj = pdf_dict_get(ctx, res, PDF_NAME_Font); if (obj) { int i, l; l = pdf_dict_len(ctx, obj); for (i = 0; i < l; i++) { pdf_obj *o = pdf_dict_get_val(ctx, obj, i); if (pdf_name_eq(ctx, PDF_NAME_Type3, pdf_dict_get(ctx, o, PDF_NAME_Subtype))) { pdf_clean_type3(ctx, doc, o, page->resources, cookie); } } } /* ProcSet - no cleaning possible. Inherit this from the old dict. */ obj = pdf_dict_get(ctx, page->resources, PDF_NAME_ProcSet); if (obj) pdf_dict_put(ctx, res, PDF_NAME_ProcSet, obj); /* Properties - no cleaning possible. */ if (proc_fn) (*proc_fn)(ctx, buffer, res, proc_arg); pdf_update_stream(ctx, doc, contents, buffer, 0); pdf_drop_obj(ctx, page->resources); ref = pdf_new_ref(ctx, doc, res); page->resources = pdf_keep_obj(ctx, ref); pdf_dict_put(ctx, page->me, PDF_NAME_Resources, ref); } fz_always(ctx) { pdf_drop_processor(ctx, proc_filter); pdf_drop_processor(ctx, proc_buffer); fz_drop_buffer(ctx, buffer); pdf_drop_obj(ctx, new_obj); pdf_drop_obj(ctx, new_ref); pdf_drop_obj(ctx, res); pdf_drop_obj(ctx, ref); } fz_catch(ctx) { fz_rethrow_message(ctx, "Failed while cleaning page"); } }
static int strip_outline(fz_context *ctx, pdf_document *doc, pdf_obj *outlines, int page_count, int *page_object_nums, pdf_obj *names_list, pdf_obj **pfirst, pdf_obj **plast) { pdf_obj *prev = NULL; pdf_obj *first = NULL; pdf_obj *current; int count = 0; for (current = outlines; current != NULL; ) { int nc; /* Strip any children to start with. This takes care of * First/Last/Count for us. */ nc = strip_outlines(ctx, doc, current, page_count, page_object_nums, names_list); if (!dest_is_valid(ctx, current, page_count, page_object_nums, names_list)) { if (nc == 0) { /* Outline with invalid dest and no children. Drop it by * pulling the next one in here. */ pdf_obj *next = pdf_dict_get(ctx, current, PDF_NAME_Next); if (next == NULL) { /* There is no next one to pull in */ if (prev != NULL) pdf_dict_del(ctx, prev, PDF_NAME_Next); } else if (prev != NULL) { pdf_dict_put(ctx, prev, PDF_NAME_Next, next); pdf_dict_put(ctx, next, PDF_NAME_Prev, prev); } else { pdf_dict_del(ctx, next, PDF_NAME_Prev); } current = next; } else { /* Outline with invalid dest, but children. Just drop the dest. */ pdf_dict_del(ctx, current, PDF_NAME_Dest); pdf_dict_del(ctx, current, PDF_NAME_A); current = pdf_dict_get(ctx, current, PDF_NAME_Next); } } else { /* Keep this one */ if (first == NULL) first = current; prev = current; current = pdf_dict_get(ctx, current, PDF_NAME_Next); count++; } } *pfirst = first; *plast = prev; return count; }
static void retainpages(fz_context *ctx, globals *glo, int argc, char **argv) { pdf_obj *oldroot, *root, *pages, *kids, *countobj, *parent, *olddests; pdf_document *doc = glo->doc; int argidx = 0; pdf_obj *names_list = NULL; pdf_obj *outlines; int pagecount; int i; int *page_object_nums; /* Keep only pages/type and (reduced) dest entries to avoid * references to unretained pages */ oldroot = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME_Root); pages = pdf_dict_get(ctx, oldroot, PDF_NAME_Pages); olddests = pdf_load_name_tree(ctx, doc, PDF_NAME_Dests); outlines = pdf_dict_get(ctx, oldroot, PDF_NAME_Outlines); root = pdf_new_dict(ctx, doc, 3); pdf_dict_put(ctx, root, PDF_NAME_Type, pdf_dict_get(ctx, oldroot, PDF_NAME_Type)); pdf_dict_put(ctx, root, PDF_NAME_Pages, pdf_dict_get(ctx, oldroot, PDF_NAME_Pages)); pdf_dict_put(ctx, root, PDF_NAME_Outlines, outlines); pdf_update_object(ctx, doc, pdf_to_num(ctx, oldroot), root); /* Create a new kids array with only the pages we want to keep */ parent = pdf_new_indirect(ctx, doc, pdf_to_num(ctx, pages), pdf_to_gen(ctx, pages)); kids = pdf_new_array(ctx, doc, 1); /* Retain pages specified */ while (argc - argidx) { int page, spage, epage; char *spec, *dash; char *pagelist = argv[argidx]; pagecount = pdf_count_pages(ctx, doc); spec = fz_strsep(&pagelist, ","); while (spec) { dash = strchr(spec, '-'); if (dash == spec) spage = epage = pagecount; else spage = epage = atoi(spec); if (dash) { if (strlen(dash) > 1) epage = atoi(dash + 1); else epage = pagecount; } spage = fz_clampi(spage, 1, pagecount); epage = fz_clampi(epage, 1, pagecount); if (spage < epage) for (page = spage; page <= epage; ++page) retainpage(ctx, doc, parent, kids, page); else for (page = spage; page >= epage; --page) retainpage(ctx, doc, parent, kids, page); spec = fz_strsep(&pagelist, ","); } argidx++; } pdf_drop_obj(ctx, parent); /* Update page count and kids array */ countobj = pdf_new_int(ctx, doc, pdf_array_len(ctx, kids)); pdf_dict_put(ctx, pages, PDF_NAME_Count, countobj); pdf_drop_obj(ctx, countobj); pdf_dict_put(ctx, pages, PDF_NAME_Kids, kids); pdf_drop_obj(ctx, kids); /* Force the next call to pdf_count_pages to recount */ glo->doc->page_count = 0; pagecount = pdf_count_pages(ctx, doc); page_object_nums = fz_calloc(ctx, pagecount, sizeof(*page_object_nums)); for (i = 0; i < pagecount; i++) { pdf_obj *pageref = pdf_lookup_page_obj(ctx, doc, i); page_object_nums[i] = pdf_to_num(ctx, pageref); } /* If we had an old Dests tree (now reformed as an olddests * dictionary), keep any entries in there that point to * valid pages. This may mean we keep more than we need, but * it's safe at least. */ if (olddests) { pdf_obj *names = pdf_new_dict(ctx, doc, 1); pdf_obj *dests = pdf_new_dict(ctx, doc, 1); int len = pdf_dict_len(ctx, olddests); names_list = pdf_new_array(ctx, doc, 32); for (i = 0; i < len; i++) { pdf_obj *key = pdf_dict_get_key(ctx, olddests, i); pdf_obj *val = pdf_dict_get_val(ctx, olddests, i); pdf_obj *dest = pdf_dict_get(ctx, val, PDF_NAME_D); dest = pdf_array_get(ctx, dest ? dest : val, 0); if (dest_is_valid_page(ctx, dest, page_object_nums, pagecount)) { pdf_obj *key_str = pdf_new_string(ctx, doc, pdf_to_name(ctx, key), strlen(pdf_to_name(ctx, key))); pdf_array_push(ctx, names_list, key_str); pdf_array_push(ctx, names_list, val); pdf_drop_obj(ctx, key_str); } } pdf_dict_put(ctx, dests, PDF_NAME_Names, names_list); pdf_dict_put(ctx, names, PDF_NAME_Dests, dests); pdf_dict_put(ctx, root, PDF_NAME_Names, names); pdf_drop_obj(ctx, names); pdf_drop_obj(ctx, dests); pdf_drop_obj(ctx, olddests); } /* Edit each pages /Annot list to remove any links that point to * nowhere. */ for (i = 0; i < pagecount; i++) { pdf_obj *pageref = pdf_lookup_page_obj(ctx, doc, i); pdf_obj *pageobj = pdf_resolve_indirect(ctx, pageref); pdf_obj *annots = pdf_dict_get(ctx, pageobj, PDF_NAME_Annots); int len = pdf_array_len(ctx, annots); int j; for (j = 0; j < len; j++) { pdf_obj *o = pdf_array_get(ctx, annots, j); if (!pdf_name_eq(ctx, pdf_dict_get(ctx, o, PDF_NAME_Subtype), PDF_NAME_Link)) continue; if (!dest_is_valid(ctx, o, pagecount, page_object_nums, names_list)) { /* Remove this annotation */ pdf_array_delete(ctx, annots, j); j--; } } } if (strip_outlines(ctx, doc, outlines, pagecount, page_object_nums, names_list) == 0) { pdf_dict_del(ctx, root, PDF_NAME_Outlines); } fz_free(ctx, page_object_nums); pdf_drop_obj(ctx, names_list); pdf_drop_obj(ctx, root); }
/* Performs the same task as pdf_clean_page_contents, but with an optional text filter function. text_filter: Function to assess whether a given character should be kept (return 0) or removed (return 1). after_text: Function called after each text object is closed to allow other output to be sent. arg: Opaque value to be passed to callback functions. */ void pdf_filter_page_contents(fz_context *ctx, pdf_document *doc, pdf_page *page, fz_cookie *cookie, pdf_page_contents_process_fn *proc_fn, pdf_text_filter_fn *text_filter, pdf_after_text_object_fn *after_text, void *proc_arg, int sanitize, int ascii) { pdf_processor *proc_buffer = NULL; pdf_processor *proc_filter = NULL; pdf_obj *new_obj = NULL; pdf_obj *new_ref = NULL; pdf_obj *res = NULL; pdf_obj *obj; pdf_obj *contents; pdf_obj *resources; fz_buffer *buffer; fz_var(new_obj); fz_var(new_ref); fz_var(res); fz_var(proc_buffer); fz_var(proc_filter); buffer = fz_new_buffer(ctx, 1024); fz_try(ctx) { contents = pdf_page_contents(ctx, page); resources = pdf_page_resources(ctx, page); proc_buffer = pdf_new_buffer_processor(ctx, buffer, ascii); if (sanitize) { res = pdf_new_dict(ctx, doc, 1); proc_filter = pdf_new_filter_processor_with_text_filter(ctx, doc, proc_buffer, resources, res, text_filter, after_text, proc_arg); pdf_process_contents(ctx, proc_filter, doc, resources, contents, cookie); pdf_close_processor(ctx, proc_filter); } else { res = pdf_keep_obj(ctx, resources); pdf_process_contents(ctx, proc_buffer, doc, resources, contents, cookie); } pdf_close_processor(ctx, proc_buffer); /* Deal with page content stream. */ if (pdf_is_array(ctx, contents)) { /* create a new object to replace the array */ new_obj = pdf_new_dict(ctx, doc, 1); new_ref = pdf_add_object(ctx, doc, new_obj); contents = new_ref; pdf_dict_put(ctx, page->obj, PDF_NAME(Contents), contents); } else { pdf_dict_del(ctx, contents, PDF_NAME(Filter)); pdf_dict_del(ctx, contents, PDF_NAME(DecodeParms)); } pdf_update_stream(ctx, doc, contents, buffer, 0); /* Now deal with resources. The spec allows for Type3 fonts and form * XObjects to omit a resource dictionary and look in the parent. * Avoid that by flattening here as part of the cleaning. This could * conceivably cause changes in rendering, but we don't care. */ /* ExtGState */ obj = pdf_dict_get(ctx, res, PDF_NAME(ExtGState)); if (obj) { int i, l; l = pdf_dict_len(ctx, obj); for (i = 0; i < l; i++) { pdf_obj *o = pdf_dict_get(ctx, pdf_dict_get_val(ctx, obj, i), PDF_NAME(SMask)); if (!o) continue; o = pdf_dict_get(ctx, o, PDF_NAME(G)); if (!o) continue; /* Transparency group XObject */ pdf_clean_stream_object(ctx, doc, o, resources, cookie, 1, text_filter, after_text, proc_arg, sanitize, ascii); } } /* Pattern */ obj = pdf_dict_get(ctx, res, PDF_NAME(Pattern)); if (obj) { int i, l; l = pdf_dict_len(ctx, obj); for (i = 0; i < l; i++) { pdf_obj *pat_res; pdf_obj *pat = pdf_dict_get_val(ctx, obj, i); if (!pat) continue; pat_res = pdf_dict_get(ctx, pat, PDF_NAME(Resources)); if (pat_res == NULL) pat_res = resources; if (pdf_dict_get_int(ctx, pat, PDF_NAME(PatternType)) == 1) pdf_clean_stream_object(ctx, doc, pat, pat_res, cookie, 0, text_filter, after_text, proc_arg, sanitize, ascii); } } /* XObject */ obj = pdf_dict_get(ctx, res, PDF_NAME(XObject)); if (obj) { int i, l; l = pdf_dict_len(ctx, obj); for (i = 0; i < l; i++) { pdf_obj *xobj_res; pdf_obj *xobj = pdf_dict_get_val(ctx, obj, i); if (!xobj) continue; xobj_res = pdf_dict_get(ctx, xobj, PDF_NAME(Resources)); if (xobj_res == NULL) xobj_res = resources; if (pdf_name_eq(ctx, PDF_NAME(Form), pdf_dict_get(ctx, xobj, PDF_NAME(Subtype)))) pdf_clean_stream_object(ctx, doc, xobj, xobj_res, cookie, 1, text_filter, after_text, proc_arg, sanitize, ascii); } } /* Font */ obj = pdf_dict_get(ctx, res, PDF_NAME(Font)); if (obj) { int i, l; l = pdf_dict_len(ctx, obj); for (i = 0; i < l; i++) { pdf_obj *o = pdf_dict_get_val(ctx, obj, i); if (!o) continue; if (pdf_name_eq(ctx, PDF_NAME(Type3), pdf_dict_get(ctx, o, PDF_NAME(Subtype)))) pdf_clean_type3(ctx, doc, o, resources, cookie, sanitize, ascii); } } /* ProcSet - no cleaning possible. Inherit this from the old dict. */ obj = pdf_dict_get(ctx, resources, PDF_NAME(ProcSet)); if (obj) pdf_dict_put(ctx, res, PDF_NAME(ProcSet), obj); /* ColorSpace - no cleaning possible. */ /* Properties - no cleaning possible. */ if (proc_fn) (*proc_fn)(ctx, buffer, res, proc_arg); /* Update resource dictionary */ if (sanitize) { pdf_dict_put(ctx, page->obj, PDF_NAME(Resources), res); } } fz_always(ctx) { pdf_drop_processor(ctx, proc_filter); pdf_drop_processor(ctx, proc_buffer); fz_drop_buffer(ctx, buffer); pdf_drop_obj(ctx, new_obj); pdf_drop_obj(ctx, new_ref); pdf_drop_obj(ctx, res); } fz_catch(ctx) { fz_rethrow(ctx); } }
pdf_obj * pdf_add_image(fz_context *ctx, pdf_document *doc, fz_image *image) { fz_pixmap *pixmap = NULL; pdf_obj *imobj = NULL; pdf_obj *dp; fz_buffer *buffer = NULL; pdf_obj *imref = NULL; fz_compressed_buffer *cbuffer; unsigned char digest[16]; int i, n; /* If we can maintain compression, do so */ cbuffer = fz_compressed_image_buffer(ctx, image); fz_var(pixmap); fz_var(buffer); fz_var(imobj); fz_var(imref); /* Check if the same image already exists in this doc. */ imref = pdf_find_image_resource(ctx, doc, image, digest); if (imref) return imref; imobj = pdf_add_new_dict(ctx, doc, 3); fz_try(ctx) { dp = pdf_dict_put_dict(ctx, imobj, PDF_NAME(DecodeParms), 3); pdf_dict_put(ctx, imobj, PDF_NAME(Type), PDF_NAME(XObject)); pdf_dict_put(ctx, imobj, PDF_NAME(Subtype), PDF_NAME(Image)); if (cbuffer) { fz_compression_params *cp = &cbuffer->params; switch (cp ? cp->type : FZ_IMAGE_UNKNOWN) { default: goto raw_or_unknown_compression; case FZ_IMAGE_JPEG: if (cp->u.jpeg.color_transform != -1) pdf_dict_put_int(ctx, dp, PDF_NAME(ColorTransform), cp->u.jpeg.color_transform); pdf_dict_put(ctx, imobj, PDF_NAME(Filter), PDF_NAME(DCTDecode)); break; case FZ_IMAGE_JPX: if (cp->u.jpx.smask_in_data) pdf_dict_put_int(ctx, dp, PDF_NAME(SMaskInData), cp->u.jpx.smask_in_data); pdf_dict_put(ctx, imobj, PDF_NAME(Filter), PDF_NAME(JPXDecode)); break; case FZ_IMAGE_FAX: if (cp->u.fax.columns) pdf_dict_put_int(ctx, dp, PDF_NAME(Columns), cp->u.fax.columns); if (cp->u.fax.rows) pdf_dict_put_int(ctx, dp, PDF_NAME(Rows), cp->u.fax.rows); if (cp->u.fax.k) pdf_dict_put_int(ctx, dp, PDF_NAME(K), cp->u.fax.k); if (cp->u.fax.end_of_line) pdf_dict_put_bool(ctx, dp, PDF_NAME(EndOfLine), cp->u.fax.end_of_line); if (cp->u.fax.encoded_byte_align) pdf_dict_put_bool(ctx, dp, PDF_NAME(EncodedByteAlign), cp->u.fax.encoded_byte_align); if (cp->u.fax.end_of_block) pdf_dict_put_bool(ctx, dp, PDF_NAME(EndOfBlock), cp->u.fax.end_of_block); if (cp->u.fax.black_is_1) pdf_dict_put_bool(ctx, dp, PDF_NAME(BlackIs1), cp->u.fax.black_is_1); if (cp->u.fax.damaged_rows_before_error) pdf_dict_put_int(ctx, dp, PDF_NAME(DamagedRowsBeforeError), cp->u.fax.damaged_rows_before_error); pdf_dict_put(ctx, imobj, PDF_NAME(Filter), PDF_NAME(CCITTFaxDecode)); break; case FZ_IMAGE_FLATE: if (cp->u.flate.columns) pdf_dict_put_int(ctx, dp, PDF_NAME(Columns), cp->u.flate.columns); if (cp->u.flate.colors) pdf_dict_put_int(ctx, dp, PDF_NAME(Colors), cp->u.flate.colors); if (cp->u.flate.predictor) pdf_dict_put_int(ctx, dp, PDF_NAME(Predictor), cp->u.flate.predictor); if (cp->u.flate.bpc) pdf_dict_put_int(ctx, dp, PDF_NAME(BitsPerComponent), cp->u.flate.bpc); pdf_dict_put(ctx, imobj, PDF_NAME(Filter), PDF_NAME(FlateDecode)); pdf_dict_put_int(ctx, imobj, PDF_NAME(BitsPerComponent), image->bpc); break; case FZ_IMAGE_LZW: if (cp->u.lzw.columns) pdf_dict_put_int(ctx, dp, PDF_NAME(Columns), cp->u.lzw.columns); if (cp->u.lzw.colors) pdf_dict_put_int(ctx, dp, PDF_NAME(Colors), cp->u.lzw.colors); if (cp->u.lzw.predictor) pdf_dict_put_int(ctx, dp, PDF_NAME(Predictor), cp->u.lzw.predictor); if (cp->u.lzw.early_change) pdf_dict_put_int(ctx, dp, PDF_NAME(EarlyChange), cp->u.lzw.early_change); if (cp->u.lzw.bpc) pdf_dict_put_int(ctx, dp, PDF_NAME(BitsPerComponent), cp->u.lzw.bpc); pdf_dict_put(ctx, imobj, PDF_NAME(Filter), PDF_NAME(LZWDecode)); break; case FZ_IMAGE_RLD: pdf_dict_put(ctx, imobj, PDF_NAME(Filter), PDF_NAME(RunLengthDecode)); break; } if (!pdf_dict_len(ctx, dp)) pdf_dict_del(ctx, imobj, PDF_NAME(DecodeParms)); buffer = fz_keep_buffer(ctx, cbuffer->buffer); if (image->use_decode) { pdf_obj *ary = pdf_dict_put_array(ctx, imobj, PDF_NAME(Decode), image->n * 2); for (i = 0; i < image->n * 2; ++i) pdf_array_push_real(ctx, ary, image->decode[i]); } } else { unsigned int size; int h; unsigned char *d, *s; raw_or_unknown_compression: /* Currently, set to maintain resolution; should we consider * subsampling here according to desired output res? */ pixmap = fz_get_pixmap_from_image(ctx, image, NULL, NULL, NULL, NULL); n = pixmap->n - pixmap->alpha - pixmap->s; /* number of colorants */ if (n == 0) n = 1; /* treat pixmaps with only alpha or spots as grayscale */ size = image->w * n; h = image->h; s = pixmap->samples; d = fz_malloc(ctx, size * h); buffer = fz_new_buffer_from_data(ctx, d, size * h); if (n == pixmap->n) { /* If we use all channels, we can copy the data as is. */ while (h--) { memcpy(d, s, size); d += size; s += pixmap->stride; } } else { /* Need to remove the alpha and spot planes. */ /* TODO: extract alpha plane to a soft mask. */ /* TODO: convert spots to colors. */ int line_skip = pixmap->stride - pixmap->w * pixmap->n; int skip = pixmap->n - n; while (h--) { int w = pixmap->w; while (w--) { int k; for (k = 0; k < n; ++k) *d++ = *s++; s += skip; } s += line_skip; } } } pdf_dict_put_int(ctx, imobj, PDF_NAME(Width), pixmap ? pixmap->w : image->w); pdf_dict_put_int(ctx, imobj, PDF_NAME(Height), pixmap ? pixmap->h : image->h); if (image->imagemask) { pdf_dict_put_bool(ctx, imobj, PDF_NAME(ImageMask), 1); } else { fz_colorspace *cs; pdf_dict_put_int(ctx, imobj, PDF_NAME(BitsPerComponent), image->bpc); cs = pixmap ? pixmap->colorspace : image->colorspace; switch (fz_colorspace_type(ctx, cs)) { case FZ_COLORSPACE_INDEXED: { fz_colorspace *basecs; unsigned char *lookup = NULL; int high = 0; int basen; pdf_obj *arr; lookup = fz_indexed_colorspace_palette(ctx, cs, &high); basecs = fz_colorspace_base(ctx, cs); basen = fz_colorspace_n(ctx, basecs); arr = pdf_dict_put_array(ctx, imobj, PDF_NAME(ColorSpace), 4); pdf_array_push(ctx, arr, PDF_NAME(Indexed)); switch (fz_colorspace_type(ctx, basecs)) { case FZ_COLORSPACE_GRAY: pdf_array_push(ctx, arr, PDF_NAME(DeviceGray)); break; case FZ_COLORSPACE_RGB: pdf_array_push(ctx, arr, PDF_NAME(DeviceRGB)); break; case FZ_COLORSPACE_CMYK: pdf_array_push(ctx, arr, PDF_NAME(DeviceCMYK)); break; default: // TODO: convert to RGB! fz_throw(ctx, FZ_ERROR_GENERIC, "only indexed Gray, RGB, and CMYK colorspaces supported"); break; } pdf_array_push_int(ctx, arr, high); pdf_array_push_string(ctx, arr, (char *) lookup, basen * (high + 1)); } break; case FZ_COLORSPACE_NONE: case FZ_COLORSPACE_GRAY: pdf_dict_put(ctx, imobj, PDF_NAME(ColorSpace), PDF_NAME(DeviceGray)); break; case FZ_COLORSPACE_RGB: pdf_dict_put(ctx, imobj, PDF_NAME(ColorSpace), PDF_NAME(DeviceRGB)); break; case FZ_COLORSPACE_CMYK: pdf_dict_put(ctx, imobj, PDF_NAME(ColorSpace), PDF_NAME(DeviceCMYK)); break; default: // TODO: convert to RGB! fz_throw(ctx, FZ_ERROR_GENERIC, "only Gray, RGB, and CMYK colorspaces supported"); break; } } if (image->mask) { pdf_dict_put_drop(ctx, imobj, PDF_NAME(SMask), pdf_add_image(ctx, doc, image->mask)); } pdf_update_stream(ctx, doc, imobj, buffer, 1); /* Add ref to our image resource hash table. */ imref = pdf_insert_image_resource(ctx, doc, digest, imobj); } fz_always(ctx) { fz_drop_pixmap(ctx, pixmap); fz_drop_buffer(ctx, buffer); pdf_drop_obj(ctx, imobj); } fz_catch(ctx) fz_rethrow(ctx); return imref; }