static int concat_stream (pdf_obj *dst, pdf_obj *src) { const char *stream_data; long stream_length; pdf_obj *stream_dict; pdf_obj *filter; if (!PDF_OBJ_STREAMTYPE(dst) || !PDF_OBJ_STREAMTYPE(src)) ERROR("Invalid type."); stream_data = pdf_stream_dataptr(src); stream_length = pdf_stream_length (src); stream_dict = pdf_stream_dict (src); if (pdf_lookup_dict(stream_dict, "DecodeParms")) { WARN("DecodeParams not supported."); return -1; } filter = pdf_lookup_dict(stream_dict, "Filter"); if (!filter) { pdf_add_stream(dst, stream_data, stream_length); return 0; #if HAVE_ZLIB } else { char *filter_name; if (PDF_OBJ_NAMETYPE(filter)) { filter_name = pdf_name_value(filter); if (filter_name && !strcmp(filter_name, "FlateDecode")) return add_stream_flate(dst, stream_data, stream_length); else { WARN("DecodeFilter \"%s\" not supported.", filter_name); return -1; } } else if (PDF_OBJ_ARRAYTYPE(filter)) { if (pdf_array_length(filter) > 1) { WARN("Multiple DecodeFilter not supported."); return -1; } else { filter_name = pdf_name_value(pdf_get_array(filter, 0)); if (filter_name && !strcmp(filter_name, "FlateDecode")) return add_stream_flate(dst, stream_data, stream_length); else { WARN("DecodeFilter \"%s\" not supported.", filter_name); return -1; } } } else ERROR("Broken PDF file?"); #endif /* HAVE_ZLIB */ } return -1; }
static void Type0Font_dofont (Type0Font *font) { if (!font || !font->indirect) return; if (!pdf_lookup_dict(font->fontdict, "ToUnicode")) { /* FIXME */ add_ToUnicode(font); } }
/* ximage here is the result. DONT USE IT FOR PASSING OPTIONS! */ int pdf_include_page (pdf_ximage *ximage, FILE *image_file, const char *ident, load_options options) { pdf_file *pf; xform_info info; pdf_obj *contents = NULL, *catalog; pdf_obj *page = NULL, *resources = NULL, *markinfo = NULL; pf = pdf_open(ident, image_file); if (!pf) return -1; if (pdf_file_get_version(pf) > pdf_get_version()) { WARN("Trying to include PDF file which has newer version number " \ "than output PDF: 1.%d.", pdf_get_version()); } pdf_ximage_init_form_info(&info); if (options.page_no == 0) options.page_no = 1; page = pdf_doc_get_page(pf, options.page_no, options.bbox_type, &info.bbox, &resources); if(!page) goto error_silent; catalog = pdf_file_get_catalog(pf); markinfo = pdf_deref_obj(pdf_lookup_dict(catalog, "MarkInfo")); if (markinfo) { pdf_obj *tmp = pdf_deref_obj(pdf_lookup_dict(markinfo, "Marked")); pdf_release_obj(markinfo); if (!PDF_OBJ_BOOLEANTYPE(tmp)) { if (tmp) pdf_release_obj(tmp); goto error; } else if (pdf_boolean_value(tmp)) { WARN("PDF file is tagged... Ignoring tags."); } pdf_release_obj(tmp); } contents = pdf_deref_obj(pdf_lookup_dict(page, "Contents")); pdf_release_obj(page); page = NULL; /* * Handle page content stream. */ { pdf_obj *content_new; if (!contents) { /* * Empty page */ content_new = pdf_new_stream(0); /* TODO: better don't include anything if the page is empty */ } else if (PDF_OBJ_STREAMTYPE(contents)) { /* * We must import the stream because its dictionary * may contain indirect references. */ content_new = pdf_import_object(contents); } else if (PDF_OBJ_ARRAYTYPE(contents)) { /* * Concatenate all content streams. */ int idx, len = pdf_array_length(contents); content_new = pdf_new_stream(STREAM_COMPRESS); for (idx = 0; idx < len; idx++) { pdf_obj *content_seg = pdf_deref_obj(pdf_get_array(contents, idx)); if (!PDF_OBJ_STREAMTYPE(content_seg) || pdf_concat_stream(content_new, content_seg) < 0) { pdf_release_obj(content_seg); pdf_release_obj(content_new); goto error; } pdf_release_obj(content_seg); } } else { goto error; } if (contents) pdf_release_obj(contents); contents = content_new; } /* * Add entries to contents stream dictionary. */ { pdf_obj *contents_dict, *bbox, *matrix; contents_dict = pdf_stream_dict(contents); pdf_add_dict(contents_dict, pdf_new_name("Type"), pdf_new_name("XObject")); pdf_add_dict(contents_dict, pdf_new_name("Subtype"), pdf_new_name("Form")); pdf_add_dict(contents_dict, pdf_new_name("FormType"), pdf_new_number(1.0)); bbox = pdf_new_array(); pdf_add_array(bbox, pdf_new_number(info.bbox.llx)); pdf_add_array(bbox, pdf_new_number(info.bbox.lly)); pdf_add_array(bbox, pdf_new_number(info.bbox.urx)); pdf_add_array(bbox, pdf_new_number(info.bbox.ury)); pdf_add_dict(contents_dict, pdf_new_name("BBox"), bbox); matrix = pdf_new_array(); pdf_add_array(matrix, pdf_new_number(1.0)); pdf_add_array(matrix, pdf_new_number(0.0)); pdf_add_array(matrix, pdf_new_number(0.0)); pdf_add_array(matrix, pdf_new_number(1.0)); pdf_add_array(matrix, pdf_new_number(0.0)); pdf_add_array(matrix, pdf_new_number(0.0)); pdf_add_dict(contents_dict, pdf_new_name("Matrix"), matrix); pdf_add_dict(contents_dict, pdf_new_name("Resources"), pdf_import_object(resources)); pdf_release_obj(resources); } pdf_close(pf); pdf_ximage_set_form(ximage, &info, contents); return 0; error: WARN("Cannot parse document. Broken PDF file?"); error_silent: if (resources) pdf_release_obj(resources); if (markinfo) pdf_release_obj(markinfo); if (page) pdf_release_obj(page); if (contents) pdf_release_obj(contents); pdf_close(pf); return -1; }
static pdf_obj* pdf_get_page_content (pdf_obj* page) { pdf_obj *contents, *content_new; contents = pdf_deref_obj(pdf_lookup_dict(page, "Contents")); if (!contents) return NULL; if (pdf_obj_typeof(contents) == PDF_NULL) { /* empty page */ pdf_release_obj(contents); /* TODO: better don't include anything if the page is empty */ contents = pdf_new_stream(0); } else if (PDF_OBJ_ARRAYTYPE(contents)) { /* * Concatenate all content streams. */ pdf_obj *content_seg; int idx = 0; content_new = pdf_new_stream(STREAM_COMPRESS); for (;;) { content_seg = pdf_deref_obj(pdf_get_array(contents, idx)); if (!content_seg) break; else if (PDF_OBJ_NULLTYPE(content_seg)) { /* Silently ignore. */ } else if (!PDF_OBJ_STREAMTYPE(content_seg)) { WARN("Page content not a stream object. Broken PDF file?"); pdf_release_obj(content_seg); pdf_release_obj(content_new); pdf_release_obj(contents); return NULL; } else if (pdf_concat_stream(content_new, content_seg) < 0) { WARN("Could not handle content stream with multiple segments."); pdf_release_obj(content_seg); pdf_release_obj(content_new); pdf_release_obj(contents); return NULL; } pdf_release_obj(content_seg); idx++; } pdf_release_obj(contents); contents = content_new; } else { if (!PDF_OBJ_STREAMTYPE(contents)) { WARN("Page content not a stream object. Broken PDF file?"); pdf_release_obj(contents); return NULL; } /* Flate the contents if necessary. */ content_new = pdf_new_stream(STREAM_COMPRESS); if (pdf_concat_stream(content_new, contents) < 0) { WARN("Could not handle a content stream."); pdf_release_obj(contents); pdf_release_obj(content_new); return NULL; } pdf_release_obj(contents); contents = content_new; } return contents; }
static pdf_obj* pdf_get_page_obj (pdf_file *pf, int page_no, pdf_obj **ret_bbox, pdf_obj **ret_resources) { pdf_obj *page_tree; pdf_obj *bbox = NULL, *resources = NULL, *rotate = NULL; int page_idx; /* * Get Page Tree. */ page_tree = NULL; { pdf_obj *trailer, *catalog; pdf_obj *markinfo, *tmp; trailer = pdf_file_get_trailer(pf); if (pdf_lookup_dict(trailer, "Encrypt")) { WARN("This PDF document is encrypted."); pdf_release_obj(trailer); return NULL; } catalog = pdf_deref_obj(pdf_lookup_dict(trailer, "Root")); if (!PDF_OBJ_DICTTYPE(catalog)) { WARN("Can't read document catalog."); pdf_release_obj(trailer); if (catalog) pdf_release_obj(catalog); return NULL; } pdf_release_obj(trailer); markinfo = pdf_deref_obj(pdf_lookup_dict(catalog, "MarkInfo")); if (markinfo) { tmp = pdf_lookup_dict(markinfo, "Marked"); if (PDF_OBJ_BOOLEANTYPE(tmp) && pdf_boolean_value(tmp)) WARN("PDF file is tagged... Ignoring tags."); pdf_release_obj(markinfo); } page_tree = pdf_deref_obj(pdf_lookup_dict(catalog, "Pages")); pdf_release_obj(catalog); } if (!page_tree) { WARN("Page tree not found."); return NULL; } /* * Negative page numbers are counted from the back. */ { int count = pdf_number_value(pdf_lookup_dict(page_tree, "Count")); page_idx = page_no + (page_no >= 0 ? -1 : count); if (page_idx < 0 || page_idx >= count) { WARN("Page %ld does not exist.", page_no); pdf_release_obj(page_tree); return NULL; } page_no = page_idx+1; } /* * Seek correct page. Get Media/Crop Box. * Media box and resources can be inherited. */ { pdf_obj *kids_ref, *kids; pdf_obj *crop_box = NULL; pdf_obj *tmp; tmp = pdf_lookup_dict(page_tree, "Resources"); resources = tmp ? pdf_deref_obj(tmp) : pdf_new_dict(); while (1) { int kids_length, i; if ((tmp = pdf_deref_obj(pdf_lookup_dict(page_tree, "MediaBox")))) { if (bbox) pdf_release_obj(bbox); bbox = tmp; } if ((tmp = pdf_deref_obj(pdf_lookup_dict(page_tree, "BleedBox")))) { if (!rect_equal(tmp, bbox)) { if (bbox) pdf_release_obj(bbox); bbox = tmp; } else { pdf_release_obj(tmp); } } if ((tmp = pdf_deref_obj(pdf_lookup_dict(page_tree, "TrimBox")))) { if (!rect_equal(tmp, bbox)) { if (bbox) pdf_release_obj(bbox); bbox = tmp; } else { pdf_release_obj(tmp); } } if ((tmp = pdf_deref_obj(pdf_lookup_dict(page_tree, "ArtBox")))) { if (!rect_equal(tmp, bbox)) { if (bbox) pdf_release_obj(bbox); bbox = tmp; } else { pdf_release_obj(tmp); } } if ((tmp = pdf_deref_obj(pdf_lookup_dict(page_tree, "CropBox")))) { if (crop_box) pdf_release_obj(crop_box); crop_box = tmp; } if ((tmp = pdf_deref_obj(pdf_lookup_dict(page_tree, "Rotate")))) { if (rotate) pdf_release_obj(rotate); rotate = tmp; } if ((tmp = pdf_deref_obj(pdf_lookup_dict(page_tree, "Resources")))) { #if 0 pdf_merge_dict(tmp, resources); #endif if (resources) pdf_release_obj(resources); resources = tmp; } kids_ref = pdf_lookup_dict(page_tree, "Kids"); if (!kids_ref) break; kids = pdf_deref_obj(kids_ref); kids_length = pdf_array_length(kids); for (i = 0; i < kids_length; i++) { int count; pdf_release_obj(page_tree); page_tree = pdf_deref_obj(pdf_get_array(kids, i)); tmp = pdf_deref_obj(pdf_lookup_dict(page_tree, "Count")); if (tmp) { /* Pages object */ count = pdf_number_value(tmp); pdf_release_obj(tmp); } else { /* Page object */ count = 1; } if (page_idx < count) break; page_idx -= count; } pdf_release_obj(kids); if (i == kids_length) { WARN("Page %ld not found! Broken PDF file?", page_no); if (bbox) pdf_release_obj(bbox); if (crop_box) pdf_release_obj(crop_box); if (rotate) pdf_release_obj(rotate); pdf_release_obj(resources); pdf_release_obj(page_tree); return NULL; } } if (crop_box) { pdf_release_obj(bbox); bbox = crop_box; } } if (!bbox) { WARN("No BoundingBox information available."); pdf_release_obj(page_tree); pdf_release_obj(resources); if (rotate) pdf_release_obj(rotate); return NULL; } if (rotate) { if (pdf_number_value(rotate) != 0.0) WARN("<< /Rotate %d >> found. (Not supported yet)", (int)pdf_number_value(rotate)); pdf_release_obj(rotate); rotate = NULL; } if (ret_bbox != NULL) *ret_bbox = bbox; if (ret_resources != NULL) *ret_resources = resources; return page_tree; }
static int CIDFont_base_open (CIDFont *font, const char *name, CIDSysInfo *cmap_csi, cid_opt *opt) { pdf_obj *fontdict, *descriptor; char *fontname = NULL; int idx; ASSERT(font); for (idx = 0; cid_basefont[idx].fontname != NULL; idx++) { if (!strcmp(name, cid_basefont[idx].fontname) || (strlen(name) == strlen(cid_basefont[idx].fontname) - strlen("-Acro") && !strncmp(name, cid_basefont[idx].fontname, strlen(cid_basefont[idx].fontname)-strlen("-Acro"))) ) break; } if (cid_basefont[idx].fontname == NULL) return -1; fontname = NEW(strlen(name)+12, char); memset(fontname, 0, strlen(name)+12); strcpy(fontname, name); switch (opt->style) { case FONT_STYLE_BOLD: strcat(fontname, ",Bold"); break; case FONT_STYLE_ITALIC: strcat(fontname, ",Italic"); break; case FONT_STYLE_BOLDITALIC: strcat(fontname, ",BoldItalic"); break; } { const char *start; const char *end; start = cid_basefont[idx].fontdict; end = start + strlen(start); fontdict = parse_pdf_dict(&start, end, NULL); start = cid_basefont[idx].descriptor; end = start + strlen(start); descriptor = parse_pdf_dict(&start, end, NULL); ASSERT(fontdict && descriptor); } font->fontname = fontname; font->flags |= FONT_FLAG_BASEFONT; { char *registry, *ordering; int supplement; pdf_obj *tmp; tmp = pdf_lookup_dict(fontdict, "CIDSystemInfo"); ASSERT( tmp && pdf_obj_typeof(tmp) == PDF_DICT ); registry = pdf_string_value(pdf_lookup_dict(tmp, "Registry")); ordering = pdf_string_value(pdf_lookup_dict(tmp, "Ordering")); supplement = pdf_number_value(pdf_lookup_dict(tmp, "Supplement")); if (cmap_csi) { /* NULL for accept any */ if (strcmp(registry, cmap_csi->registry) || strcmp(ordering, cmap_csi->ordering)) ERROR("Inconsistent CMap used for CID-keyed font %s.", cid_basefont[idx].fontname); else if (supplement < cmap_csi->supplement) { WARN("CMap has higher supplement number than CIDFont: %s", fontname); WARN("Some chracters may not be displayed or printed."); } } font->csi = NEW(1, CIDSysInfo); font->csi->registry = NEW(strlen(registry)+1, char); font->csi->ordering = NEW(strlen(ordering)+1, char); strcpy(font->csi->registry, registry); strcpy(font->csi->ordering, ordering); font->csi->supplement = supplement; } { pdf_obj *tmp; char *type; tmp = pdf_lookup_dict(fontdict, "Subtype"); ASSERT( tmp != NULL && pdf_obj_typeof(tmp) == PDF_NAME ); type = pdf_name_value(tmp); if (!strcmp(type, "CIDFontType0")) font->subtype = CIDFONT_TYPE0; else if (!strcmp(type, "CIDFontType2")) font->subtype = CIDFONT_TYPE2; else { ERROR("Unknown CIDFontType \"%s\"", type); } } if (cidoptflags & CIDFONT_FORCE_FIXEDPITCH) { if (pdf_lookup_dict(fontdict, "W")) { pdf_remove_dict(fontdict, "W"); } if (pdf_lookup_dict(fontdict, "W2")) { pdf_remove_dict(fontdict, "W2"); } } pdf_add_dict(fontdict, pdf_new_name("Type"), pdf_new_name("Font")); pdf_add_dict(fontdict, pdf_new_name("BaseFont"), pdf_new_name(fontname)); pdf_add_dict(descriptor, pdf_new_name("Type"), pdf_new_name("FontDescriptor")); pdf_add_dict(descriptor, pdf_new_name("FontName"), pdf_new_name(fontname)); font->fontdict = fontdict; font->descriptor = descriptor; opt->embed = 0; return 0; }
pdf_obj *pdf_include_page(FILE *image_file, struct xform_info *p, char *res_name) { pdf_obj *trailer = NULL, *catalog = NULL, *page_tree = NULL; pdf_obj *kids_ref, *kids; pdf_obj *media_box = NULL, *crop_box = NULL, *resources = NULL, *contents = NULL, *contents_ref = NULL; pdf_obj *tmp1; #ifdef MEM_DEBUG MEM_START #endif if (!(trailer = pdf_open (image_file))) { fprintf (stderr, "\nCorrupt PDF file?\n"); } /* Now just lookup catalog location */ /* Deref catalog */ if (trailer && (catalog = pdf_deref_obj(pdf_lookup_dict (trailer,"Root"))) == NULL) { fprintf (stderr, "\nCatalog isn't where I expect it.\n"); } if (trailer) pdf_release_obj (trailer); /* Lookup page tree in catalog */ if (catalog) { page_tree = pdf_deref_obj (pdf_lookup_dict (catalog, "Pages")); /* Should be finished with catalog */ pdf_release_obj (catalog); } /* Media box and resources can be inherited so start looking for them here */ if (page_tree) { if ((tmp1 = pdf_lookup_dict (page_tree, "CropBox"))) crop_box = pdf_deref_obj (tmp1); if ((tmp1 = pdf_lookup_dict (page_tree, "MediaBox"))) media_box = pdf_deref_obj (tmp1); resources = pdf_deref_obj (pdf_lookup_dict (page_tree, "Resources")); if (resources == NULL) { resources = pdf_new_dict(); } while ((kids_ref = pdf_lookup_dict (page_tree, "Kids")) != NULL) { kids = pdf_deref_obj (kids_ref); pdf_release_obj (page_tree); page_tree = pdf_deref_obj (pdf_get_array(kids, 0)); pdf_release_obj (kids); /* Replace MediaBox if it's here */ tmp1 = pdf_deref_obj(pdf_lookup_dict (page_tree, "MediaBox")); if (tmp1 && media_box) pdf_release_obj (media_box); if (tmp1) media_box = tmp1; /* Do same for CropBox */ tmp1 = pdf_deref_obj(pdf_lookup_dict (page_tree, "CropBox")); if (tmp1 && crop_box) pdf_release_obj (crop_box); if (tmp1) crop_box = tmp1; /* Add resources if they're here */ tmp1 = pdf_deref_obj (pdf_lookup_dict (page_tree, "Resources")); if (tmp1) { pdf_merge_dict (tmp1, resources); pdf_release_obj (resources); resources = tmp1; } } /* At this point, page_tree contains the first page. media_box, crop_box, and resources should also be set. */ /* If there's a crop_box, replace media_box with crop_box. The rest of this routine assumes crop_box has been released */ if (crop_box) { pdf_release_obj (media_box); media_box = crop_box; crop_box = NULL; } /* This gets bit confusing. In the following code, media_box is the box the image is cropped to. The bounding box is the box the image is scaled to */ /* If user did not supply bounding box, use media_box (which may really be cropbox) as bounding box */ /* Set the crop box parameters in the xform_info structure */ p->c_llx = pdf_number_value (pdf_get_array (media_box, 0)); p->c_lly = pdf_number_value (pdf_get_array (media_box, 1)); p->c_urx = pdf_number_value (pdf_get_array (media_box, 2)); p->c_ury = pdf_number_value (pdf_get_array (media_box, 3)); /* Adjust scaling and clipping information as necessary */ pdf_scale_image (p); /* Set the media box to whatever pdf_scale_image() decided for the crop box (which may be unchanged) */ pdf_release_obj (media_box); media_box = pdf_new_array (); pdf_add_array (media_box, pdf_new_number (p->c_llx)); pdf_add_array (media_box, pdf_new_number (p->c_lly)); pdf_add_array (media_box, pdf_new_number (p->c_urx)); pdf_add_array (media_box, pdf_new_number (p->c_ury)); if ((contents = pdf_deref_obj(pdf_lookup_dict(page_tree,"Contents")))==NULL) { fprintf (stderr, "\nNo Contents found\n"); return NULL; } pdf_release_obj (page_tree); } /* Arrays of contents must be handled very differently (not implemented) */ if (contents && contents -> type != PDF_ARRAY) { doc_make_form_xobj (contents, media_box, p->user_bbox? p->u_llx: 0.0, p->user_bbox? p->u_lly: 0.0, 1.0, 1.0, resources, res_name); } else { fprintf (stderr, "\nIgnoring stream with with multiple segments\n"); contents = NULL; } if (contents) { contents_ref = pdf_ref_obj (contents); pdf_release_obj (contents); } pdf_close (); #ifdef MEM_DEBUG MEM_END #endif return (contents_ref); }