static int concat_stream (pdf_obj *dst, pdf_obj *src) { const char *stream_data; long stream_length; pdf_obj *stream_dict; pdf_obj *filter; if (!PDF_OBJ_STREAMTYPE(dst) || !PDF_OBJ_STREAMTYPE(src)) ERROR("Invalid type."); stream_data = pdf_stream_dataptr(src); stream_length = pdf_stream_length (src); stream_dict = pdf_stream_dict (src); if (pdf_lookup_dict(stream_dict, "DecodeParms")) { WARN("DecodeParams not supported."); return -1; } filter = pdf_lookup_dict(stream_dict, "Filter"); if (!filter) { pdf_add_stream(dst, stream_data, stream_length); return 0; #if HAVE_ZLIB } else { char *filter_name; if (PDF_OBJ_NAMETYPE(filter)) { filter_name = pdf_name_value(filter); if (filter_name && !strcmp(filter_name, "FlateDecode")) return add_stream_flate(dst, stream_data, stream_length); else { WARN("DecodeFilter \"%s\" not supported.", filter_name); return -1; } } else if (PDF_OBJ_ARRAYTYPE(filter)) { if (pdf_array_length(filter) > 1) { WARN("Multiple DecodeFilter not supported."); return -1; } else { filter_name = pdf_name_value(pdf_get_array(filter, 0)); if (filter_name && !strcmp(filter_name, "FlateDecode")) return add_stream_flate(dst, stream_data, stream_length); else { WARN("DecodeFilter \"%s\" not supported.", filter_name); return -1; } } } else ERROR("Broken PDF file?"); #endif /* HAVE_ZLIB */ } return -1; }
/* ximage here is the result. DONT USE IT FOR PASSING OPTIONS! */ int pdf_include_page (pdf_ximage *ximage, FILE *image_file, const char *ident, load_options options) { pdf_file *pf; xform_info info; pdf_obj *contents = NULL, *catalog; pdf_obj *page = NULL, *resources = NULL, *markinfo = NULL; pf = pdf_open(ident, image_file); if (!pf) return -1; if (pdf_file_get_version(pf) > pdf_get_version()) { WARN("Trying to include PDF file which has newer version number " \ "than output PDF: 1.%d.", pdf_get_version()); } pdf_ximage_init_form_info(&info); if (options.page_no == 0) options.page_no = 1; page = pdf_doc_get_page(pf, options.page_no, options.bbox_type, &info.bbox, &resources); if(!page) goto error_silent; catalog = pdf_file_get_catalog(pf); markinfo = pdf_deref_obj(pdf_lookup_dict(catalog, "MarkInfo")); if (markinfo) { pdf_obj *tmp = pdf_deref_obj(pdf_lookup_dict(markinfo, "Marked")); pdf_release_obj(markinfo); if (!PDF_OBJ_BOOLEANTYPE(tmp)) { if (tmp) pdf_release_obj(tmp); goto error; } else if (pdf_boolean_value(tmp)) { WARN("PDF file is tagged... Ignoring tags."); } pdf_release_obj(tmp); } contents = pdf_deref_obj(pdf_lookup_dict(page, "Contents")); pdf_release_obj(page); page = NULL; /* * Handle page content stream. */ { pdf_obj *content_new; if (!contents) { /* * Empty page */ content_new = pdf_new_stream(0); /* TODO: better don't include anything if the page is empty */ } else if (PDF_OBJ_STREAMTYPE(contents)) { /* * We must import the stream because its dictionary * may contain indirect references. */ content_new = pdf_import_object(contents); } else if (PDF_OBJ_ARRAYTYPE(contents)) { /* * Concatenate all content streams. */ int idx, len = pdf_array_length(contents); content_new = pdf_new_stream(STREAM_COMPRESS); for (idx = 0; idx < len; idx++) { pdf_obj *content_seg = pdf_deref_obj(pdf_get_array(contents, idx)); if (!PDF_OBJ_STREAMTYPE(content_seg) || pdf_concat_stream(content_new, content_seg) < 0) { pdf_release_obj(content_seg); pdf_release_obj(content_new); goto error; } pdf_release_obj(content_seg); } } else { goto error; } if (contents) pdf_release_obj(contents); contents = content_new; } /* * Add entries to contents stream dictionary. */ { pdf_obj *contents_dict, *bbox, *matrix; contents_dict = pdf_stream_dict(contents); pdf_add_dict(contents_dict, pdf_new_name("Type"), pdf_new_name("XObject")); pdf_add_dict(contents_dict, pdf_new_name("Subtype"), pdf_new_name("Form")); pdf_add_dict(contents_dict, pdf_new_name("FormType"), pdf_new_number(1.0)); bbox = pdf_new_array(); pdf_add_array(bbox, pdf_new_number(info.bbox.llx)); pdf_add_array(bbox, pdf_new_number(info.bbox.lly)); pdf_add_array(bbox, pdf_new_number(info.bbox.urx)); pdf_add_array(bbox, pdf_new_number(info.bbox.ury)); pdf_add_dict(contents_dict, pdf_new_name("BBox"), bbox); matrix = pdf_new_array(); pdf_add_array(matrix, pdf_new_number(1.0)); pdf_add_array(matrix, pdf_new_number(0.0)); pdf_add_array(matrix, pdf_new_number(0.0)); pdf_add_array(matrix, pdf_new_number(1.0)); pdf_add_array(matrix, pdf_new_number(0.0)); pdf_add_array(matrix, pdf_new_number(0.0)); pdf_add_dict(contents_dict, pdf_new_name("Matrix"), matrix); pdf_add_dict(contents_dict, pdf_new_name("Resources"), pdf_import_object(resources)); pdf_release_obj(resources); } pdf_close(pf); pdf_ximage_set_form(ximage, &info, contents); return 0; error: WARN("Cannot parse document. Broken PDF file?"); error_silent: if (resources) pdf_release_obj(resources); if (markinfo) pdf_release_obj(markinfo); if (page) pdf_release_obj(page); if (contents) pdf_release_obj(contents); pdf_close(pf); return -1; }
static pdf_obj* pdf_get_page_content (pdf_obj* page) { pdf_obj *contents, *content_new; contents = pdf_deref_obj(pdf_lookup_dict(page, "Contents")); if (!contents) return NULL; if (pdf_obj_typeof(contents) == PDF_NULL) { /* empty page */ pdf_release_obj(contents); /* TODO: better don't include anything if the page is empty */ contents = pdf_new_stream(0); } else if (PDF_OBJ_ARRAYTYPE(contents)) { /* * Concatenate all content streams. */ pdf_obj *content_seg; int idx = 0; content_new = pdf_new_stream(STREAM_COMPRESS); for (;;) { content_seg = pdf_deref_obj(pdf_get_array(contents, idx)); if (!content_seg) break; else if (PDF_OBJ_NULLTYPE(content_seg)) { /* Silently ignore. */ } else if (!PDF_OBJ_STREAMTYPE(content_seg)) { WARN("Page content not a stream object. Broken PDF file?"); pdf_release_obj(content_seg); pdf_release_obj(content_new); pdf_release_obj(contents); return NULL; } else if (pdf_concat_stream(content_new, content_seg) < 0) { WARN("Could not handle content stream with multiple segments."); pdf_release_obj(content_seg); pdf_release_obj(content_new); pdf_release_obj(contents); return NULL; } pdf_release_obj(content_seg); idx++; } pdf_release_obj(contents); contents = content_new; } else { if (!PDF_OBJ_STREAMTYPE(contents)) { WARN("Page content not a stream object. Broken PDF file?"); pdf_release_obj(contents); return NULL; } /* Flate the contents if necessary. */ content_new = pdf_new_stream(STREAM_COMPRESS); if (pdf_concat_stream(content_new, contents) < 0) { WARN("Could not handle a content stream."); pdf_release_obj(contents); pdf_release_obj(content_new); return NULL; } pdf_release_obj(contents); contents = content_new; } return contents; }