Exemple #1
0
static int
concat_stream (pdf_obj *dst, pdf_obj *src)
{
  const char *stream_data;
  long        stream_length;
  pdf_obj    *stream_dict;
  pdf_obj    *filter;

  if (!PDF_OBJ_STREAMTYPE(dst) || !PDF_OBJ_STREAMTYPE(src))
    ERROR("Invalid type.");

  stream_data   = pdf_stream_dataptr(src);
  stream_length = pdf_stream_length (src);
  stream_dict   = pdf_stream_dict   (src);

  if (pdf_lookup_dict(stream_dict, "DecodeParms")) {
    WARN("DecodeParams not supported.");
    return -1;
  }

  filter = pdf_lookup_dict(stream_dict, "Filter");
  if (!filter) {
    pdf_add_stream(dst, stream_data, stream_length);
    return 0;
#if HAVE_ZLIB
  } else {
    char *filter_name;
    if (PDF_OBJ_NAMETYPE(filter)) {
      filter_name = pdf_name_value(filter);
      if (filter_name && !strcmp(filter_name, "FlateDecode"))
	return add_stream_flate(dst, stream_data, stream_length);
      else {
	WARN("DecodeFilter \"%s\" not supported.", filter_name);
	return -1;
      }
    } else if (PDF_OBJ_ARRAYTYPE(filter)) {
      if (pdf_array_length(filter) > 1) {
	WARN("Multiple DecodeFilter not supported.");
	return -1;
      } else {
	filter_name = pdf_name_value(pdf_get_array(filter, 0));
	if (filter_name && !strcmp(filter_name, "FlateDecode"))
	  return add_stream_flate(dst, stream_data, stream_length);
	else {
	  WARN("DecodeFilter \"%s\" not supported.", filter_name);
	  return -1;
	}
      }
    } else
      ERROR("Broken PDF file?");
#endif /* HAVE_ZLIB */
  }

  return -1;
}
Exemple #2
0
/* ximage here is the result. DONT USE IT FOR PASSING OPTIONS! */
int
pdf_include_page (pdf_ximage        *ximage,
                  FILE              *image_file,
                  const char        *ident,
                  load_options       options)
{
  pdf_file *pf;
  xform_info info;
  pdf_obj *contents = NULL, *catalog;
  pdf_obj *page = NULL, *resources = NULL, *markinfo = NULL;

  pf = pdf_open(ident, image_file);
  if (!pf)
    return -1;

  if (pdf_file_get_version(pf) > pdf_get_version()) {
    WARN("Trying to include PDF file which has newer version number " \
         "than output PDF: 1.%d.", pdf_get_version());
  }

  pdf_ximage_init_form_info(&info);

  if (options.page_no == 0)
    options.page_no = 1;
  page = pdf_doc_get_page(pf,
                          options.page_no, options.bbox_type,
                          &info.bbox, &resources);

  if(!page)
    goto error_silent;

  catalog = pdf_file_get_catalog(pf);
  markinfo = pdf_deref_obj(pdf_lookup_dict(catalog, "MarkInfo"));
  if (markinfo) {
    pdf_obj *tmp = pdf_deref_obj(pdf_lookup_dict(markinfo, "Marked"));
    pdf_release_obj(markinfo);
    if (!PDF_OBJ_BOOLEANTYPE(tmp)) {
      if (tmp)
	pdf_release_obj(tmp);
      goto error;
    } else if (pdf_boolean_value(tmp)) {
      WARN("PDF file is tagged... Ignoring tags.");
    }
    pdf_release_obj(tmp);
  }

  contents = pdf_deref_obj(pdf_lookup_dict(page, "Contents"));
  pdf_release_obj(page);
  page = NULL;

  /*
   * Handle page content stream.
   */
  {
    pdf_obj *content_new;

    if (!contents) {
      /*
       * Empty page
       */
      content_new = pdf_new_stream(0);
      /* TODO: better don't include anything if the page is empty */
    } else if (PDF_OBJ_STREAMTYPE(contents)) {
      /* 
       * We must import the stream because its dictionary
       * may contain indirect references.
       */
      content_new = pdf_import_object(contents);
    } else if (PDF_OBJ_ARRAYTYPE(contents)) {
      /*
       * Concatenate all content streams.
       */
      int idx, len = pdf_array_length(contents);
      content_new = pdf_new_stream(STREAM_COMPRESS);
      for (idx = 0; idx < len; idx++) {
	pdf_obj *content_seg = pdf_deref_obj(pdf_get_array(contents, idx));
	if (!PDF_OBJ_STREAMTYPE(content_seg) ||
	    pdf_concat_stream(content_new, content_seg) < 0) {
	  pdf_release_obj(content_seg);
	  pdf_release_obj(content_new);
	  goto error;
	}
	pdf_release_obj(content_seg);
      }
    } else {
      goto error;
    }

    if (contents)
      pdf_release_obj(contents);
    contents = content_new;
  }

  /*
   * Add entries to contents stream dictionary.
   */
  {
    pdf_obj *contents_dict, *bbox, *matrix;

    contents_dict = pdf_stream_dict(contents);
    pdf_add_dict(contents_dict,
                 pdf_new_name("Type"), pdf_new_name("XObject"));
    pdf_add_dict(contents_dict,
                 pdf_new_name("Subtype"), pdf_new_name("Form"));
    pdf_add_dict(contents_dict,
                 pdf_new_name("FormType"), pdf_new_number(1.0));

    bbox = pdf_new_array();
    pdf_add_array(bbox, pdf_new_number(info.bbox.llx));
    pdf_add_array(bbox, pdf_new_number(info.bbox.lly));
    pdf_add_array(bbox, pdf_new_number(info.bbox.urx));
    pdf_add_array(bbox, pdf_new_number(info.bbox.ury));

    pdf_add_dict(contents_dict, pdf_new_name("BBox"), bbox);

    matrix = pdf_new_array();
    pdf_add_array(matrix, pdf_new_number(1.0));
    pdf_add_array(matrix, pdf_new_number(0.0));
    pdf_add_array(matrix, pdf_new_number(0.0));
    pdf_add_array(matrix, pdf_new_number(1.0));
    pdf_add_array(matrix, pdf_new_number(0.0));
    pdf_add_array(matrix, pdf_new_number(0.0));

    pdf_add_dict(contents_dict, pdf_new_name("Matrix"), matrix);

    pdf_add_dict(contents_dict, pdf_new_name("Resources"),
                 pdf_import_object(resources));
    pdf_release_obj(resources);
  }

  pdf_close(pf);

  pdf_ximage_set_form(ximage, &info, contents);

  return 0;

 error:
  WARN("Cannot parse document. Broken PDF file?");
 error_silent:
  if (resources)
    pdf_release_obj(resources);
  if (markinfo)
    pdf_release_obj(markinfo);
  if (page)
    pdf_release_obj(page);
  if (contents)
    pdf_release_obj(contents);

  pdf_close(pf);

  return -1;
}
Exemple #3
0
static pdf_obj*
pdf_get_page_obj (pdf_file *pf, int page_no,
                  pdf_obj **ret_bbox, pdf_obj **ret_resources)
{
  pdf_obj *page_tree;
  pdf_obj *bbox = NULL, *resources = NULL, *rotate = NULL;
  int page_idx;

  /*
   * Get Page Tree.
   */
  page_tree = NULL;
  {
    pdf_obj *trailer, *catalog;
    pdf_obj *markinfo, *tmp;

    trailer = pdf_file_get_trailer(pf);

    if (pdf_lookup_dict(trailer, "Encrypt")) {
      WARN("This PDF document is encrypted.");
      pdf_release_obj(trailer);
      return NULL;
    }

    catalog = pdf_deref_obj(pdf_lookup_dict(trailer, "Root"));
    if (!PDF_OBJ_DICTTYPE(catalog)) {
      WARN("Can't read document catalog.");
      pdf_release_obj(trailer);
      if (catalog)
	pdf_release_obj(catalog);
      return NULL;
    }
    pdf_release_obj(trailer);

    markinfo = pdf_deref_obj(pdf_lookup_dict(catalog, "MarkInfo"));
    if (markinfo) {
      tmp = pdf_lookup_dict(markinfo, "Marked");
      if (PDF_OBJ_BOOLEANTYPE(tmp) && pdf_boolean_value(tmp))
        WARN("PDF file is tagged... Ignoring tags.");
      pdf_release_obj(markinfo);
    }

    page_tree = pdf_deref_obj(pdf_lookup_dict(catalog, "Pages"));
    pdf_release_obj(catalog);
  }
  if (!page_tree) {
    WARN("Page tree not found.");
    return NULL;
  }

  /*
   * Negative page numbers are counted from the back.
   */
  {
    int count = pdf_number_value(pdf_lookup_dict(page_tree, "Count"));
    page_idx = page_no + (page_no >= 0 ? -1 : count);
    if (page_idx < 0 || page_idx >= count) {
	WARN("Page %ld does not exist.", page_no);
	pdf_release_obj(page_tree);
	return NULL;
      }
    page_no = page_idx+1;
  }

  /*
   * Seek correct page. Get Media/Crop Box.
   * Media box and resources can be inherited.
   */
  {
    pdf_obj *kids_ref, *kids;
    pdf_obj *crop_box = NULL;
    pdf_obj *tmp;

    tmp = pdf_lookup_dict(page_tree, "Resources");
    resources = tmp ? pdf_deref_obj(tmp) : pdf_new_dict();

    while (1) {
      int kids_length, i;
 
      if ((tmp = pdf_deref_obj(pdf_lookup_dict(page_tree, "MediaBox")))) {
	if (bbox)
	  pdf_release_obj(bbox);
	bbox = tmp;
      }
      if ((tmp = pdf_deref_obj(pdf_lookup_dict(page_tree, "BleedBox")))) {
        if (!rect_equal(tmp, bbox)) {
	  if (bbox)
	    pdf_release_obj(bbox);
	  bbox = tmp;
        } else {
          pdf_release_obj(tmp);
      }
      }
      if ((tmp = pdf_deref_obj(pdf_lookup_dict(page_tree, "TrimBox")))) {
        if (!rect_equal(tmp, bbox)) {
	  if (bbox)
	    pdf_release_obj(bbox);
	  bbox = tmp;
        } else {
          pdf_release_obj(tmp);
      }
      }
      if ((tmp = pdf_deref_obj(pdf_lookup_dict(page_tree, "ArtBox")))) {
        if (!rect_equal(tmp, bbox)) {
	  if (bbox)
	    pdf_release_obj(bbox);
	  bbox = tmp;
        } else {
          pdf_release_obj(tmp);
      }
      }
      if ((tmp = pdf_deref_obj(pdf_lookup_dict(page_tree, "CropBox")))) {
	if (crop_box)
	  pdf_release_obj(crop_box);
	crop_box = tmp;
      }
      if ((tmp = pdf_deref_obj(pdf_lookup_dict(page_tree, "Rotate")))) {
	if (rotate)
	  pdf_release_obj(rotate);
	rotate = tmp;
      }
      if ((tmp = pdf_deref_obj(pdf_lookup_dict(page_tree, "Resources")))) {
#if 0
	pdf_merge_dict(tmp, resources);
#endif
	if (resources)
	  pdf_release_obj(resources);
	resources = tmp;
      }

      kids_ref = pdf_lookup_dict(page_tree, "Kids");
      if (!kids_ref)
	break;
      kids = pdf_deref_obj(kids_ref);
      kids_length = pdf_array_length(kids);

      for (i = 0; i < kids_length; i++) {
	int count;

	pdf_release_obj(page_tree);
	page_tree = pdf_deref_obj(pdf_get_array(kids, i));

	tmp = pdf_deref_obj(pdf_lookup_dict(page_tree, "Count"));
	if (tmp) {
	  /* Pages object */
	  count = pdf_number_value(tmp);
	  pdf_release_obj(tmp);
        } else {
	  /* Page object */
	  count = 1;
        }
	if (page_idx < count)
	  break;

	page_idx -= count;
      }
      
      pdf_release_obj(kids);

      if (i == kids_length) {
	WARN("Page %ld not found! Broken PDF file?", page_no);
	if (bbox)
	  pdf_release_obj(bbox);
	if (crop_box)
	  pdf_release_obj(crop_box);
	if (rotate)
	  pdf_release_obj(rotate);
	pdf_release_obj(resources);
	pdf_release_obj(page_tree);
	return NULL;
      }
    }
    if (crop_box) {
      pdf_release_obj(bbox);
      bbox = crop_box;
    }
  }

  if (!bbox) {
    WARN("No BoundingBox information available.");
    pdf_release_obj(page_tree);
    pdf_release_obj(resources);
    if (rotate)
      pdf_release_obj(rotate);
    return NULL;
  }

  if (rotate) {
    if (pdf_number_value(rotate) != 0.0)
      WARN("<< /Rotate %d >> found. (Not supported yet)",
            (int)pdf_number_value(rotate));
    pdf_release_obj(rotate);
    rotate = NULL;
  }
  
  if (ret_bbox != NULL)
    *ret_bbox = bbox;
  if (ret_resources != NULL)
    *ret_resources = resources;

  return page_tree;
}