Beispiel #1
0
static pdf_obj*
pdf_get_page_obj (pdf_file *pf, int page_no,
                  pdf_obj **ret_bbox, pdf_obj **ret_resources)
{
  pdf_obj *page_tree;
  pdf_obj *bbox = NULL, *resources = NULL, *rotate = NULL;
  int page_idx;

  /*
   * Get Page Tree.
   */
  page_tree = NULL;
  {
    pdf_obj *trailer, *catalog;
    pdf_obj *markinfo, *tmp;

    trailer = pdf_file_get_trailer(pf);

    if (pdf_lookup_dict(trailer, "Encrypt")) {
      WARN("This PDF document is encrypted.");
      pdf_release_obj(trailer);
      return NULL;
    }

    catalog = pdf_deref_obj(pdf_lookup_dict(trailer, "Root"));
    if (!PDF_OBJ_DICTTYPE(catalog)) {
      WARN("Can't read document catalog.");
      pdf_release_obj(trailer);
      if (catalog)
	pdf_release_obj(catalog);
      return NULL;
    }
    pdf_release_obj(trailer);

    markinfo = pdf_deref_obj(pdf_lookup_dict(catalog, "MarkInfo"));
    if (markinfo) {
      tmp = pdf_lookup_dict(markinfo, "Marked");
      if (PDF_OBJ_BOOLEANTYPE(tmp) && pdf_boolean_value(tmp))
        WARN("PDF file is tagged... Ignoring tags.");
      pdf_release_obj(markinfo);
    }

    page_tree = pdf_deref_obj(pdf_lookup_dict(catalog, "Pages"));
    pdf_release_obj(catalog);
  }
  if (!page_tree) {
    WARN("Page tree not found.");
    return NULL;
  }

  /*
   * Negative page numbers are counted from the back.
   */
  {
    int count = pdf_number_value(pdf_lookup_dict(page_tree, "Count"));
    page_idx = page_no + (page_no >= 0 ? -1 : count);
    if (page_idx < 0 || page_idx >= count) {
	WARN("Page %ld does not exist.", page_no);
	pdf_release_obj(page_tree);
	return NULL;
      }
    page_no = page_idx+1;
  }

  /*
   * Seek correct page. Get Media/Crop Box.
   * Media box and resources can be inherited.
   */
  {
    pdf_obj *kids_ref, *kids;
    pdf_obj *crop_box = NULL;
    pdf_obj *tmp;

    tmp = pdf_lookup_dict(page_tree, "Resources");
    resources = tmp ? pdf_deref_obj(tmp) : pdf_new_dict();

    while (1) {
      int kids_length, i;
 
      if ((tmp = pdf_deref_obj(pdf_lookup_dict(page_tree, "MediaBox")))) {
	if (bbox)
	  pdf_release_obj(bbox);
	bbox = tmp;
      }
      if ((tmp = pdf_deref_obj(pdf_lookup_dict(page_tree, "BleedBox")))) {
        if (!rect_equal(tmp, bbox)) {
	  if (bbox)
	    pdf_release_obj(bbox);
	  bbox = tmp;
        } else {
          pdf_release_obj(tmp);
      }
      }
      if ((tmp = pdf_deref_obj(pdf_lookup_dict(page_tree, "TrimBox")))) {
        if (!rect_equal(tmp, bbox)) {
	  if (bbox)
	    pdf_release_obj(bbox);
	  bbox = tmp;
        } else {
          pdf_release_obj(tmp);
      }
      }
      if ((tmp = pdf_deref_obj(pdf_lookup_dict(page_tree, "ArtBox")))) {
        if (!rect_equal(tmp, bbox)) {
	  if (bbox)
	    pdf_release_obj(bbox);
	  bbox = tmp;
        } else {
          pdf_release_obj(tmp);
      }
      }
      if ((tmp = pdf_deref_obj(pdf_lookup_dict(page_tree, "CropBox")))) {
	if (crop_box)
	  pdf_release_obj(crop_box);
	crop_box = tmp;
      }
      if ((tmp = pdf_deref_obj(pdf_lookup_dict(page_tree, "Rotate")))) {
	if (rotate)
	  pdf_release_obj(rotate);
	rotate = tmp;
      }
      if ((tmp = pdf_deref_obj(pdf_lookup_dict(page_tree, "Resources")))) {
#if 0
	pdf_merge_dict(tmp, resources);
#endif
	if (resources)
	  pdf_release_obj(resources);
	resources = tmp;
      }

      kids_ref = pdf_lookup_dict(page_tree, "Kids");
      if (!kids_ref)
	break;
      kids = pdf_deref_obj(kids_ref);
      kids_length = pdf_array_length(kids);

      for (i = 0; i < kids_length; i++) {
	int count;

	pdf_release_obj(page_tree);
	page_tree = pdf_deref_obj(pdf_get_array(kids, i));

	tmp = pdf_deref_obj(pdf_lookup_dict(page_tree, "Count"));
	if (tmp) {
	  /* Pages object */
	  count = pdf_number_value(tmp);
	  pdf_release_obj(tmp);
        } else {
	  /* Page object */
	  count = 1;
        }
	if (page_idx < count)
	  break;

	page_idx -= count;
      }
      
      pdf_release_obj(kids);

      if (i == kids_length) {
	WARN("Page %ld not found! Broken PDF file?", page_no);
	if (bbox)
	  pdf_release_obj(bbox);
	if (crop_box)
	  pdf_release_obj(crop_box);
	if (rotate)
	  pdf_release_obj(rotate);
	pdf_release_obj(resources);
	pdf_release_obj(page_tree);
	return NULL;
      }
    }
    if (crop_box) {
      pdf_release_obj(bbox);
      bbox = crop_box;
    }
  }

  if (!bbox) {
    WARN("No BoundingBox information available.");
    pdf_release_obj(page_tree);
    pdf_release_obj(resources);
    if (rotate)
      pdf_release_obj(rotate);
    return NULL;
  }

  if (rotate) {
    if (pdf_number_value(rotate) != 0.0)
      WARN("<< /Rotate %d >> found. (Not supported yet)",
            (int)pdf_number_value(rotate));
    pdf_release_obj(rotate);
    rotate = NULL;
  }
  
  if (ret_bbox != NULL)
    *ret_bbox = bbox;
  if (ret_resources != NULL)
    *ret_resources = resources;

  return page_tree;
}
Beispiel #2
0
pdf_obj *pdf_include_page(FILE *image_file, struct xform_info *p, char *res_name)
{
  pdf_obj *trailer = NULL, *catalog = NULL, *page_tree = NULL;
  pdf_obj *kids_ref, *kids;
  pdf_obj *media_box = NULL, *crop_box = NULL, *resources = NULL,
    *contents = NULL, *contents_ref = NULL;
  pdf_obj *tmp1;
#ifdef MEM_DEBUG
MEM_START
#endif
  if (!(trailer = pdf_open (image_file))) {
    fprintf (stderr, "\nCorrupt PDF file?\n");
  }
 
  /* Now just lookup catalog location */
  /* Deref catalog */
  if (trailer && (catalog = pdf_deref_obj(pdf_lookup_dict (trailer,"Root"))) ==
      NULL) {
    fprintf (stderr, "\nCatalog isn't where I expect it.\n");
  }
  if (trailer)
    pdf_release_obj (trailer);

  /* Lookup page tree in catalog */
  if (catalog) {
    page_tree = pdf_deref_obj (pdf_lookup_dict (catalog, "Pages"));
    /* Should be finished with catalog */
    pdf_release_obj (catalog);
  }
  /* Media box and resources can be inherited so start looking for
     them here */
  if (page_tree) {
    if ((tmp1 = pdf_lookup_dict (page_tree, "CropBox")))
      crop_box = pdf_deref_obj (tmp1);
    if ((tmp1 = pdf_lookup_dict (page_tree, "MediaBox")))
      media_box = pdf_deref_obj (tmp1);
    resources = pdf_deref_obj (pdf_lookup_dict (page_tree, "Resources"));
    if (resources == NULL) {
      resources = pdf_new_dict();
    }
    while ((kids_ref = pdf_lookup_dict (page_tree, "Kids")) != NULL) {
      kids = pdf_deref_obj (kids_ref);
      pdf_release_obj (page_tree);
      page_tree = pdf_deref_obj (pdf_get_array(kids, 0));
      pdf_release_obj (kids);
      /* Replace MediaBox if it's here */
      tmp1 = pdf_deref_obj(pdf_lookup_dict (page_tree, "MediaBox"));
      if (tmp1 && media_box)
	pdf_release_obj (media_box);
      if (tmp1) 
	media_box = tmp1;
      /* Do same for CropBox */
      tmp1 = pdf_deref_obj(pdf_lookup_dict (page_tree, "CropBox"));
      if (tmp1 && crop_box)
	pdf_release_obj (crop_box);
      if (tmp1) 
	crop_box = tmp1;
      /* Add resources if they're here */
      tmp1 = pdf_deref_obj (pdf_lookup_dict (page_tree, "Resources"));
      if (tmp1) {
	pdf_merge_dict (tmp1, resources);
	pdf_release_obj (resources);
	resources = tmp1;
      }
    }
    /* At this point, page_tree contains the first page.  media_box,
       crop_box,  and resources should also be set. */
    /* If there's a crop_box, replace media_box with crop_box.
       The rest of this routine assumes crop_box has been released */
    if (crop_box) {
      pdf_release_obj (media_box);
      media_box = crop_box;
      crop_box = NULL;
    }
    /* This gets bit confusing.  In the following code,
       media_box is the box the image is cropped to. 
       The bounding box is the box the image is scaled to */
    /* If user did not supply bounding box, use media_box
       (which may really be cropbox) as bounding box */
    /* Set the crop box parameters in the xform_info structure */
    p->c_llx = pdf_number_value (pdf_get_array (media_box, 0));
    p->c_lly = pdf_number_value (pdf_get_array (media_box, 1));
    p->c_urx = pdf_number_value (pdf_get_array (media_box, 2));
    p->c_ury = pdf_number_value (pdf_get_array (media_box, 3));

    /* Adjust scaling and clipping information as necessary */
    pdf_scale_image (p);

    /* Set the media box to whatever pdf_scale_image() decided
       for the crop box (which may be unchanged) */
    pdf_release_obj (media_box);
    media_box = pdf_new_array ();
    pdf_add_array (media_box, pdf_new_number (p->c_llx));
    pdf_add_array (media_box, pdf_new_number (p->c_lly));
    pdf_add_array (media_box, pdf_new_number (p->c_urx));
    pdf_add_array (media_box, pdf_new_number (p->c_ury));

    if ((contents =
	 pdf_deref_obj(pdf_lookup_dict(page_tree,"Contents")))==NULL) {
      fprintf (stderr, "\nNo Contents found\n");
      return NULL;
    }
    pdf_release_obj (page_tree);
  }
  /* Arrays of contents must be handled very differently (not implemented) */
  if (contents && contents -> type != PDF_ARRAY) {
    doc_make_form_xobj (contents, media_box,
			p->user_bbox? p->u_llx: 0.0,
			p->user_bbox? p->u_lly: 0.0,
			1.0, 1.0,
			resources, res_name);
  } else {
    fprintf (stderr, "\nIgnoring stream with with multiple segments\n");
    contents = NULL;
  }
  if (contents) {
    contents_ref = pdf_ref_obj (contents);
    pdf_release_obj (contents);
  }
  pdf_close ();
#ifdef MEM_DEBUG
MEM_END
#endif
  return (contents_ref);
}