Пример #1
0
/* ximage here is the result. DONT USE IT FOR PASSING OPTIONS! */
int
pdf_include_page (pdf_ximage        *ximage,
                  FILE              *image_file,
                  const char        *ident,
                  load_options       options)
{
  pdf_file *pf;
  xform_info info;
  pdf_obj *contents = NULL, *catalog;
  pdf_obj *page = NULL, *resources = NULL, *markinfo = NULL;

  pf = pdf_open(ident, image_file);
  if (!pf)
    return -1;

  if (pdf_file_get_version(pf) > pdf_get_version()) {
    WARN("Trying to include PDF file which has newer version number " \
         "than output PDF: 1.%d.", pdf_get_version());
  }

  pdf_ximage_init_form_info(&info);

  if (options.page_no == 0)
    options.page_no = 1;
  page = pdf_doc_get_page(pf,
                          options.page_no, options.bbox_type,
                          &info.bbox, &resources);

  if(!page)
    goto error_silent;

  catalog = pdf_file_get_catalog(pf);
  markinfo = pdf_deref_obj(pdf_lookup_dict(catalog, "MarkInfo"));
  if (markinfo) {
    pdf_obj *tmp = pdf_deref_obj(pdf_lookup_dict(markinfo, "Marked"));
    pdf_release_obj(markinfo);
    if (!PDF_OBJ_BOOLEANTYPE(tmp)) {
      if (tmp)
	pdf_release_obj(tmp);
      goto error;
    } else if (pdf_boolean_value(tmp)) {
      WARN("PDF file is tagged... Ignoring tags.");
    }
    pdf_release_obj(tmp);
  }

  contents = pdf_deref_obj(pdf_lookup_dict(page, "Contents"));
  pdf_release_obj(page);
  page = NULL;

  /*
   * Handle page content stream.
   */
  {
    pdf_obj *content_new;

    if (!contents) {
      /*
       * Empty page
       */
      content_new = pdf_new_stream(0);
      /* TODO: better don't include anything if the page is empty */
    } else if (PDF_OBJ_STREAMTYPE(contents)) {
      /* 
       * We must import the stream because its dictionary
       * may contain indirect references.
       */
      content_new = pdf_import_object(contents);
    } else if (PDF_OBJ_ARRAYTYPE(contents)) {
      /*
       * Concatenate all content streams.
       */
      int idx, len = pdf_array_length(contents);
      content_new = pdf_new_stream(STREAM_COMPRESS);
      for (idx = 0; idx < len; idx++) {
	pdf_obj *content_seg = pdf_deref_obj(pdf_get_array(contents, idx));
	if (!PDF_OBJ_STREAMTYPE(content_seg) ||
	    pdf_concat_stream(content_new, content_seg) < 0) {
	  pdf_release_obj(content_seg);
	  pdf_release_obj(content_new);
	  goto error;
	}
	pdf_release_obj(content_seg);
      }
    } else {
      goto error;
    }

    if (contents)
      pdf_release_obj(contents);
    contents = content_new;
  }

  /*
   * Add entries to contents stream dictionary.
   */
  {
    pdf_obj *contents_dict, *bbox, *matrix;

    contents_dict = pdf_stream_dict(contents);
    pdf_add_dict(contents_dict,
                 pdf_new_name("Type"), pdf_new_name("XObject"));
    pdf_add_dict(contents_dict,
                 pdf_new_name("Subtype"), pdf_new_name("Form"));
    pdf_add_dict(contents_dict,
                 pdf_new_name("FormType"), pdf_new_number(1.0));

    bbox = pdf_new_array();
    pdf_add_array(bbox, pdf_new_number(info.bbox.llx));
    pdf_add_array(bbox, pdf_new_number(info.bbox.lly));
    pdf_add_array(bbox, pdf_new_number(info.bbox.urx));
    pdf_add_array(bbox, pdf_new_number(info.bbox.ury));

    pdf_add_dict(contents_dict, pdf_new_name("BBox"), bbox);

    matrix = pdf_new_array();
    pdf_add_array(matrix, pdf_new_number(1.0));
    pdf_add_array(matrix, pdf_new_number(0.0));
    pdf_add_array(matrix, pdf_new_number(0.0));
    pdf_add_array(matrix, pdf_new_number(1.0));
    pdf_add_array(matrix, pdf_new_number(0.0));
    pdf_add_array(matrix, pdf_new_number(0.0));

    pdf_add_dict(contents_dict, pdf_new_name("Matrix"), matrix);

    pdf_add_dict(contents_dict, pdf_new_name("Resources"),
                 pdf_import_object(resources));
    pdf_release_obj(resources);
  }

  pdf_close(pf);

  pdf_ximage_set_form(ximage, &info, contents);

  return 0;

 error:
  WARN("Cannot parse document. Broken PDF file?");
 error_silent:
  if (resources)
    pdf_release_obj(resources);
  if (markinfo)
    pdf_release_obj(markinfo);
  if (page)
    pdf_release_obj(page);
  if (contents)
    pdf_release_obj(contents);

  pdf_close(pf);

  return -1;
}
Пример #2
0
int
pdf_copy_clip (FILE *image_file, int pageNo, double x_user, double y_user)
{
  pdf_obj *page_tree, *contents;
  int depth = 0, top = -1;
  const char *clip_path, *end_path;
  char *save_path, *temp;
  pdf_tmatrix M;
  double stack[6];
  pdf_file *pf;
  
  pf = pdf_open(NULL, image_file);
  if (!pf)
    return -1;

  pdf_dev_currentmatrix(&M);
  pdf_invertmatrix(&M);
  M.e += x_user; M.f += y_user;
  page_tree = pdf_get_page_obj (pf, pageNo, NULL, NULL);
  if (!page_tree) {
    pdf_close(pf);
    return -1;
  }

  contents = pdf_get_page_content(page_tree);
  pdf_release_obj(page_tree);
  if (!contents) {
    pdf_close(pf);
    return -1;
  }

  pdf_doc_add_page_content(" ", 1);

  save_path = malloc(pdf_stream_length(contents) + 1);
  strncpy(save_path, (const char *) pdf_stream_dataptr(contents),  pdf_stream_length(contents));
  clip_path = save_path;
  end_path = clip_path + pdf_stream_length(contents);
  depth = 0;

  for (; clip_path < end_path; clip_path++) {
    int color_dimen = 0;	/* silence uninitialized warning */
    char *token;
    skip_white(&clip_path, end_path);
    if (clip_path == end_path)
      break;
    if (depth > 1) {
      if (*clip_path == 'q')
        depth++;
      if (*clip_path == 'Q')
	depth--;
      parse_ident(&clip_path, end_path);
      continue;
    } else if (*clip_path == '-'
	    || *clip_path == '+'
	    || *clip_path == '.'
	    || isdigit((unsigned char)*clip_path)) {
      stack[++top] = strtod(clip_path, &temp);
      clip_path = temp;
    } else if (*clip_path == '[') {
      /* Ignore, but put a dummy value on the stack (in case of d operator) */
      parse_pdf_array(&clip_path, end_path, pf);
      stack[++top] = 0;
    } else if (*clip_path == '/') {
      if  (strncmp("/DeviceGray",	clip_path, 11) == 0
	|| strncmp("/Indexed",		clip_path, 8)  == 0
	|| strncmp("/CalGray",		clip_path, 8)  == 0) {
	color_dimen = 1;
	continue;
      }
      else if  (strncmp("/DeviceRGB",	clip_path, 10) == 0
	|| strncmp("/CalRGB",		clip_path, 7)  == 0
	|| strncmp("/Lab",		clip_path, 4)  == 0) {
	color_dimen = 3;
	continue;
      }
      else if  (strncmp("/DeviceCMYK",	clip_path, 11) == 0) {
	color_dimen = 4;
	continue;
      }
      else {
        clip_path++;
        parse_ident(&clip_path, end_path);
	skip_white(&clip_path, end_path);
	token = parse_ident(&clip_path, end_path);
        if (strcmp(token, "gs") == 0) {
	  continue;
	}
        return -1;
      }
    } else {
      int j;
      pdf_tmatrix T;
      pdf_coord  p0, p1, p2, p3;

      token = parse_ident(&clip_path, end_path);
      for (j = 0; j < sizeof(pdf_operators) / sizeof(pdf_operators[0]); j++)
        if (strcmp(token, pdf_operators[j].token) == 0)
	  break;
      if (j == sizeof(pdf_operators) / sizeof(pdf_operators[0])) {
        return -1;
      }
      switch (pdf_operators[j].opcode) {
	case  0:
	case -1:
	case -2:
	case -3:
	case -4:
	  /* Just pop the stack and do nothing. */
	  top += pdf_operators[j].opcode;
	  if (top < -1)
	    return -1;
	  break;
	case OP_SETCOLOR:
	  top -= color_dimen;
	  if (top < -1)
	    return -1;
	  break;
	case OP_CLOSEandCLIP:
	  pdf_dev_closepath();
	case OP_CLIP:
#if 0
	  pdf_dev_clip();
#else
	  pdf_dev_flushpath('W', PDF_FILL_RULE_NONZERO);
#endif
	  break;
	case OP_CONCATMATRIX:
	  if (top < 5)
	    return -1;
	  T.f = stack[top--];
	  T.e = stack[top--];
	  T.d = stack[top--];
	  T.c = stack[top--];
	  T.b = stack[top--];
	  T.a = stack[top--];
	  pdf_concatmatrix(&M, &T);
	  break;
	case OP_SETCOLORSPACE:
	  /* Do nothing. */
	  break;
	case OP_RECTANGLE:
	  if (top < 3)
	    return -1;
	  p1.y = stack[top--];
	  p1.x = stack[top--];
	  p0.y = stack[top--];
	  p0.x = stack[top--];
	  if (M.b == 0 && M.c == 0) {
	    pdf_tmatrix M0;
	    M0.a = M.a; M0.b = M.b; M0.c = M.c; M0.d = M.d;
	    M0.e = 0; M0.f = 0;
	    pdf_dev_transform(&p0, &M);
	    pdf_dev_transform(&p1, &M0);
	    pdf_dev_rectadd(p0.x, p0.y, p1.x, p1.y);
	  } else {
	    p2.x = p0.x + p1.x; p2.y = p0.y + p1.y;
	    p3.x = p0.x; p3.y = p0.y + p1.y;
	    p1.x += p0.x; p1.y = p0.y;
	    pdf_dev_transform(&p0, &M);
	    pdf_dev_transform(&p1, &M);
	    pdf_dev_transform(&p2, &M);
	    pdf_dev_transform(&p3, &M);
	    pdf_dev_moveto(p0.x, p0.y);
	    pdf_dev_lineto(p1.x, p1.y);
	    pdf_dev_lineto(p2.x, p2.y);
	    pdf_dev_lineto(p3.x, p3.y);
	    pdf_dev_closepath();
	  }
	  break;
	case OP_CURVETO:
	  if (top < 5)
	    return -1;
	  p0.y = stack[top--];
	  p0.x = stack[top--];
	  pdf_dev_transform(&p0, &M);
	  p1.y = stack[top--];
	  p1.x = stack[top--];
	  pdf_dev_transform(&p1, &M);
	  p2.y = stack[top--];
	  p2.x = stack[top--];
	  pdf_dev_transform(&p2, &M);
	  pdf_dev_curveto(p2.x, p2.y, p1.x, p1.y, p0.x, p0.y);
	  break;
	case OP_CLOSEPATH:
	  pdf_dev_closepath();
	  break;
	case OP_LINETO:
	  if (top < 1)
	    return -1;
	  p0.y = stack[top--];
	  p0.x = stack[top--];
	  pdf_dev_transform(&p0, &M);
	  pdf_dev_lineto(p0.x, p0.y);
	  break;
	case OP_MOVETO:
	  if (top < 1)
	    return -1;
	  p0.y = stack[top--];
	  p0.x = stack[top--];
	  pdf_dev_transform(&p0, &M);
	  pdf_dev_moveto(p0.x, p0.y);
	  break;
	case OP_NOOP:
	  pdf_doc_add_page_content(" n", 2);
	  break;
	case OP_GSAVE:
	  depth++;
	  break;
	case OP_GRESTORE:
	  depth--;
	  break;
	case OP_CURVETO1:
	  if (top < 3)
	    return -1;
	  p0.y = stack[top--];
	  p0.x = stack[top--];
	  pdf_dev_transform(&p0, &M);
	  p1.y = stack[top--];
	  p1.x = stack[top--];
	  pdf_dev_transform(&p1, &M);
	  pdf_dev_vcurveto(p1.x, p1.y, p0.x, p0.y);
	  break;
	case OP_CURVETO2:
	  if (top < 3)
	    return -1;
	  p0.y = stack[top--];
	  p0.x = stack[top--];
	  pdf_dev_transform(&p0, &M);
	  p1.y = stack[top--];
	  p1.x = stack[top--];
	  pdf_dev_transform(&p1, &M);
	  pdf_dev_ycurveto(p1.x, p1.y, p0.x, p0.y);
	  break;
	default:
	  return -1;
      }
    }
  }
  free(save_path);

  pdf_release_obj(contents);
  pdf_close(pf);

  return 0;
}
Пример #3
0
pdf_obj *pdf_include_page(FILE *image_file, struct xform_info *p, char *res_name)
{
  pdf_obj *trailer = NULL, *catalog = NULL, *page_tree = NULL;
  pdf_obj *kids_ref, *kids;
  pdf_obj *media_box = NULL, *crop_box = NULL, *resources = NULL,
    *contents = NULL, *contents_ref = NULL;
  pdf_obj *tmp1;
#ifdef MEM_DEBUG
MEM_START
#endif
  if (!(trailer = pdf_open (image_file))) {
    fprintf (stderr, "\nCorrupt PDF file?\n");
  }
 
  /* Now just lookup catalog location */
  /* Deref catalog */
  if (trailer && (catalog = pdf_deref_obj(pdf_lookup_dict (trailer,"Root"))) ==
      NULL) {
    fprintf (stderr, "\nCatalog isn't where I expect it.\n");
  }
  if (trailer)
    pdf_release_obj (trailer);

  /* Lookup page tree in catalog */
  if (catalog) {
    page_tree = pdf_deref_obj (pdf_lookup_dict (catalog, "Pages"));
    /* Should be finished with catalog */
    pdf_release_obj (catalog);
  }
  /* Media box and resources can be inherited so start looking for
     them here */
  if (page_tree) {
    if ((tmp1 = pdf_lookup_dict (page_tree, "CropBox")))
      crop_box = pdf_deref_obj (tmp1);
    if ((tmp1 = pdf_lookup_dict (page_tree, "MediaBox")))
      media_box = pdf_deref_obj (tmp1);
    resources = pdf_deref_obj (pdf_lookup_dict (page_tree, "Resources"));
    if (resources == NULL) {
      resources = pdf_new_dict();
    }
    while ((kids_ref = pdf_lookup_dict (page_tree, "Kids")) != NULL) {
      kids = pdf_deref_obj (kids_ref);
      pdf_release_obj (page_tree);
      page_tree = pdf_deref_obj (pdf_get_array(kids, 0));
      pdf_release_obj (kids);
      /* Replace MediaBox if it's here */
      tmp1 = pdf_deref_obj(pdf_lookup_dict (page_tree, "MediaBox"));
      if (tmp1 && media_box)
	pdf_release_obj (media_box);
      if (tmp1) 
	media_box = tmp1;
      /* Do same for CropBox */
      tmp1 = pdf_deref_obj(pdf_lookup_dict (page_tree, "CropBox"));
      if (tmp1 && crop_box)
	pdf_release_obj (crop_box);
      if (tmp1) 
	crop_box = tmp1;
      /* Add resources if they're here */
      tmp1 = pdf_deref_obj (pdf_lookup_dict (page_tree, "Resources"));
      if (tmp1) {
	pdf_merge_dict (tmp1, resources);
	pdf_release_obj (resources);
	resources = tmp1;
      }
    }
    /* At this point, page_tree contains the first page.  media_box,
       crop_box,  and resources should also be set. */
    /* If there's a crop_box, replace media_box with crop_box.
       The rest of this routine assumes crop_box has been released */
    if (crop_box) {
      pdf_release_obj (media_box);
      media_box = crop_box;
      crop_box = NULL;
    }
    /* This gets bit confusing.  In the following code,
       media_box is the box the image is cropped to. 
       The bounding box is the box the image is scaled to */
    /* If user did not supply bounding box, use media_box
       (which may really be cropbox) as bounding box */
    /* Set the crop box parameters in the xform_info structure */
    p->c_llx = pdf_number_value (pdf_get_array (media_box, 0));
    p->c_lly = pdf_number_value (pdf_get_array (media_box, 1));
    p->c_urx = pdf_number_value (pdf_get_array (media_box, 2));
    p->c_ury = pdf_number_value (pdf_get_array (media_box, 3));

    /* Adjust scaling and clipping information as necessary */
    pdf_scale_image (p);

    /* Set the media box to whatever pdf_scale_image() decided
       for the crop box (which may be unchanged) */
    pdf_release_obj (media_box);
    media_box = pdf_new_array ();
    pdf_add_array (media_box, pdf_new_number (p->c_llx));
    pdf_add_array (media_box, pdf_new_number (p->c_lly));
    pdf_add_array (media_box, pdf_new_number (p->c_urx));
    pdf_add_array (media_box, pdf_new_number (p->c_ury));

    if ((contents =
	 pdf_deref_obj(pdf_lookup_dict(page_tree,"Contents")))==NULL) {
      fprintf (stderr, "\nNo Contents found\n");
      return NULL;
    }
    pdf_release_obj (page_tree);
  }
  /* Arrays of contents must be handled very differently (not implemented) */
  if (contents && contents -> type != PDF_ARRAY) {
    doc_make_form_xobj (contents, media_box,
			p->user_bbox? p->u_llx: 0.0,
			p->user_bbox? p->u_lly: 0.0,
			1.0, 1.0,
			resources, res_name);
  } else {
    fprintf (stderr, "\nIgnoring stream with with multiple segments\n");
    contents = NULL;
  }
  if (contents) {
    contents_ref = pdf_ref_obj (contents);
    pdf_release_obj (contents);
  }
  pdf_close ();
#ifdef MEM_DEBUG
MEM_END
#endif
  return (contents_ref);
}