Exemplo n.º 1
0
static void info_update(fz_context *ctx,pdf_document *xref,char *producer)

    {
    char moddate[64];
    time_t now;
    struct tm date;
    pdf_obj *info;
    int newinfo;

    if (xref->trailer==NULL)
        return;
    time(&now);
    date=(*localtime(&now));
    sprintf(moddate,"D:%04d%02d%02d%02d%02d%02d%s",
           date.tm_year+1900,date.tm_mon+1,date.tm_mday,
           date.tm_hour,date.tm_min,date.tm_sec,
           wsys_utc_string());
    info=pdf_dict_gets(xref->trailer,"Info");
    if (info==NULL)
        {
        newinfo=1;
        info=pdf_new_dict(ctx,2);
        }
    else
        newinfo=0;
    dict_put_string(ctx,info,"Producer",producer);
    dict_put_string(ctx,info,"ModDate",moddate);
    if (newinfo)
        {
        pdf_dict_puts(xref->trailer,"Info",info);
        pdf_drop_obj(info);
        }
    }
Exemplo n.º 2
0
pdf_obj *
pdf_copy_dict(fz_context *ctx, pdf_obj *obj)
{
	pdf_document *doc;
	pdf_obj *dict;
	int i, n;

	RESOLVE(obj);
	if (!OBJ_IS_DICT(obj))
		fz_throw(ctx, FZ_ERROR_GENERIC, "not a dict (%s)", pdf_objkindstr(obj));

	doc = DICT(obj)->doc;
	n = pdf_dict_len(ctx, obj);
	dict = pdf_new_dict(ctx, doc, n);
	fz_try(ctx)
		for (i = 0; i < n; i++)
			pdf_dict_put(ctx, dict, pdf_dict_get_key(ctx, obj, i), pdf_dict_get_val(ctx, obj, i));
	fz_catch(ctx)
	{
		pdf_drop_obj(ctx, dict);
		fz_rethrow(ctx);
	}

	return dict;
}
Exemplo n.º 3
0
/* Creates the PDF Encoding entry for the encoding.
 * If baseenc is non-null, it is used as BaseEncoding entry.
 */
static pdf_obj *
create_encoding_resource (pdf_encoding *encoding, pdf_encoding *baseenc)
{
  pdf_obj *differences;
  ASSERT(encoding);
  ASSERT(!encoding->resource);

  differences = make_encoding_differences(encoding->glyphs,
					  baseenc ? baseenc->glyphs : NULL,
					  encoding->is_used);
  
  if (differences) {
    pdf_obj *resource = pdf_new_dict();
    if (baseenc)
      pdf_add_dict(resource, pdf_new_name("BaseEncoding"),
		   pdf_link_obj(baseenc->resource));
    pdf_add_dict(resource, pdf_new_name("Differences"),  differences);
    return resource; 
  } else {
    /* Fix a bug with the MinionPro package using MnSymbol fonts
     * in its virtual fonts:
     *
     * Some font may have font_id even if no character is used.
     * For example, suppose that a virtual file A.vf uses two
     * other fonts, B and C. Even if only characters of B are used
     * in a DVI document, C will have font_id too.
     * In this case, both baseenc and differences can be NULL.
     *
     * Actually these fonts will be ignored in pdffont.c.
     */
    return baseenc ? pdf_link_obj(baseenc->resource) : NULL;
  }
}
Exemplo n.º 4
0
static void
pdf_clean_stream_object(fz_context *ctx, pdf_document *doc, pdf_obj *obj, pdf_obj *orig_res, fz_cookie *cookie, int own_res,
		pdf_text_filter_fn *text_filter, pdf_after_text_object_fn *after_text, void *arg,
		int sanitize, int ascii)
{
	pdf_processor *proc_buffer = NULL;
	pdf_processor *proc_filter = NULL;
	pdf_obj *res = NULL;
	pdf_obj *ref;
	fz_buffer *buffer;

	if (!obj)
		return;

	fz_var(res);
	fz_var(proc_buffer);
	fz_var(proc_filter);

	buffer = fz_new_buffer(ctx, 1024);

	fz_try(ctx)
	{
		if (own_res)
		{
			pdf_obj *r = pdf_dict_get(ctx, obj, PDF_NAME(Resources));
			if (r)
				orig_res = r;
		}

		res = pdf_new_dict(ctx, doc, 1);

		proc_buffer = pdf_new_buffer_processor(ctx, buffer, ascii);
		proc_filter = pdf_new_filter_processor_with_text_filter(ctx, doc, proc_buffer, orig_res, res, text_filter, after_text, arg);

		pdf_process_contents(ctx, proc_filter, doc, orig_res, obj, cookie);
		pdf_close_processor(ctx, proc_filter);
		pdf_close_processor(ctx, proc_buffer);

		pdf_update_stream(ctx, doc, obj, buffer, 0);

		if (own_res)
		{
			ref = pdf_add_object(ctx, doc, res);
			pdf_dict_put_drop(ctx, obj, PDF_NAME(Resources), ref);
		}
	}
	fz_always(ctx)
	{
		pdf_drop_processor(ctx, proc_filter);
		pdf_drop_processor(ctx, proc_buffer);
		fz_drop_buffer(ctx, buffer);
		pdf_drop_obj(ctx, res);
	}
	fz_catch(ctx)
	{
		fz_rethrow(ctx);
	}
}
Exemplo n.º 5
0
pdf_page *
pdf_create_page(pdf_document *doc, fz_rect mediabox, int res, int rotate)
{
	pdf_page *page = NULL;
	pdf_obj *pageobj;
	float userunit = 1;
	fz_context *ctx = doc->ctx;
	fz_matrix ctm, tmp;
	fz_rect realbox;

	page = fz_malloc_struct(ctx, pdf_page);

	fz_try(ctx)
	{
		page->resources = NULL;
		page->contents = NULL;
		page->transparency = 0;
		page->links = NULL;
		page->annots = NULL;
		page->me = pageobj = pdf_new_dict(doc, 4);

		pdf_dict_puts_drop(pageobj, "Type", pdf_new_name(doc, "Page"));

		page->mediabox.x0 = fz_min(mediabox.x0, mediabox.x1) * userunit;
		page->mediabox.y0 = fz_min(mediabox.y0, mediabox.y1) * userunit;
		page->mediabox.x1 = fz_max(mediabox.x0, mediabox.x1) * userunit;
		page->mediabox.y1 = fz_max(mediabox.y0, mediabox.y1) * userunit;
		pdf_dict_puts_drop(pageobj, "MediaBox", pdf_new_rect(doc, &page->mediabox));

		/* Snap page->rotate to 0, 90, 180 or 270 */
		if (page->rotate < 0)
			page->rotate = 360 - ((-page->rotate) % 360);
		if (page->rotate >= 360)
			page->rotate = page->rotate % 360;
		page->rotate = 90*((page->rotate + 45)/90);
		if (page->rotate > 360)
			page->rotate = 0;
		pdf_dict_puts_drop(pageobj, "Rotate", pdf_new_int(doc, page->rotate));

		fz_pre_rotate(fz_scale(&ctm, 1, -1), -page->rotate);
		realbox = page->mediabox;
		fz_transform_rect(&realbox, &ctm);
		fz_pre_scale(fz_translate(&tmp, -realbox.x0, -realbox.y0), userunit, userunit);
		fz_concat(&ctm, &ctm, &tmp);
		page->ctm = ctm;
		/* Do not create a Contents, as an empty Contents dict is not
		 * valid. See Bug 694712 */
	}
	fz_catch(ctx)
	{
		pdf_drop_obj(page->me);
		fz_free(ctx, page);
		fz_rethrow_message(ctx, "Failed to create page");
	}

	return page;
}
Exemplo n.º 6
0
static void
pdf_clean_stream_object(fz_context *ctx, pdf_document *doc, pdf_obj *obj, pdf_obj *orig_res, fz_cookie *cookie, int own_res)
{
	pdf_processor *proc_buffer = NULL;
	pdf_processor *proc_filter = NULL;
	pdf_obj *res = NULL;
	pdf_obj *ref = NULL;
	fz_buffer *buffer;

	if (!obj)
		return;

	fz_var(res);
	fz_var(ref);
	fz_var(proc_buffer);
	fz_var(proc_filter);

	buffer = fz_new_buffer(ctx, 1024);

	fz_try(ctx)
	{
		if (own_res)
		{
			pdf_obj *r = pdf_dict_get(ctx, obj, PDF_NAME_Resources);
			if (r)
				orig_res = r;
		}

		res = pdf_new_dict(ctx, doc, 1);

		proc_buffer = pdf_new_buffer_processor(ctx, buffer);
		proc_filter = pdf_new_filter_processor(ctx, proc_buffer, doc, orig_res, res);

		pdf_process_contents(ctx, proc_filter, doc, orig_res, obj, cookie);

		pdf_update_stream(ctx, doc, obj, buffer, 0);

		if (own_res)
		{
			ref = pdf_new_ref(ctx, doc, res);
			pdf_dict_put(ctx, obj, PDF_NAME_Resources, ref);
		}
	}
	fz_always(ctx)
	{
		pdf_drop_processor(ctx, proc_filter);
		pdf_drop_processor(ctx, proc_buffer);
		fz_drop_buffer(ctx, buffer);
		pdf_drop_obj(ctx, res);
		pdf_drop_obj(ctx, ref);
	}
	fz_catch(ctx)
	{
		fz_rethrow_message(ctx, "Failed while cleaning xobject");
	}
}
Exemplo n.º 7
0
static void
pdf_dev_alpha(fz_context *ctx, pdf_device *pdev, float alpha, int stroke)
{
	int i;
	pdf_document *doc = pdev->doc;
	gstate *gs = CURRENT_GSTATE(pdev);

	/* If the alpha is unchanged, nothing to do */
	if (gs->alpha[stroke] == alpha)
		return;

	/* Have we sent such an alpha before? */
	for (i = 0; i < pdev->num_alphas; i++)
		if (pdev->alphas[i].alpha == alpha && pdev->alphas[i].stroke == stroke)
			break;

	if (i == pdev->num_alphas)
	{
		pdf_obj *o;
		pdf_obj *ref = NULL;

		fz_var(ref);

		/* No. Need to make a new one */
		if (pdev->num_alphas == pdev->max_alphas)
		{
			int newmax = pdev->max_alphas * 2;
			if (newmax == 0)
				newmax = 4;
			pdev->alphas = fz_resize_array(ctx, pdev->alphas, newmax, sizeof(*pdev->alphas));
			pdev->max_alphas = newmax;
		}
		pdev->alphas[i].alpha = alpha;
		pdev->alphas[i].stroke = stroke;

		o = pdf_new_dict(ctx, doc, 1);
		fz_try(ctx)
		{
			char text[32];
			pdf_dict_put_drop(ctx, o, (stroke ? PDF_NAME_CA : PDF_NAME_ca), pdf_new_real(ctx, doc, alpha));
			ref = pdf_add_object(ctx, doc, o);
			fz_snprintf(text, sizeof(text), "ExtGState/Alp%d", i);
			pdf_dict_putp(ctx, pdev->resources, text, ref);
		}
		fz_always(ctx)
		{
			pdf_drop_obj(ctx, o);
			pdf_drop_obj(ctx, ref);
		}
		fz_catch(ctx)
		{
			fz_rethrow(ctx);
		}
		pdev->num_alphas++;
	}
Exemplo n.º 8
0
static void page_merge(int page_from, int page_to, pdf_graft_map *graft_map)
{
	pdf_obj *pageref = NULL;
	pdf_obj *page_dict;
	pdf_obj *obj = NULL, *ref = NULL;
	/* Include minimal number of objects for page.  Do not include items that
	 * reference other pages */
	pdf_obj *known_page_objs[] = { PDF_NAME_Contents, PDF_NAME_Resources,
		PDF_NAME_MediaBox, PDF_NAME_CropBox, PDF_NAME_BleedBox, PDF_NAME_TrimBox,
		PDF_NAME_ArtBox, PDF_NAME_Rotate, PDF_NAME_UserUnit};
	int n = nelem(known_page_objs);
	int i;
	int num;

	fz_var(obj);
	fz_var(ref);

	fz_try(ctx)
	{
		pageref = pdf_lookup_page_obj(ctx, doc_src, page_from - 1);

		/* Make a new dictionary and copy over the items from the source object to
		* the new dict that we want to deep copy. */
		page_dict = pdf_new_dict(ctx, doc_des, 4);

		pdf_dict_put_drop(ctx, page_dict, PDF_NAME_Type, PDF_NAME_Page);

		for (i = 0; i < n; i++)
		{
			obj = pdf_dict_get(ctx, pageref, known_page_objs[i]);
			if (obj != NULL)
				pdf_dict_put_drop(ctx, page_dict, known_page_objs[i], pdf_graft_object(ctx, doc_des, doc_src, obj, graft_map));
		}

		/* Add the dictionary */
		obj = pdf_add_object_drop(ctx, doc_des, page_dict);

		/* Get indirect ref */
		num = pdf_to_num(ctx, obj);
		ref = pdf_new_indirect(ctx, doc_des, num, 0);

		/* Insert */
		pdf_insert_page(ctx, doc_des, page_to - 1, ref);
	}
	fz_always(ctx)
	{
		pdf_drop_obj(ctx, obj);
		pdf_drop_obj(ctx, ref);
	}
	fz_catch(ctx)
	{
		fz_rethrow(ctx);
	}
}
Exemplo n.º 9
0
pdf_obj *parse_pdf_dict (char **start, char *end)
{
  pdf_obj *result, *tmp1, *tmp2;
  char *save = *start;
  skip_white(start, end);
  if (*((*start)++) != '<' ||
      *((*start)++) != '<') {
    *start = save;
    dump (*start, end);
    return NULL;
  }
  result = pdf_new_dict ();
    skip_white(start, end);
  while (*start < end &&
	 **start != '>') {
    if ((tmp1 = parse_pdf_name (start, end)) == NULL) {
      pdf_release_obj (result); 
      {
	*start = save;
	dump (*start, end);
	return NULL;
      }
    };
    if ((tmp2 = parse_pdf_object (start, end)) == NULL) {
      pdf_release_obj (result);
      pdf_release_obj (tmp1); 
      {
	*start = save;
	dump (*start, end);
	return NULL;
      }
    }
    pdf_add_dict (result, tmp1, tmp2);
    skip_white(start, end);
  }
  if (*start >= end) {
    pdf_release_obj (result);
    *start = save;
    dump (*start, end);
    return NULL;
  }
  if (*((*start)++) == '>' &&
      *((*start)++) == '>') {
    return result;
  } else {
    pdf_release_obj (result);
    fprintf (stderr, "\nDictionary object ended prematurely\n");
    *start = save;
    dump (*start, end);
    return NULL;
  }
}
Exemplo n.º 10
0
static void html_make_link_dict (char *name)
{
  pdf_obj *color;
  if (!link_dict) {
    link_dict = pdf_new_dict();
    pdf_add_dict(link_dict, pdf_new_name("Type"), pdf_new_name ("Annot"));
    pdf_add_dict(link_dict, pdf_new_name("Subtype"), pdf_new_name ("Link"));
    color = pdf_new_array ();
    pdf_add_array (color, pdf_new_number (0));
    pdf_add_array (color, pdf_new_number (1));
    pdf_add_array (color, pdf_new_number (1));
    pdf_add_dict(link_dict, pdf_new_name("C"), color);
    if (name && *name == '#' && !(base_value)) {
      pdf_add_dict (link_dict, pdf_new_name("Dest"), pdf_new_string(name+1,strlen(name+1)));
    } else if (name) {    /* Assume its a URL */
      char *url;
      int len;
      pdf_obj *action;
      len = strlen(name)+1;
      if (base_value)
	len+=strlen(base_value);
      url = NEW (len, char);
      if (base_value)
	strcpy (url, base_value);
      else
	url[0] = 0;
      strcat (url, name);
      action = pdf_new_dict();
      pdf_add_dict (action, pdf_new_name ("Type"), pdf_new_name ("Action"));
      pdf_add_dict (action, pdf_new_name ("S"), pdf_new_name ("URI"));
      pdf_add_dict (action, pdf_new_name ("URI"),
		    pdf_new_string (url, len));
      pdf_add_dict (link_dict, pdf_new_name ("A"), pdf_ref_obj (action));
      pdf_release_obj (action);
      RELEASE (url);
    }
    pdf_doc_begin_annot (link_dict);
  } else {
Exemplo n.º 11
0
/*
** From MuPDF pdfclean.c
*/
static void wmupdf_preserve_old_dests(pdf_obj *olddests,fz_context *ctx,pdf_document *xref,
                                      pdf_obj *pages)

    {
    int i;
    pdf_obj *names = pdf_new_dict(ctx,1);
    pdf_obj *dests = pdf_new_dict(ctx,1);
    pdf_obj *names_list = pdf_new_array(ctx,32);
    int len = pdf_dict_len(olddests);
    pdf_obj *root;

    for (i=0;i<len;i++)
        {
        pdf_obj *key = pdf_dict_get_key(olddests,i);
        pdf_obj *val = pdf_dict_get_val(olddests,i);
        pdf_obj *key_str = pdf_new_string(ctx,pdf_to_name(key),strlen(pdf_to_name(key)));
        pdf_obj *dest = pdf_dict_gets(val,"D");

        dest = pdf_array_get(dest ? dest : val, 0);
        if (pdf_array_contains(pdf_dict_gets(pages,"Kids"),dest))
            {
            pdf_array_push(names_list, key_str);
            pdf_array_push(names_list, val);
            }
        pdf_drop_obj(key_str);
        }

    root = pdf_dict_gets(xref->trailer,"Root");
    pdf_dict_puts(dests,"Names",names_list);
    pdf_dict_puts(names,"Dests",dests);
    pdf_dict_puts(root,"Names",names);

    pdf_drop_obj(names);
    pdf_drop_obj(dests);
    pdf_drop_obj(names_list);
    pdf_drop_obj(olddests);
    }
Exemplo n.º 12
0
static pdf_obj *
hash_to_pdf_dict (fz_context *ctx, pdf_document *doc, pdfout_data *hash,
		  int *page)
{
  pdf_obj *dict_obj = pdf_new_dict (ctx, doc, 3);
  int len = pdfout_data_hash_len (ctx, hash);
  for (int j = 0; j < len; ++j)
    {
      char *key, *value;
      int value_len;
      pdfout_data_hash_get_key_value (ctx, hash, &key, &value, &value_len, j);

      if (streq (key, "page"))
	*page = pdfout_strtoint_null (ctx, value);
      else if (streq (key, "prefix"))
	{
	  pdf_obj *string = pdfout_utf8_to_str_obj (ctx, doc, value,
						    value_len);
	  pdf_dict_puts_drop (ctx, dict_obj, "P", string);
	}
      else if (streq (key, "first"))
	{
	  int first = pdfout_strtoint_null (ctx, value);
	  pdf_dict_puts_drop (ctx, dict_obj, "St",
			      pdf_new_int (ctx, doc, first));
	}
      else if (streq (key, "style"))
	{
	  const char *name;
	  if (streq (value, "arabic"))
	    name = "D";
	  else if (streq (value, "Roman"))
	    name = "R";
	  else if (streq (value, "roman"))
	    name = "r";
	  else if (streq (value, "Letters"))
	    name = "A";
	  else if (streq (value, "letters"))
	    name = "a";
	  else
	    abort ();
	  pdf_obj *name_obj = pdf_new_name (ctx, doc, name);
	  pdf_dict_puts_drop (ctx, dict_obj, "S", name_obj);
	}
     
    }

  return dict_obj;
}
Exemplo n.º 13
0
/*
  put_pages_on_new_sheet() - put the contents of pages onto one page in dest_doc

  How does it work?
  1.  Create a page for the new sheet that will contain all source-pages
  2.  Analyze their sizes! If too small, just center. If too big, warn
  3.  Analye their userunit-value... if one of them != 1 we have to insert 
      scale-operations to all others and need to use userunit in the sheet too
  4.  Copy each entry of the resource-dicts of every page of the sheet 
      (if it has not already been copied for a previous sheet)
  5.  Create a dict that contains src-resource-name to dest-resource-name for 
      this sheet (this is an n:m relation where n >= m)
  6.  For each source-page-stream create a new content-stream and change
      scale (if user-unit is needed), translation and rotation... And add a 
      clipping-path around the page
  7.  Copy all source content-streams of the current page to this new content-
      stream renaming all its resources to the new names
  8.  TODO: Annots?
  9.  TODO: Preseparated pages
  10. TODO: Merge procedure sets 
*/
static int put_pages_on_new_sheet(fz_context *dest_ctx, pdf_document *dest_doc, 
	fz_context *src_ctx, pdf_document *src_doc, 
	struct pos_info *positions, size_t put_count)
{
	/* copy each entry of the resource dict */
	static const int RENAME_INITIAL_CAP = 128;
	struct put_info put_info = { dest_doc, src_doc, NULL, NULL, 0 };
	
	/* what destianation page is currently opened? */
	int sheet_pagenum = -1;
	pdf_page *sheet = NULL;
	
	size_t i;
	for(i = 0; i < put_count; i++) {
		/* if the current sheet changes, we need to close the current and open 
		   the new one*/
		if(sheet_pagenum != positions[i].sheet_pagenum) {
			if(sheet != NULL)
				pdf_drop_page(dest_ctx, sheet);
			sheet_pagenum = positions[i].sheet_pagenum;
			sheet = pdf_load_page(dest_ctx, dest_doc, sheet_pagenum);
			// TODO: We assume that the destination-page uses arrays for the
			// content and has a usable resoruce-dict... TODO: add those checks
		}
		
		/* load the source-page */
		pdf_page *src_page = pdf_load_page(src_ctx, src_doc, positions[i].src_pagenum);
		
		/* create a rename_dict */
		put_info.rename_dict = pdf_new_dict(dest_ctx, dest_doc, RENAME_INITIAL_CAP);
	
		/* copy all resources, adjust the page and finally copy the content */
		copy_and_rename_resources(dest_ctx, sheet->resources, 
			src_ctx, src_page->resources, &put_info);
		adjust_page_position(src_ctx, src_doc, src_page, positions + i);
		copy_content_streams_of_page(
			dest_ctx, sheet, src_ctx, src_page, &put_info, positions + i);

		/* free everything we created for that source-page */
		pdf_drop_obj(dest_ctx, put_info.rename_dict);
		pdf_drop_page(src_ctx, src_page);
	}

	if(sheet != NULL)
		pdf_drop_page(dest_ctx, sheet);

	return(0);
}
Exemplo n.º 14
0
static pdf_obj *start_new_destpage(fz_context *ctx,double width_pts,double height_pts)

    {
    pdf_obj *pageobj;
    pdf_obj *mbox;

    pageobj=pdf_new_dict(ctx,2);
    pdf_dict_puts(pageobj,"Type",pdf_new_name(ctx,"Page"));
    mbox=pdf_new_array(ctx,4);
    pdf_array_push(mbox,pdf_new_real(ctx,0.));
    pdf_array_push(mbox,pdf_new_real(ctx,0.));
    pdf_array_push(mbox,pdf_new_real(ctx,width_pts));
    pdf_array_push(mbox,pdf_new_real(ctx,height_pts));
    pdf_dict_puts(pageobj,"MediaBox",mbox);
    return(pageobj);
    }
Exemplo n.º 15
0
pdf_obj *
pdf_load_name_tree(pdf_document *xref, char *which)
{
	fz_context *ctx = xref->ctx;

	pdf_obj *root = pdf_dict_gets(xref->trailer, "Root");
	pdf_obj *names = pdf_dict_gets(root, "Names");
	pdf_obj *tree = pdf_dict_gets(names, which);
	if (pdf_is_dict(tree))
	{
		pdf_obj *dict = pdf_new_dict(ctx, 100);
		pdf_load_name_tree_imp(dict, xref, tree);
		return dict;
	}
	return NULL;
}
Exemplo n.º 16
0
static void page_merge(int page_from, int page_to, pdf_graft_map *graft_map)
{
	pdf_obj *page_ref;
	pdf_obj *page_dict;
	pdf_obj *obj;
	pdf_obj *ref = NULL;
	int i;

	/* Copy as few key/value pairs as we can. Do not include items that reference other pages. */
	static pdf_obj * const copy_list[] = { PDF_NAME(Contents), PDF_NAME(Resources),
		PDF_NAME(MediaBox), PDF_NAME(CropBox), PDF_NAME(BleedBox), PDF_NAME(TrimBox), PDF_NAME(ArtBox),
		PDF_NAME(Rotate), PDF_NAME(UserUnit) };

	fz_var(ref);

	fz_try(ctx)
	{
		page_ref = pdf_lookup_page_obj(ctx, doc_src, page_from - 1);
		pdf_flatten_inheritable_page_items(ctx, page_ref);

		/* Make a new page object dictionary to hold the items we copy from the source page. */
		page_dict = pdf_new_dict(ctx, doc_des, 4);

		pdf_dict_put(ctx, page_dict, PDF_NAME(Type), PDF_NAME(Page));

		for (i = 0; i < nelem(copy_list); i++)
		{
			obj = pdf_dict_get(ctx, page_ref, copy_list[i]);
			if (obj != NULL)
				pdf_dict_put_drop(ctx, page_dict, copy_list[i], pdf_graft_mapped_object(ctx, graft_map, obj));
		}

		/* Add the page object to the destination document. */
		ref = pdf_add_object_drop(ctx, doc_des, page_dict);

		/* Insert it into the page tree. */
		pdf_insert_page(ctx, doc_des, page_to - 1, ref);
	}
	fz_always(ctx)
	{
		pdf_drop_obj(ctx, ref);
	}
	fz_catch(ctx)
	{
		fz_rethrow(ctx);
	}
}
Exemplo n.º 17
0
static ErrorCode juggler_impose_create_sheet(fz_context *ctx, pdf_document *dest_doc, int width, int height)
{
	/* create and insert page to dest_doc */
	fz_rect rect = { 0, 0, width, height };// { 0, 0, 1390, 1684 };
	pdf_page *sheet = pdf_create_page(ctx, dest_doc, rect, 0, 0);

	sheet->resources = pdf_new_dict(ctx, dest_doc, 16);
	pdf_dict_puts(ctx, sheet->me, "Resources", sheet->resources); /* will be droped when freeing the page */
	sheet->contents = pdf_new_array(ctx, dest_doc, 8);
	pdf_dict_puts(ctx, sheet->me, "Contents", sheet->contents); /* will be droped when freeing the page */

	pdf_insert_page(ctx, dest_doc, sheet, INT_MAX);
	
	pdf_drop_page(ctx, sheet);
	
	return(NoError);
}
Exemplo n.º 18
0
static void insert_resource_name(pdf_csi *csi, pdf_filter_state *state, const char *key, const char *name)
{
	pdf_obj *xobj;
	pdf_obj *obj;

	if (!state->resources || !name || name[0] == 0)
		return;

	xobj = pdf_dict_gets(csi->rdb, key);
	obj = pdf_dict_gets(xobj, name);

	xobj = pdf_dict_gets(state->resources, key);
	if (xobj == NULL) {
		xobj = pdf_new_dict(csi->doc, 1);
		pdf_dict_puts_drop(state->resources, key, xobj);
	}
	pdf_dict_putp(xobj, name, obj);
}
Exemplo n.º 19
0
pdf_obj *
pdf_copy_dict(fz_context *ctx, pdf_obj *obj)
{
	pdf_obj *dict;
	int i, n;

	RESOLVE(obj);
	if (!obj)
		return NULL; /* Can't warn :( */
	if (obj->kind != PDF_DICT)
		fz_warn(ctx, "assert: not a dict (%s)", pdf_objkindstr(obj));

	n = pdf_dict_len(obj);
	dict = pdf_new_dict(ctx, n);
	for (i = 0; i < n; i++)
		fz_dict_put(dict, pdf_dict_get_key(obj, i), pdf_dict_get_val(obj, i));

	return dict;
}
Exemplo n.º 20
0
static int new_stream_object(pdf_document *xref,fz_context *ctx,char *buf)

    {
    int ref;
    pdf_obj *obj,*len;
    fz_buffer *fzbuf;

    ref = pdf_create_object(xref);
    obj = pdf_new_dict(ctx,1);
    len=pdf_new_int(ctx,strlen(buf));
    pdf_dict_puts(obj,"Length",len);
    pdf_drop_obj(len);
    pdf_update_object(xref,ref,obj);
    pdf_drop_obj(obj);
    fzbuf=fz_new_buffer(ctx,strlen(buf));
    fz_write_buffer(ctx,fzbuf,(unsigned char *)buf,strlen(buf));
    pdf_update_stream(xref,ref,fzbuf);
    fz_drop_buffer(ctx,fzbuf);
    return(ref);
    }
Exemplo n.º 21
0
static void
copy_resource(fz_context *ctx, pdf_filter_processor *p, pdf_obj *key, const char *name)
{
	pdf_obj *res, *obj;

	if (!name || name[0] == 0)
		return;

	res = pdf_dict_get(ctx, p->old_rdb, key);
	obj = pdf_dict_gets(ctx, res, name);
	if (obj)
	{
		res = pdf_dict_get(ctx, p->new_rdb, key);
		if (!res)
		{
			res = pdf_new_dict(ctx, p->doc, 1);
			pdf_dict_put_drop(ctx, p->new_rdb, key, res);
		}
		pdf_dict_putp(ctx, res, name, obj);
	}
}
Exemplo n.º 22
0
pdf_obj *
pdf_new_xobject(fz_context *ctx, pdf_document *doc, fz_rect bbox, fz_matrix matrix, pdf_obj *res, fz_buffer *contents)
{
	pdf_obj *ind = NULL;
	pdf_obj *form = pdf_new_dict(ctx, doc, 5);
	fz_try(ctx)
	{
		pdf_dict_put(ctx, form, PDF_NAME(Type), PDF_NAME(XObject));
		pdf_dict_put(ctx, form, PDF_NAME(Subtype), PDF_NAME(Form));
		pdf_dict_put_rect(ctx, form, PDF_NAME(BBox), bbox);
		pdf_dict_put_matrix(ctx, form, PDF_NAME(Matrix), matrix);
		if (res)
			pdf_dict_put(ctx, form, PDF_NAME(Resources), res);
		ind = pdf_add_stream(ctx, doc, contents, form, 0);
	}
	fz_always(ctx)
		pdf_drop_obj(ctx, form);
	fz_catch(ctx)
		fz_rethrow(ctx);
	return ind;
}
Exemplo n.º 23
0
void
pdfout_page_labels_set (fz_context *ctx, pdf_document *doc,
			pdfout_data *labels)
{
  if (labels)
    check_page_labels (ctx, labels);
  
  pdf_obj *root = pdf_dict_get (ctx, pdf_trailer (ctx, doc), PDF_NAME_Root);
  
  if (root == NULL)
    pdfout_throw (ctx, "no document catalog, cannot set/unset page labels");
  
  if (labels == NULL)
    {
      /* Remove page labels.  */
      pdf_dict_dels (ctx, root, "PageLabels");
      return;
    }

  int num = pdfout_data_array_len (ctx, labels);
  pdf_obj *array_obj = pdf_new_array (ctx, doc, 2 * num);
  
  for (int i = 0; i < num; ++i)
    {
      pdfout_data *hash = pdfout_data_array_get (ctx, labels, i);
      int page;
      pdf_obj *dict_obj = hash_to_pdf_dict (ctx, doc, hash, &page);

      pdf_obj *page_obj = pdf_new_int (ctx, doc, page);
      pdf_array_push_drop (ctx, array_obj, page_obj);
      
      pdf_array_push_drop (ctx, array_obj, dict_obj);
    }
      
  pdf_obj *labels_obj = pdf_new_dict (ctx, doc, 1);
  pdf_dict_puts_drop (ctx, labels_obj, "Nums", array_obj);
  pdf_dict_puts_drop (ctx, root, "PageLabels", labels_obj);
  
}
Exemplo n.º 24
0
pdf_obj *
pdf_copy_dict(fz_context *ctx, pdf_obj *obj)
{
	pdf_obj *dict;
	int i, n;

	RESOLVE(obj);
	if (obj >= PDF_OBJ__LIMIT)
	{
		pdf_document *doc = DICT(obj)->doc;

		if (obj->kind != PDF_DICT)
			fz_warn(ctx, "assert: not a dict (%s)", pdf_objkindstr(obj));

		n = pdf_dict_len(ctx, obj);
		dict = pdf_new_dict(ctx, doc, n);
		for (i = 0; i < n; i++)
			pdf_dict_put(ctx, dict, pdf_dict_get_key(ctx, obj, i), pdf_dict_get_val(ctx, obj, i));

		return dict;
	}
	return NULL; /* Can't warn :( */
}
Exemplo n.º 25
0
static int copy_and_rename_resources(fz_context *dest_ctx, pdf_obj *dest, 
	fz_context *src_ctx, pdf_obj *src, struct put_info *info)
{
    static char *RESOURCE_TYPES[] = 
		{ "ExtGState", "ColorSpace", "Pattern", "Shading", "XObject", 
		  "Font", "Properties" };
	static char *RESOURCE_PREFIXES[] =
		{ "E_", "C_", "P_", "S_", "X_", "F_", "T_" };
    static const size_t RESOURCE_TYPES_COUNT = 
		sizeof(RESOURCE_TYPES) / sizeof(RESOURCE_TYPES[0]);

    size_t i;
    for(i = 0; i < RESOURCE_TYPES_COUNT; i++) {
        pdf_obj *src_type = pdf_dict_gets(src_ctx, src, RESOURCE_TYPES[i]);
        pdf_obj *dest_type = pdf_dict_gets(dest_ctx, dest, RESOURCE_TYPES[i]);

		/* we only copy resource-dicts that exist in the source-page ;) */
		if(pdf_is_dict(src_ctx, src_type)) {printf("TODO: REMOVEME: Copying and renaming resources of type %s!!!\n", RESOURCE_TYPES[i]);
			/* if this kind of dict does not exists in the dest resources, 
			   we must create it */
			if(!pdf_is_dict(dest_ctx, dest_type)) {
				dest_type = pdf_new_dict(dest_ctx, info->dest_doc, 8);
				pdf_dict_puts_drop(dest_ctx, dest, RESOURCE_TYPES[i], dest_type);
			}

			if(copy_and_rename_resource(dest_ctx, dest_type, 
				src_ctx, src_type, RESOURCE_PREFIXES[i], info))
			{
				return(2);
			}
		}
	}

    // TODO: Merge Procedure-Sets (although they are obsolete)

	return(0);
}
Exemplo n.º 26
0
static void
pdf_clean_type3(fz_context *ctx, pdf_document *doc, pdf_obj *obj, pdf_obj *orig_res, fz_cookie *cookie, int sanitize, int ascii)
{
	pdf_processor *proc_buffer = NULL;
	pdf_processor *proc_filter = NULL;
	pdf_obj *res = NULL;
	pdf_obj *ref;
	pdf_obj *charprocs;
	int i, l;

	fz_var(res);
	fz_var(proc_buffer);
	fz_var(proc_filter);

	fz_try(ctx)
	{
		res = pdf_dict_get(ctx, obj, PDF_NAME(Resources));
		if (res)
			orig_res = res;
		res = NULL;

		res = pdf_new_dict(ctx, doc, 1);

		charprocs = pdf_dict_get(ctx, obj, PDF_NAME(CharProcs));
		l = pdf_dict_len(ctx, charprocs);

		for (i = 0; i < l; i++)
		{
			pdf_obj *val = pdf_dict_get_val(ctx, charprocs, i);
			fz_buffer *buffer = fz_new_buffer(ctx, 1024);
			fz_try(ctx)
			{
				proc_buffer = pdf_new_buffer_processor(ctx, buffer, ascii);
				if (sanitize)
				{
					proc_filter = pdf_new_filter_processor(ctx, doc, proc_buffer, orig_res, res);
					pdf_process_contents(ctx, proc_filter, doc, orig_res, val, cookie);
					pdf_close_processor(ctx, proc_filter);
				}
				else
				{
					pdf_process_contents(ctx, proc_filter, doc, orig_res, val, cookie);
				}
				pdf_close_processor(ctx, proc_buffer);

				pdf_update_stream(ctx, doc, val, buffer, 0);
			}
			fz_always(ctx)
			{
				pdf_drop_processor(ctx, proc_filter);
				pdf_drop_processor(ctx, proc_buffer);
				fz_drop_buffer(ctx, buffer);
			}
			fz_catch(ctx)
			{
				fz_rethrow(ctx);
			}
		}

		/* ProcSet - no cleaning possible. Inherit this from the old dict. */
		pdf_dict_put(ctx, res, PDF_NAME(ProcSet), pdf_dict_get(ctx, orig_res, PDF_NAME(ProcSet)));

		ref = pdf_add_object(ctx, doc, res);
		pdf_dict_put_drop(ctx, obj, PDF_NAME(Resources), ref);
	}
	fz_always(ctx)
	{
		pdf_drop_obj(ctx, res);
	}
	fz_catch(ctx)
	{
		fz_rethrow(ctx);
	}
}
Exemplo n.º 27
0
int
pdf_font_load_pkfont (pdf_font *font)
{
  pdf_obj  *fontdict;
  char     *usedchars;
  char     *ident;
  unsigned  dpi;
  FILE     *fp;
  double    point_size, pix2charu;
  int       opcode, code, firstchar, lastchar, prev;
  pdf_obj  *charprocs, *procset, *encoding, *tmp_array;
  double    widths[256];
  pdf_rect  bbox;
  char      charavail[256];
#if  ENABLE_GLYPHENC
  int       encoding_id;
  char    **enc_vec;
#endif /* ENABLE_GLYPHENC */
  int       error = 0;

  if (!pdf_font_is_in_use(font)) {
    return 0;
  }

  ident       = pdf_font_get_ident(font);
  point_size  = pdf_font_get_param(font, PDF_FONT_PARAM_POINT_SIZE);
  usedchars   = pdf_font_get_usedchars(font);
#if  ENABLE_GLYPHENC
  encoding_id = pdf_font_get_encoding(font);
  if (encoding_id < 0)
    enc_vec = NULL;
  else {
    enc_vec = pdf_encoding_get_encoding(encoding_id);
  }
#endif /* ENABLE_GLYPHENC */

  ASSERT(ident && usedchars && point_size > 0.0);

  dpi  = truedpi(ident, point_size, base_dpi);
  
  {
    char *fontfile =   pdf_font_get_fontfile  (font);
    
    if (fontfile)
      fp = MFOPEN(fontfile, FOPEN_RBIN_MODE);
    else
      fp = dpx_open_pk_font_at(ident, dpi);
  }
  
  
  
  if (!fp) {
    ERROR("Could not find/open PK font file: %s (at %udpi)", ident, dpi);
  }

  memset(charavail, 0, 256);
  charprocs  = pdf_new_dict();
  /* Include bitmap as 72dpi image:
   * There seems to be problems in "scaled" bitmap glyph
   * rendering in several viewers.
   */
  pix2charu  = 72. * 1000. / ((double) base_dpi) / point_size;
  bbox.llx = bbox.lly =  HUGE_VAL;
  bbox.urx = bbox.ury = -HUGE_VAL;
  while ((opcode = fgetc(fp)) >= 0 && opcode != PK_POST) {
    if (opcode < 240) {
      struct pk_header_  pkh;

      error = read_pk_char_header(&pkh, opcode, fp);
      if (error)
        ERROR("Error in reading PK character header.");
      else if (charavail[pkh.chrcode & 0xff])
        WARN("More than two bitmap image for single glyph?: font=\"%s\" code=0x%02x",
             ident, pkh.chrcode);

      if (!usedchars[pkh.chrcode & 0xff])
        do_skip(fp, pkh.pkt_len);
      else {
        char          *charname;
        pdf_obj       *charproc;
        unsigned char *pkt_ptr;
        size_t         bytesread;
        double         charwidth;

        /* Charwidth in PDF units */
        charwidth = ROUND(1000.0 * pkh.wd / (((double) (1<<20))*pix2charu), 0.1);
        widths[pkh.chrcode & 0xff] = charwidth;

        /* Update font BBox info */
        bbox.llx = MIN(bbox.llx, -pkh.bm_hoff);
        bbox.lly = MIN(bbox.lly,  pkh.bm_voff - pkh.bm_ht);
        bbox.urx = MAX(bbox.urx,  pkh.bm_wd - pkh.bm_hoff);
        bbox.ury = MAX(bbox.ury,  pkh.bm_voff);

        pkt_ptr = NEW(pkh.pkt_len, unsigned char);
        if ((bytesread = fread(pkt_ptr, 1, pkh.pkt_len, fp))!= pkh.pkt_len) {
          ERROR("Only %ld bytes PK packet read. (expected %ld bytes)",
                bytesread, pkh.pkt_len);
        }
        charproc = create_pk_CharProc_stream(&pkh, charwidth, pkt_ptr, bytesread);
        RELEASE(pkt_ptr);
        if (!charproc)
          ERROR("Unpacking PK character data failed.");
#if  ENABLE_GLYPHENC
        if (encoding_id >= 0 && enc_vec) {
          charname = (char *) enc_vec[pkh.chrcode & 0xff];
          if (!charname) {
            WARN("\".notdef\" glyph used in font (code=0x%02x): %s", pkh.chrcode, ident);
            charname = work_buffer;
            pk_char2name(charname, pkh.chrcode);
          }
        }
        else
#endif /* ENABLE_GLYPHENC */
        {
          charname = work_buffer;
          pk_char2name(charname, pkh.chrcode);
        }

        pdf_add_dict(charprocs, pdf_new_name(charname), pdf_ref_obj(charproc)); /* _FIXME_ */
        pdf_release_obj(charproc);
      }
      charavail[pkh.chrcode & 0xff] = 1;
    } else { /* A command byte */
      switch (opcode) {
      case PK_NO_OP: break;
      case PK_XXX1: do_skip(fp, get_unsigned_byte(fp));   break;
      case PK_XXX2: do_skip(fp, get_unsigned_pair(fp));   break;
      case PK_XXX3: do_skip(fp, get_unsigned_triple(fp)); break;
      case PK_XXX4: do_skip(fp, get_unsigned_quad(fp));   break;
      case PK_YYY:  do_skip(fp, 4);  break;
      case PK_PRE:  do_preamble(fp); break;
      }
    }
  }
Exemplo n.º 28
0
pdf_obj *
pdf_parse_dict(pdf_document *xref, fz_stream *file, pdf_lexbuf *buf)
{
    pdf_obj *dict;
    pdf_obj *key = NULL;
    pdf_obj *val = NULL;
    int tok;
    int a, b;
    fz_context *ctx = file->ctx;

    dict = pdf_new_dict(ctx, 8);

    fz_var(key);
    fz_var(val);

    fz_try(ctx)
    {
        while (1)
        {
            tok = pdf_lex(file, buf);
skip:
            if (tok == PDF_TOK_CLOSE_DICT)
                break;

            /* for BI .. ID .. EI in content streams */
            if (tok == PDF_TOK_KEYWORD && !strcmp(buf->scratch, "ID"))
                break;

            if (tok != PDF_TOK_NAME)
                fz_throw(ctx, "invalid key in dict");

            key = fz_new_name(ctx, buf->scratch);

            tok = pdf_lex(file, buf);

            switch (tok)
            {
            case PDF_TOK_OPEN_ARRAY:
                /* cf. http://code.google.com/p/sumatrapdf/issues/detail?id=1643 */
                fz_try(ctx)
                {
                    val = pdf_parse_array(xref, file, buf);
                }
                fz_catch(ctx)
                {
                    fz_warn(ctx, "ignoring broken array for '%s'", pdf_to_name(key));
                    pdf_drop_obj(key);
                    val = key = NULL;
                    do
                        tok = pdf_lex(file, buf);
                    while (tok != PDF_TOK_CLOSE_DICT && tok != PDF_TOK_CLOSE_ARRAY &&
                            tok != PDF_TOK_EOF && tok != PDF_TOK_OPEN_ARRAY && tok != PDF_TOK_OPEN_DICT);
                    if (tok == PDF_TOK_CLOSE_DICT)
                        goto skip;
                    if (tok == PDF_TOK_CLOSE_ARRAY)
                        continue;
                    fz_throw(ctx, "cannot make sense of broken array after all");
                }
                break;

            case PDF_TOK_OPEN_DICT:
                val = pdf_parse_dict(xref, file, buf);
                break;

            case PDF_TOK_NAME:
                val = fz_new_name(ctx, buf->scratch);
                break;
            case PDF_TOK_REAL:
                val = pdf_new_real(ctx, buf->f);
                break;
            case PDF_TOK_STRING:
                val = pdf_new_string(ctx, buf->scratch, buf->len);
                break;
            case PDF_TOK_TRUE:
                val = pdf_new_bool(ctx, 1);
                break;
            case PDF_TOK_FALSE:
                val = pdf_new_bool(ctx, 0);
                break;
            case PDF_TOK_NULL:
                val = pdf_new_null(ctx);
                break;

            case PDF_TOK_INT:
                /* 64-bit to allow for numbers > INT_MAX and overflow */
                a = buf->i;
                tok = pdf_lex(file, buf);
                if (tok == PDF_TOK_CLOSE_DICT || tok == PDF_TOK_NAME ||
                        (tok == PDF_TOK_KEYWORD && !strcmp(buf->scratch, "ID")))
                {
                    val = pdf_new_int(ctx, a);
                    fz_dict_put(dict, key, val);
                    pdf_drop_obj(val);
                    val = NULL;
                    pdf_drop_obj(key);
                    key = NULL;
                    goto skip;
                }
                if (tok == PDF_TOK_INT)
                {
                    b = buf->i;
                    tok = pdf_lex(file, buf);
                    if (tok == PDF_TOK_R)
                    {
                        val = pdf_new_indirect(ctx, a, b, xref);
                        break;
                    }
                }
                fz_throw(ctx, "invalid indirect reference in dict");

            default:
                fz_throw(ctx, "unknown token in dict");
            }

            fz_dict_put(dict, key, val);
            pdf_drop_obj(val);
            val = NULL;
            pdf_drop_obj(key);
            key = NULL;
        }
    }
    fz_catch(ctx)
    {
        pdf_drop_obj(dict);
        pdf_drop_obj(key);
        pdf_drop_obj(val);
        fz_throw(ctx, "cannot parse dict");
    }
    return dict;
}
Exemplo n.º 29
0
/*
	Performs the same task as
	pdf_clean_page_contents, but with an optional text filter
	function.

	text_filter: Function to assess whether a given character
	should be kept (return 0) or removed (return 1).

	after_text: Function called after each text object is closed
	to allow other output to be sent.

	arg: Opaque value to be passed to callback functions.
*/
void pdf_filter_page_contents(fz_context *ctx, pdf_document *doc, pdf_page *page, fz_cookie *cookie,
		pdf_page_contents_process_fn *proc_fn, pdf_text_filter_fn *text_filter, pdf_after_text_object_fn *after_text, void *proc_arg,
		int sanitize, int ascii)
{
	pdf_processor *proc_buffer = NULL;
	pdf_processor *proc_filter = NULL;
	pdf_obj *new_obj = NULL;
	pdf_obj *new_ref = NULL;
	pdf_obj *res = NULL;
	pdf_obj *obj;
	pdf_obj *contents;
	pdf_obj *resources;
	fz_buffer *buffer;

	fz_var(new_obj);
	fz_var(new_ref);
	fz_var(res);
	fz_var(proc_buffer);
	fz_var(proc_filter);

	buffer = fz_new_buffer(ctx, 1024);

	fz_try(ctx)
	{
		contents = pdf_page_contents(ctx, page);
		resources = pdf_page_resources(ctx, page);

		proc_buffer = pdf_new_buffer_processor(ctx, buffer, ascii);
		if (sanitize)
		{
			res = pdf_new_dict(ctx, doc, 1);
			proc_filter = pdf_new_filter_processor_with_text_filter(ctx, doc, proc_buffer, resources, res, text_filter, after_text, proc_arg);
			pdf_process_contents(ctx, proc_filter, doc, resources, contents, cookie);
			pdf_close_processor(ctx, proc_filter);
		}
		else
		{
			res = pdf_keep_obj(ctx, resources);
			pdf_process_contents(ctx, proc_buffer, doc, resources, contents, cookie);
		}
		pdf_close_processor(ctx, proc_buffer);

		/* Deal with page content stream. */

		if (pdf_is_array(ctx, contents))
		{
			/* create a new object to replace the array */
			new_obj = pdf_new_dict(ctx, doc, 1);
			new_ref = pdf_add_object(ctx, doc, new_obj);
			contents = new_ref;
			pdf_dict_put(ctx, page->obj, PDF_NAME(Contents), contents);
		}
		else
		{
			pdf_dict_del(ctx, contents, PDF_NAME(Filter));
			pdf_dict_del(ctx, contents, PDF_NAME(DecodeParms));
		}

		pdf_update_stream(ctx, doc, contents, buffer, 0);

		/* Now deal with resources. The spec allows for Type3 fonts and form
		 * XObjects to omit a resource dictionary and look in the parent.
		 * Avoid that by flattening here as part of the cleaning. This could
		 * conceivably cause changes in rendering, but we don't care. */

		/* ExtGState */
		obj = pdf_dict_get(ctx, res, PDF_NAME(ExtGState));
		if (obj)
		{
			int i, l;

			l = pdf_dict_len(ctx, obj);
			for (i = 0; i < l; i++)
			{
				pdf_obj *o = pdf_dict_get(ctx, pdf_dict_get_val(ctx, obj, i), PDF_NAME(SMask));
				if (!o)
					continue;
				o = pdf_dict_get(ctx, o, PDF_NAME(G));
				if (!o)
					continue;
				/* Transparency group XObject */
				pdf_clean_stream_object(ctx, doc, o, resources, cookie, 1, text_filter, after_text, proc_arg, sanitize, ascii);
			}
		}

		/* Pattern */
		obj = pdf_dict_get(ctx, res, PDF_NAME(Pattern));
		if (obj)
		{
			int i, l;
			l = pdf_dict_len(ctx, obj);
			for (i = 0; i < l; i++)
			{
				pdf_obj *pat_res;
				pdf_obj *pat = pdf_dict_get_val(ctx, obj, i);
				if (!pat)
					continue;
				pat_res = pdf_dict_get(ctx, pat, PDF_NAME(Resources));
				if (pat_res == NULL)
					pat_res = resources;
				if (pdf_dict_get_int(ctx, pat, PDF_NAME(PatternType)) == 1)
					pdf_clean_stream_object(ctx, doc, pat, pat_res, cookie, 0, text_filter, after_text, proc_arg, sanitize, ascii);
			}
		}

		/* XObject */
		obj = pdf_dict_get(ctx, res, PDF_NAME(XObject));
		if (obj)
		{
			int i, l;
			l = pdf_dict_len(ctx, obj);
			for (i = 0; i < l; i++)
			{
				pdf_obj *xobj_res;
				pdf_obj *xobj = pdf_dict_get_val(ctx, obj, i);
				if (!xobj)
					continue;
				xobj_res = pdf_dict_get(ctx, xobj, PDF_NAME(Resources));
				if (xobj_res == NULL)
					xobj_res = resources;
				if (pdf_name_eq(ctx, PDF_NAME(Form), pdf_dict_get(ctx, xobj, PDF_NAME(Subtype))))
					pdf_clean_stream_object(ctx, doc, xobj, xobj_res, cookie, 1, text_filter, after_text, proc_arg, sanitize, ascii);
			}
		}

		/* Font */
		obj = pdf_dict_get(ctx, res, PDF_NAME(Font));
		if (obj)
		{
			int i, l;
			l = pdf_dict_len(ctx, obj);
			for (i = 0; i < l; i++)
			{
				pdf_obj *o = pdf_dict_get_val(ctx, obj, i);
				if (!o)
					continue;
				if (pdf_name_eq(ctx, PDF_NAME(Type3), pdf_dict_get(ctx, o, PDF_NAME(Subtype))))
					pdf_clean_type3(ctx, doc, o, resources, cookie, sanitize, ascii);
			}
		}

		/* ProcSet - no cleaning possible. Inherit this from the old dict. */
		obj = pdf_dict_get(ctx, resources, PDF_NAME(ProcSet));
		if (obj)
			pdf_dict_put(ctx, res, PDF_NAME(ProcSet), obj);

		/* ColorSpace - no cleaning possible. */
		/* Properties - no cleaning possible. */

		if (proc_fn)
			(*proc_fn)(ctx, buffer, res, proc_arg);

		/* Update resource dictionary */
		if (sanitize)
		{
			pdf_dict_put(ctx, page->obj, PDF_NAME(Resources), res);
		}
	}
	fz_always(ctx)
	{
		pdf_drop_processor(ctx, proc_filter);
		pdf_drop_processor(ctx, proc_buffer);
		fz_drop_buffer(ctx, buffer);
		pdf_drop_obj(ctx, new_obj);
		pdf_drop_obj(ctx, new_ref);
		pdf_drop_obj(ctx, res);
	}
	fz_catch(ctx)
	{
		fz_rethrow(ctx);
	}
}
Exemplo n.º 30
0
static pdf_obj*
pdf_get_page_obj (pdf_file *pf, int page_no,
                  pdf_obj **ret_bbox, pdf_obj **ret_resources)
{
  pdf_obj *page_tree;
  pdf_obj *bbox = NULL, *resources = NULL, *rotate = NULL;
  int page_idx;

  /*
   * Get Page Tree.
   */
  page_tree = NULL;
  {
    pdf_obj *trailer, *catalog;
    pdf_obj *markinfo, *tmp;

    trailer = pdf_file_get_trailer(pf);

    if (pdf_lookup_dict(trailer, "Encrypt")) {
      WARN("This PDF document is encrypted.");
      pdf_release_obj(trailer);
      return NULL;
    }

    catalog = pdf_deref_obj(pdf_lookup_dict(trailer, "Root"));
    if (!PDF_OBJ_DICTTYPE(catalog)) {
      WARN("Can't read document catalog.");
      pdf_release_obj(trailer);
      if (catalog)
	pdf_release_obj(catalog);
      return NULL;
    }
    pdf_release_obj(trailer);

    markinfo = pdf_deref_obj(pdf_lookup_dict(catalog, "MarkInfo"));
    if (markinfo) {
      tmp = pdf_lookup_dict(markinfo, "Marked");
      if (PDF_OBJ_BOOLEANTYPE(tmp) && pdf_boolean_value(tmp))
        WARN("PDF file is tagged... Ignoring tags.");
      pdf_release_obj(markinfo);
    }

    page_tree = pdf_deref_obj(pdf_lookup_dict(catalog, "Pages"));
    pdf_release_obj(catalog);
  }
  if (!page_tree) {
    WARN("Page tree not found.");
    return NULL;
  }

  /*
   * Negative page numbers are counted from the back.
   */
  {
    int count = pdf_number_value(pdf_lookup_dict(page_tree, "Count"));
    page_idx = page_no + (page_no >= 0 ? -1 : count);
    if (page_idx < 0 || page_idx >= count) {
	WARN("Page %ld does not exist.", page_no);
	pdf_release_obj(page_tree);
	return NULL;
      }
    page_no = page_idx+1;
  }

  /*
   * Seek correct page. Get Media/Crop Box.
   * Media box and resources can be inherited.
   */
  {
    pdf_obj *kids_ref, *kids;
    pdf_obj *crop_box = NULL;
    pdf_obj *tmp;

    tmp = pdf_lookup_dict(page_tree, "Resources");
    resources = tmp ? pdf_deref_obj(tmp) : pdf_new_dict();

    while (1) {
      int kids_length, i;
 
      if ((tmp = pdf_deref_obj(pdf_lookup_dict(page_tree, "MediaBox")))) {
	if (bbox)
	  pdf_release_obj(bbox);
	bbox = tmp;
      }
      if ((tmp = pdf_deref_obj(pdf_lookup_dict(page_tree, "BleedBox")))) {
        if (!rect_equal(tmp, bbox)) {
	  if (bbox)
	    pdf_release_obj(bbox);
	  bbox = tmp;
        } else {
          pdf_release_obj(tmp);
      }
      }
      if ((tmp = pdf_deref_obj(pdf_lookup_dict(page_tree, "TrimBox")))) {
        if (!rect_equal(tmp, bbox)) {
	  if (bbox)
	    pdf_release_obj(bbox);
	  bbox = tmp;
        } else {
          pdf_release_obj(tmp);
      }
      }
      if ((tmp = pdf_deref_obj(pdf_lookup_dict(page_tree, "ArtBox")))) {
        if (!rect_equal(tmp, bbox)) {
	  if (bbox)
	    pdf_release_obj(bbox);
	  bbox = tmp;
        } else {
          pdf_release_obj(tmp);
      }
      }
      if ((tmp = pdf_deref_obj(pdf_lookup_dict(page_tree, "CropBox")))) {
	if (crop_box)
	  pdf_release_obj(crop_box);
	crop_box = tmp;
      }
      if ((tmp = pdf_deref_obj(pdf_lookup_dict(page_tree, "Rotate")))) {
	if (rotate)
	  pdf_release_obj(rotate);
	rotate = tmp;
      }
      if ((tmp = pdf_deref_obj(pdf_lookup_dict(page_tree, "Resources")))) {
#if 0
	pdf_merge_dict(tmp, resources);
#endif
	if (resources)
	  pdf_release_obj(resources);
	resources = tmp;
      }

      kids_ref = pdf_lookup_dict(page_tree, "Kids");
      if (!kids_ref)
	break;
      kids = pdf_deref_obj(kids_ref);
      kids_length = pdf_array_length(kids);

      for (i = 0; i < kids_length; i++) {
	int count;

	pdf_release_obj(page_tree);
	page_tree = pdf_deref_obj(pdf_get_array(kids, i));

	tmp = pdf_deref_obj(pdf_lookup_dict(page_tree, "Count"));
	if (tmp) {
	  /* Pages object */
	  count = pdf_number_value(tmp);
	  pdf_release_obj(tmp);
        } else {
	  /* Page object */
	  count = 1;
        }
	if (page_idx < count)
	  break;

	page_idx -= count;
      }
      
      pdf_release_obj(kids);

      if (i == kids_length) {
	WARN("Page %ld not found! Broken PDF file?", page_no);
	if (bbox)
	  pdf_release_obj(bbox);
	if (crop_box)
	  pdf_release_obj(crop_box);
	if (rotate)
	  pdf_release_obj(rotate);
	pdf_release_obj(resources);
	pdf_release_obj(page_tree);
	return NULL;
      }
    }
    if (crop_box) {
      pdf_release_obj(bbox);
      bbox = crop_box;
    }
  }

  if (!bbox) {
    WARN("No BoundingBox information available.");
    pdf_release_obj(page_tree);
    pdf_release_obj(resources);
    if (rotate)
      pdf_release_obj(rotate);
    return NULL;
  }

  if (rotate) {
    if (pdf_number_value(rotate) != 0.0)
      WARN("<< /Rotate %d >> found. (Not supported yet)",
            (int)pdf_number_value(rotate));
    pdf_release_obj(rotate);
    rotate = NULL;
  }
  
  if (ret_bbox != NULL)
    *ret_bbox = bbox;
  if (ret_resources != NULL)
    *ret_resources = resources;

  return page_tree;
}