Ejemplo n.º 1
0
static void
pdf_clean_stream_object(fz_context *ctx, pdf_document *doc, pdf_obj *obj, pdf_obj *orig_res, fz_cookie *cookie, int own_res,
		pdf_text_filter_fn *text_filter, pdf_after_text_object_fn *after_text, void *arg,
		int sanitize, int ascii)
{
	pdf_processor *proc_buffer = NULL;
	pdf_processor *proc_filter = NULL;
	pdf_obj *res = NULL;
	pdf_obj *ref;
	fz_buffer *buffer;

	if (!obj)
		return;

	fz_var(res);
	fz_var(proc_buffer);
	fz_var(proc_filter);

	buffer = fz_new_buffer(ctx, 1024);

	fz_try(ctx)
	{
		if (own_res)
		{
			pdf_obj *r = pdf_dict_get(ctx, obj, PDF_NAME(Resources));
			if (r)
				orig_res = r;
		}

		res = pdf_new_dict(ctx, doc, 1);

		proc_buffer = pdf_new_buffer_processor(ctx, buffer, ascii);
		proc_filter = pdf_new_filter_processor_with_text_filter(ctx, doc, proc_buffer, orig_res, res, text_filter, after_text, arg);

		pdf_process_contents(ctx, proc_filter, doc, orig_res, obj, cookie);
		pdf_close_processor(ctx, proc_filter);
		pdf_close_processor(ctx, proc_buffer);

		pdf_update_stream(ctx, doc, obj, buffer, 0);

		if (own_res)
		{
			ref = pdf_add_object(ctx, doc, res);
			pdf_dict_put_drop(ctx, obj, PDF_NAME(Resources), ref);
		}
	}
	fz_always(ctx)
	{
		pdf_drop_processor(ctx, proc_filter);
		pdf_drop_processor(ctx, proc_buffer);
		fz_drop_buffer(ctx, buffer);
		pdf_drop_obj(ctx, res);
	}
	fz_catch(ctx)
	{
		fz_rethrow(ctx);
	}
}
Ejemplo n.º 2
0
static void
pdf_clean_stream_object(fz_context *ctx, pdf_document *doc, pdf_obj *obj, pdf_obj *orig_res, fz_cookie *cookie, int own_res)
{
	pdf_processor *proc_buffer = NULL;
	pdf_processor *proc_filter = NULL;
	pdf_obj *res = NULL;
	pdf_obj *ref = NULL;
	fz_buffer *buffer;

	if (!obj)
		return;

	fz_var(res);
	fz_var(ref);
	fz_var(proc_buffer);
	fz_var(proc_filter);

	buffer = fz_new_buffer(ctx, 1024);

	fz_try(ctx)
	{
		if (own_res)
		{
			pdf_obj *r = pdf_dict_get(ctx, obj, PDF_NAME_Resources);
			if (r)
				orig_res = r;
		}

		res = pdf_new_dict(ctx, doc, 1);

		proc_buffer = pdf_new_buffer_processor(ctx, buffer);
		proc_filter = pdf_new_filter_processor(ctx, proc_buffer, doc, orig_res, res);

		pdf_process_contents(ctx, proc_filter, doc, orig_res, obj, cookie);

		pdf_update_stream(ctx, doc, obj, buffer, 0);

		if (own_res)
		{
			ref = pdf_new_ref(ctx, doc, res);
			pdf_dict_put(ctx, obj, PDF_NAME_Resources, ref);
		}
	}
	fz_always(ctx)
	{
		pdf_drop_processor(ctx, proc_filter);
		pdf_drop_processor(ctx, proc_buffer);
		fz_drop_buffer(ctx, buffer);
		pdf_drop_obj(ctx, res);
		pdf_drop_obj(ctx, ref);
	}
	fz_catch(ctx)
	{
		fz_rethrow_message(ctx, "Failed while cleaning xobject");
	}
}
Ejemplo n.º 3
0
static void
pdf_clean_type3(fz_context *ctx, pdf_document *doc, pdf_obj *obj, pdf_obj *orig_res, fz_cookie *cookie, int sanitize, int ascii)
{
	pdf_processor *proc_buffer = NULL;
	pdf_processor *proc_filter = NULL;
	pdf_obj *res = NULL;
	pdf_obj *ref;
	pdf_obj *charprocs;
	int i, l;

	fz_var(res);
	fz_var(proc_buffer);
	fz_var(proc_filter);

	fz_try(ctx)
	{
		res = pdf_dict_get(ctx, obj, PDF_NAME(Resources));
		if (res)
			orig_res = res;
		res = NULL;

		res = pdf_new_dict(ctx, doc, 1);

		charprocs = pdf_dict_get(ctx, obj, PDF_NAME(CharProcs));
		l = pdf_dict_len(ctx, charprocs);

		for (i = 0; i < l; i++)
		{
			pdf_obj *val = pdf_dict_get_val(ctx, charprocs, i);
			fz_buffer *buffer = fz_new_buffer(ctx, 1024);
			fz_try(ctx)
			{
				proc_buffer = pdf_new_buffer_processor(ctx, buffer, ascii);
				if (sanitize)
				{
					proc_filter = pdf_new_filter_processor(ctx, doc, proc_buffer, orig_res, res);
					pdf_process_contents(ctx, proc_filter, doc, orig_res, val, cookie);
					pdf_close_processor(ctx, proc_filter);
				}
				else
				{
					pdf_process_contents(ctx, proc_filter, doc, orig_res, val, cookie);
				}
				pdf_close_processor(ctx, proc_buffer);

				pdf_update_stream(ctx, doc, val, buffer, 0);
			}
			fz_always(ctx)
			{
				pdf_drop_processor(ctx, proc_filter);
				pdf_drop_processor(ctx, proc_buffer);
				fz_drop_buffer(ctx, buffer);
			}
			fz_catch(ctx)
			{
				fz_rethrow(ctx);
			}
		}

		/* ProcSet - no cleaning possible. Inherit this from the old dict. */
		pdf_dict_put(ctx, res, PDF_NAME(ProcSet), pdf_dict_get(ctx, orig_res, PDF_NAME(ProcSet)));

		ref = pdf_add_object(ctx, doc, res);
		pdf_dict_put_drop(ctx, obj, PDF_NAME(Resources), ref);
	}
	fz_always(ctx)
	{
		pdf_drop_obj(ctx, res);
	}
	fz_catch(ctx)
	{
		fz_rethrow(ctx);
	}
}
Ejemplo n.º 4
0
/*
	Performs the same task as
	pdf_clean_page_contents, but with an optional text filter
	function.

	text_filter: Function to assess whether a given character
	should be kept (return 0) or removed (return 1).

	after_text: Function called after each text object is closed
	to allow other output to be sent.

	arg: Opaque value to be passed to callback functions.
*/
void pdf_filter_page_contents(fz_context *ctx, pdf_document *doc, pdf_page *page, fz_cookie *cookie,
		pdf_page_contents_process_fn *proc_fn, pdf_text_filter_fn *text_filter, pdf_after_text_object_fn *after_text, void *proc_arg,
		int sanitize, int ascii)
{
	pdf_processor *proc_buffer = NULL;
	pdf_processor *proc_filter = NULL;
	pdf_obj *new_obj = NULL;
	pdf_obj *new_ref = NULL;
	pdf_obj *res = NULL;
	pdf_obj *obj;
	pdf_obj *contents;
	pdf_obj *resources;
	fz_buffer *buffer;

	fz_var(new_obj);
	fz_var(new_ref);
	fz_var(res);
	fz_var(proc_buffer);
	fz_var(proc_filter);

	buffer = fz_new_buffer(ctx, 1024);

	fz_try(ctx)
	{
		contents = pdf_page_contents(ctx, page);
		resources = pdf_page_resources(ctx, page);

		proc_buffer = pdf_new_buffer_processor(ctx, buffer, ascii);
		if (sanitize)
		{
			res = pdf_new_dict(ctx, doc, 1);
			proc_filter = pdf_new_filter_processor_with_text_filter(ctx, doc, proc_buffer, resources, res, text_filter, after_text, proc_arg);
			pdf_process_contents(ctx, proc_filter, doc, resources, contents, cookie);
			pdf_close_processor(ctx, proc_filter);
		}
		else
		{
			res = pdf_keep_obj(ctx, resources);
			pdf_process_contents(ctx, proc_buffer, doc, resources, contents, cookie);
		}
		pdf_close_processor(ctx, proc_buffer);

		/* Deal with page content stream. */

		if (pdf_is_array(ctx, contents))
		{
			/* create a new object to replace the array */
			new_obj = pdf_new_dict(ctx, doc, 1);
			new_ref = pdf_add_object(ctx, doc, new_obj);
			contents = new_ref;
			pdf_dict_put(ctx, page->obj, PDF_NAME(Contents), contents);
		}
		else
		{
			pdf_dict_del(ctx, contents, PDF_NAME(Filter));
			pdf_dict_del(ctx, contents, PDF_NAME(DecodeParms));
		}

		pdf_update_stream(ctx, doc, contents, buffer, 0);

		/* Now deal with resources. The spec allows for Type3 fonts and form
		 * XObjects to omit a resource dictionary and look in the parent.
		 * Avoid that by flattening here as part of the cleaning. This could
		 * conceivably cause changes in rendering, but we don't care. */

		/* ExtGState */
		obj = pdf_dict_get(ctx, res, PDF_NAME(ExtGState));
		if (obj)
		{
			int i, l;

			l = pdf_dict_len(ctx, obj);
			for (i = 0; i < l; i++)
			{
				pdf_obj *o = pdf_dict_get(ctx, pdf_dict_get_val(ctx, obj, i), PDF_NAME(SMask));
				if (!o)
					continue;
				o = pdf_dict_get(ctx, o, PDF_NAME(G));
				if (!o)
					continue;
				/* Transparency group XObject */
				pdf_clean_stream_object(ctx, doc, o, resources, cookie, 1, text_filter, after_text, proc_arg, sanitize, ascii);
			}
		}

		/* Pattern */
		obj = pdf_dict_get(ctx, res, PDF_NAME(Pattern));
		if (obj)
		{
			int i, l;
			l = pdf_dict_len(ctx, obj);
			for (i = 0; i < l; i++)
			{
				pdf_obj *pat_res;
				pdf_obj *pat = pdf_dict_get_val(ctx, obj, i);
				if (!pat)
					continue;
				pat_res = pdf_dict_get(ctx, pat, PDF_NAME(Resources));
				if (pat_res == NULL)
					pat_res = resources;
				if (pdf_dict_get_int(ctx, pat, PDF_NAME(PatternType)) == 1)
					pdf_clean_stream_object(ctx, doc, pat, pat_res, cookie, 0, text_filter, after_text, proc_arg, sanitize, ascii);
			}
		}

		/* XObject */
		obj = pdf_dict_get(ctx, res, PDF_NAME(XObject));
		if (obj)
		{
			int i, l;
			l = pdf_dict_len(ctx, obj);
			for (i = 0; i < l; i++)
			{
				pdf_obj *xobj_res;
				pdf_obj *xobj = pdf_dict_get_val(ctx, obj, i);
				if (!xobj)
					continue;
				xobj_res = pdf_dict_get(ctx, xobj, PDF_NAME(Resources));
				if (xobj_res == NULL)
					xobj_res = resources;
				if (pdf_name_eq(ctx, PDF_NAME(Form), pdf_dict_get(ctx, xobj, PDF_NAME(Subtype))))
					pdf_clean_stream_object(ctx, doc, xobj, xobj_res, cookie, 1, text_filter, after_text, proc_arg, sanitize, ascii);
			}
		}

		/* Font */
		obj = pdf_dict_get(ctx, res, PDF_NAME(Font));
		if (obj)
		{
			int i, l;
			l = pdf_dict_len(ctx, obj);
			for (i = 0; i < l; i++)
			{
				pdf_obj *o = pdf_dict_get_val(ctx, obj, i);
				if (!o)
					continue;
				if (pdf_name_eq(ctx, PDF_NAME(Type3), pdf_dict_get(ctx, o, PDF_NAME(Subtype))))
					pdf_clean_type3(ctx, doc, o, resources, cookie, sanitize, ascii);
			}
		}

		/* ProcSet - no cleaning possible. Inherit this from the old dict. */
		obj = pdf_dict_get(ctx, resources, PDF_NAME(ProcSet));
		if (obj)
			pdf_dict_put(ctx, res, PDF_NAME(ProcSet), obj);

		/* ColorSpace - no cleaning possible. */
		/* Properties - no cleaning possible. */

		if (proc_fn)
			(*proc_fn)(ctx, buffer, res, proc_arg);

		/* Update resource dictionary */
		if (sanitize)
		{
			pdf_dict_put(ctx, page->obj, PDF_NAME(Resources), res);
		}
	}
	fz_always(ctx)
	{
		pdf_drop_processor(ctx, proc_filter);
		pdf_drop_processor(ctx, proc_buffer);
		fz_drop_buffer(ctx, buffer);
		pdf_drop_obj(ctx, new_obj);
		pdf_drop_obj(ctx, new_ref);
		pdf_drop_obj(ctx, res);
	}
	fz_catch(ctx)
	{
		fz_rethrow(ctx);
	}
}
Ejemplo n.º 5
0
static void
pdf_run_page_contents_with_usage(fz_context *ctx, pdf_document *doc, pdf_page *page, fz_device *dev, const fz_matrix *ctm, const char *usage, fz_cookie *cookie)
{
	fz_matrix local_ctm, page_ctm;
	pdf_obj *resources;
	pdf_obj *contents;
	fz_rect mediabox;
	pdf_processor *proc = NULL;
	fz_default_colorspaces *default_cs;

	fz_var(proc);

	default_cs = pdf_load_default_colorspaces(ctx, doc, page);
	if (default_cs)
		fz_set_default_colorspaces(ctx, dev, default_cs);

	fz_try(ctx)
	{
		pdf_page_transform(ctx, page, &mediabox, &page_ctm);
		fz_concat(&local_ctm, &page_ctm, ctm);

		resources = pdf_page_resources(ctx, page);
		contents = pdf_page_contents(ctx, page);

		if (page->transparency)
		{
			fz_colorspace *colorspace = NULL;
			pdf_obj *group = pdf_page_group(ctx, page);

			if (group)
			{
				pdf_obj *cs = pdf_dict_get(ctx, group, PDF_NAME_CS);
				if (cs)
				{
					fz_try(ctx)
						colorspace = pdf_load_colorspace(ctx, cs);
					fz_catch(ctx)
						colorspace = NULL;
				}
			}
			else
				colorspace = fz_keep_colorspace(ctx, fz_default_output_intent(ctx, default_cs));

			fz_begin_group(ctx, dev, fz_transform_rect(&mediabox, &local_ctm), colorspace, 1, 0, 0, 1);
			fz_drop_colorspace(ctx, colorspace);
		}

		proc = pdf_new_run_processor(ctx, dev, &local_ctm, usage, NULL, 0);
		pdf_process_contents(ctx, proc, doc, resources, contents, cookie);
		pdf_close_processor(ctx, proc);
	}
	fz_always(ctx)
	{
		fz_drop_default_colorspaces(ctx, default_cs);
		pdf_drop_processor(ctx, proc);
	}
	fz_catch(ctx)
		fz_rethrow(ctx);

	if (page->transparency)
		fz_end_group(ctx, dev);
}
Ejemplo n.º 6
0
void pdf_clean_page_contents(fz_context *ctx, pdf_document *doc, pdf_page *page, fz_cookie *cookie, pdf_page_contents_process_fn *proc_fn, void *proc_arg)
{
	pdf_processor *proc_buffer = NULL;
	pdf_processor *proc_filter = NULL;
	pdf_obj *new_obj = NULL;
	pdf_obj *new_ref = NULL;
	pdf_obj *res = NULL;
	pdf_obj *ref = NULL;
	pdf_obj *obj;
	pdf_obj *contents;
	fz_buffer *buffer;

	fz_var(new_obj);
	fz_var(new_ref);
	fz_var(res);
	fz_var(ref);
	fz_var(proc_buffer);
	fz_var(proc_filter);

	buffer = fz_new_buffer(ctx, 1024);

	fz_try(ctx)
	{
		res = pdf_new_dict(ctx, doc, 1);

		proc_buffer = pdf_new_buffer_processor(ctx, buffer);
		proc_filter = pdf_new_filter_processor(ctx, proc_buffer, doc, page->resources, res);

		pdf_process_contents(ctx, proc_filter, doc, page->resources, page->contents, cookie);

		contents = page->contents;
		if (pdf_is_array(ctx, contents))
		{
			/* create a new object to replace the array */
			new_obj = pdf_new_dict(ctx, doc, 1);
			new_ref = pdf_new_ref(ctx, doc, new_obj);
			page->contents = contents = new_ref;
		}
		else
		{
			pdf_dict_del(ctx, contents, PDF_NAME_Filter);
			pdf_dict_del(ctx, contents, PDF_NAME_DecodeParms);
		}

		/* Now deal with resources. The spec allows for Type3 fonts and form
		 * XObjects to omit a resource dictionary and look in the parent.
		 * Avoid that by flattening here as part of the cleaning. This could
		 * conceivably cause changes in rendering, but we don't care. */

		/* ExtGState */
		obj = pdf_dict_get(ctx, res, PDF_NAME_ExtGState);
		if (obj)
		{
			int i, l;

			l = pdf_dict_len(ctx, obj);
			for (i = 0; i < l; i++)
			{
				pdf_obj *o = pdf_dict_get(ctx, pdf_dict_get_val(ctx, obj, i), PDF_NAME_SMask);

				if (!o)
					continue;
				o = pdf_dict_get(ctx, o, PDF_NAME_G);
				if (!o)
					continue;

				/* Transparency group XObject */
				pdf_clean_stream_object(ctx, doc, o, page->resources, cookie, 1);
			}
		}

		/* ColorSpace - no cleaning possible */

		/* Pattern */
		obj = pdf_dict_get(ctx, res, PDF_NAME_Pattern);
		if (obj)
		{
			int i, l;

			l = pdf_dict_len(ctx, obj);
			for (i = 0; i < l; i++)
			{
				pdf_obj *pat = pdf_dict_get_val(ctx, obj, i);

				if (!pat)
					continue;
				if (pdf_to_int(ctx, pdf_dict_get(ctx, pat, PDF_NAME_PatternType)) == 1)
					pdf_clean_stream_object(ctx, doc, pat, page->resources, cookie, 0);
			}
		}

		/* Shading - no cleaning possible */

		/* XObject */
		obj = pdf_dict_get(ctx, res, PDF_NAME_XObject);
		if (obj)
		{
			int i, l;

			l = pdf_dict_len(ctx, obj);
			for (i = 0; i < l; i++)
			{
				pdf_obj *xobj = pdf_dict_get_val(ctx, obj, i);

				if (!pdf_name_eq(ctx, PDF_NAME_Form, pdf_dict_get(ctx, xobj, PDF_NAME_Subtype)))
					continue;

				pdf_clean_stream_object(ctx, doc, xobj, page->resources, cookie, 1);
			}
		}

		/* Font */
		obj = pdf_dict_get(ctx, res, PDF_NAME_Font);
		if (obj)
		{
			int i, l;

			l = pdf_dict_len(ctx, obj);
			for (i = 0; i < l; i++)
			{
				pdf_obj *o = pdf_dict_get_val(ctx, obj, i);

				if (pdf_name_eq(ctx, PDF_NAME_Type3, pdf_dict_get(ctx, o, PDF_NAME_Subtype)))
				{
					pdf_clean_type3(ctx, doc, o, page->resources, cookie);
				}
			}
		}

		/* ProcSet - no cleaning possible. Inherit this from the old dict. */
		obj = pdf_dict_get(ctx, page->resources, PDF_NAME_ProcSet);
		if (obj)
			pdf_dict_put(ctx, res, PDF_NAME_ProcSet, obj);

		/* Properties - no cleaning possible. */

		if (proc_fn)
			(*proc_fn)(ctx, buffer, res, proc_arg);

		pdf_update_stream(ctx, doc, contents, buffer, 0);
		pdf_drop_obj(ctx, page->resources);
		ref = pdf_new_ref(ctx, doc, res);
		page->resources = pdf_keep_obj(ctx, ref);
		pdf_dict_put(ctx, page->me, PDF_NAME_Resources, ref);
	}
	fz_always(ctx)
	{
		pdf_drop_processor(ctx, proc_filter);
		pdf_drop_processor(ctx, proc_buffer);
		fz_drop_buffer(ctx, buffer);
		pdf_drop_obj(ctx, new_obj);
		pdf_drop_obj(ctx, new_ref);
		pdf_drop_obj(ctx, res);
		pdf_drop_obj(ctx, ref);
	}
	fz_catch(ctx)
	{
		fz_rethrow_message(ctx, "Failed while cleaning page");
	}
}