Exemplo n.º 1
0
static int
pdf_resources_use_blending(pdf_document *doc, pdf_obj *rdb)
{
	fz_context *ctx = doc->ctx;
	pdf_obj *obj;
	int i, n, useBM = 0;

	if (!rdb)
		return 0;

	/* Have we been here before and remembered an answer? */
	if (pdf_obj_memo(rdb, &useBM))
		return useBM;

	/* stop on cyclic resource dependencies */
	if (pdf_mark_obj(rdb))
		return 0;

	fz_try(ctx)
	{
		obj = pdf_dict_gets(rdb, "ExtGState");
		n = pdf_dict_len(obj);
		for (i = 0; i < n; i++)
			if (pdf_extgstate_uses_blending(doc, pdf_dict_get_val(obj, i)))
				goto found;

		obj = pdf_dict_gets(rdb, "Pattern");
		n = pdf_dict_len(obj);
		for (i = 0; i < n; i++)
			if (pdf_pattern_uses_blending(doc, pdf_dict_get_val(obj, i)))
				goto found;

		obj = pdf_dict_gets(rdb, "XObject");
		n = pdf_dict_len(obj);
		for (i = 0; i < n; i++)
			if (pdf_xobject_uses_blending(doc, pdf_dict_get_val(obj, i)))
				goto found;
		if (0)
		{
found:
			useBM = 1;
		}
	}
	fz_always(ctx)
	{
		pdf_unmark_obj(rdb);
	}
	fz_catch(ctx)
	{
		fz_rethrow(ctx);
	}

	pdf_set_obj_memo(rdb, useBM);
	return useBM;
}
Exemplo n.º 2
0
pdf_obj *
pdf_copy_dict(fz_context *ctx, pdf_obj *obj)
{
	pdf_document *doc;
	pdf_obj *dict;
	int i, n;

	RESOLVE(obj);
	if (!OBJ_IS_DICT(obj))
		fz_throw(ctx, FZ_ERROR_GENERIC, "not a dict (%s)", pdf_objkindstr(obj));

	doc = DICT(obj)->doc;
	n = pdf_dict_len(ctx, obj);
	dict = pdf_new_dict(ctx, doc, n);
	fz_try(ctx)
		for (i = 0; i < n; i++)
			pdf_dict_put(ctx, dict, pdf_dict_get_key(ctx, obj, i), pdf_dict_get_val(ctx, obj, i));
	fz_catch(ctx)
	{
		pdf_drop_obj(ctx, dict);
		fz_rethrow(ctx);
	}

	return dict;
}
Exemplo n.º 3
0
static void
load_portfolio(fz_context *ctx, pdf_document *doc)
{
	pdf_obj *obj;
	int i, n;
	pdf_portfolio **pp;

	obj = pdf_dict_getl(ctx, pdf_trailer(ctx, doc), PDF_NAME_Root, PDF_NAME_Collection, PDF_NAME_Schema, NULL);

	n = pdf_dict_len(ctx, obj);
	for (i = 0; i < n; i++)
	{
		pdf_obj *k = pdf_dict_get_key(ctx, obj, i);
		pdf_obj *v = pdf_dict_get_val(ctx, obj, i);
		int sort = pdf_to_int(ctx, pdf_dict_get(ctx, v, PDF_NAME_O));
		pdf_obj *eo = pdf_dict_get(ctx, v, PDF_NAME_E);
		int editable = eo ? pdf_to_bool(ctx, eo) : 0;
		pdf_obj *vo = pdf_dict_get(ctx, v, PDF_NAME_V);
		int visible = vo ? pdf_to_bool(ctx, vo) : 1;
		char *subtype = pdf_to_name(ctx, pdf_dict_get(ctx, v, PDF_NAME_Subtype));
		pdf_obj *name = pdf_dict_get(ctx, v, PDF_NAME_N);
		pdf_portfolio *p = fz_malloc_struct(ctx, pdf_portfolio);
		p->key = pdf_keep_obj(ctx, k);
		p->val = pdf_keep_obj(ctx, v);
		p->sort = sort;
		p->entry.visible = visible;
		p->entry.editable = editable;
		p->entry.name = pdf_keep_obj(ctx, name);
		if (!strcmp(subtype, "S"))
			p->entry.type = PDF_SCHEMA_TEXT;
		else if (!strcmp(subtype, "D"))
			p->entry.type = PDF_SCHEMA_DATE;
		else if (!strcmp(subtype, "N"))
			p->entry.type = PDF_SCHEMA_NUMBER;
		else if (!strcmp(subtype, "F"))
			p->entry.type = PDF_SCHEMA_FILENAME;
		else if (!strcmp(subtype, "Desc"))
			p->entry.type = PDF_SCHEMA_DESC;
		else if (!strcmp(subtype, "ModDate"))
			p->entry.type = PDF_SCHEMA_MODDATE;
		else if (!strcmp(subtype, "CreationDate"))
			p->entry.type = PDF_SCHEMA_CREATIONDATE;
		else if (!strcmp(subtype, "Size"))
			p->entry.type = PDF_SCHEMA_SIZE;
		else
			p->entry.type = PDF_SCHEMA_UNKNOWN;

		/* Now insert p */
		pp = &doc->portfolio;

		while (*pp && (*pp)->sort <= p->sort)
			pp = &(*pp)->next;

		p->next = *pp;
		*pp = p;
	}
}
Exemplo n.º 4
0
static void
gatherforms(int page, pdf_obj *pageref, pdf_obj *pageobj, pdf_obj *dict)
{
	int i, n;

	n = pdf_dict_len(dict);
	for (i = 0; i < n; i++)
	{
		pdf_obj *xobjdict;
		pdf_obj *type;
		pdf_obj *subtype;
		pdf_obj *group;
		pdf_obj *groupsubtype;
		pdf_obj *reference;
		int k;

		xobjdict = pdf_dict_get_val(dict, i);
		if (!pdf_is_dict(xobjdict))
		{
			fz_warn(ctx, "not a xobject dict (%d %d R)", pdf_to_num(xobjdict), pdf_to_gen(xobjdict));
			continue;
		}

		type = pdf_dict_gets(xobjdict, "Subtype");
		if (strcmp(pdf_to_name(type), "Form"))
			continue;

		subtype = pdf_dict_gets(xobjdict, "Subtype2");
		if (!strcmp(pdf_to_name(subtype), "PS"))
			continue;

		group = pdf_dict_gets(xobjdict, "Group");
		groupsubtype = pdf_dict_gets(group, "S");
		reference = pdf_dict_gets(xobjdict, "Ref");

		for (k = 0; k < forms; k++)
			if (!pdf_objcmp(form[k].u.form.obj, xobjdict))
				break;

		if (k < forms)
			continue;

		form = fz_resize_array(ctx, form, forms+1, sizeof(struct info));
		forms++;

		form[forms - 1].page = page;
		form[forms - 1].pageref = pageref;
		form[forms - 1].pageobj = pageobj;
		form[forms - 1].u.form.obj = xobjdict;
		form[forms - 1].u.form.groupsubtype = groupsubtype;
		form[forms - 1].u.form.reference = reference;
	}
}
Exemplo n.º 5
0
/*
** Merge srcdict into dstdict.
*/
static void wmupdf_dict_merge(fz_context *ctx,char *dictname,pdf_obj *dstdict,pdf_obj *srcdict)

    {
    int i,len;

/*
printf("    Merging %s dictionaries (%d <-- %d)\n",dictname,pdf_to_num(dstdict),pdf_to_num(srcdict));
*/
    len=pdf_dict_len(srcdict);
    for (i=0;i<len;i++)
        {
        pdf_obj *key,*value;

        key=pdf_dict_get_key(srcdict,i);
        value=pdf_dict_get_val(srcdict,i);
        wmupdf_dict_merge_keyval(ctx,dstdict,key,value);
        }
    }
Exemplo n.º 6
0
static void
gatherfonts(int page, pdf_obj *pageref, pdf_obj *pageobj, pdf_obj *dict)
{
	int i, n;

	n = pdf_dict_len(dict);
	for (i = 0; i < n; i++)
	{
		pdf_obj *fontdict = NULL;
		pdf_obj *subtype = NULL;
		pdf_obj *basefont = NULL;
		pdf_obj *name = NULL;
		int k;

		fontdict = pdf_dict_get_val(dict, i);
		if (!pdf_is_dict(fontdict))
		{
			fz_warn(ctx, "not a font dict (%d %d R)", pdf_to_num(fontdict), pdf_to_gen(fontdict));
			continue;
		}

		subtype = pdf_dict_gets(fontdict, "Subtype");
		basefont = pdf_dict_gets(fontdict, "BaseFont");
		if (!basefont || pdf_is_null(basefont))
			name = pdf_dict_gets(fontdict, "Name");

		for (k = 0; k < fonts; k++)
			if (!pdf_objcmp(font[k].u.font.obj, fontdict))
				break;

		if (k < fonts)
			continue;

		font = fz_resize_array(ctx, font, fonts+1, sizeof(struct info));
		fonts++;

		font[fonts - 1].page = page;
		font[fonts - 1].pageref = pageref;
		font[fonts - 1].pageobj = pageobj;
		font[fonts - 1].u.font.obj = fontdict;
		font[fonts - 1].u.font.subtype = subtype;
		font[fonts - 1].u.font.name = basefont ? basefont : name;
	}
}
Exemplo n.º 7
0
pdf_obj *
pdf_copy_dict(fz_context *ctx, pdf_obj *obj)
{
	pdf_obj *dict;
	int i, n;

	RESOLVE(obj);
	if (!obj)
		return NULL; /* Can't warn :( */
	if (obj->kind != PDF_DICT)
		fz_warn(ctx, "assert: not a dict (%s)", pdf_objkindstr(obj));

	n = pdf_dict_len(obj);
	dict = pdf_new_dict(ctx, n);
	for (i = 0; i < n; i++)
		fz_dict_put(dict, pdf_dict_get_key(obj, i), pdf_dict_get_val(obj, i));

	return dict;
}
Exemplo n.º 8
0
static void
gathershadings(int page, pdf_obj *pageref, pdf_obj *pageobj, pdf_obj *dict)
{
	int i, n;

	n = pdf_dict_len(dict);
	for (i = 0; i < n; i++)
	{
		pdf_obj *shade;
		pdf_obj *type;
		int k;

		shade = pdf_dict_get_val(dict, i);
		if (!pdf_is_dict(shade))
		{
			fz_warn(ctx, "not a shading dict (%d %d R)", pdf_to_num(shade), pdf_to_gen(shade));
			continue;
		}

		type = pdf_dict_gets(shade, "ShadingType");
		if (!pdf_is_int(type) || pdf_to_int(type) < 1 || pdf_to_int(type) > 7)
		{
			fz_warn(ctx, "not a shading type (%d %d R)", pdf_to_num(shade), pdf_to_gen(shade));
			type = NULL;
		}

		for (k = 0; k < shadings; k++)
			if (!pdf_objcmp(shading[k].u.shading.obj, shade))
				break;

		if (k < shadings)
			continue;

		shading = fz_resize_array(ctx, shading, shadings+1, sizeof(struct info));
		shadings++;

		shading[shadings - 1].page = page;
		shading[shadings - 1].pageref = pageref;
		shading[shadings - 1].pageobj = pageobj;
		shading[shadings - 1].u.shading.obj = shade;
		shading[shadings - 1].u.shading.type = type;
	}
}
Exemplo n.º 9
0
static void
gatherpsobjs(int page, pdf_obj *pageref, pdf_obj *pageobj, pdf_obj *dict)
{
	int i, n;

	n = pdf_dict_len(dict);
	for (i = 0; i < n; i++)
	{
		pdf_obj *xobjdict;
		pdf_obj *type;
		pdf_obj *subtype;
		int k;

		xobjdict = pdf_dict_get_val(dict, i);
		if (!pdf_is_dict(xobjdict))
		{
			fz_warn(ctx, "not a xobject dict (%d %d R)", pdf_to_num(xobjdict), pdf_to_gen(xobjdict));
			continue;
		}

		type = pdf_dict_gets(xobjdict, "Subtype");
		subtype = pdf_dict_gets(xobjdict, "Subtype2");
		if (strcmp(pdf_to_name(type), "PS") &&
			(strcmp(pdf_to_name(type), "Form") || strcmp(pdf_to_name(subtype), "PS")))
			continue;

		for (k = 0; k < psobjs; k++)
			if (!pdf_objcmp(psobj[k].u.form.obj, xobjdict))
				break;

		if (k < psobjs)
			continue;

		psobj = fz_resize_array(ctx, psobj, psobjs+1, sizeof(struct info));
		psobjs++;

		psobj[psobjs - 1].page = page;
		psobj[psobjs - 1].pageref = pageref;
		psobj[psobjs - 1].pageobj = pageobj;
		psobj[psobjs - 1].u.form.obj = xobjdict;
	}
}
Exemplo n.º 10
0
/*
	Performs the same task as
	pdf_clean_annot_contents, but with an optional text filter
	function.

	text_filter: Function to assess whether a given character
	should be kept (return 0) or removed (return 1).

	after_text: Function called after each text object is closed
	to allow other output to be sent.

	arg: Opaque value to be passed to callback functions.
*/
void pdf_filter_annot_contents(fz_context *ctx, pdf_document *doc, pdf_annot *annot, fz_cookie *cookie,
	pdf_page_contents_process_fn *proc, pdf_text_filter_fn *text_filter, pdf_after_text_object_fn *after_text, void *arg, int sanitize, int ascii)
{
	pdf_obj *ap;
	int i, n;

	ap = pdf_dict_get(ctx, annot->obj, PDF_NAME(AP));
	if (ap == NULL)
		return;

	n = pdf_dict_len(ctx, ap);
	for (i = 0; i < n; i++)
	{
		pdf_obj *v = pdf_dict_get_val(ctx, ap, i);

		if (v == NULL)
			continue;

		pdf_clean_stream_object(ctx, doc, v, NULL, cookie, 1, text_filter, after_text, arg, sanitize, ascii);
	}
}
Exemplo n.º 11
0
pdf_obj *
pdf_copy_dict(fz_context *ctx, pdf_obj *obj)
{
	pdf_obj *dict;
	int i, n;

	RESOLVE(obj);
	if (obj >= PDF_OBJ__LIMIT)
	{
		pdf_document *doc = DICT(obj)->doc;

		if (obj->kind != PDF_DICT)
			fz_warn(ctx, "assert: not a dict (%s)", pdf_objkindstr(obj));

		n = pdf_dict_len(ctx, obj);
		dict = pdf_new_dict(ctx, doc, n);
		for (i = 0; i < n; i++)
			pdf_dict_put(ctx, dict, pdf_dict_get_key(ctx, obj, i), pdf_dict_get_val(ctx, obj, i));

		return dict;
	}
	return NULL; /* Can't warn :( */
}
Exemplo n.º 12
0
/*
** From MuPDF pdfclean.c
*/
static void wmupdf_preserve_old_dests(pdf_obj *olddests,fz_context *ctx,pdf_document *xref,
                                      pdf_obj *pages)

    {
    int i;
    pdf_obj *names = pdf_new_dict(ctx,1);
    pdf_obj *dests = pdf_new_dict(ctx,1);
    pdf_obj *names_list = pdf_new_array(ctx,32);
    int len = pdf_dict_len(olddests);
    pdf_obj *root;

    for (i=0;i<len;i++)
        {
        pdf_obj *key = pdf_dict_get_key(olddests,i);
        pdf_obj *val = pdf_dict_get_val(olddests,i);
        pdf_obj *key_str = pdf_new_string(ctx,pdf_to_name(key),strlen(pdf_to_name(key)));
        pdf_obj *dest = pdf_dict_gets(val,"D");

        dest = pdf_array_get(dest ? dest : val, 0);
        if (pdf_array_contains(pdf_dict_gets(pages,"Kids"),dest))
            {
            pdf_array_push(names_list, key_str);
            pdf_array_push(names_list, val);
            }
        pdf_drop_obj(key_str);
        }

    root = pdf_dict_gets(xref->trailer,"Root");
    pdf_dict_puts(dests,"Names",names_list);
    pdf_dict_puts(names,"Dests",dests);
    pdf_dict_puts(root,"Names",names);

    pdf_drop_obj(names);
    pdf_drop_obj(dests);
    pdf_drop_obj(names_list);
    pdf_drop_obj(olddests);
    }
Exemplo n.º 13
0
static void
pdf_clean_type3(fz_context *ctx, pdf_document *doc, pdf_obj *obj, pdf_obj *orig_res, fz_cookie *cookie, int sanitize, int ascii)
{
	pdf_processor *proc_buffer = NULL;
	pdf_processor *proc_filter = NULL;
	pdf_obj *res = NULL;
	pdf_obj *ref;
	pdf_obj *charprocs;
	int i, l;

	fz_var(res);
	fz_var(proc_buffer);
	fz_var(proc_filter);

	fz_try(ctx)
	{
		res = pdf_dict_get(ctx, obj, PDF_NAME(Resources));
		if (res)
			orig_res = res;
		res = NULL;

		res = pdf_new_dict(ctx, doc, 1);

		charprocs = pdf_dict_get(ctx, obj, PDF_NAME(CharProcs));
		l = pdf_dict_len(ctx, charprocs);

		for (i = 0; i < l; i++)
		{
			pdf_obj *val = pdf_dict_get_val(ctx, charprocs, i);
			fz_buffer *buffer = fz_new_buffer(ctx, 1024);
			fz_try(ctx)
			{
				proc_buffer = pdf_new_buffer_processor(ctx, buffer, ascii);
				if (sanitize)
				{
					proc_filter = pdf_new_filter_processor(ctx, doc, proc_buffer, orig_res, res);
					pdf_process_contents(ctx, proc_filter, doc, orig_res, val, cookie);
					pdf_close_processor(ctx, proc_filter);
				}
				else
				{
					pdf_process_contents(ctx, proc_filter, doc, orig_res, val, cookie);
				}
				pdf_close_processor(ctx, proc_buffer);

				pdf_update_stream(ctx, doc, val, buffer, 0);
			}
			fz_always(ctx)
			{
				pdf_drop_processor(ctx, proc_filter);
				pdf_drop_processor(ctx, proc_buffer);
				fz_drop_buffer(ctx, buffer);
			}
			fz_catch(ctx)
			{
				fz_rethrow(ctx);
			}
		}

		/* ProcSet - no cleaning possible. Inherit this from the old dict. */
		pdf_dict_put(ctx, res, PDF_NAME(ProcSet), pdf_dict_get(ctx, orig_res, PDF_NAME(ProcSet)));

		ref = pdf_add_object(ctx, doc, res);
		pdf_dict_put_drop(ctx, obj, PDF_NAME(Resources), ref);
	}
	fz_always(ctx)
	{
		pdf_drop_obj(ctx, res);
	}
	fz_catch(ctx)
	{
		fz_rethrow(ctx);
	}
}
Exemplo n.º 14
0
/*
	Performs the same task as
	pdf_clean_page_contents, but with an optional text filter
	function.

	text_filter: Function to assess whether a given character
	should be kept (return 0) or removed (return 1).

	after_text: Function called after each text object is closed
	to allow other output to be sent.

	arg: Opaque value to be passed to callback functions.
*/
void pdf_filter_page_contents(fz_context *ctx, pdf_document *doc, pdf_page *page, fz_cookie *cookie,
		pdf_page_contents_process_fn *proc_fn, pdf_text_filter_fn *text_filter, pdf_after_text_object_fn *after_text, void *proc_arg,
		int sanitize, int ascii)
{
	pdf_processor *proc_buffer = NULL;
	pdf_processor *proc_filter = NULL;
	pdf_obj *new_obj = NULL;
	pdf_obj *new_ref = NULL;
	pdf_obj *res = NULL;
	pdf_obj *obj;
	pdf_obj *contents;
	pdf_obj *resources;
	fz_buffer *buffer;

	fz_var(new_obj);
	fz_var(new_ref);
	fz_var(res);
	fz_var(proc_buffer);
	fz_var(proc_filter);

	buffer = fz_new_buffer(ctx, 1024);

	fz_try(ctx)
	{
		contents = pdf_page_contents(ctx, page);
		resources = pdf_page_resources(ctx, page);

		proc_buffer = pdf_new_buffer_processor(ctx, buffer, ascii);
		if (sanitize)
		{
			res = pdf_new_dict(ctx, doc, 1);
			proc_filter = pdf_new_filter_processor_with_text_filter(ctx, doc, proc_buffer, resources, res, text_filter, after_text, proc_arg);
			pdf_process_contents(ctx, proc_filter, doc, resources, contents, cookie);
			pdf_close_processor(ctx, proc_filter);
		}
		else
		{
			res = pdf_keep_obj(ctx, resources);
			pdf_process_contents(ctx, proc_buffer, doc, resources, contents, cookie);
		}
		pdf_close_processor(ctx, proc_buffer);

		/* Deal with page content stream. */

		if (pdf_is_array(ctx, contents))
		{
			/* create a new object to replace the array */
			new_obj = pdf_new_dict(ctx, doc, 1);
			new_ref = pdf_add_object(ctx, doc, new_obj);
			contents = new_ref;
			pdf_dict_put(ctx, page->obj, PDF_NAME(Contents), contents);
		}
		else
		{
			pdf_dict_del(ctx, contents, PDF_NAME(Filter));
			pdf_dict_del(ctx, contents, PDF_NAME(DecodeParms));
		}

		pdf_update_stream(ctx, doc, contents, buffer, 0);

		/* Now deal with resources. The spec allows for Type3 fonts and form
		 * XObjects to omit a resource dictionary and look in the parent.
		 * Avoid that by flattening here as part of the cleaning. This could
		 * conceivably cause changes in rendering, but we don't care. */

		/* ExtGState */
		obj = pdf_dict_get(ctx, res, PDF_NAME(ExtGState));
		if (obj)
		{
			int i, l;

			l = pdf_dict_len(ctx, obj);
			for (i = 0; i < l; i++)
			{
				pdf_obj *o = pdf_dict_get(ctx, pdf_dict_get_val(ctx, obj, i), PDF_NAME(SMask));
				if (!o)
					continue;
				o = pdf_dict_get(ctx, o, PDF_NAME(G));
				if (!o)
					continue;
				/* Transparency group XObject */
				pdf_clean_stream_object(ctx, doc, o, resources, cookie, 1, text_filter, after_text, proc_arg, sanitize, ascii);
			}
		}

		/* Pattern */
		obj = pdf_dict_get(ctx, res, PDF_NAME(Pattern));
		if (obj)
		{
			int i, l;
			l = pdf_dict_len(ctx, obj);
			for (i = 0; i < l; i++)
			{
				pdf_obj *pat_res;
				pdf_obj *pat = pdf_dict_get_val(ctx, obj, i);
				if (!pat)
					continue;
				pat_res = pdf_dict_get(ctx, pat, PDF_NAME(Resources));
				if (pat_res == NULL)
					pat_res = resources;
				if (pdf_dict_get_int(ctx, pat, PDF_NAME(PatternType)) == 1)
					pdf_clean_stream_object(ctx, doc, pat, pat_res, cookie, 0, text_filter, after_text, proc_arg, sanitize, ascii);
			}
		}

		/* XObject */
		obj = pdf_dict_get(ctx, res, PDF_NAME(XObject));
		if (obj)
		{
			int i, l;
			l = pdf_dict_len(ctx, obj);
			for (i = 0; i < l; i++)
			{
				pdf_obj *xobj_res;
				pdf_obj *xobj = pdf_dict_get_val(ctx, obj, i);
				if (!xobj)
					continue;
				xobj_res = pdf_dict_get(ctx, xobj, PDF_NAME(Resources));
				if (xobj_res == NULL)
					xobj_res = resources;
				if (pdf_name_eq(ctx, PDF_NAME(Form), pdf_dict_get(ctx, xobj, PDF_NAME(Subtype))))
					pdf_clean_stream_object(ctx, doc, xobj, xobj_res, cookie, 1, text_filter, after_text, proc_arg, sanitize, ascii);
			}
		}

		/* Font */
		obj = pdf_dict_get(ctx, res, PDF_NAME(Font));
		if (obj)
		{
			int i, l;
			l = pdf_dict_len(ctx, obj);
			for (i = 0; i < l; i++)
			{
				pdf_obj *o = pdf_dict_get_val(ctx, obj, i);
				if (!o)
					continue;
				if (pdf_name_eq(ctx, PDF_NAME(Type3), pdf_dict_get(ctx, o, PDF_NAME(Subtype))))
					pdf_clean_type3(ctx, doc, o, resources, cookie, sanitize, ascii);
			}
		}

		/* ProcSet - no cleaning possible. Inherit this from the old dict. */
		obj = pdf_dict_get(ctx, resources, PDF_NAME(ProcSet));
		if (obj)
			pdf_dict_put(ctx, res, PDF_NAME(ProcSet), obj);

		/* ColorSpace - no cleaning possible. */
		/* Properties - no cleaning possible. */

		if (proc_fn)
			(*proc_fn)(ctx, buffer, res, proc_arg);

		/* Update resource dictionary */
		if (sanitize)
		{
			pdf_dict_put(ctx, page->obj, PDF_NAME(Resources), res);
		}
	}
	fz_always(ctx)
	{
		pdf_drop_processor(ctx, proc_filter);
		pdf_drop_processor(ctx, proc_buffer);
		fz_drop_buffer(ctx, buffer);
		pdf_drop_obj(ctx, new_obj);
		pdf_drop_obj(ctx, new_ref);
		pdf_drop_obj(ctx, res);
	}
	fz_catch(ctx)
	{
		fz_rethrow(ctx);
	}
}
Exemplo n.º 15
0
static void
gatherresourceinfo(int page, pdf_obj *rsrc, int show)
{
	pdf_obj *pageobj;
	pdf_obj *pageref;
	pdf_obj *font;
	pdf_obj *xobj;
	pdf_obj *shade;
	pdf_obj *pattern;
	pdf_obj *subrsrc;
	int i;

	pageref = pdf_lookup_page_obj(doc, page-1);
	pageobj = pdf_resolve_indirect(pageref);

	if (!pageobj)
		fz_throw(ctx, FZ_ERROR_GENERIC, "cannot retrieve info from page %d", page);

	font = pdf_dict_gets(rsrc, "Font");
	if (show & FONTS && font)
	{
		int n;

		gatherfonts(page, pageref, pageobj, font);
		n = pdf_dict_len(font);
		for (i = 0; i < n; i++)
		{
			pdf_obj *obj = pdf_dict_get_val(font, i);

			subrsrc = pdf_dict_gets(obj, "Resources");
			if (subrsrc && pdf_objcmp(rsrc, subrsrc))
				gatherresourceinfo(page, subrsrc, show);
		}
	}

	xobj = pdf_dict_gets(rsrc, "XObject");
	if (show & XOBJS && xobj)
	{
		int n;

		gatherimages(page, pageref, pageobj, xobj);
		gatherforms(page, pageref, pageobj, xobj);
		gatherpsobjs(page, pageref, pageobj, xobj);
		n = pdf_dict_len(xobj);
		for (i = 0; i < n; i++)
		{
			pdf_obj *obj = pdf_dict_get_val(xobj, i);
			subrsrc = pdf_dict_gets(obj, "Resources");
			if (subrsrc && pdf_objcmp(rsrc, subrsrc))
				gatherresourceinfo(page, subrsrc, show);
		}
	}

	shade = pdf_dict_gets(rsrc, "Shading");
	if (show & SHADINGS && shade)
		gathershadings(page, pageref, pageobj, shade);

	pattern = pdf_dict_gets(rsrc, "Pattern");
	if (show & PATTERNS && pattern)
	{
		int n;
		gatherpatterns(page, pageref, pageobj, pattern);
		n = pdf_dict_len(pattern);
		for (i = 0; i < n; i++)
		{
			pdf_obj *obj = pdf_dict_get_val(pattern, i);
			subrsrc = pdf_dict_gets(obj, "Resources");
			if (subrsrc && pdf_objcmp(rsrc, subrsrc))
				gatherresourceinfo(page, subrsrc, show);
		}
	}
}
Exemplo n.º 16
0
static void
gatherpatterns(int page, pdf_obj *pageref, pdf_obj *pageobj, pdf_obj *dict)
{
	int i, n;

	n = pdf_dict_len(dict);
	for (i = 0; i < n; i++)
	{
		pdf_obj *patterndict;
		pdf_obj *type;
		pdf_obj *paint = NULL;
		pdf_obj *tiling = NULL;
		pdf_obj *shading = NULL;
		int k;

		patterndict = pdf_dict_get_val(dict, i);
		if (!pdf_is_dict(patterndict))
		{
			fz_warn(ctx, "not a pattern dict (%d %d R)", pdf_to_num(patterndict), pdf_to_gen(patterndict));
			continue;
		}

		type = pdf_dict_gets(patterndict, "PatternType");
		if (!pdf_is_int(type) || pdf_to_int(type) < 1 || pdf_to_int(type) > 2)
		{
			fz_warn(ctx, "not a pattern type (%d %d R)", pdf_to_num(patterndict), pdf_to_gen(patterndict));
			type = NULL;
		}

		if (pdf_to_int(type) == 1)
		{
			paint = pdf_dict_gets(patterndict, "PaintType");
			if (!pdf_is_int(paint) || pdf_to_int(paint) < 1 || pdf_to_int(paint) > 2)
			{
				fz_warn(ctx, "not a pattern paint type (%d %d R)", pdf_to_num(patterndict), pdf_to_gen(patterndict));
				paint = NULL;
			}

			tiling = pdf_dict_gets(patterndict, "TilingType");
			if (!pdf_is_int(tiling) || pdf_to_int(tiling) < 1 || pdf_to_int(tiling) > 3)
			{
				fz_warn(ctx, "not a pattern tiling type (%d %d R)", pdf_to_num(patterndict), pdf_to_gen(patterndict));
				tiling = NULL;
			}
		}
		else
		{
			shading = pdf_dict_gets(patterndict, "Shading");
		}

		for (k = 0; k < patterns; k++)
			if (!pdf_objcmp(pattern[k].u.pattern.obj, patterndict))
				break;

		if (k < patterns)
			continue;

		pattern = fz_resize_array(ctx, pattern, patterns+1, sizeof(struct info));
		patterns++;

		pattern[patterns - 1].page = page;
		pattern[patterns - 1].pageref = pageref;
		pattern[patterns - 1].pageobj = pageobj;
		pattern[patterns - 1].u.pattern.obj = patterndict;
		pattern[patterns - 1].u.pattern.type = type;
		pattern[patterns - 1].u.pattern.paint = paint;
		pattern[patterns - 1].u.pattern.tiling = tiling;
		pattern[patterns - 1].u.pattern.shading = shading;
	}
}
Exemplo n.º 17
0
static void retainpages(int argc, char **argv)
{
	pdf_obj *oldroot, *root, *pages, *kids, *countobj, *parent, *olddests;

	/* Keep only pages/type and (reduced) dest entries to avoid
	 * references to unretained pages */
	oldroot = pdf_dict_gets(xref->trailer, "Root");
	pages = pdf_dict_gets(oldroot, "Pages");
	olddests = pdf_load_name_tree(xref, "Dests");

	root = pdf_new_dict(ctx, 2);
	pdf_dict_puts(root, "Type", pdf_dict_gets(oldroot, "Type"));
	pdf_dict_puts(root, "Pages", pdf_dict_gets(oldroot, "Pages"));

	pdf_update_object(xref, pdf_to_num(oldroot), root);

	pdf_drop_obj(root);

	/* Create a new kids array with only the pages we want to keep */
	parent = pdf_new_indirect(ctx, pdf_to_num(pages), pdf_to_gen(pages), xref);
	kids = pdf_new_array(ctx, 1);

	/* Retain pages specified */
	while (argc - fz_optind)
	{
		int page, spage, epage, pagecount;
		char *spec, *dash;
		char *pagelist = argv[fz_optind];

		pagecount = pdf_count_pages(xref);
		spec = fz_strsep(&pagelist, ",");
		while (spec)
		{
			dash = strchr(spec, '-');

			if (dash == spec)
				spage = epage = pagecount;
			else
				spage = epage = atoi(spec);

			if (dash)
			{
				if (strlen(dash) > 1)
					epage = atoi(dash + 1);
				else
					epage = pagecount;
			}

			if (spage > epage)
				page = spage, spage = epage, epage = page;

			spage = fz_clampi(spage, 1, pagecount);
			epage = fz_clampi(epage, 1, pagecount);

			for (page = spage; page <= epage; page++)
			{
				pdf_obj *pageobj = xref->page_objs[page-1];
				pdf_obj *pageref = xref->page_refs[page-1];

				pdf_dict_puts(pageobj, "Parent", parent);

				/* Store page object in new kids array */
				pdf_array_push(kids, pageref);
			}

			spec = fz_strsep(&pagelist, ",");
		}

		fz_optind++;
	}

	pdf_drop_obj(parent);

	/* Update page count and kids array */
	countobj = pdf_new_int(ctx, pdf_array_len(kids));
	pdf_dict_puts(pages, "Count", countobj);
	pdf_drop_obj(countobj);
	pdf_dict_puts(pages, "Kids", kids);
	pdf_drop_obj(kids);

	/* Also preserve the (partial) Dests name tree */
	if (olddests)
	{
		int i;
		pdf_obj *names = pdf_new_dict(ctx, 1);
		pdf_obj *dests = pdf_new_dict(ctx, 1);
		pdf_obj *names_list = pdf_new_array(ctx, 32);
		int len = pdf_dict_len(olddests);

		for (i = 0; i < len; i++)
		{
			pdf_obj *key = pdf_dict_get_key(olddests, i);
			pdf_obj *val = pdf_dict_get_val(olddests, i);
			pdf_obj *key_str = pdf_new_string(ctx, pdf_to_name(key), strlen(pdf_to_name(key)));
			pdf_obj *dest = pdf_dict_gets(val, "D");

			dest = pdf_array_get(dest ? dest : val, 0);
			if (pdf_array_contains(pdf_dict_gets(pages, "Kids"), dest))
			{
				pdf_array_push(names_list, key_str);
				pdf_array_push(names_list, val);
			}
			pdf_drop_obj(key_str);
		}

		root = pdf_dict_gets(xref->trailer, "Root");
		pdf_dict_puts(dests, "Names", names_list);
		pdf_dict_puts(names, "Dests", dests);
		pdf_dict_puts(root, "Names", names);

		pdf_drop_obj(names);
		pdf_drop_obj(dests);
		pdf_drop_obj(names_list);
		pdf_drop_obj(olddests);
	}
}
Exemplo n.º 18
0
static void
gatherimages(int page, pdf_obj *pageref, pdf_obj *pageobj, pdf_obj *dict)
{
	int i, n;

	n = pdf_dict_len(dict);
	for (i = 0; i < n; i++)
	{
		pdf_obj *imagedict;
		pdf_obj *type;
		pdf_obj *width;
		pdf_obj *height;
		pdf_obj *bpc = NULL;
		pdf_obj *filter = NULL;
		pdf_obj *cs = NULL;
		pdf_obj *altcs;
		int k;

		imagedict = pdf_dict_get_val(dict, i);
		if (!pdf_is_dict(imagedict))
		{
			fz_warn(ctx, "not an image dict (%d %d R)", pdf_to_num(imagedict), pdf_to_gen(imagedict));
			continue;
		}

		type = pdf_dict_gets(imagedict, "Subtype");
		if (strcmp(pdf_to_name(type), "Image"))
			continue;

		filter = pdf_dict_gets(imagedict, "Filter");

		altcs = NULL;
		cs = pdf_dict_gets(imagedict, "ColorSpace");
		if (pdf_is_array(cs))
		{
			pdf_obj *cses = cs;

			cs = pdf_array_get(cses, 0);
			if (pdf_is_name(cs) && (!strcmp(pdf_to_name(cs), "DeviceN") || !strcmp(pdf_to_name(cs), "Separation")))
			{
				altcs = pdf_array_get(cses, 2);
				if (pdf_is_array(altcs))
					altcs = pdf_array_get(altcs, 0);
			}
		}

		width = pdf_dict_gets(imagedict, "Width");
		height = pdf_dict_gets(imagedict, "Height");
		bpc = pdf_dict_gets(imagedict, "BitsPerComponent");

		for (k = 0; k < images; k++)
			if (!pdf_objcmp(image[k].u.image.obj, imagedict))
				break;

		if (k < images)
			continue;

		image = fz_resize_array(ctx, image, images+1, sizeof(struct info));
		images++;

		image[images - 1].page = page;
		image[images - 1].pageref = pageref;
		image[images - 1].pageobj = pageobj;
		image[images - 1].u.image.obj = imagedict;
		image[images - 1].u.image.width = width;
		image[images - 1].u.image.height = height;
		image[images - 1].u.image.bpc = bpc;
		image[images - 1].u.image.filter = filter;
		image[images - 1].u.image.cs = cs;
		image[images - 1].u.image.altcs = altcs;
	}
}
Exemplo n.º 19
0
void pdf_clean_page_contents(fz_context *ctx, pdf_document *doc, pdf_page *page, fz_cookie *cookie, pdf_page_contents_process_fn *proc_fn, void *proc_arg)
{
	pdf_processor *proc_buffer = NULL;
	pdf_processor *proc_filter = NULL;
	pdf_obj *new_obj = NULL;
	pdf_obj *new_ref = NULL;
	pdf_obj *res = NULL;
	pdf_obj *ref = NULL;
	pdf_obj *obj;
	pdf_obj *contents;
	fz_buffer *buffer;

	fz_var(new_obj);
	fz_var(new_ref);
	fz_var(res);
	fz_var(ref);
	fz_var(proc_buffer);
	fz_var(proc_filter);

	buffer = fz_new_buffer(ctx, 1024);

	fz_try(ctx)
	{
		res = pdf_new_dict(ctx, doc, 1);

		proc_buffer = pdf_new_buffer_processor(ctx, buffer);
		proc_filter = pdf_new_filter_processor(ctx, proc_buffer, doc, page->resources, res);

		pdf_process_contents(ctx, proc_filter, doc, page->resources, page->contents, cookie);

		contents = page->contents;
		if (pdf_is_array(ctx, contents))
		{
			/* create a new object to replace the array */
			new_obj = pdf_new_dict(ctx, doc, 1);
			new_ref = pdf_new_ref(ctx, doc, new_obj);
			page->contents = contents = new_ref;
		}
		else
		{
			pdf_dict_del(ctx, contents, PDF_NAME_Filter);
			pdf_dict_del(ctx, contents, PDF_NAME_DecodeParms);
		}

		/* Now deal with resources. The spec allows for Type3 fonts and form
		 * XObjects to omit a resource dictionary and look in the parent.
		 * Avoid that by flattening here as part of the cleaning. This could
		 * conceivably cause changes in rendering, but we don't care. */

		/* ExtGState */
		obj = pdf_dict_get(ctx, res, PDF_NAME_ExtGState);
		if (obj)
		{
			int i, l;

			l = pdf_dict_len(ctx, obj);
			for (i = 0; i < l; i++)
			{
				pdf_obj *o = pdf_dict_get(ctx, pdf_dict_get_val(ctx, obj, i), PDF_NAME_SMask);

				if (!o)
					continue;
				o = pdf_dict_get(ctx, o, PDF_NAME_G);
				if (!o)
					continue;

				/* Transparency group XObject */
				pdf_clean_stream_object(ctx, doc, o, page->resources, cookie, 1);
			}
		}

		/* ColorSpace - no cleaning possible */

		/* Pattern */
		obj = pdf_dict_get(ctx, res, PDF_NAME_Pattern);
		if (obj)
		{
			int i, l;

			l = pdf_dict_len(ctx, obj);
			for (i = 0; i < l; i++)
			{
				pdf_obj *pat = pdf_dict_get_val(ctx, obj, i);

				if (!pat)
					continue;
				if (pdf_to_int(ctx, pdf_dict_get(ctx, pat, PDF_NAME_PatternType)) == 1)
					pdf_clean_stream_object(ctx, doc, pat, page->resources, cookie, 0);
			}
		}

		/* Shading - no cleaning possible */

		/* XObject */
		obj = pdf_dict_get(ctx, res, PDF_NAME_XObject);
		if (obj)
		{
			int i, l;

			l = pdf_dict_len(ctx, obj);
			for (i = 0; i < l; i++)
			{
				pdf_obj *xobj = pdf_dict_get_val(ctx, obj, i);

				if (!pdf_name_eq(ctx, PDF_NAME_Form, pdf_dict_get(ctx, xobj, PDF_NAME_Subtype)))
					continue;

				pdf_clean_stream_object(ctx, doc, xobj, page->resources, cookie, 1);
			}
		}

		/* Font */
		obj = pdf_dict_get(ctx, res, PDF_NAME_Font);
		if (obj)
		{
			int i, l;

			l = pdf_dict_len(ctx, obj);
			for (i = 0; i < l; i++)
			{
				pdf_obj *o = pdf_dict_get_val(ctx, obj, i);

				if (pdf_name_eq(ctx, PDF_NAME_Type3, pdf_dict_get(ctx, o, PDF_NAME_Subtype)))
				{
					pdf_clean_type3(ctx, doc, o, page->resources, cookie);
				}
			}
		}

		/* ProcSet - no cleaning possible. Inherit this from the old dict. */
		obj = pdf_dict_get(ctx, page->resources, PDF_NAME_ProcSet);
		if (obj)
			pdf_dict_put(ctx, res, PDF_NAME_ProcSet, obj);

		/* Properties - no cleaning possible. */

		if (proc_fn)
			(*proc_fn)(ctx, buffer, res, proc_arg);

		pdf_update_stream(ctx, doc, contents, buffer, 0);
		pdf_drop_obj(ctx, page->resources);
		ref = pdf_new_ref(ctx, doc, res);
		page->resources = pdf_keep_obj(ctx, ref);
		pdf_dict_put(ctx, page->me, PDF_NAME_Resources, ref);
	}
	fz_always(ctx)
	{
		pdf_drop_processor(ctx, proc_filter);
		pdf_drop_processor(ctx, proc_buffer);
		fz_drop_buffer(ctx, buffer);
		pdf_drop_obj(ctx, new_obj);
		pdf_drop_obj(ctx, new_ref);
		pdf_drop_obj(ctx, res);
		pdf_drop_obj(ctx, ref);
	}
	fz_catch(ctx)
	{
		fz_rethrow_message(ctx, "Failed while cleaning page");
	}
}
Exemplo n.º 20
0
static void retainpages(fz_context *ctx, globals *glo, int argc, char **argv)
{
	pdf_obj *oldroot, *root, *pages, *kids, *countobj, *parent, *olddests;
	pdf_document *doc = glo->doc;
	int argidx = 0;
	pdf_obj *names_list = NULL;
	pdf_obj *outlines;
	int pagecount;
	int i;
	int *page_object_nums;

	/* Keep only pages/type and (reduced) dest entries to avoid
	 * references to unretained pages */
	oldroot = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME_Root);
	pages = pdf_dict_get(ctx, oldroot, PDF_NAME_Pages);
	olddests = pdf_load_name_tree(ctx, doc, PDF_NAME_Dests);
	outlines = pdf_dict_get(ctx, oldroot, PDF_NAME_Outlines);

	root = pdf_new_dict(ctx, doc, 3);
	pdf_dict_put(ctx, root, PDF_NAME_Type, pdf_dict_get(ctx, oldroot, PDF_NAME_Type));
	pdf_dict_put(ctx, root, PDF_NAME_Pages, pdf_dict_get(ctx, oldroot, PDF_NAME_Pages));
	pdf_dict_put(ctx, root, PDF_NAME_Outlines, outlines);

	pdf_update_object(ctx, doc, pdf_to_num(ctx, oldroot), root);

	/* Create a new kids array with only the pages we want to keep */
	parent = pdf_new_indirect(ctx, doc, pdf_to_num(ctx, pages), pdf_to_gen(ctx, pages));
	kids = pdf_new_array(ctx, doc, 1);

	/* Retain pages specified */
	while (argc - argidx)
	{
		int page, spage, epage;
		char *spec, *dash;
		char *pagelist = argv[argidx];

		pagecount = pdf_count_pages(ctx, doc);
		spec = fz_strsep(&pagelist, ",");
		while (spec)
		{
			dash = strchr(spec, '-');

			if (dash == spec)
				spage = epage = pagecount;
			else
				spage = epage = atoi(spec);

			if (dash)
			{
				if (strlen(dash) > 1)
					epage = atoi(dash + 1);
				else
					epage = pagecount;
			}

			spage = fz_clampi(spage, 1, pagecount);
			epage = fz_clampi(epage, 1, pagecount);

			if (spage < epage)
				for (page = spage; page <= epage; ++page)
					retainpage(ctx, doc, parent, kids, page);
			else
				for (page = spage; page >= epage; --page)
					retainpage(ctx, doc, parent, kids, page);

			spec = fz_strsep(&pagelist, ",");
		}

		argidx++;
	}

	pdf_drop_obj(ctx, parent);

	/* Update page count and kids array */
	countobj = pdf_new_int(ctx, doc, pdf_array_len(ctx, kids));
	pdf_dict_put(ctx, pages, PDF_NAME_Count, countobj);
	pdf_drop_obj(ctx, countobj);
	pdf_dict_put(ctx, pages, PDF_NAME_Kids, kids);
	pdf_drop_obj(ctx, kids);

	/* Force the next call to pdf_count_pages to recount */
	glo->doc->page_count = 0;

	pagecount = pdf_count_pages(ctx, doc);
	page_object_nums = fz_calloc(ctx, pagecount, sizeof(*page_object_nums));
	for (i = 0; i < pagecount; i++)
	{
		pdf_obj *pageref = pdf_lookup_page_obj(ctx, doc, i);
		page_object_nums[i] = pdf_to_num(ctx, pageref);
	}

	/* If we had an old Dests tree (now reformed as an olddests
	 * dictionary), keep any entries in there that point to
	 * valid pages. This may mean we keep more than we need, but
	 * it's safe at least. */
	if (olddests)
	{
		pdf_obj *names = pdf_new_dict(ctx, doc, 1);
		pdf_obj *dests = pdf_new_dict(ctx, doc, 1);
		int len = pdf_dict_len(ctx, olddests);

		names_list = pdf_new_array(ctx, doc, 32);

		for (i = 0; i < len; i++)
		{
			pdf_obj *key = pdf_dict_get_key(ctx, olddests, i);
			pdf_obj *val = pdf_dict_get_val(ctx, olddests, i);
			pdf_obj *dest = pdf_dict_get(ctx, val, PDF_NAME_D);

			dest = pdf_array_get(ctx, dest ? dest : val, 0);
			if (dest_is_valid_page(ctx, dest, page_object_nums, pagecount))
			{
				pdf_obj *key_str = pdf_new_string(ctx, doc, pdf_to_name(ctx, key), strlen(pdf_to_name(ctx, key)));
				pdf_array_push(ctx, names_list, key_str);
				pdf_array_push(ctx, names_list, val);
				pdf_drop_obj(ctx, key_str);
			}
		}

		pdf_dict_put(ctx, dests, PDF_NAME_Names, names_list);
		pdf_dict_put(ctx, names, PDF_NAME_Dests, dests);
		pdf_dict_put(ctx, root, PDF_NAME_Names, names);

		pdf_drop_obj(ctx, names);
		pdf_drop_obj(ctx, dests);
		pdf_drop_obj(ctx, olddests);
	}

	/* Edit each pages /Annot list to remove any links that point to
	 * nowhere. */
	for (i = 0; i < pagecount; i++)
	{
		pdf_obj *pageref = pdf_lookup_page_obj(ctx, doc, i);
		pdf_obj *pageobj = pdf_resolve_indirect(ctx, pageref);

		pdf_obj *annots = pdf_dict_get(ctx, pageobj, PDF_NAME_Annots);

		int len = pdf_array_len(ctx, annots);
		int j;

		for (j = 0; j < len; j++)
		{
			pdf_obj *o = pdf_array_get(ctx, annots, j);

			if (!pdf_name_eq(ctx, pdf_dict_get(ctx, o, PDF_NAME_Subtype), PDF_NAME_Link))
				continue;

			if (!dest_is_valid(ctx, o, pagecount, page_object_nums, names_list))
			{
				/* Remove this annotation */
				pdf_array_delete(ctx, annots, j);
				j--;
			}
		}
	}

	if (strip_outlines(ctx, doc, outlines, pagecount, page_object_nums, names_list) == 0)
	{
		pdf_dict_del(ctx, root, PDF_NAME_Outlines);
	}

	fz_free(ctx, page_object_nums);
	pdf_drop_obj(ctx, names_list);
	pdf_drop_obj(ctx, root);
}
static void retainpages(fz_context *ctx, globals *glo, int argc, char **argv)
{
	pdf_obj *oldroot, *root, *pages, *kids, *countobj, *parent, *olddests;
	pdf_document *doc = glo->doc;
	int argidx = 0;
	pdf_obj *names_list = NULL;
	int pagecount;
	int i;

	/* Keep only pages/type and (reduced) dest entries to avoid
	 * references to unretained pages */
	oldroot = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME_Root);
	pages = pdf_dict_get(ctx, oldroot, PDF_NAME_Pages);
	olddests = pdf_load_name_tree(ctx, doc, PDF_NAME_Dests);

	root = pdf_new_dict(ctx, doc, 2);
	pdf_dict_put(ctx, root, PDF_NAME_Type, pdf_dict_get(ctx, oldroot, PDF_NAME_Type));
	pdf_dict_put(ctx, root, PDF_NAME_Pages, pdf_dict_get(ctx, oldroot, PDF_NAME_Pages));

	pdf_update_object(ctx, doc, pdf_to_num(ctx, oldroot), root);

	pdf_drop_obj(ctx, root);

	/* Create a new kids array with only the pages we want to keep */
	parent = pdf_new_indirect(ctx, doc, pdf_to_num(ctx, pages), pdf_to_gen(ctx, pages));
	kids = pdf_new_array(ctx, doc, 1);

	/* Retain pages specified */
	while (argc - argidx)
	{
		int page, spage, epage;
		char *spec, *dash;
		char *pagelist = argv[argidx];

		pagecount = pdf_count_pages(ctx, doc);
		spec = fz_strsep(&pagelist, ",");
		while (spec)
		{
			dash = strchr(spec, '-');

			if (dash == spec)
				spage = epage = pagecount;
			else
				spage = epage = atoi(spec);

			if (dash)
			{
				if (strlen(dash) > 1)
					epage = atoi(dash + 1);
				else
					epage = pagecount;
			}

			spage = fz_clampi(spage, 1, pagecount);
			epage = fz_clampi(epage, 1, pagecount);

			if (spage < epage)
				for (page = spage; page <= epage; ++page)
					retainpage(ctx, doc, parent, kids, page);
			else
				for (page = spage; page >= epage; --page)
					retainpage(ctx, doc, parent, kids, page);

			spec = fz_strsep(&pagelist, ",");
		}

		argidx++;
	}

	pdf_drop_obj(ctx, parent);

	/* Update page count and kids array */
	countobj = pdf_new_int(ctx, doc, pdf_array_len(ctx, kids));
	pdf_dict_put(ctx, pages, PDF_NAME_Count, countobj);
	pdf_drop_obj(ctx, countobj);
	pdf_dict_put(ctx, pages, PDF_NAME_Kids, kids);
	pdf_drop_obj(ctx, kids);

	/* Also preserve the (partial) Dests name tree */
	if (olddests)
	{
		pdf_obj *names = pdf_new_dict(ctx, doc, 1);
		pdf_obj *dests = pdf_new_dict(ctx, doc, 1);
		int len = pdf_dict_len(ctx, olddests);

		names_list = pdf_new_array(ctx, doc, 32);

		for (i = 0; i < len; i++)
		{
			pdf_obj *key = pdf_dict_get_key(ctx, olddests, i);
			pdf_obj *val = pdf_dict_get_val(ctx, olddests, i);
			pdf_obj *dest = pdf_dict_get(ctx, val, PDF_NAME_D);

			dest = pdf_array_get(ctx, dest ? dest : val, 0);
			if (pdf_array_contains(ctx, pdf_dict_get(ctx, pages, PDF_NAME_Kids), dest))
			{
				pdf_obj *key_str = pdf_new_string(ctx, doc, pdf_to_name(ctx, key), strlen(pdf_to_name(ctx, key)));
				pdf_array_push(ctx, names_list, key_str);
				pdf_array_push(ctx, names_list, val);
				pdf_drop_obj(ctx, key_str);
			}
		}

		root = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME_Root);
		pdf_dict_put(ctx, dests, PDF_NAME_Names, names_list);
		pdf_dict_put(ctx, names, PDF_NAME_Dests, dests);
		pdf_dict_put(ctx, root, PDF_NAME_Names, names);

		pdf_drop_obj(ctx, names);
		pdf_drop_obj(ctx, dests);
		pdf_drop_obj(ctx, names_list);
		pdf_drop_obj(ctx, olddests);
	}

	/* Force the next call to pdf_count_pages to recount */
	glo->doc->page_count = 0;

	/* Edit each pages /Annot list to remove any links that point to
	 * nowhere. */
	pagecount = pdf_count_pages(ctx, doc);
	for (i = 0; i < pagecount; i++)
	{
		pdf_obj *pageref = pdf_lookup_page_obj(ctx, doc, i);
		pdf_obj *pageobj = pdf_resolve_indirect(ctx, pageref);

		pdf_obj *annots = pdf_dict_get(ctx, pageobj, PDF_NAME_Annots);

		int len = pdf_array_len(ctx, annots);
		int j;

		for (j = 0; j < len; j++)
		{
			pdf_obj *o = pdf_array_get(ctx, annots, j);
			pdf_obj *p;

			if (!pdf_name_eq(ctx, pdf_dict_get(ctx, o, PDF_NAME_Subtype), PDF_NAME_Link))
				continue;

			p = pdf_dict_get(ctx, o, PDF_NAME_A);
			if (!pdf_name_eq(ctx, pdf_dict_get(ctx, p, PDF_NAME_S), PDF_NAME_GoTo))
				continue;

			if (string_in_names_list(ctx, pdf_dict_get(ctx, p, PDF_NAME_D), names_list))
				continue;

			/* FIXME: Should probably look at Next too */

			/* Remove this annotation */
			pdf_array_delete(ctx, annots, j);
			j--;
		}
	}
}
Exemplo n.º 22
0
static void
gatherresourceinfo(int page, pdf_obj *rsrc)
{
	pdf_obj *pageobj;
	pdf_obj *pageref;
	pdf_obj *font;
	pdf_obj *xobj;
	pdf_obj *shade;
	pdf_obj *pattern;
	pdf_obj *subrsrc;
	int i;

	pageobj = xref->page_objs[page-1];
	pageref = xref->page_refs[page-1];

	if (!pageobj)
		fz_throw(ctx, "cannot retrieve info from page %d", page);

	font = pdf_dict_gets(rsrc, "Font");
	if (font)
	{
		int n;

		gatherfonts(page, pageref, pageobj, font);
		n = pdf_dict_len(font);
		for (i = 0; i < n; i++)
		{
			pdf_obj *obj = pdf_dict_get_val(font, i);

			subrsrc = pdf_dict_gets(obj, "Resources");
			if (subrsrc && pdf_objcmp(rsrc, subrsrc))
				gatherresourceinfo(page, subrsrc);
		}
	}

	xobj = pdf_dict_gets(rsrc, "XObject");
	if (xobj)
	{
		int n;

		gatherimages(page, pageref, pageobj, xobj);
		gatherforms(page, pageref, pageobj, xobj);
		gatherpsobjs(page, pageref, pageobj, xobj);
		n = pdf_dict_len(xobj);
		for (i = 0; i < n; i++)
		{
			pdf_obj *obj = pdf_dict_get_val(xobj, i);
			subrsrc = pdf_dict_gets(obj, "Resources");
			if (subrsrc && pdf_objcmp(rsrc, subrsrc))
				gatherresourceinfo(page, subrsrc);
		}
	}

	shade = pdf_dict_gets(rsrc, "Shading");
	if (shade)
		gathershadings(page, pageref, pageobj, shade);

	pattern = pdf_dict_gets(rsrc, "Pattern");
	if (pattern)
	{
		int n;
		gatherpatterns(page, pageref, pageobj, pattern);
		n = pdf_dict_len(pattern);
		for (i = 0; i < n; i++)
		{
			pdf_obj *obj = pdf_dict_get_val(pattern, i);
			subrsrc = pdf_dict_gets(obj, "Resources");
			if (subrsrc && pdf_objcmp(rsrc, subrsrc))
				gatherresourceinfo(page, subrsrc);
		}
	}
}
Exemplo n.º 23
0
// just copy one of the resource sub-entries (e.g. /Font)
static int copy_and_rename_resource(fz_context *dest_ctx, pdf_obj *dest, 
	fz_context *src_ctx, pdf_obj *src, char *prefix, struct put_info *info)
{
	char new_name[64]; /* this buffer is big enough up to hold all digits for two 16-bit numbers */

    int i;
    for(i = 0; i < pdf_dict_len(src_ctx, src); i++) {
        pdf_obj *src_key = pdf_dict_get_key(src_ctx, src, i);
		pdf_obj *src_val = pdf_dict_get_val(src_ctx, src, i);

		if(!pdf_is_name(src_ctx, src_key)) {
			return(2);
		}
		
		/* if this is an inline resource, just copy this object into the new
		    resource dict */	
		if(!pdf_is_indirect(src_ctx, src_val)) {
			if(snprintf(new_name, sizeof(new_name) / sizeof(new_name[0]), 
					"%sinline_%d", prefix, info->next_inline_id++) >= sizeof(new_name) / sizeof(new_name[0]))
				return(1); // not enough space
		
			pdf_obj *new_res = copy_unassigned_object_continue(dest_ctx, 
				info->dest_doc, src_ctx, info->src_doc, src_val, &info->new_ids);
				
			//pdf_obj *new_res = pdf_new_dict(dest_ctx, info->dest_doc, 10);
			printf("dump it...\n");
			pdf_fprint_obj(dest_ctx, stdout, new_res, 0);
				
			/* now reference this new object in the resource object of this sheet */
			pdf_obj *dest_key = pdf_new_name(dest_ctx, info->dest_doc, new_name);

			pdf_dict_put(dest_ctx, dest, dest_key, new_res);
			pdf_drop_obj(dest_ctx, dest_key);
			pdf_drop_obj(dest_ctx, new_res);
		} else {
			/* The new name of resource objects is always the num/gen of the 
			   referenced object in the src-file. Thus we can check by that name
			   if the object was already referenced by another page of this sheet. */
			if(snprintf(new_name, sizeof(new_name) / sizeof(new_name[0]), 
					"%s%d_%d", prefix, pdf_to_num(dest_ctx, src_val), pdf_to_gen(dest_ctx, src_val)) >= 
					sizeof(new_name) / sizeof(new_name[0]))
				return(1); // not enough space
						
			if(pdf_dict_gets(dest_ctx, dest, new_name) == NULL) {
			   /* if this resource is not inlined and not already in the resource-dict 
				  of the current sheet... */
			
				/* ...copy the referenced resource to the new document!
				   If this object has copied already (for another sheet in dest_doc),
				   copy_object_continue() will do nothing */
				pdf_obj *new_res = copy_object_continue(dest_ctx, info->dest_doc, 
					src_ctx, info->src_doc, src_val, &info->new_ids);

				/* now reference this new object in the resource object of this sheet */
				pdf_obj *dest_key = pdf_new_name(dest_ctx, info->dest_doc, new_name);

				pdf_dict_put(dest_ctx, dest, dest_key, new_res);
				pdf_drop_obj(dest_ctx, dest_key);
				pdf_drop_obj(dest_ctx, new_res);	
			}
		}

		/* even if it was used on another sheet or on this sheet, add it to the 
		   rename-dict for this sheet! Because it could have different names
		   on different source-pages */
		pdf_obj *rename_key = pdf_new_name(dest_ctx, info->dest_doc, pdf_to_name(dest_ctx, src_key));
		pdf_obj *rename_val = pdf_new_name(dest_ctx, info->dest_doc, new_name);
		pdf_dict_put(dest_ctx, info->rename_dict, rename_key, rename_val);
		pdf_drop_obj(dest_ctx, rename_key);
		pdf_drop_obj(dest_ctx, rename_val);
	}

	return(0);
}