Exemple #1
0
/*
 * Create a filter given a name and param dictionary.
 */
static fz_stream *
build_filter(fz_context *ctx, fz_stream *chain, pdf_document *doc, pdf_obj *f, pdf_obj *p, int num, int gen, fz_compression_params *params)
{
	fz_compression_params local_params;

	if (params == NULL)
		params = &local_params;

	build_compression_params(ctx, f, p, params);

	/* If we were using params we were passed in, and we successfully
	 * recognised the image type, we can use the existing filter and
	 * shortstop here. */
	if (params != &local_params && params->type != FZ_IMAGE_RAW)
		return chain;

	if (params->type != FZ_IMAGE_RAW)
		return fz_open_image_decomp_stream(ctx, chain, params, NULL);

	if (pdf_name_eq(ctx, f, PDF_NAME_ASCIIHexDecode) || pdf_name_eq(ctx, f, PDF_NAME_AHx))
		return fz_open_ahxd(ctx, chain);

	else if (pdf_name_eq(ctx, f, PDF_NAME_ASCII85Decode) || pdf_name_eq(ctx, f, PDF_NAME_A85))
		return fz_open_a85d(ctx, chain);

	else if (pdf_name_eq(ctx, f, PDF_NAME_JBIG2Decode))
	{
		fz_jbig2_globals *globals = NULL;
		pdf_obj *obj = pdf_dict_get(ctx, p, PDF_NAME_JBIG2Globals);
		if (pdf_is_indirect(ctx, obj))
			globals = pdf_load_jbig2_globals(ctx, doc, obj);
		/* fz_open_jbig2d takes possession of globals */
		return fz_open_jbig2d(ctx, chain, globals);
	}

	else if (pdf_name_eq(ctx, f, PDF_NAME_JPXDecode))
		return chain; /* JPX decoding is special cased in the image loading code */

	else if (pdf_name_eq(ctx, f, PDF_NAME_Crypt))
	{
		pdf_obj *name;

		if (!doc->crypt)
		{
			fz_warn(ctx, "crypt filter in unencrypted document");
			return chain;
		}

		name = pdf_dict_get(ctx, p, PDF_NAME_Name);
		if (pdf_is_name(ctx, name))
			return pdf_open_crypt_with_filter(ctx, chain, doc->crypt, name, num, gen);

		return chain;
	}

	fz_warn(ctx, "unknown filter name (%s)", pdf_to_name(ctx, f));
	return chain;
}
Exemple #2
0
static void
pdf_write_widget_appearance(fz_context *ctx, pdf_annot *annot, fz_buffer *buf,
	fz_rect *rect, fz_rect *bbox, fz_matrix *matrix, pdf_obj **res)
{
	pdf_obj *ft = pdf_dict_get_inheritable(ctx, annot->obj, PDF_NAME(FT));
	if (pdf_name_eq(ctx, ft, PDF_NAME(Tx)))
	{
		int ff = pdf_field_flags(ctx, annot->obj);
		char *format = NULL;
		const char *text = NULL;
		if (!annot->ignore_trigger_events)
		{
			format = pdf_field_event_format(ctx, annot->page->doc, annot->obj);
			if (format)
				text = format;
			else
				text = pdf_field_value(ctx, annot->obj);
		}
		else
		{
			text = pdf_field_value(ctx, annot->obj);
		}
		fz_try(ctx)
			pdf_write_tx_widget_appearance(ctx, annot, buf, rect, bbox, matrix, res, text, ff);
		fz_always(ctx)
			fz_free(ctx, format);
		fz_catch(ctx)
			fz_rethrow(ctx);
	}
	else if (pdf_name_eq(ctx, ft, PDF_NAME(Ch)))
	{
		pdf_write_ch_widget_appearance(ctx, annot, buf, rect, bbox, matrix, res);
	}
	else if (pdf_name_eq(ctx, ft, PDF_NAME(Sig)))
	{
		pdf_write_sig_widget_appearance(ctx, annot, buf, rect, bbox, matrix, res);
	}
	else
	{
		fz_throw(ctx, FZ_ERROR_GENERIC, "cannot create appearance stream for %s widgets", pdf_to_name(ctx, ft));
	}
}
Exemple #3
0
static int is_allowed_subtype(fz_context *ctx, pdf_annot *annot, pdf_obj *property, pdf_obj **allowed)
{
	pdf_obj *subtype = pdf_dict_get(ctx, annot->obj, PDF_NAME(Subtype));
	while (*allowed) {
		if (pdf_name_eq(ctx, subtype, *allowed))
			return 1;
		allowed++;
	}

	return 0;
}
static void check_allowed_subtypes(fz_context *ctx, pdf_annot *annot, pdf_obj *property, pdf_obj **allowed)
{
	pdf_obj *subtype = pdf_dict_get(ctx, annot->obj, PDF_NAME_Subtype);
	while (allowed) {
		if (pdf_name_eq(ctx, subtype, *allowed))
			return;
		allowed++;
	}

	fz_throw(ctx, FZ_ERROR_GENERIC, "%s annotations have no %s property", pdf_to_name(ctx, subtype), pdf_to_name(ctx, property));
}
Exemple #5
0
const char *
pdf_annot_icon_name(fz_context *ctx, pdf_annot *annot)
{
	pdf_obj *name;
	check_allowed_subtypes(ctx, annot, PDF_NAME(Name), icon_name_subtypes);
	name = pdf_dict_get(ctx, annot->obj, PDF_NAME(Name));
	if (!name)
	{
		pdf_obj *subtype = pdf_dict_get(ctx, annot->obj, PDF_NAME(Subtype));
		if (pdf_name_eq(ctx, subtype, PDF_NAME(Text)))
			return "Note";
		if (pdf_name_eq(ctx, subtype, PDF_NAME(Stamp)))
			return "Draft";
		if (pdf_name_eq(ctx, subtype, PDF_NAME(FileAttachment)))
			return "PushPin";
		if (pdf_name_eq(ctx, subtype, PDF_NAME(Sound)))
			return "Speaker";
	}
	return pdf_to_name(ctx, name);
}
Exemple #6
0
/* Image specific methods */
static void
res_image_init(fz_context *ctx, pdf_document *doc, pdf_res_table *table)
{
	int len, k;
	pdf_obj *obj;
	pdf_obj *type;
	pdf_obj *res = NULL;
	fz_image *image = NULL;
	unsigned char digest[16];

	fz_var(obj);
	fz_var(image);
	fz_var(res);

	fz_try(ctx)
	{
		table->hash = fz_new_hash_table(ctx, 4096, 16, -1);
		len = pdf_count_objects(ctx, doc);
		for (k = 1; k < len; k++)
		{
			obj = pdf_load_object(ctx, doc, k, 0);
			type = pdf_dict_get(ctx, obj, PDF_NAME_Subtype);
			if (pdf_name_eq(ctx, type, PDF_NAME_Image))
			{
				image = pdf_load_image(ctx, doc, obj);
				res_image_get_md5(ctx, image, digest);
				fz_drop_image(ctx, image);
				image = NULL;

				/* Don't allow overwrites. */
				if (fz_hash_find(ctx, table->hash, digest) == NULL)
					fz_hash_insert(ctx, table->hash, digest, obj);
			}
			else
			{
				pdf_drop_obj(ctx, obj);
			}
			obj = NULL;
		}
	}
	fz_always(ctx)
	{
		fz_drop_image(ctx, image);
		pdf_drop_obj(ctx, obj);
	}
	fz_catch(ctx)
	{
		res_table_free(ctx, table);
		fz_rethrow(ctx);
	}
}
Exemple #7
0
char *
pdf_parse_file_spec(fz_context *ctx, pdf_document *doc, pdf_obj *file_spec, pdf_obj *dest)
{
	pdf_obj *filename = NULL;
	const char *path;
	char *uri;
	char frag[256];

	if (pdf_is_string(ctx, file_spec))
		filename = file_spec;

	if (pdf_is_dict(ctx, file_spec)) {
#ifdef _WIN32
		filename = pdf_dict_get(ctx, file_spec, PDF_NAME(DOS));
#else
		filename = pdf_dict_get(ctx, file_spec, PDF_NAME(Unix));
#endif
		if (!filename)
			filename = pdf_dict_geta(ctx, file_spec, PDF_NAME(UF), PDF_NAME(F));
	}

	if (!pdf_is_string(ctx, filename))
	{
		fz_warn(ctx, "cannot parse file specification");
		return NULL;
	}

	if (pdf_is_array(ctx, dest))
		fz_snprintf(frag, sizeof frag, "#page=%d", pdf_array_get_int(ctx, dest, 0) + 1);
	else if (pdf_is_name(ctx, dest))
		fz_snprintf(frag, sizeof frag, "#%s", pdf_to_name(ctx, dest));
	else if (pdf_is_string(ctx, dest))
		fz_snprintf(frag, sizeof frag, "#%s", pdf_to_str_buf(ctx, dest));
	else
		frag[0] = 0;

	path = pdf_to_text_string(ctx, filename);
	uri = NULL;
#ifdef _WIN32
	if (!pdf_name_eq(ctx, pdf_dict_get(ctx, file_spec, PDF_NAME(FS)), PDF_NAME(URL)))
	{
		/* Fix up the drive letter (change "/C/Documents/Foo" to "C:/Documents/Foo") */
		if (path[0] == '/' && (('A' <= path[1] && path[1] <= 'Z') || ('a' <= path[1] && path[1] <= 'z')) && path[2] == '/')
			uri = fz_asprintf(ctx, "file://%c:%s%s", path[1], path+2, frag);
	}
#endif
	if (!uri)
		uri = fz_asprintf(ctx, "file://%s%s", path, frag);

	return uri;
}
Exemple #8
0
static void execute_action(fz_context *ctx, pdf_document *doc, pdf_obj *obj, pdf_obj *a)
{
    if (a)
    {
        pdf_obj *type = pdf_dict_get(ctx, a, PDF_NAME_S);

        if (pdf_name_eq(ctx, type, PDF_NAME_JavaScript))
        {
            pdf_obj *js = pdf_dict_get(ctx, a, PDF_NAME_JS);
            if (js)
            {
                char *code = pdf_to_utf8(ctx, doc, js);
                fz_try(ctx)
                {
                    pdf_js_execute(doc->js, code);
                }
                fz_always(ctx)
                {
                    fz_free(ctx, code);
                }
                fz_catch(ctx)
                {
                    fz_rethrow(ctx);
                }
            }
        }
        else if (pdf_name_eq(ctx, type, PDF_NAME_ResetForm))
        {
            reset_form(ctx, doc, pdf_dict_get(ctx, a, PDF_NAME_Fields), pdf_to_int(ctx, pdf_dict_get(ctx, a, PDF_NAME_Flags)) & 1);
        }
        else if (pdf_name_eq(ctx, type, PDF_NAME_Named))
        {
            pdf_obj *name = pdf_dict_get(ctx, a, PDF_NAME_N);

            if (pdf_name_eq(ctx, name, PDF_NAME_Print))
                pdf_event_issue_print(ctx, doc);
        }
    }
Exemple #9
0
static fz_link *
pdf_load_link(fz_context *ctx, pdf_document *doc, pdf_obj *dict, int pagenum, const fz_matrix *page_ctm)
{
	pdf_obj *action;
	pdf_obj *obj;
	fz_rect bbox;
	char *uri;
	fz_link *link = NULL;

	obj = pdf_dict_get(ctx, dict, PDF_NAME_Subtype);
	if (!pdf_name_eq(ctx, obj, PDF_NAME_Link))
		return NULL;

	obj = pdf_dict_get(ctx, dict, PDF_NAME_Rect);
	if (!obj)
		return NULL;

	pdf_to_rect(ctx, obj, &bbox);
	fz_transform_rect(&bbox, page_ctm);

	obj = pdf_dict_get(ctx, dict, PDF_NAME_Dest);
	if (obj)
		uri = pdf_parse_link_dest(ctx, doc, obj);
	else
	{
		action = pdf_dict_get(ctx, dict, PDF_NAME_A);
		/* fall back to additional action button's down/up action */
		if (!action)
			action = pdf_dict_geta(ctx, pdf_dict_get(ctx, dict, PDF_NAME_AA), PDF_NAME_U, PDF_NAME_D);
		uri = pdf_parse_link_action(ctx, doc, action, pagenum);
	}

	if (!uri)
		return NULL;

	fz_try(ctx)
		link = fz_new_link(ctx, &bbox, doc, uri);
	fz_always(ctx)
		fz_free(ctx, uri);
	fz_catch(ctx)
		fz_rethrow(ctx);

	return link;
}
static int dest_is_valid(fz_context *ctx, pdf_obj *o, int page_count, int *page_object_nums, pdf_obj *names_list)
{
	pdf_obj *p;

	p = pdf_dict_get(ctx, o, PDF_NAME_A);
	if (pdf_name_eq(ctx, pdf_dict_get(ctx, p, PDF_NAME_S), PDF_NAME_GoTo) &&
		!string_in_names_list(ctx, pdf_dict_get(ctx, p, PDF_NAME_D), names_list))
		return 0;

	p = pdf_dict_get(ctx, o, PDF_NAME_Dest);
	if (p == NULL)
	{}
	else if (pdf_is_string(ctx, p))
	{
		return string_in_names_list(ctx, p, names_list);
	}
	else if (!dest_is_valid_page(ctx, pdf_array_get(ctx, p, 0), page_object_nums, page_count))
		return 0;

	return 1;
}
Exemple #11
0
/*
	Performs the same task as
	pdf_clean_page_contents, but with an optional text filter
	function.

	text_filter: Function to assess whether a given character
	should be kept (return 0) or removed (return 1).

	after_text: Function called after each text object is closed
	to allow other output to be sent.

	arg: Opaque value to be passed to callback functions.
*/
void pdf_filter_page_contents(fz_context *ctx, pdf_document *doc, pdf_page *page, fz_cookie *cookie,
		pdf_page_contents_process_fn *proc_fn, pdf_text_filter_fn *text_filter, pdf_after_text_object_fn *after_text, void *proc_arg,
		int sanitize, int ascii)
{
	pdf_processor *proc_buffer = NULL;
	pdf_processor *proc_filter = NULL;
	pdf_obj *new_obj = NULL;
	pdf_obj *new_ref = NULL;
	pdf_obj *res = NULL;
	pdf_obj *obj;
	pdf_obj *contents;
	pdf_obj *resources;
	fz_buffer *buffer;

	fz_var(new_obj);
	fz_var(new_ref);
	fz_var(res);
	fz_var(proc_buffer);
	fz_var(proc_filter);

	buffer = fz_new_buffer(ctx, 1024);

	fz_try(ctx)
	{
		contents = pdf_page_contents(ctx, page);
		resources = pdf_page_resources(ctx, page);

		proc_buffer = pdf_new_buffer_processor(ctx, buffer, ascii);
		if (sanitize)
		{
			res = pdf_new_dict(ctx, doc, 1);
			proc_filter = pdf_new_filter_processor_with_text_filter(ctx, doc, proc_buffer, resources, res, text_filter, after_text, proc_arg);
			pdf_process_contents(ctx, proc_filter, doc, resources, contents, cookie);
			pdf_close_processor(ctx, proc_filter);
		}
		else
		{
			res = pdf_keep_obj(ctx, resources);
			pdf_process_contents(ctx, proc_buffer, doc, resources, contents, cookie);
		}
		pdf_close_processor(ctx, proc_buffer);

		/* Deal with page content stream. */

		if (pdf_is_array(ctx, contents))
		{
			/* create a new object to replace the array */
			new_obj = pdf_new_dict(ctx, doc, 1);
			new_ref = pdf_add_object(ctx, doc, new_obj);
			contents = new_ref;
			pdf_dict_put(ctx, page->obj, PDF_NAME(Contents), contents);
		}
		else
		{
			pdf_dict_del(ctx, contents, PDF_NAME(Filter));
			pdf_dict_del(ctx, contents, PDF_NAME(DecodeParms));
		}

		pdf_update_stream(ctx, doc, contents, buffer, 0);

		/* Now deal with resources. The spec allows for Type3 fonts and form
		 * XObjects to omit a resource dictionary and look in the parent.
		 * Avoid that by flattening here as part of the cleaning. This could
		 * conceivably cause changes in rendering, but we don't care. */

		/* ExtGState */
		obj = pdf_dict_get(ctx, res, PDF_NAME(ExtGState));
		if (obj)
		{
			int i, l;

			l = pdf_dict_len(ctx, obj);
			for (i = 0; i < l; i++)
			{
				pdf_obj *o = pdf_dict_get(ctx, pdf_dict_get_val(ctx, obj, i), PDF_NAME(SMask));
				if (!o)
					continue;
				o = pdf_dict_get(ctx, o, PDF_NAME(G));
				if (!o)
					continue;
				/* Transparency group XObject */
				pdf_clean_stream_object(ctx, doc, o, resources, cookie, 1, text_filter, after_text, proc_arg, sanitize, ascii);
			}
		}

		/* Pattern */
		obj = pdf_dict_get(ctx, res, PDF_NAME(Pattern));
		if (obj)
		{
			int i, l;
			l = pdf_dict_len(ctx, obj);
			for (i = 0; i < l; i++)
			{
				pdf_obj *pat_res;
				pdf_obj *pat = pdf_dict_get_val(ctx, obj, i);
				if (!pat)
					continue;
				pat_res = pdf_dict_get(ctx, pat, PDF_NAME(Resources));
				if (pat_res == NULL)
					pat_res = resources;
				if (pdf_dict_get_int(ctx, pat, PDF_NAME(PatternType)) == 1)
					pdf_clean_stream_object(ctx, doc, pat, pat_res, cookie, 0, text_filter, after_text, proc_arg, sanitize, ascii);
			}
		}

		/* XObject */
		obj = pdf_dict_get(ctx, res, PDF_NAME(XObject));
		if (obj)
		{
			int i, l;
			l = pdf_dict_len(ctx, obj);
			for (i = 0; i < l; i++)
			{
				pdf_obj *xobj_res;
				pdf_obj *xobj = pdf_dict_get_val(ctx, obj, i);
				if (!xobj)
					continue;
				xobj_res = pdf_dict_get(ctx, xobj, PDF_NAME(Resources));
				if (xobj_res == NULL)
					xobj_res = resources;
				if (pdf_name_eq(ctx, PDF_NAME(Form), pdf_dict_get(ctx, xobj, PDF_NAME(Subtype))))
					pdf_clean_stream_object(ctx, doc, xobj, xobj_res, cookie, 1, text_filter, after_text, proc_arg, sanitize, ascii);
			}
		}

		/* Font */
		obj = pdf_dict_get(ctx, res, PDF_NAME(Font));
		if (obj)
		{
			int i, l;
			l = pdf_dict_len(ctx, obj);
			for (i = 0; i < l; i++)
			{
				pdf_obj *o = pdf_dict_get_val(ctx, obj, i);
				if (!o)
					continue;
				if (pdf_name_eq(ctx, PDF_NAME(Type3), pdf_dict_get(ctx, o, PDF_NAME(Subtype))))
					pdf_clean_type3(ctx, doc, o, resources, cookie, sanitize, ascii);
			}
		}

		/* ProcSet - no cleaning possible. Inherit this from the old dict. */
		obj = pdf_dict_get(ctx, resources, PDF_NAME(ProcSet));
		if (obj)
			pdf_dict_put(ctx, res, PDF_NAME(ProcSet), obj);

		/* ColorSpace - no cleaning possible. */
		/* Properties - no cleaning possible. */

		if (proc_fn)
			(*proc_fn)(ctx, buffer, res, proc_arg);

		/* Update resource dictionary */
		if (sanitize)
		{
			pdf_dict_put(ctx, page->obj, PDF_NAME(Resources), res);
		}
	}
	fz_always(ctx)
	{
		pdf_drop_processor(ctx, proc_filter);
		pdf_drop_processor(ctx, proc_buffer);
		fz_drop_buffer(ctx, buffer);
		pdf_drop_obj(ctx, new_obj);
		pdf_drop_obj(ctx, new_ref);
		pdf_drop_obj(ctx, res);
	}
	fz_catch(ctx)
	{
		fz_rethrow(ctx);
	}
}
Exemple #12
0
static int isfontdesc(pdf_obj *obj)
{
	pdf_obj *type = pdf_dict_get(ctx, obj, PDF_NAME_Type);
	return pdf_name_eq(ctx, type, PDF_NAME_FontDescriptor);
}
Exemple #13
0
static void savefont(pdf_obj *dict, int num)
{
	char namebuf[1024];
	fz_buffer *buf;
	pdf_obj *stream = NULL;
	pdf_obj *obj;
	char *ext = "";
	fz_output *out;
	char *fontname = "font";
	size_t len;
	unsigned char *data;

	obj = pdf_dict_get(ctx, dict, PDF_NAME_FontName);
	if (obj)
		fontname = pdf_to_name(ctx, obj);

	obj = pdf_dict_get(ctx, dict, PDF_NAME_FontFile);
	if (obj)
	{
		stream = obj;
		ext = "pfa";
	}

	obj = pdf_dict_get(ctx, dict, PDF_NAME_FontFile2);
	if (obj)
	{
		stream = obj;
		ext = "ttf";
	}

	obj = pdf_dict_get(ctx, dict, PDF_NAME_FontFile3);
	if (obj)
	{
		stream = obj;

		obj = pdf_dict_get(ctx, obj, PDF_NAME_Subtype);
		if (obj && !pdf_is_name(ctx, obj))
			fz_throw(ctx, FZ_ERROR_GENERIC, "invalid font descriptor subtype");

		if (pdf_name_eq(ctx, obj, PDF_NAME_Type1C))
			ext = "cff";
		else if (pdf_name_eq(ctx, obj, PDF_NAME_CIDFontType0C))
			ext = "cid";
		else if (pdf_name_eq(ctx, obj, PDF_NAME_OpenType))
			ext = "otf";
		else
			fz_throw(ctx, FZ_ERROR_GENERIC, "unhandled font type '%s'", pdf_to_name(ctx, obj));
	}

	if (!stream)
	{
		fz_warn(ctx, "unhandled font type");
		return;
	}

	buf = pdf_load_stream(ctx, doc, pdf_to_num(ctx, stream));
	len = fz_buffer_storage(ctx, buf, &data);
	fz_try(ctx)
	{
		snprintf(namebuf, sizeof(namebuf), "%s-%04d.%s", fontname, num, ext);
		printf("extracting font %s\n", namebuf);
		out = fz_new_output_with_path(ctx, namebuf, 0);
		fz_try(ctx)
			fz_write(ctx, out, data, len);
		fz_always(ctx)
			fz_drop_output(ctx, out);
		fz_catch(ctx)
			fz_rethrow(ctx);
	}
	fz_always(ctx)
		fz_drop_buffer(ctx, buf);
	fz_catch(ctx)
		fz_rethrow(ctx);
}
Exemple #14
0
static void
pdf_process_extgstate(fz_context *ctx, pdf_processor *proc, pdf_csi *csi, pdf_obj *dict)
{
	pdf_obj *obj;

	obj = pdf_dict_get(ctx, dict, PDF_NAME_LW);
	if (pdf_is_number(ctx, obj) && proc->op_w)
		proc->op_w(ctx, proc, pdf_to_real(ctx, obj));

	obj = pdf_dict_get(ctx, dict, PDF_NAME_LC);
	if (pdf_is_int(ctx, obj) && proc->op_J)
		proc->op_J(ctx, proc, fz_clampi(pdf_to_int(ctx, obj), 0, 2));

	obj = pdf_dict_get(ctx, dict, PDF_NAME_LJ);
	if (pdf_is_int(ctx, obj) && proc->op_j)
		proc->op_j(ctx, proc, fz_clampi(pdf_to_int(ctx, obj), 0, 2));

	obj = pdf_dict_get(ctx, dict, PDF_NAME_ML);
	if (pdf_is_number(ctx, obj) && proc->op_M)
		proc->op_M(ctx, proc, pdf_to_real(ctx, obj));

	obj = pdf_dict_get(ctx, dict, PDF_NAME_D);
	if (pdf_is_array(ctx, obj) && proc->op_d)
	{
		pdf_obj *dash_array = pdf_array_get(ctx, obj, 0);
		pdf_obj *dash_phase = pdf_array_get(ctx, obj, 1);
		proc->op_d(ctx, proc, dash_array, pdf_to_real(ctx, dash_phase));
	}

	obj = pdf_dict_get(ctx, dict, PDF_NAME_RI);
	if (pdf_is_name(ctx, obj) && proc->op_ri)
		proc->op_ri(ctx, proc, pdf_to_name(ctx, obj));

	obj = pdf_dict_get(ctx, dict, PDF_NAME_FL);
	if (pdf_is_number(ctx, obj) && proc->op_i)
		proc->op_i(ctx, proc, pdf_to_real(ctx, obj));

	obj = pdf_dict_get(ctx, dict, PDF_NAME_Font);
	if (pdf_is_array(ctx, obj) && proc->op_Tf)
	{
		pdf_obj *font_ref = pdf_array_get(ctx, obj, 0);
		pdf_obj *font_size = pdf_array_get(ctx, obj, 1);
		pdf_font_desc *font = load_font_or_hail_mary(ctx, csi->doc, csi->rdb, font_ref, 0, csi->cookie);
		fz_try(ctx)
			proc->op_Tf(ctx, proc, "ExtGState", font, pdf_to_real(ctx, font_size));
		fz_always(ctx)
			pdf_drop_font(ctx, font);
		fz_catch(ctx)
			fz_rethrow(ctx);
	}

	/* transfer functions */

	obj = pdf_dict_get(ctx, dict, PDF_NAME_TR2);
	if (pdf_is_name(ctx, obj))
		if (!pdf_name_eq(ctx, obj, PDF_NAME_Identity) && !pdf_name_eq(ctx, obj, PDF_NAME_Default))
			fz_warn(ctx, "ignoring transfer function");
	if (!obj) /* TR is ignored in the presence of TR2 */
	{
		pdf_obj *tr = pdf_dict_get(ctx, dict, PDF_NAME_TR);
		if (pdf_is_name(ctx, tr))
			if (!pdf_name_eq(ctx, tr, PDF_NAME_Identity))
				fz_warn(ctx, "ignoring transfer function");
	}

	/* transparency state */

	obj = pdf_dict_get(ctx, dict, PDF_NAME_CA);
	if (pdf_is_number(ctx, obj) && proc->op_gs_CA)
		proc->op_gs_CA(ctx, proc, pdf_to_real(ctx, obj));

	obj = pdf_dict_get(ctx, dict, PDF_NAME_ca);
	if (pdf_is_number(ctx, obj) && proc->op_gs_ca)
		proc->op_gs_ca(ctx, proc, pdf_to_real(ctx, obj));

	obj = pdf_dict_get(ctx, dict, PDF_NAME_BM);
	if (pdf_is_array(ctx, obj))
		obj = pdf_array_get(ctx, obj, 0);
	if (pdf_is_name(ctx, obj) && proc->op_gs_BM)
		proc->op_gs_BM(ctx, proc, pdf_to_name(ctx, obj));

	obj = pdf_dict_get(ctx, dict, PDF_NAME_SMask);
	if (proc->op_gs_SMask)
	{
		if (pdf_is_dict(ctx, obj))
		{
			pdf_xobject *xobj;
			pdf_obj *group, *s, *bc, *tr;
			float softmask_bc[FZ_MAX_COLORS];
			fz_colorspace *colorspace;
			int colorspace_n = 1;
			int k, luminosity;

			fz_var(xobj);

			group = pdf_dict_get(ctx, obj, PDF_NAME_G);
			if (!group)
				fz_throw(ctx, FZ_ERROR_GENERIC, "cannot load softmask xobject (%d 0 R)", pdf_to_num(ctx, obj));
			xobj = pdf_load_xobject(ctx, csi->doc, group);

			fz_try(ctx)
			{
				colorspace = pdf_xobject_colorspace(ctx, xobj);
				if (colorspace)
				{
					colorspace_n = fz_colorspace_n(ctx, colorspace);
					fz_drop_colorspace(ctx, colorspace);
				}

				/* Default background color is black. */
				for (k = 0; k < colorspace_n; k++)
					softmask_bc[k] = 0;
				/* Which in CMYK means not all zeros! This should really be
				 * a test for subtractive color spaces, but this will have
				 * to do for now. */
				if (colorspace == fz_device_cmyk(ctx))
					softmask_bc[3] = 1.0;

				bc = pdf_dict_get(ctx, obj, PDF_NAME_BC);
				if (pdf_is_array(ctx, bc))
				{
					for (k = 0; k < colorspace_n; k++)
						softmask_bc[k] = pdf_to_real(ctx, pdf_array_get(ctx, bc, k));
				}

				s = pdf_dict_get(ctx, obj, PDF_NAME_S);
				if (pdf_name_eq(ctx, s, PDF_NAME_Luminosity))
					luminosity = 1;
				else
					luminosity = 0;

				tr = pdf_dict_get(ctx, obj, PDF_NAME_TR);
				if (tr && !pdf_name_eq(ctx, tr, PDF_NAME_Identity))
					fz_warn(ctx, "ignoring transfer function");

				proc->op_gs_SMask(ctx, proc, xobj, csi->rdb, softmask_bc, luminosity);
			}
			fz_always(ctx)
			{
				pdf_drop_xobject(ctx, xobj);
			}
			fz_catch(ctx)
			{
				fz_rethrow(ctx);
			}
		}
		else if (pdf_is_name(ctx, obj) && pdf_name_eq(ctx, obj, PDF_NAME_None))
		{
			proc->op_gs_SMask(ctx, proc, NULL, NULL, NULL, 0);
		}
	}
Exemple #15
0
static int isimage(pdf_obj *obj)
{
	pdf_obj *type = pdf_dict_get(ctx, obj, PDF_NAME_Subtype);
	return pdf_name_eq(ctx, type, PDF_NAME_Image);
}
Exemple #16
0
static void savefont(pdf_obj *dict, int num)
{
    char namebuf[1024];
    fz_buffer *buf;
    pdf_obj *stream = NULL;
    pdf_obj *obj;
    char *ext = "";
    FILE *f;
    char *fontname = "font";
    int n, len;
    unsigned char *data;

    obj = pdf_dict_get(ctx, dict, PDF_NAME_FontName);
    if (obj)
        fontname = pdf_to_name(ctx, obj);

    obj = pdf_dict_get(ctx, dict, PDF_NAME_FontFile);
    if (obj)
    {
        stream = obj;
        ext = "pfa";
    }

    obj = pdf_dict_get(ctx, dict, PDF_NAME_FontFile2);
    if (obj)
    {
        stream = obj;
        ext = "ttf";
    }

    obj = pdf_dict_get(ctx, dict, PDF_NAME_FontFile3);
    if (obj)
    {
        stream = obj;

        obj = pdf_dict_get(ctx, obj, PDF_NAME_Subtype);
        if (obj && !pdf_is_name(ctx, obj))
            fz_throw(ctx, FZ_ERROR_GENERIC, "invalid font descriptor subtype");

        if (pdf_name_eq(ctx, obj, PDF_NAME_Type1C))
            ext = "cff";
        else if (pdf_name_eq(ctx, obj, PDF_NAME_CIDFontType0C))
            ext = "cid";
        else if (pdf_name_eq(ctx, obj, PDF_NAME_OpenType))
            ext = "otf";
        else
            fz_throw(ctx, FZ_ERROR_GENERIC, "unhandled font type '%s'", pdf_to_name(ctx, obj));
    }

    if (!stream)
    {
        fz_warn(ctx, "unhandled font type");
        return;
    }

    buf = pdf_load_stream(ctx, doc, pdf_to_num(ctx, stream), pdf_to_gen(ctx, stream));

    snprintf(namebuf, sizeof(namebuf), "%s-%04d.%s", fontname, num, ext);
    printf("extracting font %s\n", namebuf);

    f = fz_fopen(namebuf, "wb");
    if (!f)
        fz_throw(ctx, FZ_ERROR_GENERIC, "cannot create font file");

    len = fz_buffer_storage(ctx, buf, &data);
    n = fwrite(data, 1, len, f);
    if (n < len)
        fz_throw(ctx, FZ_ERROR_GENERIC, "cannot write font file");

    if (fclose(f) < 0)
        fz_throw(ctx, FZ_ERROR_GENERIC, "cannot close font file");

    fz_drop_buffer(ctx, buf);
}
static void retainpages(fz_context *ctx, globals *glo, int argc, char **argv)
{
	pdf_obj *oldroot, *root, *pages, *kids, *countobj, *parent, *olddests;
	pdf_document *doc = glo->doc;
	int argidx = 0;
	pdf_obj *names_list = NULL;
	int pagecount;
	int i;

	/* Keep only pages/type and (reduced) dest entries to avoid
	 * references to unretained pages */
	oldroot = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME_Root);
	pages = pdf_dict_get(ctx, oldroot, PDF_NAME_Pages);
	olddests = pdf_load_name_tree(ctx, doc, PDF_NAME_Dests);

	root = pdf_new_dict(ctx, doc, 2);
	pdf_dict_put(ctx, root, PDF_NAME_Type, pdf_dict_get(ctx, oldroot, PDF_NAME_Type));
	pdf_dict_put(ctx, root, PDF_NAME_Pages, pdf_dict_get(ctx, oldroot, PDF_NAME_Pages));

	pdf_update_object(ctx, doc, pdf_to_num(ctx, oldroot), root);

	pdf_drop_obj(ctx, root);

	/* Create a new kids array with only the pages we want to keep */
	parent = pdf_new_indirect(ctx, doc, pdf_to_num(ctx, pages), pdf_to_gen(ctx, pages));
	kids = pdf_new_array(ctx, doc, 1);

	/* Retain pages specified */
	while (argc - argidx)
	{
		int page, spage, epage;
		char *spec, *dash;
		char *pagelist = argv[argidx];

		pagecount = pdf_count_pages(ctx, doc);
		spec = fz_strsep(&pagelist, ",");
		while (spec)
		{
			dash = strchr(spec, '-');

			if (dash == spec)
				spage = epage = pagecount;
			else
				spage = epage = atoi(spec);

			if (dash)
			{
				if (strlen(dash) > 1)
					epage = atoi(dash + 1);
				else
					epage = pagecount;
			}

			spage = fz_clampi(spage, 1, pagecount);
			epage = fz_clampi(epage, 1, pagecount);

			if (spage < epage)
				for (page = spage; page <= epage; ++page)
					retainpage(ctx, doc, parent, kids, page);
			else
				for (page = spage; page >= epage; --page)
					retainpage(ctx, doc, parent, kids, page);

			spec = fz_strsep(&pagelist, ",");
		}

		argidx++;
	}

	pdf_drop_obj(ctx, parent);

	/* Update page count and kids array */
	countobj = pdf_new_int(ctx, doc, pdf_array_len(ctx, kids));
	pdf_dict_put(ctx, pages, PDF_NAME_Count, countobj);
	pdf_drop_obj(ctx, countobj);
	pdf_dict_put(ctx, pages, PDF_NAME_Kids, kids);
	pdf_drop_obj(ctx, kids);

	/* Also preserve the (partial) Dests name tree */
	if (olddests)
	{
		pdf_obj *names = pdf_new_dict(ctx, doc, 1);
		pdf_obj *dests = pdf_new_dict(ctx, doc, 1);
		int len = pdf_dict_len(ctx, olddests);

		names_list = pdf_new_array(ctx, doc, 32);

		for (i = 0; i < len; i++)
		{
			pdf_obj *key = pdf_dict_get_key(ctx, olddests, i);
			pdf_obj *val = pdf_dict_get_val(ctx, olddests, i);
			pdf_obj *dest = pdf_dict_get(ctx, val, PDF_NAME_D);

			dest = pdf_array_get(ctx, dest ? dest : val, 0);
			if (pdf_array_contains(ctx, pdf_dict_get(ctx, pages, PDF_NAME_Kids), dest))
			{
				pdf_obj *key_str = pdf_new_string(ctx, doc, pdf_to_name(ctx, key), strlen(pdf_to_name(ctx, key)));
				pdf_array_push(ctx, names_list, key_str);
				pdf_array_push(ctx, names_list, val);
				pdf_drop_obj(ctx, key_str);
			}
		}

		root = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME_Root);
		pdf_dict_put(ctx, dests, PDF_NAME_Names, names_list);
		pdf_dict_put(ctx, names, PDF_NAME_Dests, dests);
		pdf_dict_put(ctx, root, PDF_NAME_Names, names);

		pdf_drop_obj(ctx, names);
		pdf_drop_obj(ctx, dests);
		pdf_drop_obj(ctx, names_list);
		pdf_drop_obj(ctx, olddests);
	}

	/* Force the next call to pdf_count_pages to recount */
	glo->doc->page_count = 0;

	/* Edit each pages /Annot list to remove any links that point to
	 * nowhere. */
	pagecount = pdf_count_pages(ctx, doc);
	for (i = 0; i < pagecount; i++)
	{
		pdf_obj *pageref = pdf_lookup_page_obj(ctx, doc, i);
		pdf_obj *pageobj = pdf_resolve_indirect(ctx, pageref);

		pdf_obj *annots = pdf_dict_get(ctx, pageobj, PDF_NAME_Annots);

		int len = pdf_array_len(ctx, annots);
		int j;

		for (j = 0; j < len; j++)
		{
			pdf_obj *o = pdf_array_get(ctx, annots, j);
			pdf_obj *p;

			if (!pdf_name_eq(ctx, pdf_dict_get(ctx, o, PDF_NAME_Subtype), PDF_NAME_Link))
				continue;

			p = pdf_dict_get(ctx, o, PDF_NAME_A);
			if (!pdf_name_eq(ctx, pdf_dict_get(ctx, p, PDF_NAME_S), PDF_NAME_GoTo))
				continue;

			if (string_in_names_list(ctx, pdf_dict_get(ctx, p, PDF_NAME_D), names_list))
				continue;

			/* FIXME: Should probably look at Next too */

			/* Remove this annotation */
			pdf_array_delete(ctx, annots, j);
			j--;
		}
	}
}
Exemple #18
0
void pdf_clean_page_contents(fz_context *ctx, pdf_document *doc, pdf_page *page, fz_cookie *cookie, pdf_page_contents_process_fn *proc_fn, void *proc_arg)
{
	pdf_processor *proc_buffer = NULL;
	pdf_processor *proc_filter = NULL;
	pdf_obj *new_obj = NULL;
	pdf_obj *new_ref = NULL;
	pdf_obj *res = NULL;
	pdf_obj *ref = NULL;
	pdf_obj *obj;
	pdf_obj *contents;
	fz_buffer *buffer;

	fz_var(new_obj);
	fz_var(new_ref);
	fz_var(res);
	fz_var(ref);
	fz_var(proc_buffer);
	fz_var(proc_filter);

	buffer = fz_new_buffer(ctx, 1024);

	fz_try(ctx)
	{
		res = pdf_new_dict(ctx, doc, 1);

		proc_buffer = pdf_new_buffer_processor(ctx, buffer);
		proc_filter = pdf_new_filter_processor(ctx, proc_buffer, doc, page->resources, res);

		pdf_process_contents(ctx, proc_filter, doc, page->resources, page->contents, cookie);

		contents = page->contents;
		if (pdf_is_array(ctx, contents))
		{
			/* create a new object to replace the array */
			new_obj = pdf_new_dict(ctx, doc, 1);
			new_ref = pdf_new_ref(ctx, doc, new_obj);
			page->contents = contents = new_ref;
		}
		else
		{
			pdf_dict_del(ctx, contents, PDF_NAME_Filter);
			pdf_dict_del(ctx, contents, PDF_NAME_DecodeParms);
		}

		/* Now deal with resources. The spec allows for Type3 fonts and form
		 * XObjects to omit a resource dictionary and look in the parent.
		 * Avoid that by flattening here as part of the cleaning. This could
		 * conceivably cause changes in rendering, but we don't care. */

		/* ExtGState */
		obj = pdf_dict_get(ctx, res, PDF_NAME_ExtGState);
		if (obj)
		{
			int i, l;

			l = pdf_dict_len(ctx, obj);
			for (i = 0; i < l; i++)
			{
				pdf_obj *o = pdf_dict_get(ctx, pdf_dict_get_val(ctx, obj, i), PDF_NAME_SMask);

				if (!o)
					continue;
				o = pdf_dict_get(ctx, o, PDF_NAME_G);
				if (!o)
					continue;

				/* Transparency group XObject */
				pdf_clean_stream_object(ctx, doc, o, page->resources, cookie, 1);
			}
		}

		/* ColorSpace - no cleaning possible */

		/* Pattern */
		obj = pdf_dict_get(ctx, res, PDF_NAME_Pattern);
		if (obj)
		{
			int i, l;

			l = pdf_dict_len(ctx, obj);
			for (i = 0; i < l; i++)
			{
				pdf_obj *pat = pdf_dict_get_val(ctx, obj, i);

				if (!pat)
					continue;
				if (pdf_to_int(ctx, pdf_dict_get(ctx, pat, PDF_NAME_PatternType)) == 1)
					pdf_clean_stream_object(ctx, doc, pat, page->resources, cookie, 0);
			}
		}

		/* Shading - no cleaning possible */

		/* XObject */
		obj = pdf_dict_get(ctx, res, PDF_NAME_XObject);
		if (obj)
		{
			int i, l;

			l = pdf_dict_len(ctx, obj);
			for (i = 0; i < l; i++)
			{
				pdf_obj *xobj = pdf_dict_get_val(ctx, obj, i);

				if (!pdf_name_eq(ctx, PDF_NAME_Form, pdf_dict_get(ctx, xobj, PDF_NAME_Subtype)))
					continue;

				pdf_clean_stream_object(ctx, doc, xobj, page->resources, cookie, 1);
			}
		}

		/* Font */
		obj = pdf_dict_get(ctx, res, PDF_NAME_Font);
		if (obj)
		{
			int i, l;

			l = pdf_dict_len(ctx, obj);
			for (i = 0; i < l; i++)
			{
				pdf_obj *o = pdf_dict_get_val(ctx, obj, i);

				if (pdf_name_eq(ctx, PDF_NAME_Type3, pdf_dict_get(ctx, o, PDF_NAME_Subtype)))
				{
					pdf_clean_type3(ctx, doc, o, page->resources, cookie);
				}
			}
		}

		/* ProcSet - no cleaning possible. Inherit this from the old dict. */
		obj = pdf_dict_get(ctx, page->resources, PDF_NAME_ProcSet);
		if (obj)
			pdf_dict_put(ctx, res, PDF_NAME_ProcSet, obj);

		/* Properties - no cleaning possible. */

		if (proc_fn)
			(*proc_fn)(ctx, buffer, res, proc_arg);

		pdf_update_stream(ctx, doc, contents, buffer, 0);
		pdf_drop_obj(ctx, page->resources);
		ref = pdf_new_ref(ctx, doc, res);
		page->resources = pdf_keep_obj(ctx, ref);
		pdf_dict_put(ctx, page->me, PDF_NAME_Resources, ref);
	}
	fz_always(ctx)
	{
		pdf_drop_processor(ctx, proc_filter);
		pdf_drop_processor(ctx, proc_buffer);
		fz_drop_buffer(ctx, buffer);
		pdf_drop_obj(ctx, new_obj);
		pdf_drop_obj(ctx, new_ref);
		pdf_drop_obj(ctx, res);
		pdf_drop_obj(ctx, ref);
	}
	fz_catch(ctx)
	{
		fz_rethrow_message(ctx, "Failed while cleaning page");
	}
}
Exemple #19
0
int
pdf_repair_obj(fz_context *ctx, pdf_document *doc, pdf_lexbuf *buf, fz_off_t *stmofsp, int *stmlenp, pdf_obj **encrypt, pdf_obj **id, pdf_obj **page, fz_off_t *tmpofs, pdf_obj **root)
{
	fz_stream *file = doc->file;
	pdf_token tok;
	int stm_len;

	*stmofsp = 0;
	if (stmlenp)
		*stmlenp = -1;

	stm_len = 0;

	/* On entry to this function, we know that we've just seen
	 * '<int> <int> obj'. We expect the next thing we see to be a
	 * pdf object. Regardless of the type of thing we meet next
	 * we only need to fully parse it if it is a dictionary. */
	tok = pdf_lex(ctx, file, buf);

	if (tok == PDF_TOK_OPEN_DICT)
	{
		pdf_obj *dict, *obj;

		fz_try(ctx)
		{
			dict = pdf_parse_dict(ctx, doc, file, buf);
		}
		fz_catch(ctx)
		{
			fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
			/* Don't let a broken object at EOF overwrite a good one */
			if (file->eof)
				fz_rethrow(ctx);
			/* Silently swallow the error */
			dict = pdf_new_dict(ctx, NULL, 2);
		}

		/* We must be careful not to try to resolve any indirections
		 * here. We have just read dict, so we know it to be a non
		 * indirected dictionary. Before we look at any values that
		 * we get back from looking up in it, we need to check they
		 * aren't indirected. */

		if (encrypt || id || root)
		{
			obj = pdf_dict_get(ctx, dict, PDF_NAME_Type);
			if (!pdf_is_indirect(ctx, obj) && pdf_name_eq(ctx, obj, PDF_NAME_XRef))
			{
				if (encrypt)
				{
					obj = pdf_dict_get(ctx, dict, PDF_NAME_Encrypt);
					if (obj)
					{
						pdf_drop_obj(ctx, *encrypt);
						*encrypt = pdf_keep_obj(ctx, obj);
					}
				}

				if (id)
				{
					obj = pdf_dict_get(ctx, dict, PDF_NAME_ID);
					if (obj)
					{
						pdf_drop_obj(ctx, *id);
						*id = pdf_keep_obj(ctx, obj);
					}
				}

				if (root)
					*root = pdf_keep_obj(ctx, pdf_dict_get(ctx, dict, PDF_NAME_Root));
			}
		}

		obj = pdf_dict_get(ctx, dict, PDF_NAME_Length);
		if (!pdf_is_indirect(ctx, obj) && pdf_is_int(ctx, obj))
			stm_len = pdf_to_int(ctx, obj);

		if (doc->file_reading_linearly && page)
		{
			obj = pdf_dict_get(ctx, dict, PDF_NAME_Type);
			if (!pdf_is_indirect(ctx, obj) && pdf_name_eq(ctx, obj, PDF_NAME_Page))
			{
				pdf_drop_obj(ctx, *page);
				*page = pdf_keep_obj(ctx, dict);
			}
		}

		pdf_drop_obj(ctx, dict);
	}
Exemple #20
0
void pdf_add_portfolio_schema(fz_context *ctx, pdf_document *doc, int entry, const pdf_portfolio_schema *info)
{
	pdf_portfolio **pp;
	pdf_portfolio *p;
	pdf_obj *s;
	pdf_obj *sc;
	int num;
	char str_name[32];
	pdf_obj *num_name = NULL;

	if (!doc)
		fz_throw(ctx, FZ_ERROR_GENERIC, "Bad pdf_add_portfolio_schema call");

	if (doc->portfolio == NULL)
		load_portfolio(ctx, doc);

	fz_var(num_name);

	pp = &doc->portfolio;
	while (*pp && entry > 0)
		pp = &(*pp)->next, entry--;

	fz_try(ctx)
	{
		/* Find a name for the new schema entry */
		num = 0;
		do
		{
			pdf_drop_obj(ctx, num_name);
			num_name = NULL;
			num++;
			sprintf(str_name, "%d", num);
			num_name = pdf_new_name(ctx, doc, str_name);
			p = doc->portfolio;
			for (p = doc->portfolio; p; p = p->next)
				if (pdf_name_eq(ctx, num_name, p->key))
					break;
		}
		while (p);

		sc = pdf_new_dict(ctx, doc, 4);
		pdf_dict_put_drop(ctx, sc, PDF_NAME_E, pdf_new_bool(ctx, doc, !!info->editable));
		pdf_dict_put_drop(ctx, sc, PDF_NAME_V, pdf_new_bool(ctx, doc, !!info->visible));
		pdf_dict_put_drop(ctx, sc, PDF_NAME_N, info->name);
		pdf_dict_put(ctx, sc, PDF_NAME_Subtype, PDF_NAME_S);

		/* Add to our linked list (in the correct sorted place) */
		p = fz_malloc_struct(ctx, pdf_portfolio);
		p->entry = *info;
		p->sort = 0; /* Will be rewritten in a mo */
		p->key = pdf_keep_obj(ctx, num_name);
		p->val = pdf_keep_obj(ctx, sc);
		p->next = *pp;
		*pp = p;

		/* Add the key to the schema */
		s = pdf_dict_getl(ctx, pdf_trailer(ctx, doc), PDF_NAME_Root, PDF_NAME_Collection, PDF_NAME_Schema, NULL);
		pdf_dict_put(ctx, s, num_name, sc);

		/* Renumber the schema entries */
		for (num = 0, p = doc->portfolio; p; num++, p = p->next)
		{
			pdf_dict_put_drop(ctx, p->val, PDF_NAME_O, pdf_new_int(ctx, doc, num));
			p->sort = num;
		}
	}
	fz_always(ctx)
		pdf_drop_obj(ctx, num_name);
	fz_catch(ctx)
		fz_rethrow(ctx);
}
static void retainpages(fz_context *ctx, globals *glo, int argc, char **argv)
{
	pdf_obj *oldroot, *root, *pages, *kids, *countobj, *parent, *olddests;
	pdf_document *doc = glo->doc;
	int argidx = 0;
	pdf_obj *names_list = NULL;
	pdf_obj *outlines;
	int pagecount;
	int i;
	int *page_object_nums;

	/* Keep only pages/type and (reduced) dest entries to avoid
	 * references to unretained pages */
	oldroot = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME_Root);
	pages = pdf_dict_get(ctx, oldroot, PDF_NAME_Pages);
	olddests = pdf_load_name_tree(ctx, doc, PDF_NAME_Dests);
	outlines = pdf_dict_get(ctx, oldroot, PDF_NAME_Outlines);

	root = pdf_new_dict(ctx, doc, 3);
	pdf_dict_put(ctx, root, PDF_NAME_Type, pdf_dict_get(ctx, oldroot, PDF_NAME_Type));
	pdf_dict_put(ctx, root, PDF_NAME_Pages, pdf_dict_get(ctx, oldroot, PDF_NAME_Pages));
	pdf_dict_put(ctx, root, PDF_NAME_Outlines, outlines);

	pdf_update_object(ctx, doc, pdf_to_num(ctx, oldroot), root);

	/* Create a new kids array with only the pages we want to keep */
	parent = pdf_new_indirect(ctx, doc, pdf_to_num(ctx, pages), pdf_to_gen(ctx, pages));
	kids = pdf_new_array(ctx, doc, 1);

	/* Retain pages specified */
	while (argc - argidx)
	{
		int page, spage, epage;
		char *spec, *dash;
		char *pagelist = argv[argidx];

		pagecount = pdf_count_pages(ctx, doc);
		spec = fz_strsep(&pagelist, ",");
		while (spec)
		{
			dash = strchr(spec, '-');

			if (dash == spec)
				spage = epage = pagecount;
			else
				spage = epage = atoi(spec);

			if (dash)
			{
				if (strlen(dash) > 1)
					epage = atoi(dash + 1);
				else
					epage = pagecount;
			}

			spage = fz_clampi(spage, 1, pagecount);
			epage = fz_clampi(epage, 1, pagecount);

			if (spage < epage)
				for (page = spage; page <= epage; ++page)
					retainpage(ctx, doc, parent, kids, page);
			else
				for (page = spage; page >= epage; --page)
					retainpage(ctx, doc, parent, kids, page);

			spec = fz_strsep(&pagelist, ",");
		}

		argidx++;
	}

	pdf_drop_obj(ctx, parent);

	/* Update page count and kids array */
	countobj = pdf_new_int(ctx, doc, pdf_array_len(ctx, kids));
	pdf_dict_put(ctx, pages, PDF_NAME_Count, countobj);
	pdf_drop_obj(ctx, countobj);
	pdf_dict_put(ctx, pages, PDF_NAME_Kids, kids);
	pdf_drop_obj(ctx, kids);

	/* Force the next call to pdf_count_pages to recount */
	glo->doc->page_count = 0;

	pagecount = pdf_count_pages(ctx, doc);
	page_object_nums = fz_calloc(ctx, pagecount, sizeof(*page_object_nums));
	for (i = 0; i < pagecount; i++)
	{
		pdf_obj *pageref = pdf_lookup_page_obj(ctx, doc, i);
		page_object_nums[i] = pdf_to_num(ctx, pageref);
	}

	/* If we had an old Dests tree (now reformed as an olddests
	 * dictionary), keep any entries in there that point to
	 * valid pages. This may mean we keep more than we need, but
	 * it's safe at least. */
	if (olddests)
	{
		pdf_obj *names = pdf_new_dict(ctx, doc, 1);
		pdf_obj *dests = pdf_new_dict(ctx, doc, 1);
		int len = pdf_dict_len(ctx, olddests);

		names_list = pdf_new_array(ctx, doc, 32);

		for (i = 0; i < len; i++)
		{
			pdf_obj *key = pdf_dict_get_key(ctx, olddests, i);
			pdf_obj *val = pdf_dict_get_val(ctx, olddests, i);
			pdf_obj *dest = pdf_dict_get(ctx, val, PDF_NAME_D);

			dest = pdf_array_get(ctx, dest ? dest : val, 0);
			if (dest_is_valid_page(ctx, dest, page_object_nums, pagecount))
			{
				pdf_obj *key_str = pdf_new_string(ctx, doc, pdf_to_name(ctx, key), strlen(pdf_to_name(ctx, key)));
				pdf_array_push(ctx, names_list, key_str);
				pdf_array_push(ctx, names_list, val);
				pdf_drop_obj(ctx, key_str);
			}
		}

		pdf_dict_put(ctx, dests, PDF_NAME_Names, names_list);
		pdf_dict_put(ctx, names, PDF_NAME_Dests, dests);
		pdf_dict_put(ctx, root, PDF_NAME_Names, names);

		pdf_drop_obj(ctx, names);
		pdf_drop_obj(ctx, dests);
		pdf_drop_obj(ctx, olddests);
	}

	/* Edit each pages /Annot list to remove any links that point to
	 * nowhere. */
	for (i = 0; i < pagecount; i++)
	{
		pdf_obj *pageref = pdf_lookup_page_obj(ctx, doc, i);
		pdf_obj *pageobj = pdf_resolve_indirect(ctx, pageref);

		pdf_obj *annots = pdf_dict_get(ctx, pageobj, PDF_NAME_Annots);

		int len = pdf_array_len(ctx, annots);
		int j;

		for (j = 0; j < len; j++)
		{
			pdf_obj *o = pdf_array_get(ctx, annots, j);

			if (!pdf_name_eq(ctx, pdf_dict_get(ctx, o, PDF_NAME_Subtype), PDF_NAME_Link))
				continue;

			if (!dest_is_valid(ctx, o, pagecount, page_object_nums, names_list))
			{
				/* Remove this annotation */
				pdf_array_delete(ctx, annots, j);
				j--;
			}
		}
	}

	if (strip_outlines(ctx, doc, outlines, pagecount, page_object_nums, names_list) == 0)
	{
		pdf_dict_del(ctx, root, PDF_NAME_Outlines);
	}

	fz_free(ctx, page_object_nums);
	pdf_drop_obj(ctx, names_list);
	pdf_drop_obj(ctx, root);
}
Exemple #22
0
char *
pdf_parse_link_dest(fz_context *ctx, pdf_document *doc, pdf_obj *dest)
{
	pdf_obj *obj, *pageobj;
	fz_rect mediabox;
	fz_matrix pagectm;
	const char *ld;
	int page, x, y, h;

	dest = resolve_dest(ctx, doc, dest);
	if (dest == NULL)
	{
		fz_warn(ctx, "undefined link destination");
		return NULL;
	}

	if (pdf_is_name(ctx, dest))
	{
		ld = pdf_to_name(ctx, dest);
		return fz_strdup(ctx, ld);
	}
	else if (pdf_is_string(ctx, dest))
	{
		ld = pdf_to_str_buf(ctx, dest);
		return fz_strdup(ctx, ld);
	}

	pageobj = pdf_array_get(ctx, dest, 0);
	if (pdf_is_int(ctx, pageobj))
	{
		page = pdf_to_int(ctx, pageobj);
		pageobj = pdf_lookup_page_obj(ctx, doc, page);
	}
	else
	{
		fz_try(ctx)
			page = pdf_lookup_page_number(ctx, doc, pageobj);
		fz_catch(ctx)
			page = -1;
	}

	if (page < 0)
		return NULL;

	obj = pdf_array_get(ctx, dest, 1);
	if (obj)
	{
		/* Link coords use a coordinate space that does not seem to respect Rotate or UserUnit. */
		/* All we need to do is figure out the page height to flip the coordinate space. */
		pdf_page_obj_transform(ctx, pageobj, &mediabox, &pagectm);
		mediabox = fz_transform_rect(mediabox, pagectm);
		h = mediabox.y1 - mediabox.y0;

		if (pdf_name_eq(ctx, obj, PDF_NAME(XYZ)))
		{
			x = pdf_array_get_int(ctx, dest, 2);
			y = h - pdf_array_get_int(ctx, dest, 3);
		}
		else if (pdf_name_eq(ctx, obj, PDF_NAME(FitR)))
		{
			x = pdf_array_get_int(ctx, dest, 2);
			y = h - pdf_array_get_int(ctx, dest, 5);
		}
		else if (pdf_name_eq(ctx, obj, PDF_NAME(FitH)) || pdf_name_eq(ctx, obj, PDF_NAME(FitBH)))
		{
			x = 0;
			y = h - pdf_array_get_int(ctx, dest, 2);
		}
		else if (pdf_name_eq(ctx, obj, PDF_NAME(FitV)) || pdf_name_eq(ctx, obj, PDF_NAME(FitBV)))
		{
			x = pdf_array_get_int(ctx, dest, 2);
			y = 0;
		}
		else
		{
			x = 0;
			y = 0;
		}
		return fz_asprintf(ctx, "#%d,%d,%d", page + 1, x, y);
	}

	return fz_asprintf(ctx, "#%d", page + 1);
}
Exemple #23
0
char *
pdf_parse_link_dest(fz_context *ctx, pdf_document *doc, pdf_obj *dest)
{
	pdf_obj *obj;
	char buf[256];
	const char *ld;
	int page;
	int x, y;

	dest = resolve_dest(ctx, doc, dest);
	if (dest == NULL)
	{
		fz_warn(ctx, "undefined link destination");
		return NULL;
	}

	if (pdf_is_name(ctx, dest))
	{
		ld = pdf_to_name(ctx, dest);
		return fz_strdup(ctx, ld);
	}
	else if (pdf_is_string(ctx, dest))
	{
		ld = pdf_to_str_buf(ctx, dest);
		return fz_strdup(ctx, ld);
	}

	obj = pdf_array_get(ctx, dest, 0);
	if (pdf_is_int(ctx, obj))
		page = pdf_to_int(ctx, obj);
	else
	{
		fz_try(ctx)
			page = pdf_lookup_page_number(ctx, doc, obj);
		fz_catch(ctx)
			page = -1;
	}

	x = y = 0;
	obj = pdf_array_get(ctx, dest, 1);
	if (pdf_name_eq(ctx, obj, PDF_NAME_XYZ))
	{
		x = pdf_to_int(ctx, pdf_array_get(ctx, dest, 2));
		y = pdf_to_int(ctx, pdf_array_get(ctx, dest, 3));
	}
	else if (pdf_name_eq(ctx, obj, PDF_NAME_FitR))
	{
		x = pdf_to_int(ctx, pdf_array_get(ctx, dest, 2));
		y = pdf_to_int(ctx, pdf_array_get(ctx, dest, 5));
	}
	else if (pdf_name_eq(ctx, obj, PDF_NAME_FitH) || pdf_name_eq(ctx, obj, PDF_NAME_FitBH))
		y = pdf_to_int(ctx, pdf_array_get(ctx, dest, 2));
	else if (pdf_name_eq(ctx, obj, PDF_NAME_FitV) || pdf_name_eq(ctx, obj, PDF_NAME_FitBV))
		x = pdf_to_int(ctx, pdf_array_get(ctx, dest, 2));

	if (page >= 0)
	{
		if (x != 0 || y != 0)
			fz_snprintf(buf, sizeof buf, "#%d,%d,%d", page + 1, x, y);
		else
			fz_snprintf(buf, sizeof buf, "#%d", page + 1);
		return fz_strdup(ctx, buf);
	}

	return NULL;
}
Exemple #24
0
char *
pdf_parse_link_action(fz_context *ctx, pdf_document *doc, pdf_obj *action, int pagenum)
{
	pdf_obj *obj, *dest, *file_spec;

	if (!action)
		return NULL;

	obj = pdf_dict_get(ctx, action, PDF_NAME_S);
	if (pdf_name_eq(ctx, PDF_NAME_GoTo, obj))
	{
		dest = pdf_dict_get(ctx, action, PDF_NAME_D);
		return pdf_parse_link_dest(ctx, doc, dest);
	}
	else if (pdf_name_eq(ctx, PDF_NAME_URI, obj))
	{
		/* URI entries are ASCII strings */
		const char *uri = pdf_to_str_buf(ctx, pdf_dict_get(ctx, action, PDF_NAME_URI));
		if (!fz_is_external_link(ctx, uri))
		{
			pdf_obj *uri_base_obj = pdf_dict_getp(ctx, pdf_trailer(ctx, doc), "Root/URI/Base");
			const char *uri_base = uri_base_obj ? pdf_to_str_buf(ctx, uri_base_obj) : "file://";
			char *new_uri = fz_malloc(ctx, strlen(uri_base) + strlen(uri) + 1);
			strcpy(new_uri, uri_base);
			strcat(new_uri, uri);
			return new_uri;
		}
		return fz_strdup(ctx, uri);
	}
	else if (pdf_name_eq(ctx, PDF_NAME_Launch, obj))
	{
		file_spec = pdf_dict_get(ctx, action, PDF_NAME_F);
		return pdf_parse_file_spec(ctx, doc, file_spec, NULL);
	}
	else if (pdf_name_eq(ctx, PDF_NAME_GoToR, obj))
	{
		dest = pdf_dict_get(ctx, action, PDF_NAME_D);
		file_spec = pdf_dict_get(ctx, action, PDF_NAME_F);
		return pdf_parse_file_spec(ctx, doc, file_spec, dest);
	}
	else if (pdf_name_eq(ctx, PDF_NAME_Named, obj))
	{
		dest = pdf_dict_get(ctx, action, PDF_NAME_N);

		if (pdf_name_eq(ctx, PDF_NAME_FirstPage, dest))
			pagenum = 0;
		else if (pdf_name_eq(ctx, PDF_NAME_LastPage, dest))
			pagenum = pdf_count_pages(ctx, doc) - 1;
		else if (pdf_name_eq(ctx, PDF_NAME_PrevPage, dest) && pagenum >= 0)
		{
			if (pagenum > 0)
				pagenum--;
		}
		else if (pdf_name_eq(ctx, PDF_NAME_NextPage, dest) && pagenum >= 0)
		{
			if (pagenum < pdf_count_pages(ctx, doc) - 1)
				pagenum++;
		}
		else
			return NULL;

		return fz_asprintf(ctx, "#%d", pagenum + 1);
	}

	return NULL;
}