Ejemplo n.º 1
0
int
pdf_resolve_link(fz_context *ctx, pdf_document *doc, const char *uri, float *xp, float *yp)
{
	if (uri && uri[0] == '#')
	{
		int page = fz_atoi(uri + 1) - 1;
		if (xp || yp)
		{
			const char *x = strchr(uri, ',');
			const char *y = strrchr(uri, ',');
			if (x && y)
			{
				pdf_obj *obj;
				fz_matrix ctm;
				fz_point p;

				p.x = x ? fz_atoi(x + 1) : 0;
				p.y = y ? fz_atoi(y + 1) : 0;
				obj = pdf_lookup_page_obj(ctx, doc, page);
				pdf_page_obj_transform(ctx, obj, NULL, &ctm);
				fz_transform_point(&p, &ctm);

				if (xp) *xp = p.x;
				if (yp) *yp = p.y;
			}
		}
		return page;
	}
	fz_warn(ctx, "unknown link uri '%s'", uri);
	return -1;
}
Ejemplo n.º 2
0
static int
showpage(fz_context *ctx, pdf_document *doc, fz_output *out, int page)
{
	pdf_obj *pageref;
	int failed = 0;

	fz_printf(ctx, out, "<page pagenum=\"%d\">\n", page);
	fz_try(ctx)
	{
		pageref = pdf_lookup_page_obj(ctx, doc, page-1);
		if (!pageref)
			fz_throw(ctx, FZ_ERROR_GENERIC, "cannot retrieve info from page %d", page);
	}
	fz_catch(ctx)
	{
		fz_printf(ctx, out, "Failed to gather information for page %d\n", page);
		failed = 1;
	}

	if (!failed)
	{
		failed |= showbox(ctx, out, pageref, "MediaBox", PDF_NAME_MediaBox);
		failed |= showbox(ctx, out, pageref, "CropBox", PDF_NAME_CropBox);
		failed |= showbox(ctx, out, pageref, "ArtBox", PDF_NAME_ArtBox);
		failed |= showbox(ctx, out, pageref, "BleedBox", PDF_NAME_BleedBox);
		failed |= showbox(ctx, out, pageref, "TrimBox", PDF_NAME_TrimBox);
		failed |= shownum(ctx, out, pageref, "Rotate", PDF_NAME_Rotate);
		failed |= shownum(ctx, out, pageref, "UserUnit", PDF_NAME_UserUnit);
	}

	fz_printf(ctx, out, "</page>\n");

	return failed;
}
static void retainpage(fz_context *ctx, pdf_document *doc, pdf_obj *parent, pdf_obj *kids, int page)
{
	pdf_obj *pageref = pdf_lookup_page_obj(ctx, doc, page-1);
	pdf_obj *pageobj = pdf_resolve_indirect(ctx, pageref);

	pdf_dict_put(ctx, pageobj, PDF_NAME_Parent, parent);

	/* Store page object in new kids array */
	pdf_array_push(ctx, kids, pageref);
}
Ejemplo n.º 4
0
static void page_merge(int page_from, int page_to, pdf_graft_map *graft_map)
{
	pdf_obj *pageref = NULL;
	pdf_obj *page_dict;
	pdf_obj *obj = NULL, *ref = NULL;
	/* Include minimal number of objects for page.  Do not include items that
	 * reference other pages */
	pdf_obj *known_page_objs[] = { PDF_NAME_Contents, PDF_NAME_Resources,
		PDF_NAME_MediaBox, PDF_NAME_CropBox, PDF_NAME_BleedBox, PDF_NAME_TrimBox,
		PDF_NAME_ArtBox, PDF_NAME_Rotate, PDF_NAME_UserUnit};
	int n = nelem(known_page_objs);
	int i;
	int num;

	fz_var(obj);
	fz_var(ref);

	fz_try(ctx)
	{
		pageref = pdf_lookup_page_obj(ctx, doc_src, page_from - 1);

		/* Make a new dictionary and copy over the items from the source object to
		* the new dict that we want to deep copy. */
		page_dict = pdf_new_dict(ctx, doc_des, 4);

		pdf_dict_put_drop(ctx, page_dict, PDF_NAME_Type, PDF_NAME_Page);

		for (i = 0; i < n; i++)
		{
			obj = pdf_dict_get(ctx, pageref, known_page_objs[i]);
			if (obj != NULL)
				pdf_dict_put_drop(ctx, page_dict, known_page_objs[i], pdf_graft_object(ctx, doc_des, doc_src, obj, graft_map));
		}

		/* Add the dictionary */
		obj = pdf_add_object_drop(ctx, doc_des, page_dict);

		/* Get indirect ref */
		num = pdf_to_num(ctx, obj);
		ref = pdf_new_indirect(ctx, doc_des, num, 0);

		/* Insert */
		pdf_insert_page(ctx, doc_des, page_to - 1, ref);
	}
	fz_always(ctx)
	{
		pdf_drop_obj(ctx, obj);
		pdf_drop_obj(ctx, ref);
	}
	fz_catch(ctx)
	{
		fz_rethrow(ctx);
	}
}
Ejemplo n.º 5
0
static void retainpage(fz_context *ctx, pdf_document *doc, pdf_obj *parent, pdf_obj *kids, int page)
{
	pdf_obj *pageref = pdf_lookup_page_obj(ctx, doc, page-1);

	pdf_flatten_inheritable_page_items(ctx, pageref);

	pdf_dict_put(ctx, pageref, PDF_NAME_Parent, parent);

	/* Store page object in new kids array */
	pdf_array_push(ctx, kids, pageref);
}
Ejemplo n.º 6
0
/* SumatraPDF: allow replacing potentially slow pdf_lookup_page_obj */
pdf_page *
pdf_load_page(pdf_document *doc, int number)
{
	pdf_obj *pageref;

	if (doc->file_reading_linearly)
	{
		pageref = pdf_progressive_advance(doc, number);
		if (pageref == NULL)
			fz_throw(doc->ctx, FZ_ERROR_TRYLATER, "page %d not available yet", number);
	}
	else
		pageref = pdf_lookup_page_obj(doc, number);

	return pdf_load_page_by_obj(doc, number, pageref);
}
Ejemplo n.º 7
0
static void page_merge(int page_from, int page_to, pdf_graft_map *graft_map)
{
	pdf_obj *page_ref;
	pdf_obj *page_dict;
	pdf_obj *obj;
	pdf_obj *ref = NULL;
	int i;

	/* Copy as few key/value pairs as we can. Do not include items that reference other pages. */
	static pdf_obj * const copy_list[] = { PDF_NAME(Contents), PDF_NAME(Resources),
		PDF_NAME(MediaBox), PDF_NAME(CropBox), PDF_NAME(BleedBox), PDF_NAME(TrimBox), PDF_NAME(ArtBox),
		PDF_NAME(Rotate), PDF_NAME(UserUnit) };

	fz_var(ref);

	fz_try(ctx)
	{
		page_ref = pdf_lookup_page_obj(ctx, doc_src, page_from - 1);
		pdf_flatten_inheritable_page_items(ctx, page_ref);

		/* Make a new page object dictionary to hold the items we copy from the source page. */
		page_dict = pdf_new_dict(ctx, doc_des, 4);

		pdf_dict_put(ctx, page_dict, PDF_NAME(Type), PDF_NAME(Page));

		for (i = 0; i < nelem(copy_list); i++)
		{
			obj = pdf_dict_get(ctx, page_ref, copy_list[i]);
			if (obj != NULL)
				pdf_dict_put_drop(ctx, page_dict, copy_list[i], pdf_graft_mapped_object(ctx, graft_map, obj));
		}

		/* Add the page object to the destination document. */
		ref = pdf_add_object_drop(ctx, doc_des, page_dict);

		/* Insert it into the page tree. */
		pdf_insert_page(ctx, doc_des, page_to - 1, ref);
	}
	fz_always(ctx)
	{
		pdf_drop_obj(ctx, ref);
	}
	fz_catch(ctx)
	{
		fz_rethrow(ctx);
	}
}
Ejemplo n.º 8
0
static void
gatherpageinfo(int page, int show)
{
	pdf_obj *pageobj;
	pdf_obj *pageref;
	pdf_obj *rsrc;

	pageref = pdf_lookup_page_obj(doc, page-1);
	pageobj = pdf_resolve_indirect(pageref);

	if (!pageobj)
		fz_throw(ctx, FZ_ERROR_GENERIC, "cannot retrieve info from page %d", page);

	gatherdimensions(page, pageref, pageobj);

	rsrc = pdf_dict_gets(pageobj, "Resources");
	gatherresourceinfo(page, rsrc, show);
}
Ejemplo n.º 9
0
static void
gatherresourceinfo(int page, pdf_obj *rsrc, int show)
{
	pdf_obj *pageobj;
	pdf_obj *pageref;
	pdf_obj *font;
	pdf_obj *xobj;
	pdf_obj *shade;
	pdf_obj *pattern;
	pdf_obj *subrsrc;
	int i;

	pageref = pdf_lookup_page_obj(doc, page-1);
	pageobj = pdf_resolve_indirect(pageref);

	if (!pageobj)
		fz_throw(ctx, FZ_ERROR_GENERIC, "cannot retrieve info from page %d", page);

	font = pdf_dict_gets(rsrc, "Font");
	if (show & FONTS && font)
	{
		int n;

		gatherfonts(page, pageref, pageobj, font);
		n = pdf_dict_len(font);
		for (i = 0; i < n; i++)
		{
			pdf_obj *obj = pdf_dict_get_val(font, i);

			subrsrc = pdf_dict_gets(obj, "Resources");
			if (subrsrc && pdf_objcmp(rsrc, subrsrc))
				gatherresourceinfo(page, subrsrc, show);
		}
	}

	xobj = pdf_dict_gets(rsrc, "XObject");
	if (show & XOBJS && xobj)
	{
		int n;

		gatherimages(page, pageref, pageobj, xobj);
		gatherforms(page, pageref, pageobj, xobj);
		gatherpsobjs(page, pageref, pageobj, xobj);
		n = pdf_dict_len(xobj);
		for (i = 0; i < n; i++)
		{
			pdf_obj *obj = pdf_dict_get_val(xobj, i);
			subrsrc = pdf_dict_gets(obj, "Resources");
			if (subrsrc && pdf_objcmp(rsrc, subrsrc))
				gatherresourceinfo(page, subrsrc, show);
		}
	}

	shade = pdf_dict_gets(rsrc, "Shading");
	if (show & SHADINGS && shade)
		gathershadings(page, pageref, pageobj, shade);

	pattern = pdf_dict_gets(rsrc, "Pattern");
	if (show & PATTERNS && pattern)
	{
		int n;
		gatherpatterns(page, pageref, pageobj, pattern);
		n = pdf_dict_len(pattern);
		for (i = 0; i < n; i++)
		{
			pdf_obj *obj = pdf_dict_get_val(pattern, i);
			subrsrc = pdf_dict_gets(obj, "Resources");
			if (subrsrc && pdf_objcmp(rsrc, subrsrc))
				gatherresourceinfo(page, subrsrc, show);
		}
	}
}
static void retainpages(fz_context *ctx, globals *glo, int argc, char **argv)
{
	pdf_obj *oldroot, *root, *pages, *kids, *countobj, *parent, *olddests;
	pdf_document *doc = glo->doc;
	int argidx = 0;
	pdf_obj *names_list = NULL;
	int pagecount;
	int i;

	/* Keep only pages/type and (reduced) dest entries to avoid
	 * references to unretained pages */
	oldroot = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME_Root);
	pages = pdf_dict_get(ctx, oldroot, PDF_NAME_Pages);
	olddests = pdf_load_name_tree(ctx, doc, PDF_NAME_Dests);

	root = pdf_new_dict(ctx, doc, 2);
	pdf_dict_put(ctx, root, PDF_NAME_Type, pdf_dict_get(ctx, oldroot, PDF_NAME_Type));
	pdf_dict_put(ctx, root, PDF_NAME_Pages, pdf_dict_get(ctx, oldroot, PDF_NAME_Pages));

	pdf_update_object(ctx, doc, pdf_to_num(ctx, oldroot), root);

	pdf_drop_obj(ctx, root);

	/* Create a new kids array with only the pages we want to keep */
	parent = pdf_new_indirect(ctx, doc, pdf_to_num(ctx, pages), pdf_to_gen(ctx, pages));
	kids = pdf_new_array(ctx, doc, 1);

	/* Retain pages specified */
	while (argc - argidx)
	{
		int page, spage, epage;
		char *spec, *dash;
		char *pagelist = argv[argidx];

		pagecount = pdf_count_pages(ctx, doc);
		spec = fz_strsep(&pagelist, ",");
		while (spec)
		{
			dash = strchr(spec, '-');

			if (dash == spec)
				spage = epage = pagecount;
			else
				spage = epage = atoi(spec);

			if (dash)
			{
				if (strlen(dash) > 1)
					epage = atoi(dash + 1);
				else
					epage = pagecount;
			}

			spage = fz_clampi(spage, 1, pagecount);
			epage = fz_clampi(epage, 1, pagecount);

			if (spage < epage)
				for (page = spage; page <= epage; ++page)
					retainpage(ctx, doc, parent, kids, page);
			else
				for (page = spage; page >= epage; --page)
					retainpage(ctx, doc, parent, kids, page);

			spec = fz_strsep(&pagelist, ",");
		}

		argidx++;
	}

	pdf_drop_obj(ctx, parent);

	/* Update page count and kids array */
	countobj = pdf_new_int(ctx, doc, pdf_array_len(ctx, kids));
	pdf_dict_put(ctx, pages, PDF_NAME_Count, countobj);
	pdf_drop_obj(ctx, countobj);
	pdf_dict_put(ctx, pages, PDF_NAME_Kids, kids);
	pdf_drop_obj(ctx, kids);

	/* Also preserve the (partial) Dests name tree */
	if (olddests)
	{
		pdf_obj *names = pdf_new_dict(ctx, doc, 1);
		pdf_obj *dests = pdf_new_dict(ctx, doc, 1);
		int len = pdf_dict_len(ctx, olddests);

		names_list = pdf_new_array(ctx, doc, 32);

		for (i = 0; i < len; i++)
		{
			pdf_obj *key = pdf_dict_get_key(ctx, olddests, i);
			pdf_obj *val = pdf_dict_get_val(ctx, olddests, i);
			pdf_obj *dest = pdf_dict_get(ctx, val, PDF_NAME_D);

			dest = pdf_array_get(ctx, dest ? dest : val, 0);
			if (pdf_array_contains(ctx, pdf_dict_get(ctx, pages, PDF_NAME_Kids), dest))
			{
				pdf_obj *key_str = pdf_new_string(ctx, doc, pdf_to_name(ctx, key), strlen(pdf_to_name(ctx, key)));
				pdf_array_push(ctx, names_list, key_str);
				pdf_array_push(ctx, names_list, val);
				pdf_drop_obj(ctx, key_str);
			}
		}

		root = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME_Root);
		pdf_dict_put(ctx, dests, PDF_NAME_Names, names_list);
		pdf_dict_put(ctx, names, PDF_NAME_Dests, dests);
		pdf_dict_put(ctx, root, PDF_NAME_Names, names);

		pdf_drop_obj(ctx, names);
		pdf_drop_obj(ctx, dests);
		pdf_drop_obj(ctx, names_list);
		pdf_drop_obj(ctx, olddests);
	}

	/* Force the next call to pdf_count_pages to recount */
	glo->doc->page_count = 0;

	/* Edit each pages /Annot list to remove any links that point to
	 * nowhere. */
	pagecount = pdf_count_pages(ctx, doc);
	for (i = 0; i < pagecount; i++)
	{
		pdf_obj *pageref = pdf_lookup_page_obj(ctx, doc, i);
		pdf_obj *pageobj = pdf_resolve_indirect(ctx, pageref);

		pdf_obj *annots = pdf_dict_get(ctx, pageobj, PDF_NAME_Annots);

		int len = pdf_array_len(ctx, annots);
		int j;

		for (j = 0; j < len; j++)
		{
			pdf_obj *o = pdf_array_get(ctx, annots, j);
			pdf_obj *p;

			if (!pdf_name_eq(ctx, pdf_dict_get(ctx, o, PDF_NAME_Subtype), PDF_NAME_Link))
				continue;

			p = pdf_dict_get(ctx, o, PDF_NAME_A);
			if (!pdf_name_eq(ctx, pdf_dict_get(ctx, p, PDF_NAME_S), PDF_NAME_GoTo))
				continue;

			if (string_in_names_list(ctx, pdf_dict_get(ctx, p, PDF_NAME_D), names_list))
				continue;

			/* FIXME: Should probably look at Next too */

			/* Remove this annotation */
			pdf_array_delete(ctx, annots, j);
			j--;
		}
	}
}
Ejemplo n.º 11
0
pdf_page *
pdf_load_page(pdf_document *doc, int number)
{
	fz_context *ctx = doc->ctx;
	pdf_page *page;
	pdf_annot *annot;
	pdf_obj *pageobj, *pageref, *obj;
	fz_rect mediabox, cropbox, realbox;
	float userunit;
	fz_matrix mat;

	pageref = pdf_lookup_page_obj(doc, number);
	pageobj = pdf_resolve_indirect(pageref);

	page = fz_malloc_struct(ctx, pdf_page);
	page->resources = NULL;
	page->contents = NULL;
	page->transparency = 0;
	page->links = NULL;
	page->annots = NULL;
	page->deleted_annots = NULL;
	page->tmp_annots = NULL;
	page->me = pdf_keep_obj(pageobj);

	obj = pdf_dict_gets(pageobj, "UserUnit");
	if (pdf_is_real(obj))
		userunit = pdf_to_real(obj);
	else
		userunit = 1;

	pdf_to_rect(ctx, pdf_lookup_inherited_page_item(doc, pageobj, "MediaBox"), &mediabox);
	if (fz_is_empty_rect(&mediabox))
	{
		fz_warn(ctx, "cannot find page size for page %d", number + 1);
		mediabox.x0 = 0;
		mediabox.y0 = 0;
		mediabox.x1 = 612;
		mediabox.y1 = 792;
	}

	pdf_to_rect(ctx, pdf_lookup_inherited_page_item(doc, pageobj, "CropBox"), &cropbox);
	if (!fz_is_empty_rect(&cropbox))
		fz_intersect_rect(&mediabox, &cropbox);

	page->mediabox.x0 = fz_min(mediabox.x0, mediabox.x1) * userunit;
	page->mediabox.y0 = fz_min(mediabox.y0, mediabox.y1) * userunit;
	page->mediabox.x1 = fz_max(mediabox.x0, mediabox.x1) * userunit;
	page->mediabox.y1 = fz_max(mediabox.y0, mediabox.y1) * userunit;

	if (page->mediabox.x1 - page->mediabox.x0 < 1 || page->mediabox.y1 - page->mediabox.y0 < 1)
	{
		fz_warn(ctx, "invalid page size in page %d", number + 1);
		page->mediabox = fz_unit_rect;
	}

	page->rotate = pdf_to_int(pdf_lookup_inherited_page_item(doc, pageobj, "Rotate"));
	/* Snap page->rotate to 0, 90, 180 or 270 */
	if (page->rotate < 0)
		page->rotate = 360 - ((-page->rotate) % 360);
	if (page->rotate >= 360)
		page->rotate = page->rotate % 360;
	page->rotate = 90*((page->rotate + 45)/90);
	if (page->rotate > 360)
		page->rotate = 0;

	fz_pre_rotate(fz_scale(&page->ctm, 1, -1), -page->rotate);
	realbox = page->mediabox;
	fz_transform_rect(&realbox, &page->ctm);
	fz_pre_scale(fz_translate(&mat, -realbox.x0, -realbox.y0), userunit, userunit);
	fz_concat(&page->ctm, &page->ctm, &mat);

	obj = pdf_dict_gets(pageobj, "Annots");
	if (obj)
	{
		/* SumatraPDF: ignore annotations in case of unexpected errors */
		fz_try(ctx)
		{
		page->links = pdf_load_link_annots(doc, obj, &page->ctm);
		page->annots = pdf_load_annots(doc, obj, page);
		}
		fz_catch(ctx)
		{
			fz_warn(ctx, "unexpectedly failed to load page annotations");
		}
	}

	page->duration = pdf_to_real(pdf_dict_gets(pageobj, "Dur"));

	obj = pdf_dict_gets(pageobj, "Trans");
	page->transition_present = (obj != NULL);
	if (obj)
	{
		pdf_load_transition(doc, page, obj);
	}

	// TODO: inherit
	page->resources = pdf_lookup_inherited_page_item(doc, pageobj, "Resources");
	if (page->resources)
		pdf_keep_obj(page->resources);

	obj = pdf_dict_gets(pageobj, "Contents");
	fz_try(ctx)
	{
		page->contents = pdf_keep_obj(obj);

		if (pdf_resources_use_blending(doc, page->resources))
			page->transparency = 1;
		/* cf. http://code.google.com/p/sumatrapdf/issues/detail?id=2107 */
		else if (!strcmp(pdf_to_name(pdf_dict_getp(pageobj, "Group/S")), "Transparency"))
			page->transparency = 1;

		for (annot = page->annots; annot && !page->transparency; annot = annot->next)
			if (annot->ap && pdf_resources_use_blending(doc, annot->ap->resources))
				page->transparency = 1;
	}
	fz_catch(ctx)
	{
		pdf_free_page(doc, page);
		fz_rethrow_message(ctx, "cannot load page %d contents (%d 0 R)", number + 1, pdf_to_num(pageref));
	}

	return page;
}
Ejemplo n.º 12
0
static void decimatepages(fz_context *ctx, pdf_document *doc)
{
	pdf_obj *oldroot, *root, *pages, *kids, *parent;
	int num_pages = pdf_count_pages(ctx, doc);
	int page, kidcount;

	oldroot = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME_Root);
	pages = pdf_dict_get(ctx, oldroot, PDF_NAME_Pages);

	root = pdf_new_dict(ctx, doc, 2);
	pdf_dict_put(ctx, root, PDF_NAME_Type, pdf_dict_get(ctx, oldroot, PDF_NAME_Type));
	pdf_dict_put(ctx, root, PDF_NAME_Pages, pdf_dict_get(ctx, oldroot, PDF_NAME_Pages));

	pdf_update_object(ctx, doc, pdf_to_num(ctx, oldroot), root);

	pdf_drop_obj(ctx, root);

	/* Create a new kids array with our new pages in */
	parent = pdf_new_indirect(ctx, doc, pdf_to_num(ctx, pages), pdf_to_gen(ctx, pages));
	kids = pdf_new_array(ctx, doc, 1);

	kidcount = 0;
	for (page=0; page < num_pages; page++)
	{
		pdf_page *page_details = pdf_load_page(ctx, doc, page);
		int xf = x_factor, yf = y_factor;
		int x, y;
		float w = page_details->mediabox.x1 - page_details->mediabox.x0;
		float h = page_details->mediabox.y1 - page_details->mediabox.y0;

		if (xf == 0 && yf == 0)
		{
			/* Nothing specified, so split along the long edge */
			if (w > h)
				xf = 2, yf = 1;
			else
				xf = 1, yf = 2;
		}
		else if (xf == 0)
			xf = 1;
		else if (yf == 0)
			yf = 1;

		for (y = yf-1; y >= 0; y--)
		{
			for (x = 0; x < xf; x++)
			{
				pdf_obj *newpageobj, *newpageref, *newmediabox;
				fz_rect mb;
				int num;

				newpageobj = pdf_copy_dict(ctx, pdf_lookup_page_obj(ctx, doc, page));
				num = pdf_create_object(ctx, doc);
				pdf_update_object(ctx, doc, num, newpageobj);
				newpageref = pdf_new_indirect(ctx, doc, num, 0);

				newmediabox = pdf_new_array(ctx, doc, 4);

				mb.x0 = page_details->mediabox.x0 + (w/xf)*x;
				if (x == xf-1)
					mb.x1 = page_details->mediabox.x1;
				else
					mb.x1 = page_details->mediabox.x0 + (w/xf)*(x+1);
				mb.y0 = page_details->mediabox.y0 + (h/yf)*y;
				if (y == yf-1)
					mb.y1 = page_details->mediabox.y1;
				else
					mb.y1 = page_details->mediabox.y0 + (h/yf)*(y+1);

				pdf_array_push(ctx, newmediabox, pdf_new_real(ctx, doc, mb.x0));
				pdf_array_push(ctx, newmediabox, pdf_new_real(ctx, doc, mb.y0));
				pdf_array_push(ctx, newmediabox, pdf_new_real(ctx, doc, mb.x1));
				pdf_array_push(ctx, newmediabox, pdf_new_real(ctx, doc, mb.y1));

				pdf_dict_put(ctx, newpageobj, PDF_NAME_Parent, parent);
				pdf_dict_put(ctx, newpageobj, PDF_NAME_MediaBox, newmediabox);

				/* Store page object in new kids array */
				pdf_array_push(ctx, kids, newpageref);

				kidcount++;
			}
		}
	}

	pdf_drop_obj(ctx, parent);

	/* Update page count and kids array */
	pdf_dict_put(ctx, pages, PDF_NAME_Count, pdf_new_int(ctx, doc, kidcount));
	pdf_dict_put(ctx, pages, PDF_NAME_Kids, kids);
	pdf_drop_obj(ctx, kids);
}
Ejemplo n.º 13
0
static void retainpages(fz_context *ctx, globals *glo, int argc, char **argv)
{
	pdf_obj *oldroot, *root, *pages, *kids, *countobj, *parent, *olddests;
	pdf_document *doc = glo->doc;
	int argidx = 0;
	pdf_obj *names_list = NULL;
	pdf_obj *outlines;
	int pagecount;
	int i;
	int *page_object_nums;

	/* Keep only pages/type and (reduced) dest entries to avoid
	 * references to unretained pages */
	oldroot = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME_Root);
	pages = pdf_dict_get(ctx, oldroot, PDF_NAME_Pages);
	olddests = pdf_load_name_tree(ctx, doc, PDF_NAME_Dests);
	outlines = pdf_dict_get(ctx, oldroot, PDF_NAME_Outlines);

	root = pdf_new_dict(ctx, doc, 3);
	pdf_dict_put(ctx, root, PDF_NAME_Type, pdf_dict_get(ctx, oldroot, PDF_NAME_Type));
	pdf_dict_put(ctx, root, PDF_NAME_Pages, pdf_dict_get(ctx, oldroot, PDF_NAME_Pages));
	pdf_dict_put(ctx, root, PDF_NAME_Outlines, outlines);

	pdf_update_object(ctx, doc, pdf_to_num(ctx, oldroot), root);

	/* Create a new kids array with only the pages we want to keep */
	parent = pdf_new_indirect(ctx, doc, pdf_to_num(ctx, pages), pdf_to_gen(ctx, pages));
	kids = pdf_new_array(ctx, doc, 1);

	/* Retain pages specified */
	while (argc - argidx)
	{
		int page, spage, epage;
		char *spec, *dash;
		char *pagelist = argv[argidx];

		pagecount = pdf_count_pages(ctx, doc);
		spec = fz_strsep(&pagelist, ",");
		while (spec)
		{
			dash = strchr(spec, '-');

			if (dash == spec)
				spage = epage = pagecount;
			else
				spage = epage = atoi(spec);

			if (dash)
			{
				if (strlen(dash) > 1)
					epage = atoi(dash + 1);
				else
					epage = pagecount;
			}

			spage = fz_clampi(spage, 1, pagecount);
			epage = fz_clampi(epage, 1, pagecount);

			if (spage < epage)
				for (page = spage; page <= epage; ++page)
					retainpage(ctx, doc, parent, kids, page);
			else
				for (page = spage; page >= epage; --page)
					retainpage(ctx, doc, parent, kids, page);

			spec = fz_strsep(&pagelist, ",");
		}

		argidx++;
	}

	pdf_drop_obj(ctx, parent);

	/* Update page count and kids array */
	countobj = pdf_new_int(ctx, doc, pdf_array_len(ctx, kids));
	pdf_dict_put(ctx, pages, PDF_NAME_Count, countobj);
	pdf_drop_obj(ctx, countobj);
	pdf_dict_put(ctx, pages, PDF_NAME_Kids, kids);
	pdf_drop_obj(ctx, kids);

	/* Force the next call to pdf_count_pages to recount */
	glo->doc->page_count = 0;

	pagecount = pdf_count_pages(ctx, doc);
	page_object_nums = fz_calloc(ctx, pagecount, sizeof(*page_object_nums));
	for (i = 0; i < pagecount; i++)
	{
		pdf_obj *pageref = pdf_lookup_page_obj(ctx, doc, i);
		page_object_nums[i] = pdf_to_num(ctx, pageref);
	}

	/* If we had an old Dests tree (now reformed as an olddests
	 * dictionary), keep any entries in there that point to
	 * valid pages. This may mean we keep more than we need, but
	 * it's safe at least. */
	if (olddests)
	{
		pdf_obj *names = pdf_new_dict(ctx, doc, 1);
		pdf_obj *dests = pdf_new_dict(ctx, doc, 1);
		int len = pdf_dict_len(ctx, olddests);

		names_list = pdf_new_array(ctx, doc, 32);

		for (i = 0; i < len; i++)
		{
			pdf_obj *key = pdf_dict_get_key(ctx, olddests, i);
			pdf_obj *val = pdf_dict_get_val(ctx, olddests, i);
			pdf_obj *dest = pdf_dict_get(ctx, val, PDF_NAME_D);

			dest = pdf_array_get(ctx, dest ? dest : val, 0);
			if (dest_is_valid_page(ctx, dest, page_object_nums, pagecount))
			{
				pdf_obj *key_str = pdf_new_string(ctx, doc, pdf_to_name(ctx, key), strlen(pdf_to_name(ctx, key)));
				pdf_array_push(ctx, names_list, key_str);
				pdf_array_push(ctx, names_list, val);
				pdf_drop_obj(ctx, key_str);
			}
		}

		pdf_dict_put(ctx, dests, PDF_NAME_Names, names_list);
		pdf_dict_put(ctx, names, PDF_NAME_Dests, dests);
		pdf_dict_put(ctx, root, PDF_NAME_Names, names);

		pdf_drop_obj(ctx, names);
		pdf_drop_obj(ctx, dests);
		pdf_drop_obj(ctx, olddests);
	}

	/* Edit each pages /Annot list to remove any links that point to
	 * nowhere. */
	for (i = 0; i < pagecount; i++)
	{
		pdf_obj *pageref = pdf_lookup_page_obj(ctx, doc, i);
		pdf_obj *pageobj = pdf_resolve_indirect(ctx, pageref);

		pdf_obj *annots = pdf_dict_get(ctx, pageobj, PDF_NAME_Annots);

		int len = pdf_array_len(ctx, annots);
		int j;

		for (j = 0; j < len; j++)
		{
			pdf_obj *o = pdf_array_get(ctx, annots, j);

			if (!pdf_name_eq(ctx, pdf_dict_get(ctx, o, PDF_NAME_Subtype), PDF_NAME_Link))
				continue;

			if (!dest_is_valid(ctx, o, pagecount, page_object_nums, names_list))
			{
				/* Remove this annotation */
				pdf_array_delete(ctx, annots, j);
				j--;
			}
		}
	}

	if (strip_outlines(ctx, doc, outlines, pagecount, page_object_nums, names_list) == 0)
	{
		pdf_dict_del(ctx, root, PDF_NAME_Outlines);
	}

	fz_free(ctx, page_object_nums);
	pdf_drop_obj(ctx, names_list);
	pdf_drop_obj(ctx, root);
}
Ejemplo n.º 14
0
char *
pdf_parse_link_dest(fz_context *ctx, pdf_document *doc, pdf_obj *dest)
{
	pdf_obj *obj, *pageobj;
	fz_rect mediabox;
	fz_matrix pagectm;
	const char *ld;
	int page, x, y, h;

	dest = resolve_dest(ctx, doc, dest);
	if (dest == NULL)
	{
		fz_warn(ctx, "undefined link destination");
		return NULL;
	}

	if (pdf_is_name(ctx, dest))
	{
		ld = pdf_to_name(ctx, dest);
		return fz_strdup(ctx, ld);
	}
	else if (pdf_is_string(ctx, dest))
	{
		ld = pdf_to_str_buf(ctx, dest);
		return fz_strdup(ctx, ld);
	}

	pageobj = pdf_array_get(ctx, dest, 0);
	if (pdf_is_int(ctx, pageobj))
	{
		page = pdf_to_int(ctx, pageobj);
		pageobj = pdf_lookup_page_obj(ctx, doc, page);
	}
	else
	{
		fz_try(ctx)
			page = pdf_lookup_page_number(ctx, doc, pageobj);
		fz_catch(ctx)
			page = -1;
	}

	if (page < 0)
		return NULL;

	obj = pdf_array_get(ctx, dest, 1);
	if (obj)
	{
		/* Link coords use a coordinate space that does not seem to respect Rotate or UserUnit. */
		/* All we need to do is figure out the page height to flip the coordinate space. */
		pdf_page_obj_transform(ctx, pageobj, &mediabox, &pagectm);
		mediabox = fz_transform_rect(mediabox, pagectm);
		h = mediabox.y1 - mediabox.y0;

		if (pdf_name_eq(ctx, obj, PDF_NAME(XYZ)))
		{
			x = pdf_array_get_int(ctx, dest, 2);
			y = h - pdf_array_get_int(ctx, dest, 3);
		}
		else if (pdf_name_eq(ctx, obj, PDF_NAME(FitR)))
		{
			x = pdf_array_get_int(ctx, dest, 2);
			y = h - pdf_array_get_int(ctx, dest, 5);
		}
		else if (pdf_name_eq(ctx, obj, PDF_NAME(FitH)) || pdf_name_eq(ctx, obj, PDF_NAME(FitBH)))
		{
			x = 0;
			y = h - pdf_array_get_int(ctx, dest, 2);
		}
		else if (pdf_name_eq(ctx, obj, PDF_NAME(FitV)) || pdf_name_eq(ctx, obj, PDF_NAME(FitBV)))
		{
			x = pdf_array_get_int(ctx, dest, 2);
			y = 0;
		}
		else
		{
			x = 0;
			y = 0;
		}
		return fz_asprintf(ctx, "#%d,%d,%d", page + 1, x, y);
	}

	return fz_asprintf(ctx, "#%d", page + 1);
}