static fz_error
pdf_readoldtrailer(pdf_xref *xref, char *buf, int cap)
{
    fz_error error;
    int len;
    char *s;
    int n;
    int t;
    pdf_token_e tok;
    int c;

    pdf_logxref("load old xref format trailer\n");

    fz_readline(xref->file, buf, cap);
    if (strncmp(buf, "xref", 4) != 0)
        return fz_throw("cannot find xref marker");

    while (1)
    {
        c = fz_peekbyte(xref->file);
        if (!(c >= '0' && c <= '9'))
            break;

        fz_readline(xref->file, buf, cap);
        s = buf;
        fz_strsep(&s, " "); /* ignore ofs */
        if (!s)
            return fz_throw("invalid range marker in xref");
        len = atoi(fz_strsep(&s, " "));

        /* broken pdfs where the section is not on a separate line */
        if (s && *s != '\0')
            fz_seek(xref->file, -(2 + (int)strlen(s)), 1);

        t = fz_tell(xref->file);
        if (t < 0)
            return fz_throw("cannot tell in file");

        fz_seek(xref->file, t + 20 * len, 0);
    }

    error = pdf_lex(&tok, xref->file, buf, cap, &n);
    if (error)
        return fz_rethrow(error, "cannot parse trailer");
    if (tok != PDF_TTRAILER)
        return fz_throw("expected trailer marker");

    error = pdf_lex(&tok, xref->file, buf, cap, &n);
    if (error)
        return fz_rethrow(error, "cannot parse trailer");
    if (tok != PDF_TODICT)
        return fz_throw("expected trailer dictionary");

    error = pdf_parsedict(&xref->trailer, xref, xref->file, buf, cap);
    if (error)
        return fz_rethrow(error, "cannot parse trailer");
    return fz_okay;
}
Пример #2
0
static void
showinfo(char *filename, int show, char *pagelist)
{
	int page, spage, epage;
	char *spec, *dash;
	int allpages;
	int pagecount;

	if (!doc)
		infousage();

	allpages = !strcmp(pagelist, "1-");

	pagecount = pdf_count_pages(doc);
	spec = fz_strsep(&pagelist, ",");
	while (spec && pagecount)
	{
		dash = strchr(spec, '-');

		if (dash == spec)
			spage = epage = pagecount;
		else
			spage = epage = atoi(spec);

		if (dash)
		{
			if (strlen(dash) > 1)
				epage = atoi(dash + 1);
			else
				epage = pagecount;
		}

		if (spage > epage)
			page = spage, spage = epage, epage = page;

		spage = fz_clampi(spage, 1, pagecount);
		epage = fz_clampi(epage, 1, pagecount);

		if (allpages)
			printf("Retrieving info from pages %d-%d...\n", spage, epage);
		for (page = spage; page <= epage; page++)
		{
			gatherpageinfo(page, show);
			if (!allpages)
			{
				printf("Page %d:\n", page);
				printinfo(filename, show, page);
				printf("\n");
			}
		}

		spec = fz_strsep(&pagelist, ",");
	}

	if (allpages)
		printinfo(filename, show, -1);
}
Пример #3
0
static void merge_range(char *range)
{
	int page, spage, epage, src_pagecount, des_pagecount;
	char *spec, *dash;
	pdf_graft_map *graft_map;

	src_pagecount = fz_count_pages(ctx, (fz_document*) doc_src);
	des_pagecount = fz_count_pages(ctx, (fz_document*) doc_des);
	spec = fz_strsep(&range, ",");
	graft_map = pdf_new_graft_map(ctx, doc_src);

	fz_try(ctx)
	{
		while (spec)
		{
			dash = strchr(spec, '-');

			if (dash == spec)
				spage = epage = src_pagecount;
			else
				spage = epage = atoi(spec);

			if (dash)
			{
				if (strlen(dash) > 1)
					epage = atoi(dash + 1);
				else
					epage = src_pagecount;
			}

			spage = fz_clampi(spage, 1, src_pagecount);
			epage = fz_clampi(epage, 1, src_pagecount);

			if (spage < epage)
				for (page = spage; page <= epage; page++, des_pagecount++)
					page_merge(page, des_pagecount + 1, graft_map);
			else
				for (page = spage; page >= epage; page--, des_pagecount++)
					page_merge(page, des_pagecount + 1, graft_map);
			spec = fz_strsep(&range, ",");
		}
	}
	fz_always(ctx)
	{
		pdf_drop_graft_map(ctx, graft_map);
	}
	fz_catch(ctx)
	{
		fz_rethrow(ctx);
	}
}
Пример #4
0
static int
showpages(fz_context *ctx, pdf_document *doc, fz_output *out, char *pagelist)
{
	int page, spage, epage;
	char *spec, *dash;
	int pagecount;
	int ret = 0;

	if (!doc)
		infousage();

	pagecount = pdf_count_pages(ctx, doc);
	spec = fz_strsep(&pagelist, ",");
	while (spec && pagecount)
	{
		dash = strchr(spec, '-');

		if (dash == spec)
			spage = epage = pagecount;
		else
			spage = epage = atoi(spec);

		if (dash)
		{
			if (strlen(dash) > 1)
				epage = atoi(dash + 1);
			else
				epage = pagecount;
		}

		if (spage > epage)
			page = spage, spage = epage, epage = page;

		spage = fz_clampi(spage, 1, pagecount);
		epage = fz_clampi(epage, 1, pagecount);

		for (page = spage; page <= epage; page++)
		{
			ret |= showpage(ctx, doc, out, page);
		}

		spec = fz_strsep(&pagelist, ",");
	}

	return ret;
}
Пример #5
0
static void drawrange(xps_context *ctx, char *range)
{
	int page, spage, epage;
	char *spec, *dash;

	spec = fz_strsep(&range, ",");
	while (spec)
	{
		dash = strchr(spec, '-');

		if (dash == spec)
			spage = epage = xps_count_pages(ctx);
		else
			spage = epage = atoi(spec);

		if (dash)
		{
			if (strlen(dash) > 1)
				epage = atoi(dash + 1);
			else
				epage = xps_count_pages(ctx);
		}

		spage = CLAMP(spage, 1, xps_count_pages(ctx));
		epage = CLAMP(epage, 1, xps_count_pages(ctx));

		if (spage < epage)
			for (page = spage; page <= epage; page++)
				drawpage(ctx, page);
		else
			for (page = spage; page >= epage; page--)
				drawpage(ctx, page);

		spec = fz_strsep(&range, ",");
	}
}
Пример #6
0
xml_element *
xps_lookup_alternate_content(xml_element *node)
{
	for (node = xml_down(node); node; node = xml_next(node))
	{
		if (!strcmp(xml_tag(node), "mc:Choice") && xml_att(node, "Requires"))
		{
			char list[64];
			char *next = list, *item;
			fz_strlcpy(list, xml_att(node, "Requires"), sizeof(list));
			while ((item = fz_strsep(&next, " \t\r\n")) && (!*item || !strcmp(item, "xps")));
			if (!item)
				return xml_down(node);
		}
		else if (!strcmp(xml_tag(node), "mc:Fallback"))
			return xml_down(node);
	}
	return NULL;
}
Пример #7
0
fz_xml *
xps_lookup_alternate_content(fz_context *ctx, xps_document *doc, fz_xml *node)
{
	for (node = fz_xml_down(node); node; node = fz_xml_next(node))
	{
		if (fz_xml_is_tag(node, "Choice") && fz_xml_att(node, "Requires"))
		{
			char list[64];
			char *next = list, *item;
			fz_strlcpy(list, fz_xml_att(node, "Requires"), sizeof(list));
			while ((item = fz_strsep(&next, " \t\r\n")) != NULL && (!*item || !strcmp(item, "xps")));
			if (!item)
				return fz_xml_down(node);
		}
		else if (fz_xml_is_tag(node, "Fallback"))
			return fz_xml_down(node);
	}
	return NULL;
}
Пример #8
0
static void retainpages(int argc, char **argv)
{
	fz_error error;
	fz_obj *oldroot, *root, *pages, *kids, *countobj, *parent;

	/* Load the old page tree */
	error = pdf_loadpagetree(xref);
	if (error)
		die(fz_rethrow(error, "cannot load page tree"));

	/* Keep only pages/type entry to avoid references to unretained pages */
	oldroot = fz_dictgets(xref->trailer, "Root");
	pages = fz_dictgets(oldroot, "Pages");

	root = fz_newdict(2);
	fz_dictputs(root, "Type", fz_dictgets(oldroot, "Type"));
	fz_dictputs(root, "Pages", fz_dictgets(oldroot, "Pages"));

	pdf_updateobject(xref, fz_tonum(oldroot), fz_togen(oldroot), root);

	fz_dropobj(root);

	/* Create a new kids array with only the pages we want to keep */
	parent = fz_newindirect(fz_tonum(pages), fz_togen(pages), xref);
	kids = fz_newarray(1);

	/* Retain pages specified */
	while (argc - fz_optind)
	{
		int page, spage, epage;
		char *spec, *dash;
		char *pagelist = argv[fz_optind];

		spec = fz_strsep(&pagelist, ",");
		while (spec)
		{
			dash = strchr(spec, '-');

			if (dash == spec)
				spage = epage = pdf_getpagecount(xref);
			else
				spage = epage = atoi(spec);

			if (dash)
			{
				if (strlen(dash) > 1)
					epage = atoi(dash + 1);
				else
					epage = pdf_getpagecount(xref);
			}

			if (spage > epage)
				page = spage, spage = epage, epage = page;

			if (spage < 1)
				spage = 1;
			if (epage > pdf_getpagecount(xref))
				epage = pdf_getpagecount(xref);

			for (page = spage; page <= epage; page++)
			{
				fz_obj *pageobj = pdf_getpageobject(xref, page);
				fz_obj *pageref = pdf_getpageref(xref, page);

				fz_dictputs(pageobj, "Parent", parent);

				/* Store page object in new kids array */
				fz_arraypush(kids, pageref);
			}

			spec = fz_strsep(&pagelist, ",");
		}

		fz_optind++;
	}

	fz_dropobj(parent);

	/* Update page count and kids array */
	countobj = fz_newint(fz_arraylen(kids));
	fz_dictputs(pages, "Count", countobj);
	fz_dropobj(countobj);
	fz_dictputs(pages, "Kids", kids);
	fz_dropobj(kids);
}
Пример #9
0
static void drawrange(fz_context *ctx, fz_document *doc, char *range) {
	int page, spage, epage, pagecount;
	char *spec, *dash;

	pagecount = fz_count_pages(doc);
	spec = fz_strsep(&range, ",");
	while (spec)
	{
		dash = strchr(spec, '-');

		if (dash == spec)
			spage = epage = pagecount;
		else
			spage = epage = atoi(spec);

		if (dash)
		{
			if (strlen(dash) > 1)
				epage = atoi(dash + 1);
			else
				epage = pagecount;
		}

		spage = fz_clampi(spage, 1, pagecount);
		epage = fz_clampi(epage, 1, pagecount);

		current_doc = doc;
		current_ctx = ctx;
		current_epage = epage;

		for (page = spage; page <= epage; ) {
			if (page == 0) page = 1;

			current_page = page;

			drawpage(ctx, doc, page);
			draw_decors(page, epage);

			yutani_flip(yctx, window);

			yutani_msg_t * m = NULL;
			while (1) {
				m = yutani_poll(yctx);
				if (m) {
					switch (m->type) {
						case YUTANI_MSG_KEY_EVENT:
							{
								struct yutani_msg_key_event * ke = (void*)m->data;
								if (ke->event.action == KEY_ACTION_DOWN) {
									switch (ke->event.keycode) {
										case KEY_ESCAPE:
										case 'q':
											yutani_close(yctx, window);
											exit(0);
											break;
										case KEY_ARROW_LEFT:
										case 'a':
											page--;
											goto _continue;
										case KEY_ARROW_RIGHT:
										case 's':
											page++;
											if (page > epage) page = epage;
											goto _continue;
										case KEY_F12:
											toggle_decorations();
											goto _continue;
										default:
											break;
									}
								}
							}
							break;
						case YUTANI_MSG_SESSION_END:
							yutani_close(yctx, window);
							exit(0);
							break;
						case YUTANI_MSG_WINDOW_FOCUS_CHANGE:
							{
								struct yutani_msg_window_focus_change * wf = (void*)m->data;
								yutani_window_t * win = hashmap_get(yctx->windows, (void*)wf->wid);
								if (win) {
									win->focused = wf->focused;
									goto _continue;
								}
							}
							break;
						case YUTANI_MSG_RESIZE_OFFER:
							{
								struct yutani_msg_window_resize * wr = (void*)m->data;
								resize_finish(wr->width, wr->height);
								goto _continue;
							}
							break;
						case YUTANI_MSG_WINDOW_MOUSE_EVENT:
							if (decor_handle_event(yctx, m) == DECOR_CLOSE) {
								yutani_close(yctx, window);
								exit(0);
							}
							break;
						default:
							break;
					}
				}
				free(m);
			}
_continue:
			free(m);
		}

		spec = fz_strsep(&range, ",");
	}
}
static fz_error
pdf_readoldxref(fz_obj **trailerp, pdf_xref *xref, char *buf, int cap)
{
    fz_error error;
    int ofs, len;
    char *s;
    int n;
    pdf_token_e tok;
    int i;
    int c;

    pdf_logxref("load old xref format\n");

    fz_readline(xref->file, buf, cap);
    if (strncmp(buf, "xref", 4) != 0)
        return fz_throw("cannot find xref marker");

    while (1)
    {
        c = fz_peekbyte(xref->file);
        if (!(c >= '0' && c <= '9'))
            break;

        fz_readline(xref->file, buf, cap);
        s = buf;
        ofs = atoi(fz_strsep(&s, " "));
        len = atoi(fz_strsep(&s, " "));

        /* broken pdfs where the section is not on a separate line */
        if (s && *s != '\0')
        {
            fz_warn("broken xref section. proceeding anyway.");
            fz_seek(xref->file, -(2 + (int)strlen(s)), 1);
        }

        /* broken pdfs where size in trailer undershoots entries in xref sections */
        if (ofs + len > xref->cap)
        {
            fz_warn("broken xref section, proceeding anyway.");
            xref->cap = ofs + len;
            xref->table = fz_realloc(xref->table, xref->cap * sizeof(pdf_xrefentry));
        }

        if ((ofs + len) > xref->len)
        {
            for (i = xref->len; i < (ofs + len); i++)
            {
                xref->table[i].ofs = 0;
                xref->table[i].gen = 0;
                xref->table[i].stmofs = 0;
                xref->table[i].obj = nil;
                xref->table[i].type = 0;
            }
            xref->len = ofs + len;
        }

        for (i = ofs; i < ofs + len; i++)
        {
            n = fz_read(xref->file, (unsigned char *) buf, 20);
            if (n < 0)
                return fz_rethrow(n, "cannot read xref table");
            if (!xref->table[i].type)
            {
                s = buf;

                /* broken pdfs where line start with white space */
                while (*s != '\0' && iswhite(*s))
                    s++;

                xref->table[i].ofs = atoi(s);
                xref->table[i].gen = atoi(s + 11);
                xref->table[i].type = s[17];
            }
        }
    }

    error = pdf_lex(&tok, xref->file, buf, cap, &n);
    if (error)
        return fz_rethrow(error, "cannot parse trailer");
    if (tok != PDF_TTRAILER)
        return fz_throw("expected trailer marker");

    error = pdf_lex(&tok, xref->file, buf, cap, &n);
    if (error)
        return fz_rethrow(error, "cannot parse trailer");
    if (tok != PDF_TODICT)
        return fz_throw("expected trailer dictionary");

    error = pdf_parsedict(trailerp, xref, xref->file, buf, cap);
    if (error)
        return fz_rethrow(error, "cannot parse trailer");
    return fz_okay;
}
static void retainpages(fz_context *ctx, globals *glo, int argc, char **argv)
{
	pdf_obj *oldroot, *root, *pages, *kids, *countobj, *parent, *olddests;
	pdf_document *doc = glo->doc;
	int argidx = 0;
	pdf_obj *names_list = NULL;
	int pagecount;
	int i;

	/* Keep only pages/type and (reduced) dest entries to avoid
	 * references to unretained pages */
	oldroot = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME_Root);
	pages = pdf_dict_get(ctx, oldroot, PDF_NAME_Pages);
	olddests = pdf_load_name_tree(ctx, doc, PDF_NAME_Dests);

	root = pdf_new_dict(ctx, doc, 2);
	pdf_dict_put(ctx, root, PDF_NAME_Type, pdf_dict_get(ctx, oldroot, PDF_NAME_Type));
	pdf_dict_put(ctx, root, PDF_NAME_Pages, pdf_dict_get(ctx, oldroot, PDF_NAME_Pages));

	pdf_update_object(ctx, doc, pdf_to_num(ctx, oldroot), root);

	pdf_drop_obj(ctx, root);

	/* Create a new kids array with only the pages we want to keep */
	parent = pdf_new_indirect(ctx, doc, pdf_to_num(ctx, pages), pdf_to_gen(ctx, pages));
	kids = pdf_new_array(ctx, doc, 1);

	/* Retain pages specified */
	while (argc - argidx)
	{
		int page, spage, epage;
		char *spec, *dash;
		char *pagelist = argv[argidx];

		pagecount = pdf_count_pages(ctx, doc);
		spec = fz_strsep(&pagelist, ",");
		while (spec)
		{
			dash = strchr(spec, '-');

			if (dash == spec)
				spage = epage = pagecount;
			else
				spage = epage = atoi(spec);

			if (dash)
			{
				if (strlen(dash) > 1)
					epage = atoi(dash + 1);
				else
					epage = pagecount;
			}

			spage = fz_clampi(spage, 1, pagecount);
			epage = fz_clampi(epage, 1, pagecount);

			if (spage < epage)
				for (page = spage; page <= epage; ++page)
					retainpage(ctx, doc, parent, kids, page);
			else
				for (page = spage; page >= epage; --page)
					retainpage(ctx, doc, parent, kids, page);

			spec = fz_strsep(&pagelist, ",");
		}

		argidx++;
	}

	pdf_drop_obj(ctx, parent);

	/* Update page count and kids array */
	countobj = pdf_new_int(ctx, doc, pdf_array_len(ctx, kids));
	pdf_dict_put(ctx, pages, PDF_NAME_Count, countobj);
	pdf_drop_obj(ctx, countobj);
	pdf_dict_put(ctx, pages, PDF_NAME_Kids, kids);
	pdf_drop_obj(ctx, kids);

	/* Also preserve the (partial) Dests name tree */
	if (olddests)
	{
		pdf_obj *names = pdf_new_dict(ctx, doc, 1);
		pdf_obj *dests = pdf_new_dict(ctx, doc, 1);
		int len = pdf_dict_len(ctx, olddests);

		names_list = pdf_new_array(ctx, doc, 32);

		for (i = 0; i < len; i++)
		{
			pdf_obj *key = pdf_dict_get_key(ctx, olddests, i);
			pdf_obj *val = pdf_dict_get_val(ctx, olddests, i);
			pdf_obj *dest = pdf_dict_get(ctx, val, PDF_NAME_D);

			dest = pdf_array_get(ctx, dest ? dest : val, 0);
			if (pdf_array_contains(ctx, pdf_dict_get(ctx, pages, PDF_NAME_Kids), dest))
			{
				pdf_obj *key_str = pdf_new_string(ctx, doc, pdf_to_name(ctx, key), strlen(pdf_to_name(ctx, key)));
				pdf_array_push(ctx, names_list, key_str);
				pdf_array_push(ctx, names_list, val);
				pdf_drop_obj(ctx, key_str);
			}
		}

		root = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME_Root);
		pdf_dict_put(ctx, dests, PDF_NAME_Names, names_list);
		pdf_dict_put(ctx, names, PDF_NAME_Dests, dests);
		pdf_dict_put(ctx, root, PDF_NAME_Names, names);

		pdf_drop_obj(ctx, names);
		pdf_drop_obj(ctx, dests);
		pdf_drop_obj(ctx, names_list);
		pdf_drop_obj(ctx, olddests);
	}

	/* Force the next call to pdf_count_pages to recount */
	glo->doc->page_count = 0;

	/* Edit each pages /Annot list to remove any links that point to
	 * nowhere. */
	pagecount = pdf_count_pages(ctx, doc);
	for (i = 0; i < pagecount; i++)
	{
		pdf_obj *pageref = pdf_lookup_page_obj(ctx, doc, i);
		pdf_obj *pageobj = pdf_resolve_indirect(ctx, pageref);

		pdf_obj *annots = pdf_dict_get(ctx, pageobj, PDF_NAME_Annots);

		int len = pdf_array_len(ctx, annots);
		int j;

		for (j = 0; j < len; j++)
		{
			pdf_obj *o = pdf_array_get(ctx, annots, j);
			pdf_obj *p;

			if (!pdf_name_eq(ctx, pdf_dict_get(ctx, o, PDF_NAME_Subtype), PDF_NAME_Link))
				continue;

			p = pdf_dict_get(ctx, o, PDF_NAME_A);
			if (!pdf_name_eq(ctx, pdf_dict_get(ctx, p, PDF_NAME_S), PDF_NAME_GoTo))
				continue;

			if (string_in_names_list(ctx, pdf_dict_get(ctx, p, PDF_NAME_D), names_list))
				continue;

			/* FIXME: Should probably look at Next too */

			/* Remove this annotation */
			pdf_array_delete(ctx, annots, j);
			j--;
		}
	}
}
Пример #12
0
static void
showinfo(char *filename, int show, char *pagelist)
{
	int page, spage, epage;
	char *spec, *dash;
	int allpages;

	if (!xref)
		infousage();

	allpages = !strcmp(pagelist, "1-");

	spec = fz_strsep(&pagelist, ",");
	while (spec)
	{
		dash = strchr(spec, '-');

		if (dash == spec)
			spage = epage = 1;
		else
			spage = epage = atoi(spec);

		if (dash)
		{
			if (strlen(dash) > 1)
				epage = atoi(dash + 1);
			else
				epage = pagecount;
		}

		if (spage > epage)
			page = spage, spage = epage, epage = page;

		if (spage < 1)
			spage = 1;
		if (epage > pagecount)
			epage = pagecount;
		if (spage > pagecount)
			spage = pagecount;

		if (allpages)
			printf("Retrieving info from pages %d-%d...\n", spage, epage);
		if (spage >= 1)
		{
			for (page = spage; page <= epage; page++)
			{
				gatherinfo(show, page);
				if (!allpages)
				{
					printf("Page %05d:\n", page);
					printinfo(filename, show, page);
					printf("\n");
				}
			}
		}

		spec = fz_strsep(&pagelist, ",");
	}

	if (allpages)
		printinfo(filename, show, -1);
}
Пример #13
0
static void retainpages(int argc, char **argv)
{
	fz_obj *oldroot, *root, *pages, *kids, *countobj, *parent, *olddests;

	/* Keep only pages/type and (reduced) dest entries to avoid
	 * references to unretained pages */
	oldroot = fz_dict_gets(xref->trailer, "Root");
	pages = fz_dict_gets(oldroot, "Pages");
	olddests = pdf_load_name_tree(xref, "Dests");

	root = fz_new_dict(ctx, 2);
	fz_dict_puts(root, "Type", fz_dict_gets(oldroot, "Type"));
	fz_dict_puts(root, "Pages", fz_dict_gets(oldroot, "Pages"));

	pdf_update_object(xref, fz_to_num(oldroot), fz_to_gen(oldroot), root);

	fz_drop_obj(root);

	/* Create a new kids array with only the pages we want to keep */
	parent = fz_new_indirect(ctx, fz_to_num(pages), fz_to_gen(pages), xref);
	kids = fz_new_array(ctx, 1);

	/* Retain pages specified */
	while (argc - fz_optind)
	{
		int page, spage, epage;
		char *spec, *dash;
		char *pagelist = argv[fz_optind];

		spec = fz_strsep(&pagelist, ",");
		while (spec)
		{
			dash = strchr(spec, '-');

			if (dash == spec)
				spage = epage = pdf_count_pages(xref);
			else
				spage = epage = atoi(spec);

			if (dash)
			{
				if (strlen(dash) > 1)
					epage = atoi(dash + 1);
				else
					epage = pdf_count_pages(xref);
			}

			if (spage > epage)
				page = spage, spage = epage, epage = page;

			if (spage < 1)
				spage = 1;
			if (epage > pdf_count_pages(xref))
				epage = pdf_count_pages(xref);

			for (page = spage; page <= epage; page++)
			{
				fz_obj *pageobj = xref->page_objs[page-1];
				fz_obj *pageref = xref->page_refs[page-1];

				fz_dict_puts(pageobj, "Parent", parent);

				/* Store page object in new kids array */
				fz_array_push(kids, pageref);
			}

			spec = fz_strsep(&pagelist, ",");
		}

		fz_optind++;
	}

	fz_drop_obj(parent);

	/* Update page count and kids array */
	countobj = fz_new_int(ctx, fz_array_len(kids));
	fz_dict_puts(pages, "Count", countobj);
	fz_drop_obj(countobj);
	fz_dict_puts(pages, "Kids", kids);
	fz_drop_obj(kids);

	/* Also preserve the (partial) Dests name tree */
	if (olddests)
	{
		int i;
		fz_obj *names = fz_new_dict(ctx, 1);
		fz_obj *dests = fz_new_dict(ctx, 1);
		fz_obj *names_list = fz_new_array(ctx, 32);

		for (i = 0; i < fz_dict_len(olddests); i++)
		{
			fz_obj *key = fz_dict_get_key(olddests, i);
			fz_obj *val = fz_dict_get_val(olddests, i);
			fz_obj *key_str = fz_new_string(ctx, fz_to_name(key), strlen(fz_to_name(key)));
			fz_obj *dest = fz_dict_gets(val, "D");

			dest = fz_array_get(dest ? dest : val, 0);
			if (fz_array_contains(fz_dict_gets(pages, "Kids"), dest))
			{
				fz_array_push(names_list, key_str);
				fz_array_push(names_list, val);
			}
			fz_drop_obj(key_str);
		}

		root = fz_dict_gets(xref->trailer, "Root");
		fz_dict_puts(dests, "Names", names_list);
		fz_dict_puts(names, "Dests", dests);
		fz_dict_puts(root, "Names", names);

		fz_drop_obj(names);
		fz_drop_obj(dests);
		fz_drop_obj(names_list);
		fz_drop_obj(olddests);
	}
}
Пример #14
0
static void retainpages(fz_context *ctx, globals *glo, int argc, char **argv)
{
	pdf_obj *oldroot, *root, *pages, *kids, *countobj, *parent, *olddests;
	pdf_document *doc = glo->doc;
	int argidx = 0;
	pdf_obj *names_list = NULL;
	pdf_obj *outlines;
	int pagecount;
	int i;
	int *page_object_nums;

	/* Keep only pages/type and (reduced) dest entries to avoid
	 * references to unretained pages */
	oldroot = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME_Root);
	pages = pdf_dict_get(ctx, oldroot, PDF_NAME_Pages);
	olddests = pdf_load_name_tree(ctx, doc, PDF_NAME_Dests);
	outlines = pdf_dict_get(ctx, oldroot, PDF_NAME_Outlines);

	root = pdf_new_dict(ctx, doc, 3);
	pdf_dict_put(ctx, root, PDF_NAME_Type, pdf_dict_get(ctx, oldroot, PDF_NAME_Type));
	pdf_dict_put(ctx, root, PDF_NAME_Pages, pdf_dict_get(ctx, oldroot, PDF_NAME_Pages));
	pdf_dict_put(ctx, root, PDF_NAME_Outlines, outlines);

	pdf_update_object(ctx, doc, pdf_to_num(ctx, oldroot), root);

	/* Create a new kids array with only the pages we want to keep */
	parent = pdf_new_indirect(ctx, doc, pdf_to_num(ctx, pages), pdf_to_gen(ctx, pages));
	kids = pdf_new_array(ctx, doc, 1);

	/* Retain pages specified */
	while (argc - argidx)
	{
		int page, spage, epage;
		char *spec, *dash;
		char *pagelist = argv[argidx];

		pagecount = pdf_count_pages(ctx, doc);
		spec = fz_strsep(&pagelist, ",");
		while (spec)
		{
			dash = strchr(spec, '-');

			if (dash == spec)
				spage = epage = pagecount;
			else
				spage = epage = atoi(spec);

			if (dash)
			{
				if (strlen(dash) > 1)
					epage = atoi(dash + 1);
				else
					epage = pagecount;
			}

			spage = fz_clampi(spage, 1, pagecount);
			epage = fz_clampi(epage, 1, pagecount);

			if (spage < epage)
				for (page = spage; page <= epage; ++page)
					retainpage(ctx, doc, parent, kids, page);
			else
				for (page = spage; page >= epage; --page)
					retainpage(ctx, doc, parent, kids, page);

			spec = fz_strsep(&pagelist, ",");
		}

		argidx++;
	}

	pdf_drop_obj(ctx, parent);

	/* Update page count and kids array */
	countobj = pdf_new_int(ctx, doc, pdf_array_len(ctx, kids));
	pdf_dict_put(ctx, pages, PDF_NAME_Count, countobj);
	pdf_drop_obj(ctx, countobj);
	pdf_dict_put(ctx, pages, PDF_NAME_Kids, kids);
	pdf_drop_obj(ctx, kids);

	/* Force the next call to pdf_count_pages to recount */
	glo->doc->page_count = 0;

	pagecount = pdf_count_pages(ctx, doc);
	page_object_nums = fz_calloc(ctx, pagecount, sizeof(*page_object_nums));
	for (i = 0; i < pagecount; i++)
	{
		pdf_obj *pageref = pdf_lookup_page_obj(ctx, doc, i);
		page_object_nums[i] = pdf_to_num(ctx, pageref);
	}

	/* If we had an old Dests tree (now reformed as an olddests
	 * dictionary), keep any entries in there that point to
	 * valid pages. This may mean we keep more than we need, but
	 * it's safe at least. */
	if (olddests)
	{
		pdf_obj *names = pdf_new_dict(ctx, doc, 1);
		pdf_obj *dests = pdf_new_dict(ctx, doc, 1);
		int len = pdf_dict_len(ctx, olddests);

		names_list = pdf_new_array(ctx, doc, 32);

		for (i = 0; i < len; i++)
		{
			pdf_obj *key = pdf_dict_get_key(ctx, olddests, i);
			pdf_obj *val = pdf_dict_get_val(ctx, olddests, i);
			pdf_obj *dest = pdf_dict_get(ctx, val, PDF_NAME_D);

			dest = pdf_array_get(ctx, dest ? dest : val, 0);
			if (dest_is_valid_page(ctx, dest, page_object_nums, pagecount))
			{
				pdf_obj *key_str = pdf_new_string(ctx, doc, pdf_to_name(ctx, key), strlen(pdf_to_name(ctx, key)));
				pdf_array_push(ctx, names_list, key_str);
				pdf_array_push(ctx, names_list, val);
				pdf_drop_obj(ctx, key_str);
			}
		}

		pdf_dict_put(ctx, dests, PDF_NAME_Names, names_list);
		pdf_dict_put(ctx, names, PDF_NAME_Dests, dests);
		pdf_dict_put(ctx, root, PDF_NAME_Names, names);

		pdf_drop_obj(ctx, names);
		pdf_drop_obj(ctx, dests);
		pdf_drop_obj(ctx, olddests);
	}

	/* Edit each pages /Annot list to remove any links that point to
	 * nowhere. */
	for (i = 0; i < pagecount; i++)
	{
		pdf_obj *pageref = pdf_lookup_page_obj(ctx, doc, i);
		pdf_obj *pageobj = pdf_resolve_indirect(ctx, pageref);

		pdf_obj *annots = pdf_dict_get(ctx, pageobj, PDF_NAME_Annots);

		int len = pdf_array_len(ctx, annots);
		int j;

		for (j = 0; j < len; j++)
		{
			pdf_obj *o = pdf_array_get(ctx, annots, j);

			if (!pdf_name_eq(ctx, pdf_dict_get(ctx, o, PDF_NAME_Subtype), PDF_NAME_Link))
				continue;

			if (!dest_is_valid(ctx, o, pagecount, page_object_nums, names_list))
			{
				/* Remove this annotation */
				pdf_array_delete(ctx, annots, j);
				j--;
			}
		}
	}

	if (strip_outlines(ctx, doc, outlines, pagecount, page_object_nums, names_list) == 0)
	{
		pdf_dict_del(ctx, root, PDF_NAME_Outlines);
	}

	fz_free(ctx, page_object_nums);
	pdf_drop_obj(ctx, names_list);
	pdf_drop_obj(ctx, root);
}
Пример #15
0
static fz_error
pdf_read_old_xref(fz_obj **trailerp, pdf_xref *xref, char *buf, int cap)
{
	fz_error error;
	int ofs, len;
	char *s;
	int n;
	int tok;
	int i;
	int c;

	fz_read_line(xref->file, buf, cap);
	if (strncmp(buf, "xref", 4) != 0)
		return fz_throw("cannot find xref marker");

	while (1)
	{
		c = fz_peek_byte(xref->file);
		if (!(c >= '0' && c <= '9'))
			break;

		fz_read_line(xref->file, buf, cap);
		s = buf;
		ofs = atoi(fz_strsep(&s, " "));
		len = atoi(fz_strsep(&s, " "));

		/* broken pdfs where the section is not on a separate line */
		if (s && *s != '\0')
		{
			fz_warn("broken xref section. proceeding anyway.");
			fz_seek(xref->file, -(2 + (int)strlen(s)), 1);
		}

		/* broken pdfs where size in trailer undershoots entries in xref sections */
		if (ofs + len > xref->len)
		{
			fz_warn("broken xref section, proceeding anyway.");
			pdf_resize_xref(xref, ofs + len);
		}

		for (i = ofs; i < ofs + len; i++)
		{
			n = fz_read(xref->file, (unsigned char *) buf, 20);
			if (n < 0)
				return fz_rethrow(n, "cannot read xref table");
			if (!xref->table[i].type)
			{
				s = buf;

				/* broken pdfs where line start with white space */
				while (*s != '\0' && iswhite(*s))
					s++;

				xref->table[i].ofs = atoi(s);
				xref->table[i].gen = atoi(s + 11);
				xref->table[i].type = s[17];
				if (s[17] != 'f' && s[17] != 'n' && s[17] != 'o')
					return fz_throw("unexpected xref type: %#x (%d %d R)", s[17], i, xref->table[i].gen);
			}
		}
	}

	error = pdf_lex(&tok, xref->file, buf, cap, &n);
	if (error)
		return fz_rethrow(error, "cannot parse trailer");
	if (tok != PDF_TOK_TRAILER)
		return fz_throw("expected trailer marker");

	error = pdf_lex(&tok, xref->file, buf, cap, &n);
	if (error)
		return fz_rethrow(error, "cannot parse trailer");
	if (tok != PDF_TOK_OPEN_DICT)
		return fz_throw("expected trailer dictionary");

	error = pdf_parse_dict(trailerp, xref, xref->file, buf, cap);
	if (error)
		return fz_rethrow(error, "cannot parse trailer");
	return fz_okay;
}