Example #1
0
static int strip_outlines(fz_context *ctx, pdf_document *doc, pdf_obj *outlines, int page_count, int *page_object_nums, pdf_obj *names_list)
{
	int nc;
	pdf_obj *first;
	pdf_obj *last;

	first = pdf_dict_get(ctx, outlines, PDF_NAME_First);
	if (first == NULL)
		nc = 0;
	else
		nc = strip_outline(ctx, doc, first, page_count, page_object_nums, names_list, &first, &last);

	if (nc == 0)
	{
		pdf_dict_del(ctx, outlines, PDF_NAME_First);
		pdf_dict_del(ctx, outlines, PDF_NAME_Last);
		pdf_dict_del(ctx, outlines, PDF_NAME_Count);
	}
	else
	{
		int old_count = pdf_to_int(ctx, pdf_dict_get(ctx, outlines, PDF_NAME_Count));
		pdf_dict_put(ctx, outlines, PDF_NAME_First, first);
		pdf_dict_put(ctx, outlines, PDF_NAME_Last, last);
		pdf_dict_put(ctx, outlines, PDF_NAME_Count, pdf_new_int(ctx, doc, old_count > 0 ? nc : -nc));
	}

	return nc;
}
Example #2
0
void pdf_delete_portfolio_schema(fz_context *ctx, pdf_document *doc, int entry)
{
	pdf_portfolio **pp;
	pdf_portfolio *p;
	pdf_obj *s;

	if (!doc)
		fz_throw(ctx, FZ_ERROR_GENERIC, "Bad pdf_delete_portfolio_schema call");

	if (doc->portfolio == NULL)
		load_portfolio(ctx, doc);

	pp = &doc->portfolio;
	while (*pp && entry > 0)
		pp = &(*pp)->next, entry--;

	p = *pp;
	if (p == NULL || entry)
		fz_throw(ctx, FZ_ERROR_GENERIC, "entry out of range in pdf_delete_portfolio_schema");
	*pp = p->next;

	/* Delete the key from the schema */
	s = pdf_dict_getl(ctx, pdf_trailer(ctx, doc), PDF_NAME_Root, PDF_NAME_Collection, PDF_NAME_Schema, NULL);
	pdf_dict_del(ctx, s, p->key);

	/* Delete this entry from all the collection entries */
	s = pdf_dict_getl(ctx, pdf_trailer(ctx, doc), PDF_NAME_Root, PDF_NAME_Names, PDF_NAME_EmbeddedFiles, NULL);
	pdf_name_tree_map(ctx, s, delete_from_node, p->key);

	pdf_drop_obj(ctx, p->entry.name);
	pdf_drop_obj(ctx, p->key);
	pdf_drop_obj(ctx, p->val);
	fz_free(ctx, p);
}
Example #3
0
void
pdf_clear_annot_quad_points(fz_context *ctx, pdf_annot *annot)
{
	check_allowed_subtypes(ctx, annot, PDF_NAME(QuadPoints), quad_point_subtypes);
	pdf_dict_del(ctx, annot->obj, PDF_NAME(QuadPoints));
	pdf_dirty_annot(ctx, annot);
}
Example #4
0
void
pdf_clear_annot_ink_list(fz_context *ctx, pdf_annot *annot)
{
	check_allowed_subtypes(ctx, annot, PDF_NAME(InkList), ink_list_subtypes);
	pdf_dict_del(ctx, annot->obj, PDF_NAME(InkList));
	pdf_dirty_annot(ctx, annot);
}
Example #5
0
static int delete_from_node(fz_context *ctx, pdf_obj *container, pdf_obj *key, pdf_obj *val, void *arg)
{
	pdf_obj *delete_key = (pdf_obj *)arg;

	pdf_dict_del(ctx, pdf_dict_get(ctx, val, PDF_NAME_CI), delete_key);

	return 0;
}
Example #6
0
void pdf_set_annot_opacity(fz_context *ctx, pdf_annot *annot, float opacity)
{
	if (opacity != 1)
		pdf_dict_put_real(ctx, annot->obj, PDF_NAME(CA), opacity);
	else
		pdf_dict_del(ctx, annot->obj, PDF_NAME(CA));
	pdf_dirty_annot(ctx, annot);
}
Example #7
0
void
pdf_update_xobject(fz_context *ctx, pdf_document *doc, pdf_obj *form, fz_rect bbox, fz_matrix matrix, pdf_obj *res, fz_buffer *contents)
{
	pdf_dict_put_rect(ctx, form, PDF_NAME(BBox), bbox);
	pdf_dict_put_matrix(ctx, form, PDF_NAME(Matrix), matrix);
	if (res)
		pdf_dict_put(ctx, form, PDF_NAME(Resources), res);
	else
		pdf_dict_del(ctx, form, PDF_NAME(Resources));
	pdf_update_stream(ctx, doc, form, contents, 0);
}
void
pdf_set_annot_border(fz_context *ctx, pdf_annot *annot, float w)
{
	pdf_document *doc = annot->page->doc;
	pdf_obj *border = pdf_dict_get(ctx, annot->obj, PDF_NAME_Border);
	if (pdf_is_array(ctx, border))
		pdf_array_put_drop(ctx, border, 2, pdf_new_real(ctx, doc, w));
	else
	{
		border = pdf_new_array(ctx, doc, 3);
		pdf_array_push_drop(ctx, border, pdf_new_real(ctx, doc, 0));
		pdf_array_push_drop(ctx, border, pdf_new_real(ctx, doc, 0));
		pdf_array_push_drop(ctx, border, pdf_new_real(ctx, doc, w));
		pdf_dict_put_drop(ctx, annot->obj, PDF_NAME_Border, border);
	}

	/* Remove border style and effect dictionaries so they won't interfere. */
	pdf_dict_del(ctx, annot->obj, PDF_NAME_BS);
	pdf_dict_del(ctx, annot->obj, PDF_NAME_BE);

	annot->changed = 1;
}
Example #9
0
/*
	When resetting or submitting a form, the fields to act upon are defined
	by an array of either field references or field names, plus a flag determining
	whether to act upon the fields in the array, or all fields other than those in
	the array. specified_fields interprets this information and produces the array
	of fields to be acted upon.
*/
static pdf_obj *specified_fields(fz_context *ctx, pdf_document *doc, pdf_obj *fields, int exclude)
{
    pdf_obj *form = pdf_dict_getl(ctx, pdf_trailer(ctx, doc), PDF_NAME_Root, PDF_NAME_AcroForm, PDF_NAME_Fields, NULL);
    int i, n;
    pdf_obj *result = pdf_new_array(ctx, doc, 0);
    pdf_obj *nil = NULL;

    fz_var(nil);
    fz_try(ctx)
    {
        /* The 'fields' array not being present signals that all fields
        * should be acted upon, so handle it using the exclude case - excluding none */
        if (exclude || !fields)
        {
            /* mark the fields we don't want to act upon */
            nil = pdf_new_null(ctx, doc);

            n = pdf_array_len(ctx, fields);

            for (i = 0; i < n; i++)
            {
                pdf_obj *field = pdf_array_get(ctx, fields, i);

                if (pdf_is_string(ctx, field))
                    field = pdf_lookup_field(ctx, form, pdf_to_str_buf(ctx, field));

                if (field)
                    pdf_dict_put(ctx, field, PDF_NAME_Exclude, nil);
            }

            /* Act upon all unmarked fields */
            n = pdf_array_len(ctx, form);

            for (i = 0; i < n; i++)
                add_field_hierarchy_to_array(ctx, result, pdf_array_get(ctx, form, i));

            /* Unmark the marked fields */
            n = pdf_array_len(ctx, fields);

            for (i = 0; i < n; i++)
            {
                pdf_obj *field = pdf_array_get(ctx, fields, i);

                if (pdf_is_string(ctx, field))
                    field = pdf_lookup_field(ctx, form, pdf_to_str_buf(ctx, field));

                if (field)
                    pdf_dict_del(ctx, field, PDF_NAME_Exclude);
            }
        }
        else
        {
            n = pdf_array_len(ctx, fields);

            for (i = 0; i < n; i++)
            {
                pdf_obj *field = pdf_array_get(ctx, fields, i);

                if (pdf_is_string(ctx, field))
                    field = pdf_lookup_field(ctx, form, pdf_to_str_buf(ctx, field));

                if (field)
                    add_field_hierarchy_to_array(ctx, result, field);
            }
        }
    }
    fz_always(ctx)
    {
        pdf_drop_obj(ctx, nil);
    }
    fz_catch(ctx)
    {
        pdf_drop_obj(ctx, result);
        fz_rethrow(ctx);
    }

    return result;
}
Example #10
0
static void reset_field(fz_context *ctx, pdf_document *doc, pdf_obj *field)
{
    /* Set V to DV whereever DV is present, and delete V where DV is not.
     * FIXME: we assume for now that V has not been set unequal
     * to DV higher in the hierarchy than "field".
     *
     * At the bottom of the hierarchy we may find widget annotations
     * that aren't also fields, but DV and V will not be present in their
     * dictionaries, and attempts to remove V will be harmless. */
    pdf_obj *dv = pdf_dict_get(ctx, field, PDF_NAME_DV);
    pdf_obj *kids = pdf_dict_get(ctx, field, PDF_NAME_Kids);

    if (dv)
        pdf_dict_put(ctx, field, PDF_NAME_V, dv);
    else
        pdf_dict_del(ctx, field, PDF_NAME_V);

    if (kids == NULL)
    {
        /* The leaves of the tree are widget annotations
         * In some cases we need to update the appearance state;
         * in others we need to mark the field as dirty so that
         * the appearance stream will be regenerated. */
        switch (pdf_field_type(ctx, doc, field))
        {
        case PDF_WIDGET_TYPE_RADIOBUTTON:
        case PDF_WIDGET_TYPE_CHECKBOX:
        {
            pdf_obj *leafv = pdf_get_inheritable(ctx, doc, field, PDF_NAME_V);

            if (leafv)
                pdf_keep_obj(ctx, leafv);
            else
                leafv = PDF_NAME_Off;

            fz_try(ctx)
            {
                pdf_dict_put(ctx, field, PDF_NAME_AS, leafv);
            }
            fz_always(ctx)
            {
                pdf_drop_obj(ctx, leafv);
            }
            fz_catch(ctx)
            {
                fz_rethrow(ctx);
            }
        }
        break;

        case PDF_WIDGET_TYPE_PUSHBUTTON:
            break;

        default:
            pdf_field_mark_dirty(ctx, doc, field);
            break;
        }
    }

    if (pdf_field_dirties_document(ctx, doc, field))
        doc->dirty = 1;
}
Example #11
0
void pdf_clear_annot_vertices(fz_context *ctx, pdf_annot *annot)
{
	check_allowed_subtypes(ctx, annot, PDF_NAME(Vertices), vertices_subtypes);
	pdf_dict_del(ctx, annot->obj, PDF_NAME(Vertices));
	pdf_dirty_annot(ctx, annot);
}
Example #12
0
static void wmupdf_convert_single_page_to_form(pdf_document *xref,fz_context *ctx,int pageno)

    {
    pdf_obj *array,*srcpageobj,*srcpagecontents;
    int i,len,streamlen,pageref,pagegen,compressed;
    double bbox_array[4];
    double matrix[6];

    /* New source page, so get the source page objects */
    srcpageobj = xref->page_objs[pageno-1];
    pageref=pdf_to_num(xref->page_refs[pageno-1]);
    pagegen=pdf_to_gen(xref->page_refs[pageno-1]);
    wmupdf_page_bbox(srcpageobj,bbox_array);
    for (i=0;i<6;i++)
        matrix[i]=0.;
    matrix[0]=matrix[3]=1.;
    srcpagecontents=pdf_dict_gets(srcpageobj,"Contents");
    /* Concatenate all indirect streams from source page directly into it. */
// printf("Adding streams to source page %d (pageref=%d, pagegen=%d)...\n",pageno,pageref,pagegen);
    streamlen=0;
    if (pdf_is_array(srcpagecontents))
        {
        int k;
        for (k=0;k<pdf_array_len(srcpagecontents);k++)
            {
            pdf_obj *obj;
            obj=pdf_array_get(srcpagecontents,k);
            if (pdf_is_indirect(obj))
                pdf_resolve_indirect(obj);
            streamlen=add_to_srcpage_stream(xref,ctx,pageref,pagegen,obj);
            }
        }
    else
        {
        if (pdf_is_indirect(srcpagecontents))
            pdf_resolve_indirect(srcpagecontents);
        streamlen=add_to_srcpage_stream(xref,ctx,pageref,pagegen,srcpagecontents);
        }
    compressed=stream_deflate(xref,ctx,pageref,pagegen,&streamlen);
    srcpageobj = xref->page_objs[pageno-1];
    pageref=pdf_to_num(xref->page_refs[pageno-1]);
    len=pdf_dict_len(srcpageobj);
    for (i=0;i<len;i++)
        {
        pdf_obj *key; /* *value */

        key=pdf_dict_get_key(srcpageobj,i);
/*
if (pdf_is_name(key))
printf("key[%d] = name = %s\n",i,pdf_to_name(key));
else
printf("key[%d] = ??\n",i);
*/
        /* value=pdf_dict_get_val(srcpageobj,i); */
        /* Keep same resources */
        if (!pdf_is_name(key))
            continue;
        if (pdf_is_name(key) && !stricmp("Resources",pdf_to_name(key)))
            continue;
        /* Drop dictionary entry otherwise */
// printf("Deleting key %s.\n",pdf_to_name(key));
        pdf_dict_del(srcpageobj,key);
        i=-1;
        len=pdf_dict_len(srcpageobj);
        }
    pdf_dict_puts(srcpageobj,"Type",pdf_new_name(ctx,"XObject"));
    pdf_dict_puts(srcpageobj,"Subtype",pdf_new_name(ctx,"Form"));
    pdf_dict_puts(srcpageobj,"FormType",pdf_new_int(ctx,1));
    if (compressed)
        pdf_dict_puts(srcpageobj,"Filter",pdf_new_name(ctx,"FlateDecode"));
    pdf_dict_puts(srcpageobj,"Length",pdf_new_int(ctx,streamlen));
    array=pdf_new_array(ctx,4);
    for (i=0;i<4;i++)
        pdf_array_push(array,pdf_new_real(ctx,bbox_array[i]));
    pdf_dict_puts(srcpageobj,"BBox",array);
    array=pdf_new_array(ctx,6);
    for (i=0;i<6;i++)
        pdf_array_push(array,pdf_new_real(ctx,matrix[i]));
    pdf_dict_puts(srcpageobj,"Matrix",array);
    /* (It's no longer a "page"--it's a Form-type XObject) */
    /* I don't think this call should be made since it will call fz_drop_object on srcpageobj */
    /* pdf_update_object(xref,pageref,srcpageobj); */
    }
Example #13
0
void pdf_clean_page_contents(fz_context *ctx, pdf_document *doc, pdf_page *page, fz_cookie *cookie, pdf_page_contents_process_fn *proc_fn, void *proc_arg)
{
	pdf_processor *proc_buffer = NULL;
	pdf_processor *proc_filter = NULL;
	pdf_obj *new_obj = NULL;
	pdf_obj *new_ref = NULL;
	pdf_obj *res = NULL;
	pdf_obj *ref = NULL;
	pdf_obj *obj;
	pdf_obj *contents;
	fz_buffer *buffer;

	fz_var(new_obj);
	fz_var(new_ref);
	fz_var(res);
	fz_var(ref);
	fz_var(proc_buffer);
	fz_var(proc_filter);

	buffer = fz_new_buffer(ctx, 1024);

	fz_try(ctx)
	{
		res = pdf_new_dict(ctx, doc, 1);

		proc_buffer = pdf_new_buffer_processor(ctx, buffer);
		proc_filter = pdf_new_filter_processor(ctx, proc_buffer, doc, page->resources, res);

		pdf_process_contents(ctx, proc_filter, doc, page->resources, page->contents, cookie);

		contents = page->contents;
		if (pdf_is_array(ctx, contents))
		{
			/* create a new object to replace the array */
			new_obj = pdf_new_dict(ctx, doc, 1);
			new_ref = pdf_new_ref(ctx, doc, new_obj);
			page->contents = contents = new_ref;
		}
		else
		{
			pdf_dict_del(ctx, contents, PDF_NAME_Filter);
			pdf_dict_del(ctx, contents, PDF_NAME_DecodeParms);
		}

		/* Now deal with resources. The spec allows for Type3 fonts and form
		 * XObjects to omit a resource dictionary and look in the parent.
		 * Avoid that by flattening here as part of the cleaning. This could
		 * conceivably cause changes in rendering, but we don't care. */

		/* ExtGState */
		obj = pdf_dict_get(ctx, res, PDF_NAME_ExtGState);
		if (obj)
		{
			int i, l;

			l = pdf_dict_len(ctx, obj);
			for (i = 0; i < l; i++)
			{
				pdf_obj *o = pdf_dict_get(ctx, pdf_dict_get_val(ctx, obj, i), PDF_NAME_SMask);

				if (!o)
					continue;
				o = pdf_dict_get(ctx, o, PDF_NAME_G);
				if (!o)
					continue;

				/* Transparency group XObject */
				pdf_clean_stream_object(ctx, doc, o, page->resources, cookie, 1);
			}
		}

		/* ColorSpace - no cleaning possible */

		/* Pattern */
		obj = pdf_dict_get(ctx, res, PDF_NAME_Pattern);
		if (obj)
		{
			int i, l;

			l = pdf_dict_len(ctx, obj);
			for (i = 0; i < l; i++)
			{
				pdf_obj *pat = pdf_dict_get_val(ctx, obj, i);

				if (!pat)
					continue;
				if (pdf_to_int(ctx, pdf_dict_get(ctx, pat, PDF_NAME_PatternType)) == 1)
					pdf_clean_stream_object(ctx, doc, pat, page->resources, cookie, 0);
			}
		}

		/* Shading - no cleaning possible */

		/* XObject */
		obj = pdf_dict_get(ctx, res, PDF_NAME_XObject);
		if (obj)
		{
			int i, l;

			l = pdf_dict_len(ctx, obj);
			for (i = 0; i < l; i++)
			{
				pdf_obj *xobj = pdf_dict_get_val(ctx, obj, i);

				if (!pdf_name_eq(ctx, PDF_NAME_Form, pdf_dict_get(ctx, xobj, PDF_NAME_Subtype)))
					continue;

				pdf_clean_stream_object(ctx, doc, xobj, page->resources, cookie, 1);
			}
		}

		/* Font */
		obj = pdf_dict_get(ctx, res, PDF_NAME_Font);
		if (obj)
		{
			int i, l;

			l = pdf_dict_len(ctx, obj);
			for (i = 0; i < l; i++)
			{
				pdf_obj *o = pdf_dict_get_val(ctx, obj, i);

				if (pdf_name_eq(ctx, PDF_NAME_Type3, pdf_dict_get(ctx, o, PDF_NAME_Subtype)))
				{
					pdf_clean_type3(ctx, doc, o, page->resources, cookie);
				}
			}
		}

		/* ProcSet - no cleaning possible. Inherit this from the old dict. */
		obj = pdf_dict_get(ctx, page->resources, PDF_NAME_ProcSet);
		if (obj)
			pdf_dict_put(ctx, res, PDF_NAME_ProcSet, obj);

		/* Properties - no cleaning possible. */

		if (proc_fn)
			(*proc_fn)(ctx, buffer, res, proc_arg);

		pdf_update_stream(ctx, doc, contents, buffer, 0);
		pdf_drop_obj(ctx, page->resources);
		ref = pdf_new_ref(ctx, doc, res);
		page->resources = pdf_keep_obj(ctx, ref);
		pdf_dict_put(ctx, page->me, PDF_NAME_Resources, ref);
	}
	fz_always(ctx)
	{
		pdf_drop_processor(ctx, proc_filter);
		pdf_drop_processor(ctx, proc_buffer);
		fz_drop_buffer(ctx, buffer);
		pdf_drop_obj(ctx, new_obj);
		pdf_drop_obj(ctx, new_ref);
		pdf_drop_obj(ctx, res);
		pdf_drop_obj(ctx, ref);
	}
	fz_catch(ctx)
	{
		fz_rethrow_message(ctx, "Failed while cleaning page");
	}
}
Example #14
0
static int strip_outline(fz_context *ctx, pdf_document *doc, pdf_obj *outlines, int page_count, int *page_object_nums, pdf_obj *names_list, pdf_obj **pfirst, pdf_obj **plast)
{
	pdf_obj *prev = NULL;
	pdf_obj *first = NULL;
	pdf_obj *current;
	int count = 0;

	for (current = outlines; current != NULL; )
	{
		int nc;

		/* Strip any children to start with. This takes care of
		 * First/Last/Count for us. */
		nc = strip_outlines(ctx, doc, current, page_count, page_object_nums, names_list);

		if (!dest_is_valid(ctx, current, page_count, page_object_nums, names_list))
		{
			if (nc == 0)
			{
				/* Outline with invalid dest and no children. Drop it by
				 * pulling the next one in here. */
				pdf_obj *next = pdf_dict_get(ctx, current, PDF_NAME_Next);
				if (next == NULL)
				{
					/* There is no next one to pull in */
					if (prev != NULL)
						pdf_dict_del(ctx, prev, PDF_NAME_Next);
				}
				else if (prev != NULL)
				{
					pdf_dict_put(ctx, prev, PDF_NAME_Next, next);
					pdf_dict_put(ctx, next, PDF_NAME_Prev, prev);
				}
				else
				{
					pdf_dict_del(ctx, next, PDF_NAME_Prev);
				}
				current = next;
			}
			else
			{
				/* Outline with invalid dest, but children. Just drop the dest. */
				pdf_dict_del(ctx, current, PDF_NAME_Dest);
				pdf_dict_del(ctx, current, PDF_NAME_A);
				current = pdf_dict_get(ctx, current, PDF_NAME_Next);
			}
		}
		else
		{
			/* Keep this one */
			if (first == NULL)
				first = current;
			prev = current;
			current = pdf_dict_get(ctx, current, PDF_NAME_Next);
			count++;
		}
	}

	*pfirst = first;
	*plast = prev;

	return count;
}
Example #15
0
static void retainpages(fz_context *ctx, globals *glo, int argc, char **argv)
{
	pdf_obj *oldroot, *root, *pages, *kids, *countobj, *parent, *olddests;
	pdf_document *doc = glo->doc;
	int argidx = 0;
	pdf_obj *names_list = NULL;
	pdf_obj *outlines;
	int pagecount;
	int i;
	int *page_object_nums;

	/* Keep only pages/type and (reduced) dest entries to avoid
	 * references to unretained pages */
	oldroot = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME_Root);
	pages = pdf_dict_get(ctx, oldroot, PDF_NAME_Pages);
	olddests = pdf_load_name_tree(ctx, doc, PDF_NAME_Dests);
	outlines = pdf_dict_get(ctx, oldroot, PDF_NAME_Outlines);

	root = pdf_new_dict(ctx, doc, 3);
	pdf_dict_put(ctx, root, PDF_NAME_Type, pdf_dict_get(ctx, oldroot, PDF_NAME_Type));
	pdf_dict_put(ctx, root, PDF_NAME_Pages, pdf_dict_get(ctx, oldroot, PDF_NAME_Pages));
	pdf_dict_put(ctx, root, PDF_NAME_Outlines, outlines);

	pdf_update_object(ctx, doc, pdf_to_num(ctx, oldroot), root);

	/* Create a new kids array with only the pages we want to keep */
	parent = pdf_new_indirect(ctx, doc, pdf_to_num(ctx, pages), pdf_to_gen(ctx, pages));
	kids = pdf_new_array(ctx, doc, 1);

	/* Retain pages specified */
	while (argc - argidx)
	{
		int page, spage, epage;
		char *spec, *dash;
		char *pagelist = argv[argidx];

		pagecount = pdf_count_pages(ctx, doc);
		spec = fz_strsep(&pagelist, ",");
		while (spec)
		{
			dash = strchr(spec, '-');

			if (dash == spec)
				spage = epage = pagecount;
			else
				spage = epage = atoi(spec);

			if (dash)
			{
				if (strlen(dash) > 1)
					epage = atoi(dash + 1);
				else
					epage = pagecount;
			}

			spage = fz_clampi(spage, 1, pagecount);
			epage = fz_clampi(epage, 1, pagecount);

			if (spage < epage)
				for (page = spage; page <= epage; ++page)
					retainpage(ctx, doc, parent, kids, page);
			else
				for (page = spage; page >= epage; --page)
					retainpage(ctx, doc, parent, kids, page);

			spec = fz_strsep(&pagelist, ",");
		}

		argidx++;
	}

	pdf_drop_obj(ctx, parent);

	/* Update page count and kids array */
	countobj = pdf_new_int(ctx, doc, pdf_array_len(ctx, kids));
	pdf_dict_put(ctx, pages, PDF_NAME_Count, countobj);
	pdf_drop_obj(ctx, countobj);
	pdf_dict_put(ctx, pages, PDF_NAME_Kids, kids);
	pdf_drop_obj(ctx, kids);

	/* Force the next call to pdf_count_pages to recount */
	glo->doc->page_count = 0;

	pagecount = pdf_count_pages(ctx, doc);
	page_object_nums = fz_calloc(ctx, pagecount, sizeof(*page_object_nums));
	for (i = 0; i < pagecount; i++)
	{
		pdf_obj *pageref = pdf_lookup_page_obj(ctx, doc, i);
		page_object_nums[i] = pdf_to_num(ctx, pageref);
	}

	/* If we had an old Dests tree (now reformed as an olddests
	 * dictionary), keep any entries in there that point to
	 * valid pages. This may mean we keep more than we need, but
	 * it's safe at least. */
	if (olddests)
	{
		pdf_obj *names = pdf_new_dict(ctx, doc, 1);
		pdf_obj *dests = pdf_new_dict(ctx, doc, 1);
		int len = pdf_dict_len(ctx, olddests);

		names_list = pdf_new_array(ctx, doc, 32);

		for (i = 0; i < len; i++)
		{
			pdf_obj *key = pdf_dict_get_key(ctx, olddests, i);
			pdf_obj *val = pdf_dict_get_val(ctx, olddests, i);
			pdf_obj *dest = pdf_dict_get(ctx, val, PDF_NAME_D);

			dest = pdf_array_get(ctx, dest ? dest : val, 0);
			if (dest_is_valid_page(ctx, dest, page_object_nums, pagecount))
			{
				pdf_obj *key_str = pdf_new_string(ctx, doc, pdf_to_name(ctx, key), strlen(pdf_to_name(ctx, key)));
				pdf_array_push(ctx, names_list, key_str);
				pdf_array_push(ctx, names_list, val);
				pdf_drop_obj(ctx, key_str);
			}
		}

		pdf_dict_put(ctx, dests, PDF_NAME_Names, names_list);
		pdf_dict_put(ctx, names, PDF_NAME_Dests, dests);
		pdf_dict_put(ctx, root, PDF_NAME_Names, names);

		pdf_drop_obj(ctx, names);
		pdf_drop_obj(ctx, dests);
		pdf_drop_obj(ctx, olddests);
	}

	/* Edit each pages /Annot list to remove any links that point to
	 * nowhere. */
	for (i = 0; i < pagecount; i++)
	{
		pdf_obj *pageref = pdf_lookup_page_obj(ctx, doc, i);
		pdf_obj *pageobj = pdf_resolve_indirect(ctx, pageref);

		pdf_obj *annots = pdf_dict_get(ctx, pageobj, PDF_NAME_Annots);

		int len = pdf_array_len(ctx, annots);
		int j;

		for (j = 0; j < len; j++)
		{
			pdf_obj *o = pdf_array_get(ctx, annots, j);

			if (!pdf_name_eq(ctx, pdf_dict_get(ctx, o, PDF_NAME_Subtype), PDF_NAME_Link))
				continue;

			if (!dest_is_valid(ctx, o, pagecount, page_object_nums, names_list))
			{
				/* Remove this annotation */
				pdf_array_delete(ctx, annots, j);
				j--;
			}
		}
	}

	if (strip_outlines(ctx, doc, outlines, pagecount, page_object_nums, names_list) == 0)
	{
		pdf_dict_del(ctx, root, PDF_NAME_Outlines);
	}

	fz_free(ctx, page_object_nums);
	pdf_drop_obj(ctx, names_list);
	pdf_drop_obj(ctx, root);
}
Example #16
0
/*
	Performs the same task as
	pdf_clean_page_contents, but with an optional text filter
	function.

	text_filter: Function to assess whether a given character
	should be kept (return 0) or removed (return 1).

	after_text: Function called after each text object is closed
	to allow other output to be sent.

	arg: Opaque value to be passed to callback functions.
*/
void pdf_filter_page_contents(fz_context *ctx, pdf_document *doc, pdf_page *page, fz_cookie *cookie,
		pdf_page_contents_process_fn *proc_fn, pdf_text_filter_fn *text_filter, pdf_after_text_object_fn *after_text, void *proc_arg,
		int sanitize, int ascii)
{
	pdf_processor *proc_buffer = NULL;
	pdf_processor *proc_filter = NULL;
	pdf_obj *new_obj = NULL;
	pdf_obj *new_ref = NULL;
	pdf_obj *res = NULL;
	pdf_obj *obj;
	pdf_obj *contents;
	pdf_obj *resources;
	fz_buffer *buffer;

	fz_var(new_obj);
	fz_var(new_ref);
	fz_var(res);
	fz_var(proc_buffer);
	fz_var(proc_filter);

	buffer = fz_new_buffer(ctx, 1024);

	fz_try(ctx)
	{
		contents = pdf_page_contents(ctx, page);
		resources = pdf_page_resources(ctx, page);

		proc_buffer = pdf_new_buffer_processor(ctx, buffer, ascii);
		if (sanitize)
		{
			res = pdf_new_dict(ctx, doc, 1);
			proc_filter = pdf_new_filter_processor_with_text_filter(ctx, doc, proc_buffer, resources, res, text_filter, after_text, proc_arg);
			pdf_process_contents(ctx, proc_filter, doc, resources, contents, cookie);
			pdf_close_processor(ctx, proc_filter);
		}
		else
		{
			res = pdf_keep_obj(ctx, resources);
			pdf_process_contents(ctx, proc_buffer, doc, resources, contents, cookie);
		}
		pdf_close_processor(ctx, proc_buffer);

		/* Deal with page content stream. */

		if (pdf_is_array(ctx, contents))
		{
			/* create a new object to replace the array */
			new_obj = pdf_new_dict(ctx, doc, 1);
			new_ref = pdf_add_object(ctx, doc, new_obj);
			contents = new_ref;
			pdf_dict_put(ctx, page->obj, PDF_NAME(Contents), contents);
		}
		else
		{
			pdf_dict_del(ctx, contents, PDF_NAME(Filter));
			pdf_dict_del(ctx, contents, PDF_NAME(DecodeParms));
		}

		pdf_update_stream(ctx, doc, contents, buffer, 0);

		/* Now deal with resources. The spec allows for Type3 fonts and form
		 * XObjects to omit a resource dictionary and look in the parent.
		 * Avoid that by flattening here as part of the cleaning. This could
		 * conceivably cause changes in rendering, but we don't care. */

		/* ExtGState */
		obj = pdf_dict_get(ctx, res, PDF_NAME(ExtGState));
		if (obj)
		{
			int i, l;

			l = pdf_dict_len(ctx, obj);
			for (i = 0; i < l; i++)
			{
				pdf_obj *o = pdf_dict_get(ctx, pdf_dict_get_val(ctx, obj, i), PDF_NAME(SMask));
				if (!o)
					continue;
				o = pdf_dict_get(ctx, o, PDF_NAME(G));
				if (!o)
					continue;
				/* Transparency group XObject */
				pdf_clean_stream_object(ctx, doc, o, resources, cookie, 1, text_filter, after_text, proc_arg, sanitize, ascii);
			}
		}

		/* Pattern */
		obj = pdf_dict_get(ctx, res, PDF_NAME(Pattern));
		if (obj)
		{
			int i, l;
			l = pdf_dict_len(ctx, obj);
			for (i = 0; i < l; i++)
			{
				pdf_obj *pat_res;
				pdf_obj *pat = pdf_dict_get_val(ctx, obj, i);
				if (!pat)
					continue;
				pat_res = pdf_dict_get(ctx, pat, PDF_NAME(Resources));
				if (pat_res == NULL)
					pat_res = resources;
				if (pdf_dict_get_int(ctx, pat, PDF_NAME(PatternType)) == 1)
					pdf_clean_stream_object(ctx, doc, pat, pat_res, cookie, 0, text_filter, after_text, proc_arg, sanitize, ascii);
			}
		}

		/* XObject */
		obj = pdf_dict_get(ctx, res, PDF_NAME(XObject));
		if (obj)
		{
			int i, l;
			l = pdf_dict_len(ctx, obj);
			for (i = 0; i < l; i++)
			{
				pdf_obj *xobj_res;
				pdf_obj *xobj = pdf_dict_get_val(ctx, obj, i);
				if (!xobj)
					continue;
				xobj_res = pdf_dict_get(ctx, xobj, PDF_NAME(Resources));
				if (xobj_res == NULL)
					xobj_res = resources;
				if (pdf_name_eq(ctx, PDF_NAME(Form), pdf_dict_get(ctx, xobj, PDF_NAME(Subtype))))
					pdf_clean_stream_object(ctx, doc, xobj, xobj_res, cookie, 1, text_filter, after_text, proc_arg, sanitize, ascii);
			}
		}

		/* Font */
		obj = pdf_dict_get(ctx, res, PDF_NAME(Font));
		if (obj)
		{
			int i, l;
			l = pdf_dict_len(ctx, obj);
			for (i = 0; i < l; i++)
			{
				pdf_obj *o = pdf_dict_get_val(ctx, obj, i);
				if (!o)
					continue;
				if (pdf_name_eq(ctx, PDF_NAME(Type3), pdf_dict_get(ctx, o, PDF_NAME(Subtype))))
					pdf_clean_type3(ctx, doc, o, resources, cookie, sanitize, ascii);
			}
		}

		/* ProcSet - no cleaning possible. Inherit this from the old dict. */
		obj = pdf_dict_get(ctx, resources, PDF_NAME(ProcSet));
		if (obj)
			pdf_dict_put(ctx, res, PDF_NAME(ProcSet), obj);

		/* ColorSpace - no cleaning possible. */
		/* Properties - no cleaning possible. */

		if (proc_fn)
			(*proc_fn)(ctx, buffer, res, proc_arg);

		/* Update resource dictionary */
		if (sanitize)
		{
			pdf_dict_put(ctx, page->obj, PDF_NAME(Resources), res);
		}
	}
	fz_always(ctx)
	{
		pdf_drop_processor(ctx, proc_filter);
		pdf_drop_processor(ctx, proc_buffer);
		fz_drop_buffer(ctx, buffer);
		pdf_drop_obj(ctx, new_obj);
		pdf_drop_obj(ctx, new_ref);
		pdf_drop_obj(ctx, res);
	}
	fz_catch(ctx)
	{
		fz_rethrow(ctx);
	}
}
Example #17
0
pdf_obj *
pdf_add_image(fz_context *ctx, pdf_document *doc, fz_image *image)
{
	fz_pixmap *pixmap = NULL;
	pdf_obj *imobj = NULL;
	pdf_obj *dp;
	fz_buffer *buffer = NULL;
	pdf_obj *imref = NULL;
	fz_compressed_buffer *cbuffer;
	unsigned char digest[16];
	int i, n;

	/* If we can maintain compression, do so */
	cbuffer = fz_compressed_image_buffer(ctx, image);

	fz_var(pixmap);
	fz_var(buffer);
	fz_var(imobj);
	fz_var(imref);

	/* Check if the same image already exists in this doc. */
	imref = pdf_find_image_resource(ctx, doc, image, digest);
	if (imref)
		return imref;

	imobj = pdf_add_new_dict(ctx, doc, 3);
	fz_try(ctx)
	{
		dp = pdf_dict_put_dict(ctx, imobj, PDF_NAME(DecodeParms), 3);
		pdf_dict_put(ctx, imobj, PDF_NAME(Type), PDF_NAME(XObject));
		pdf_dict_put(ctx, imobj, PDF_NAME(Subtype), PDF_NAME(Image));

		if (cbuffer)
		{
			fz_compression_params *cp = &cbuffer->params;
			switch (cp ? cp->type : FZ_IMAGE_UNKNOWN)
			{
			default:
				goto raw_or_unknown_compression;
			case FZ_IMAGE_JPEG:
				if (cp->u.jpeg.color_transform != -1)
					pdf_dict_put_int(ctx, dp, PDF_NAME(ColorTransform), cp->u.jpeg.color_transform);
				pdf_dict_put(ctx, imobj, PDF_NAME(Filter), PDF_NAME(DCTDecode));
				break;
			case FZ_IMAGE_JPX:
				if (cp->u.jpx.smask_in_data)
					pdf_dict_put_int(ctx, dp, PDF_NAME(SMaskInData), cp->u.jpx.smask_in_data);
				pdf_dict_put(ctx, imobj, PDF_NAME(Filter), PDF_NAME(JPXDecode));
				break;
			case FZ_IMAGE_FAX:
				if (cp->u.fax.columns)
					pdf_dict_put_int(ctx, dp, PDF_NAME(Columns), cp->u.fax.columns);
				if (cp->u.fax.rows)
					pdf_dict_put_int(ctx, dp, PDF_NAME(Rows), cp->u.fax.rows);
				if (cp->u.fax.k)
					pdf_dict_put_int(ctx, dp, PDF_NAME(K), cp->u.fax.k);
				if (cp->u.fax.end_of_line)
					pdf_dict_put_bool(ctx, dp, PDF_NAME(EndOfLine), cp->u.fax.end_of_line);
				if (cp->u.fax.encoded_byte_align)
					pdf_dict_put_bool(ctx, dp, PDF_NAME(EncodedByteAlign), cp->u.fax.encoded_byte_align);
				if (cp->u.fax.end_of_block)
					pdf_dict_put_bool(ctx, dp, PDF_NAME(EndOfBlock), cp->u.fax.end_of_block);
				if (cp->u.fax.black_is_1)
					pdf_dict_put_bool(ctx, dp, PDF_NAME(BlackIs1), cp->u.fax.black_is_1);
				if (cp->u.fax.damaged_rows_before_error)
					pdf_dict_put_int(ctx, dp, PDF_NAME(DamagedRowsBeforeError), cp->u.fax.damaged_rows_before_error);
				pdf_dict_put(ctx, imobj, PDF_NAME(Filter), PDF_NAME(CCITTFaxDecode));
				break;
			case FZ_IMAGE_FLATE:
				if (cp->u.flate.columns)
					pdf_dict_put_int(ctx, dp, PDF_NAME(Columns), cp->u.flate.columns);
				if (cp->u.flate.colors)
					pdf_dict_put_int(ctx, dp, PDF_NAME(Colors), cp->u.flate.colors);
				if (cp->u.flate.predictor)
					pdf_dict_put_int(ctx, dp, PDF_NAME(Predictor), cp->u.flate.predictor);
				if (cp->u.flate.bpc)
					pdf_dict_put_int(ctx, dp, PDF_NAME(BitsPerComponent), cp->u.flate.bpc);
				pdf_dict_put(ctx, imobj, PDF_NAME(Filter), PDF_NAME(FlateDecode));
				pdf_dict_put_int(ctx, imobj, PDF_NAME(BitsPerComponent), image->bpc);
				break;
			case FZ_IMAGE_LZW:
				if (cp->u.lzw.columns)
					pdf_dict_put_int(ctx, dp, PDF_NAME(Columns), cp->u.lzw.columns);
				if (cp->u.lzw.colors)
					pdf_dict_put_int(ctx, dp, PDF_NAME(Colors), cp->u.lzw.colors);
				if (cp->u.lzw.predictor)
					pdf_dict_put_int(ctx, dp, PDF_NAME(Predictor), cp->u.lzw.predictor);
				if (cp->u.lzw.early_change)
					pdf_dict_put_int(ctx, dp, PDF_NAME(EarlyChange), cp->u.lzw.early_change);
				if (cp->u.lzw.bpc)
					pdf_dict_put_int(ctx, dp, PDF_NAME(BitsPerComponent), cp->u.lzw.bpc);
				pdf_dict_put(ctx, imobj, PDF_NAME(Filter), PDF_NAME(LZWDecode));
				break;
			case FZ_IMAGE_RLD:
				pdf_dict_put(ctx, imobj, PDF_NAME(Filter), PDF_NAME(RunLengthDecode));
				break;
			}

			if (!pdf_dict_len(ctx, dp))
				pdf_dict_del(ctx, imobj, PDF_NAME(DecodeParms));

			buffer = fz_keep_buffer(ctx, cbuffer->buffer);

			if (image->use_decode)
			{
				pdf_obj *ary = pdf_dict_put_array(ctx, imobj, PDF_NAME(Decode), image->n * 2);
				for (i = 0; i < image->n * 2; ++i)
					pdf_array_push_real(ctx, ary, image->decode[i]);
			}
		}
		else
		{
			unsigned int size;
			int h;
			unsigned char *d, *s;

raw_or_unknown_compression:
			/* Currently, set to maintain resolution; should we consider
			 * subsampling here according to desired output res? */
			pixmap = fz_get_pixmap_from_image(ctx, image, NULL, NULL, NULL, NULL);
			n = pixmap->n - pixmap->alpha - pixmap->s; /* number of colorants */
			if (n == 0)
				n = 1; /* treat pixmaps with only alpha or spots as grayscale */

			size = image->w * n;
			h = image->h;
			s = pixmap->samples;
			d = fz_malloc(ctx, size * h);
			buffer = fz_new_buffer_from_data(ctx, d, size * h);

			if (n == pixmap->n)
			{
				/* If we use all channels, we can copy the data as is. */
				while (h--)
				{
					memcpy(d, s, size);
					d += size;
					s += pixmap->stride;
				}
			}
			else
			{
				/* Need to remove the alpha and spot planes. */
				/* TODO: extract alpha plane to a soft mask. */
				/* TODO: convert spots to colors. */

				int line_skip = pixmap->stride - pixmap->w * pixmap->n;
				int skip = pixmap->n - n;
				while (h--)
				{
					int w = pixmap->w;
					while (w--)
					{
						int k;
						for (k = 0; k < n; ++k)
							*d++ = *s++;
						s += skip;
					}
					s += line_skip;
				}
			}
		}

		pdf_dict_put_int(ctx, imobj, PDF_NAME(Width), pixmap ? pixmap->w : image->w);
		pdf_dict_put_int(ctx, imobj, PDF_NAME(Height), pixmap ? pixmap->h : image->h);

		if (image->imagemask)
		{
			pdf_dict_put_bool(ctx, imobj, PDF_NAME(ImageMask), 1);
		}
		else
		{
			fz_colorspace *cs;

			pdf_dict_put_int(ctx, imobj, PDF_NAME(BitsPerComponent), image->bpc);

			cs = pixmap ? pixmap->colorspace : image->colorspace;
			switch (fz_colorspace_type(ctx, cs))
			{
			case FZ_COLORSPACE_INDEXED:
				{
					fz_colorspace *basecs;
					unsigned char *lookup = NULL;
					int high = 0;
					int basen;
					pdf_obj *arr;

					lookup = fz_indexed_colorspace_palette(ctx, cs, &high);
					basecs = fz_colorspace_base(ctx, cs);
					basen = fz_colorspace_n(ctx, basecs);

					arr = pdf_dict_put_array(ctx, imobj, PDF_NAME(ColorSpace), 4);

					pdf_array_push(ctx, arr, PDF_NAME(Indexed));
					switch (fz_colorspace_type(ctx, basecs))
					{
					case FZ_COLORSPACE_GRAY:
						pdf_array_push(ctx, arr, PDF_NAME(DeviceGray));
						break;
					case FZ_COLORSPACE_RGB:
						pdf_array_push(ctx, arr, PDF_NAME(DeviceRGB));
						break;
					case FZ_COLORSPACE_CMYK:
						pdf_array_push(ctx, arr, PDF_NAME(DeviceCMYK));
						break;
					default:
						// TODO: convert to RGB!
						fz_throw(ctx, FZ_ERROR_GENERIC, "only indexed Gray, RGB, and CMYK colorspaces supported");
						break;
					}

					pdf_array_push_int(ctx, arr, high);
					pdf_array_push_string(ctx, arr, (char *) lookup, basen * (high + 1));
				}
				break;
			case FZ_COLORSPACE_NONE:
			case FZ_COLORSPACE_GRAY:
				pdf_dict_put(ctx, imobj, PDF_NAME(ColorSpace), PDF_NAME(DeviceGray));
				break;
			case FZ_COLORSPACE_RGB:
				pdf_dict_put(ctx, imobj, PDF_NAME(ColorSpace), PDF_NAME(DeviceRGB));
				break;
			case FZ_COLORSPACE_CMYK:
				pdf_dict_put(ctx, imobj, PDF_NAME(ColorSpace), PDF_NAME(DeviceCMYK));
				break;
			default:
				// TODO: convert to RGB!
				fz_throw(ctx, FZ_ERROR_GENERIC, "only Gray, RGB, and CMYK colorspaces supported");
				break;
			}
		}

		if (image->mask)
		{
			pdf_dict_put_drop(ctx, imobj, PDF_NAME(SMask), pdf_add_image(ctx, doc, image->mask));
		}

		pdf_update_stream(ctx, doc, imobj, buffer, 1);

		/* Add ref to our image resource hash table. */
		imref = pdf_insert_image_resource(ctx, doc, digest, imobj);
	}
	fz_always(ctx)
	{
		fz_drop_pixmap(ctx, pixmap);
		fz_drop_buffer(ctx, buffer);
		pdf_drop_obj(ctx, imobj);
	}
	fz_catch(ctx)
		fz_rethrow(ctx);
	return imref;
}