Beispiel #1
0
static void sweepref(fz_obj *obj)
{
	int num = fz_to_num(obj);
	int gen = fz_to_gen(obj);

	if (num < 0 || num >= xref->len)
		return;
	if (uselist[num])
		return;

	uselist[num] = 1;

	/* Bake in /Length in stream objects */
	fz_try(ctx)
	{
		if (pdf_is_stream(xref, num, gen))
		{
			fz_obj *len = fz_dict_gets(obj, "Length");
			if (fz_is_indirect(len))
			{
				uselist[fz_to_num(len)] = 0;
				len = fz_resolve_indirect(len);
				fz_dict_puts(obj, "Length", len);
			}
		}
	}
	fz_catch(ctx)
	{
		/* Leave broken */
	}

	sweepobj(fz_resolve_indirect(obj));
}
Beispiel #2
0
static void expandstream(fz_obj *obj, int num, int gen)
{
	fz_buffer *buf, *tmp;
	fz_obj *newlen;

	buf = pdf_load_stream(xref, num, gen);

	fz_dict_dels(obj, "Filter");
	fz_dict_dels(obj, "DecodeParms");

	if (doascii && isbinarystream(buf))
	{
		tmp = hexbuf(buf->data, buf->len);
		fz_drop_buffer(ctx, buf);
		buf = tmp;

		addhexfilter(obj);
	}

	newlen = fz_new_int(ctx, buf->len);
	fz_dict_puts(obj, "Length", newlen);
	fz_drop_obj(newlen);

	fprintf(out, "%d %d obj\n", num, gen);
	fz_fprint_obj(out, obj, doexpand == 0);
	fprintf(out, "stream\n");
	fwrite(buf->data, 1, buf->len, out);
	fprintf(out, "endstream\nendobj\n\n");

	fz_drop_buffer(ctx, buf);
}
Beispiel #3
0
static void copystream(fz_obj *obj, int num, int gen)
{
	fz_error error;
	fz_buffer *buf, *tmp;
	fz_obj *newlen;

	error = pdf_load_raw_stream(&buf, xref, num, gen);
	if (error)
		die(error);

	if (doascii && isbinarystream(buf))
	{
		tmp = hexbuf(buf->data, buf->len);
		fz_drop_buffer(ctx, buf);
		buf = tmp;

		addhexfilter(obj);

		newlen = fz_new_int(ctx, buf->len);
		fz_dict_puts(ctx, obj, "Length", newlen);
		fz_drop_obj(ctx, newlen);
	}

	fprintf(out, "%d %d obj\n", num, gen);
	fz_fprint_obj(ctx, out, obj, !doexpand);
	fprintf(out, "stream\n");
	fwrite(buf->data, 1, buf->len, out);
	fprintf(out, "endstream\nendobj\n\n");

	fz_drop_buffer(ctx, buf);
}
Beispiel #4
0
static void addhexfilter(fz_obj *dict)
{
	fz_obj *f, *dp, *newf, *newdp;
	fz_obj *ahx, *nullobj;

	ahx = fz_new_name(ctx, "ASCIIHexDecode");
	nullobj = fz_new_null(ctx);
	newf = newdp = NULL;

	f = fz_dict_gets(dict, "Filter");
	dp = fz_dict_gets(dict, "DecodeParms");

	if (fz_is_name(f))
	{
		newf = fz_new_array(ctx, 2);
		fz_array_push(newf, ahx);
		fz_array_push(newf, f);
		f = newf;
		if (fz_is_dict(dp))
		{
			newdp = fz_new_array(ctx, 2);
			fz_array_push(newdp, nullobj);
			fz_array_push(newdp, dp);
			dp = newdp;
		}
	}
	else if (fz_is_array(f))
	{
		fz_array_insert(f, ahx);
		if (fz_is_array(dp))
			fz_array_insert(dp, nullobj);
	}
	else
		f = ahx;

	fz_dict_puts(dict, "Filter", f);
	if (dp)
		fz_dict_puts(dict, "DecodeParms", dp);

	fz_drop_obj(ahx);
	fz_drop_obj(nullobj);
	if (newf)
		fz_drop_obj(newf);
	if (newdp)
		fz_drop_obj(newdp);
}
Beispiel #5
0
static void writexref(void)
{
	fz_obj *trailer;
	fz_obj *obj;
	int startxref;
	int num;

	startxref = ftell(out);

	fprintf(out, "xref\n0 %d\n", xref->len);
	for (num = 0; num < xref->len; num++)
	{
		if (uselist[num])
			fprintf(out, "%010d %05d n \n", ofslist[num], genlist[num]);
		else
			fprintf(out, "%010d %05d f \n", ofslist[num], genlist[num]);
	}
	fprintf(out, "\n");

	trailer = fz_new_dict(ctx, 5);

	obj = fz_new_int(ctx, xref->len);
	fz_dict_puts(trailer, "Size", obj);
	fz_drop_obj(obj);

	obj = fz_dict_gets(xref->trailer, "Info");
	if (obj)
		fz_dict_puts(trailer, "Info", obj);

	obj = fz_dict_gets(xref->trailer, "Root");
	if (obj)
		fz_dict_puts(trailer, "Root", obj);

	obj = fz_dict_gets(xref->trailer, "ID");
	if (obj)
		fz_dict_puts(trailer, "ID", obj);

	fprintf(out, "trailer\n");
	fz_fprint_obj(out, trailer, doexpand == 0);
	fprintf(out, "\n");

	fz_drop_obj(trailer);

	fprintf(out, "startxref\n%d\n%%%%EOF\n", startxref);
}
Beispiel #6
0
static pdf_annot *
pdf_create_freetext_annot(pdf_xref *xref, fz_obj *obj)
{
	fz_context *ctx = xref->ctx;
	fz_buffer *content = fz_new_buffer(ctx, 256);
	fz_buffer *base_ap = fz_new_buffer(ctx, 256);
	fz_obj *ap = fz_dict_gets(ctx, obj, "DA");
	fz_obj *value = fz_dict_gets(ctx, obj, "Contents");
	fz_rect rect = pdf_to_rect(ctx, fz_dict_gets(ctx, obj, "Rect"));
	int align = fz_to_int(ctx, fz_dict_gets(ctx, obj, "Q"));
	fz_obj *res = pdf_dict_from_string(xref, ANNOT_FREETEXT_AP_RESOURCES);
	unsigned short *ucs2, *rest;
	float x;

	char *font_name = NULL;
	float font_size = pdf_extract_font_size(xref, fz_to_str_buf(ctx, ap), &font_name);
	if (!font_size)
		font_size = 10;
	/* TODO: what resource dictionary does this font name refer to? */
	if (font_name)
	{
		fz_obj *font = fz_dict_gets(ctx, res, "Font");
		fz_dict_puts(ctx, font, font_name, fz_dict_gets(ctx, font, "Default"));
		fz_free(ctx, font_name);
	}

	fz_buffer_printf(ctx, content, "q 1 1 %.4f %.4f re W n BT %s ",
		rect.x1 - rect.x0 - 2.0f, rect.y1 - rect.y0 - 2.0f, fz_to_str_buf(ctx, ap));
	fz_buffer_printf(ctx, base_ap, "q BT %s ", fz_to_str_buf(ctx, ap));
	fz_buffer_printf(ctx, content, "/Default %.4f Tf ", font_size);
	fz_buffer_printf(ctx, base_ap, "/Default %.4f Tf ", font_size);
	fz_buffer_printf(ctx, content, "1 0 0 1 2 %.4f Tm ", rect.y1 - rect.y0 - 2);

	/* Adobe Reader seems to consider "[1 0 0] r" and "1 0 0 rg" to mean the same(?) */
	if (strchr(base_ap->data, '['))
	{
		float r, g, b;
		if (sscanf(strchr(base_ap->data, '['), "[%f %f %f] r", &r, &g, &b) == 3)
			fz_buffer_printf(ctx, content, "%.4f %.4f %.4f rg ", r, g, b);
	}

	ucs2 = pdf_to_ucs2(ctx, value);
	for (rest = ucs2; *rest; rest++)
		if (*rest > 0xFF)
			*rest = '?';

	x = 0;
	rest = ucs2;
	while (*rest)
		rest = pdf_append_line(xref, res, content, base_ap, rest, font_size, align, rect.x1 - rect.x0 - 4.0f, 1, &x);

	fz_free(ctx, ucs2);
	fz_buffer_printf(ctx, content, "ET Q");
	fz_drop_buffer(ctx, base_ap);

	return pdf_create_annot(ctx, rect, fz_keep_obj(obj), content, res, 0);
}
Beispiel #7
0
static fz_obj *
pdf_clone_for_view_only(pdf_xref *xref, fz_obj *obj)
{
	fz_obj *ocgs = pdf_dict_from_string(xref, ANNOT_OC_VIEW_ONLY);

	obj = fz_copy_dict(xref->ctx, pdf_resolve_indirect(obj));
	fz_dict_puts(xref->ctx, obj, "OC", ocgs);
	fz_drop_obj(xref->ctx, ocgs);

	return obj;
}
Beispiel #8
0
static int
xps_decode_tiff_fax(struct tiff *tiff, int comp, fz_stream *chain, byte *wp, int wlen)
{
	fz_stream *stm;
	fz_obj *params;
	fz_obj *columns, *rows, *black_is_1, *k, *encoded_byte_align;
	int n;
	fz_context *ctx = tiff->ctx;

	columns = fz_new_int(ctx, tiff->imagewidth);
	rows = fz_new_int(ctx, tiff->imagelength);
	black_is_1 = fz_new_bool(ctx, tiff->photometric == 0);
	k = fz_new_int(ctx, comp == 4 ? -1 : 0);
	encoded_byte_align = fz_new_bool(ctx, comp == 2);

	params = fz_new_dict(ctx, 5);
	fz_dict_puts(ctx, params, "Columns", columns);
	fz_dict_puts(ctx, params, "Rows", rows);
	fz_dict_puts(ctx, params, "BlackIs1", black_is_1);
	fz_dict_puts(ctx, params, "K", k);
	fz_dict_puts(ctx, params, "EncodedByteAlign", encoded_byte_align);

	fz_drop_obj(ctx, columns);
	fz_drop_obj(ctx, rows);
	fz_drop_obj(ctx, black_is_1);
	fz_drop_obj(ctx, k);
	fz_drop_obj(ctx, encoded_byte_align);

	stm = fz_open_faxd(chain, params);
	n = fz_read(stm, wp, wlen);
	fz_close(stm);
	fz_drop_obj(ctx, params);

	if (n < 0)
		return fz_error_note(ctx, n, "cannot read fax strip");
	return fz_okay;
}
Beispiel #9
0
static void
xps_hacky_get_prop(fz_context *ctx, char *data, fz_obj *dict, char *name, char *tag_name)
{
	char *start, *end;
	fz_obj *value;

	start = strstr(data, tag_name);
	if (!start || start == data || start[-1] != '<')
		return;
	end = strstr(start + 1, tag_name);
	start = strchr(start, '>');
	if (!start || !end || start >= end || end[-2] != '<' || end[-1] != '/')
		return;

	for (start++; iswhite(*start); start++);
	for (end -= 3; iswhite(*end) && end > start; end--);

	value = fz_new_string(ctx, start, end - start + 1);
	fz_dict_puts(ctx, dict, name, value);
	fz_drop_obj(ctx, value);
}
Beispiel #10
0
static void retainpages(int argc, char **argv)
{
	fz_obj *oldroot, *root, *pages, *kids, *countobj, *parent, *olddests;

	/* Keep only pages/type and (reduced) dest entries to avoid
	 * references to unretained pages */
	oldroot = fz_dict_gets(xref->trailer, "Root");
	pages = fz_dict_gets(oldroot, "Pages");
	olddests = pdf_load_name_tree(xref, "Dests");

	root = fz_new_dict(ctx, 2);
	fz_dict_puts(root, "Type", fz_dict_gets(oldroot, "Type"));
	fz_dict_puts(root, "Pages", fz_dict_gets(oldroot, "Pages"));

	pdf_update_object(xref, fz_to_num(oldroot), fz_to_gen(oldroot), root);

	fz_drop_obj(root);

	/* Create a new kids array with only the pages we want to keep */
	parent = fz_new_indirect(ctx, fz_to_num(pages), fz_to_gen(pages), xref);
	kids = fz_new_array(ctx, 1);

	/* Retain pages specified */
	while (argc - fz_optind)
	{
		int page, spage, epage;
		char *spec, *dash;
		char *pagelist = argv[fz_optind];

		spec = fz_strsep(&pagelist, ",");
		while (spec)
		{
			dash = strchr(spec, '-');

			if (dash == spec)
				spage = epage = pdf_count_pages(xref);
			else
				spage = epage = atoi(spec);

			if (dash)
			{
				if (strlen(dash) > 1)
					epage = atoi(dash + 1);
				else
					epage = pdf_count_pages(xref);
			}

			if (spage > epage)
				page = spage, spage = epage, epage = page;

			if (spage < 1)
				spage = 1;
			if (epage > pdf_count_pages(xref))
				epage = pdf_count_pages(xref);

			for (page = spage; page <= epage; page++)
			{
				fz_obj *pageobj = xref->page_objs[page-1];
				fz_obj *pageref = xref->page_refs[page-1];

				fz_dict_puts(pageobj, "Parent", parent);

				/* Store page object in new kids array */
				fz_array_push(kids, pageref);
			}

			spec = fz_strsep(&pagelist, ",");
		}

		fz_optind++;
	}

	fz_drop_obj(parent);

	/* Update page count and kids array */
	countobj = fz_new_int(ctx, fz_array_len(kids));
	fz_dict_puts(pages, "Count", countobj);
	fz_drop_obj(countobj);
	fz_dict_puts(pages, "Kids", kids);
	fz_drop_obj(kids);

	/* Also preserve the (partial) Dests name tree */
	if (olddests)
	{
		int i;
		fz_obj *names = fz_new_dict(ctx, 1);
		fz_obj *dests = fz_new_dict(ctx, 1);
		fz_obj *names_list = fz_new_array(ctx, 32);

		for (i = 0; i < fz_dict_len(olddests); i++)
		{
			fz_obj *key = fz_dict_get_key(olddests, i);
			fz_obj *val = fz_dict_get_val(olddests, i);
			fz_obj *key_str = fz_new_string(ctx, fz_to_name(key), strlen(fz_to_name(key)));
			fz_obj *dest = fz_dict_gets(val, "D");

			dest = fz_array_get(dest ? dest : val, 0);
			if (fz_array_contains(fz_dict_gets(pages, "Kids"), dest))
			{
				fz_array_push(names_list, key_str);
				fz_array_push(names_list, val);
			}
			fz_drop_obj(key_str);
		}

		root = fz_dict_gets(xref->trailer, "Root");
		fz_dict_puts(dests, "Names", names_list);
		fz_dict_puts(names, "Dests", dests);
		fz_dict_puts(root, "Names", names);

		fz_drop_obj(names);
		fz_drop_obj(dests);
		fz_drop_obj(names_list);
		fz_drop_obj(olddests);
	}
}
Beispiel #11
0
static fz_outline *
pdf_load_outline_imp(pdf_xref *xref, fz_obj *dict)
{
    pdf_link *link;
    fz_outline *node;
    fz_obj *obj;
    /* SumatraPDF: prevent potential stack overflow */
    fz_outline *prev, *root = NULL;
    fz_obj *origDict = dict;
    fz_context *ctx = xref->ctx;

    if (fz_is_null(ctx, dict))
        return NULL;

    /* SumatraPDF: prevent cyclic outlines */
    do
    {
        if (fz_dict_gets(ctx, dict, ".seen"))
            break;
        obj = fz_new_null(ctx);
        fz_dict_puts(ctx, dict, ".seen", obj);
        fz_drop_obj(ctx, obj);

        node = fz_malloc(ctx, sizeof(fz_outline));
        node->title = NULL;
        node->page = -1;
        node->down = NULL;
        node->next = NULL;

        obj = fz_dict_gets(ctx, dict, "Title");
        if (obj)
            node->title = pdf_to_utf8(ctx, obj);

        /* SumatraPDF: support expansion states */
        node->is_open = fz_to_int(ctx, fz_dict_gets(ctx, dict, "Count")) >= 0;
        /* SumatraPDF: extended outline actions */
        node->data = node->free_data = NULL;

        if (fz_dict_gets(ctx, dict, "Dest") || fz_dict_gets(ctx, dict, "A"))
        {
            link = pdf_load_link(xref, dict);
            if (link) /* SumatraPDF: don't crash if it's no link after all */
            {
                if (link->kind == PDF_LINK_GOTO)
                    node->page = pdf_find_page_number(xref, fz_array_get(ctx, link->dest, 0));
                /* SumatraPDF: extended outline actions */
                node->data = link;
                node->free_data = pdf_free_link;
            }
        }

        obj = fz_dict_gets(ctx, dict, "First");
        if (obj)
            node->down = pdf_load_outline_imp(xref, obj);

        /* SumatraPDF: prevent potential stack overflow */
        if (!root)
            prev = root = node;
        else
            prev = prev->next = node;

        dict = fz_dict_gets(ctx, dict, "Next");
    } while (dict && !fz_is_null(ctx, dict));
    node = root;
    /* SumatraPDF: prevent cyclic outlines */
    for (dict = origDict; dict && fz_dict_gets(ctx, dict, ".seen"); dict = fz_dict_gets(ctx, dict, "Next"))
        fz_dict_dels(ctx, dict, ".seen");

    return node;
}
Beispiel #12
0
fz_error
pdf_open_xref_with_stream(pdf_xref **xrefp, fz_stream *file, char *password)
{
	pdf_xref *xref;
	fz_error error;
	fz_obj *encrypt, *id;
	fz_obj *dict, *obj;
	int i, repaired = 0;

	/* install pdf specific callback */
	fz_resolve_indirect = pdf_resolve_indirect;

	xref = fz_malloc(sizeof(pdf_xref));

	memset(xref, 0, sizeof(pdf_xref));

	xref->file = fz_keep_stream(file);

	error = pdf_load_xref(xref, xref->scratch, sizeof xref->scratch);
	if (error)
	{
		fz_catch(error, "trying to repair");
		if (xref->table)
		{
			fz_free(xref->table);
			xref->table = NULL;
			xref->len = 0;
		}
		if (xref->trailer)
		{
			fz_drop_obj(xref->trailer);
			xref->trailer = NULL;
		}
		error = pdf_repair_xref(xref, xref->scratch, sizeof xref->scratch);
		if (error)
		{
			pdf_free_xref(xref);
			return fz_rethrow(error, "cannot repair document");
		}
		repaired = 1;
	}

	encrypt = fz_dict_gets(xref->trailer, "Encrypt");
	id = fz_dict_gets(xref->trailer, "ID");
	if (fz_is_dict(encrypt))
	{
		error = pdf_new_crypt(&xref->crypt, encrypt, id);
		if (error)
		{
			pdf_free_xref(xref);
			return fz_rethrow(error, "cannot decrypt document");
		}
	}

	if (pdf_needs_password(xref))
	{
		/* Only care if we have a password */
		if (password)
		{
			int okay = pdf_authenticate_password(xref, password);
			if (!okay)
			{
				pdf_free_xref(xref);
				return fz_throw("invalid password");
			}
		}
	}

	if (repaired)
	{
		int hasroot, hasinfo;

		error = pdf_repair_obj_stms(xref);
		if (error)
		{
			pdf_free_xref(xref);
			return fz_rethrow(error, "cannot repair document");
		}

		hasroot = fz_dict_gets(xref->trailer, "Root") != NULL;
		hasinfo = fz_dict_gets(xref->trailer, "Info") != NULL;

		for (i = 1; i < xref->len; i++)
		{
			if (xref->table[i].type == 0 || xref->table[i].type == 'f')
				continue;

			error = pdf_load_object(&dict, xref, i, 0);
			if (error)
			{
				fz_catch(error, "ignoring broken object (%d 0 R)", i);
				continue;
			}

			if (!hasroot)
			{
				obj = fz_dict_gets(dict, "Type");
				if (fz_is_name(obj) && !strcmp(fz_to_name(obj), "Catalog"))
				{
					obj = fz_new_indirect(i, 0, xref);
					fz_dict_puts(xref->trailer, "Root", obj);
					fz_drop_obj(obj);
				}
			}

			if (!hasinfo)
			{
				if (fz_dict_gets(dict, "Creator") || fz_dict_gets(dict, "Producer"))
				{
					obj = fz_new_indirect(i, 0, xref);
					fz_dict_puts(xref->trailer, "Info", obj);
					fz_drop_obj(obj);
				}
			}

			fz_drop_obj(dict);
		}
	}

	error = pdf_read_ocg(xref);
	if (error)
	{
		pdf_free_xref(xref);
		return fz_rethrow(error, "Broken Optional Content");
	}

	*xrefp = xref;
	return fz_okay;
}