Пример #1
0
static void copystream(fz_obj *obj, int num, int gen)
{
	fz_error error;
	fz_buffer *buf, *tmp;
	fz_obj *newlen;

	error = pdf_load_raw_stream(&buf, xref, num, gen);
	if (error)
		die(error);

	if (doascii && isbinarystream(buf))
	{
		tmp = hexbuf(buf->data, buf->len);
		fz_drop_buffer(ctx, buf);
		buf = tmp;

		addhexfilter(obj);

		newlen = fz_new_int(ctx, buf->len);
		fz_dict_puts(ctx, obj, "Length", newlen);
		fz_drop_obj(ctx, newlen);
	}

	fprintf(out, "%d %d obj\n", num, gen);
	fz_fprint_obj(ctx, out, obj, !doexpand);
	fprintf(out, "stream\n");
	fwrite(buf->data, 1, buf->len, out);
	fprintf(out, "endstream\nendobj\n\n");

	fz_drop_buffer(ctx, buf);
}
Пример #2
0
static void expandstream(fz_obj *obj, int num, int gen)
{
	fz_buffer *buf, *tmp;
	fz_obj *newlen;

	buf = pdf_load_stream(xref, num, gen);

	fz_dict_dels(obj, "Filter");
	fz_dict_dels(obj, "DecodeParms");

	if (doascii && isbinarystream(buf))
	{
		tmp = hexbuf(buf->data, buf->len);
		fz_drop_buffer(ctx, buf);
		buf = tmp;

		addhexfilter(obj);
	}

	newlen = fz_new_int(ctx, buf->len);
	fz_dict_puts(obj, "Length", newlen);
	fz_drop_obj(newlen);

	fprintf(out, "%d %d obj\n", num, gen);
	fz_fprint_obj(out, obj, doexpand == 0);
	fprintf(out, "stream\n");
	fwrite(buf->data, 1, buf->len, out);
	fprintf(out, "endstream\nendobj\n\n");

	fz_drop_buffer(ctx, buf);
}
Пример #3
0
fz_error
pdf_parse_stm_obj(fz_obj **op, pdf_xref *xref, fz_stream *file, char *buf, int cap)
{
	fz_error error;
	int tok;
	int len;

	error = pdf_lex(&tok, file, buf, cap, &len);
	if (error)
		return fz_rethrow(error, "cannot parse token in object stream");

	switch (tok)
	{
	case PDF_TOK_OPEN_ARRAY:
		error = pdf_parse_array(op, xref, file, buf, cap);
		if (error)
			return fz_rethrow(error, "cannot parse object stream");
		break;
	case PDF_TOK_OPEN_DICT:
		error = pdf_parse_dict(op, xref, file, buf, cap);
		if (error)
			return fz_rethrow(error, "cannot parse object stream");
		break;
	case PDF_TOK_NAME: *op = fz_new_name(buf); break;
	case PDF_TOK_REAL: *op = fz_new_real(fz_atof(buf)); break;
	case PDF_TOK_STRING: *op = fz_new_string(buf, len); break;
	case PDF_TOK_TRUE: *op = fz_new_bool(1); break;
	case PDF_TOK_FALSE: *op = fz_new_bool(0); break;
	case PDF_TOK_NULL: *op = fz_new_null(); break;
	case PDF_TOK_INT: *op = fz_new_int(atoi(buf)); break;
	default: return fz_throw("unknown token in object stream");
	}

	return fz_okay;
}
Пример #4
0
static int
xps_decode_tiff_fax(struct tiff *tiff, int comp, fz_stream *chain, byte *wp, int wlen)
{
	fz_stream *stm;
	fz_obj *params;
	fz_obj *columns, *rows, *black_is_1, *k, *encoded_byte_align;
	int n;
	fz_context *ctx = tiff->ctx;

	columns = fz_new_int(ctx, tiff->imagewidth);
	rows = fz_new_int(ctx, tiff->imagelength);
	black_is_1 = fz_new_bool(ctx, tiff->photometric == 0);
	k = fz_new_int(ctx, comp == 4 ? -1 : 0);
	encoded_byte_align = fz_new_bool(ctx, comp == 2);

	params = fz_new_dict(ctx, 5);
	fz_dict_puts(ctx, params, "Columns", columns);
	fz_dict_puts(ctx, params, "Rows", rows);
	fz_dict_puts(ctx, params, "BlackIs1", black_is_1);
	fz_dict_puts(ctx, params, "K", k);
	fz_dict_puts(ctx, params, "EncodedByteAlign", encoded_byte_align);

	fz_drop_obj(ctx, columns);
	fz_drop_obj(ctx, rows);
	fz_drop_obj(ctx, black_is_1);
	fz_drop_obj(ctx, k);
	fz_drop_obj(ctx, encoded_byte_align);

	stm = fz_open_faxd(chain, params);
	n = fz_read(stm, wp, wlen);
	fz_close(stm);
	fz_drop_obj(ctx, params);

	if (n < 0)
		return fz_error_note(ctx, n, "cannot read fax strip");
	return fz_okay;
}
Пример #5
0
static void writexref(void)
{
	fz_obj *trailer;
	fz_obj *obj;
	int startxref;
	int num;

	startxref = ftell(out);

	fprintf(out, "xref\n0 %d\n", xref->len);
	for (num = 0; num < xref->len; num++)
	{
		if (uselist[num])
			fprintf(out, "%010d %05d n \n", ofslist[num], genlist[num]);
		else
			fprintf(out, "%010d %05d f \n", ofslist[num], genlist[num]);
	}
	fprintf(out, "\n");

	trailer = fz_new_dict(ctx, 5);

	obj = fz_new_int(ctx, xref->len);
	fz_dict_puts(trailer, "Size", obj);
	fz_drop_obj(obj);

	obj = fz_dict_gets(xref->trailer, "Info");
	if (obj)
		fz_dict_puts(trailer, "Info", obj);

	obj = fz_dict_gets(xref->trailer, "Root");
	if (obj)
		fz_dict_puts(trailer, "Root", obj);

	obj = fz_dict_gets(xref->trailer, "ID");
	if (obj)
		fz_dict_puts(trailer, "ID", obj);

	fprintf(out, "trailer\n");
	fz_fprint_obj(out, trailer, doexpand == 0);
	fprintf(out, "\n");

	fz_drop_obj(trailer);

	fprintf(out, "startxref\n%d\n%%%%EOF\n", startxref);
}
Пример #6
0
static void retainpages(int argc, char **argv)
{
	fz_obj *oldroot, *root, *pages, *kids, *countobj, *parent, *olddests;

	/* Keep only pages/type and (reduced) dest entries to avoid
	 * references to unretained pages */
	oldroot = fz_dict_gets(xref->trailer, "Root");
	pages = fz_dict_gets(oldroot, "Pages");
	olddests = pdf_load_name_tree(xref, "Dests");

	root = fz_new_dict(ctx, 2);
	fz_dict_puts(root, "Type", fz_dict_gets(oldroot, "Type"));
	fz_dict_puts(root, "Pages", fz_dict_gets(oldroot, "Pages"));

	pdf_update_object(xref, fz_to_num(oldroot), fz_to_gen(oldroot), root);

	fz_drop_obj(root);

	/* Create a new kids array with only the pages we want to keep */
	parent = fz_new_indirect(ctx, fz_to_num(pages), fz_to_gen(pages), xref);
	kids = fz_new_array(ctx, 1);

	/* Retain pages specified */
	while (argc - fz_optind)
	{
		int page, spage, epage;
		char *spec, *dash;
		char *pagelist = argv[fz_optind];

		spec = fz_strsep(&pagelist, ",");
		while (spec)
		{
			dash = strchr(spec, '-');

			if (dash == spec)
				spage = epage = pdf_count_pages(xref);
			else
				spage = epage = atoi(spec);

			if (dash)
			{
				if (strlen(dash) > 1)
					epage = atoi(dash + 1);
				else
					epage = pdf_count_pages(xref);
			}

			if (spage > epage)
				page = spage, spage = epage, epage = page;

			if (spage < 1)
				spage = 1;
			if (epage > pdf_count_pages(xref))
				epage = pdf_count_pages(xref);

			for (page = spage; page <= epage; page++)
			{
				fz_obj *pageobj = xref->page_objs[page-1];
				fz_obj *pageref = xref->page_refs[page-1];

				fz_dict_puts(pageobj, "Parent", parent);

				/* Store page object in new kids array */
				fz_array_push(kids, pageref);
			}

			spec = fz_strsep(&pagelist, ",");
		}

		fz_optind++;
	}

	fz_drop_obj(parent);

	/* Update page count and kids array */
	countobj = fz_new_int(ctx, fz_array_len(kids));
	fz_dict_puts(pages, "Count", countobj);
	fz_drop_obj(countobj);
	fz_dict_puts(pages, "Kids", kids);
	fz_drop_obj(kids);

	/* Also preserve the (partial) Dests name tree */
	if (olddests)
	{
		int i;
		fz_obj *names = fz_new_dict(ctx, 1);
		fz_obj *dests = fz_new_dict(ctx, 1);
		fz_obj *names_list = fz_new_array(ctx, 32);

		for (i = 0; i < fz_dict_len(olddests); i++)
		{
			fz_obj *key = fz_dict_get_key(olddests, i);
			fz_obj *val = fz_dict_get_val(olddests, i);
			fz_obj *key_str = fz_new_string(ctx, fz_to_name(key), strlen(fz_to_name(key)));
			fz_obj *dest = fz_dict_gets(val, "D");

			dest = fz_array_get(dest ? dest : val, 0);
			if (fz_array_contains(fz_dict_gets(pages, "Kids"), dest))
			{
				fz_array_push(names_list, key_str);
				fz_array_push(names_list, val);
			}
			fz_drop_obj(key_str);
		}

		root = fz_dict_gets(xref->trailer, "Root");
		fz_dict_puts(dests, "Names", names_list);
		fz_dict_puts(names, "Dests", dests);
		fz_dict_puts(root, "Names", names);

		fz_drop_obj(names);
		fz_drop_obj(dests);
		fz_drop_obj(names_list);
		fz_drop_obj(olddests);
	}
}
Пример #7
0
fz_error
pdf_parse_ind_obj(fz_obj **op, pdf_xref *xref,
	fz_stream *file, char *buf, int cap,
	int *onum, int *ogen, int *ostmofs)
{
	fz_error error = fz_okay;
	fz_obj *obj = NULL;
	int num = 0, gen = 0, stm_ofs;
	int tok;
	int len;
	int a, b;

	error = pdf_lex(&tok, file, buf, cap, &len);
	if (error)
		return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);
	if (tok != PDF_TOK_INT)
		return fz_throw("expected object number (%d %d R)", num, gen);
	num = atoi(buf);

	error = pdf_lex(&tok, file, buf, cap, &len);
	if (error)
		return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);
	if (tok != PDF_TOK_INT)
		return fz_throw("expected generation number (%d %d R)", num, gen);
	gen = atoi(buf);

	error = pdf_lex(&tok, file, buf, cap, &len);
	if (error)
		return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);
	if (tok != PDF_TOK_OBJ)
		return fz_throw("expected 'obj' keyword (%d %d R)", num, gen);

	error = pdf_lex(&tok, file, buf, cap, &len);
	if (error)
		return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);

	switch (tok)
	{
	case PDF_TOK_OPEN_ARRAY:
		error = pdf_parse_array(&obj, xref, file, buf, cap);
		if (error)
			return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);
		break;

	case PDF_TOK_OPEN_DICT:
		error = pdf_parse_dict(&obj, xref, file, buf, cap);
		if (error)
			return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);
		break;

	case PDF_TOK_NAME: obj = fz_new_name(buf); break;
	case PDF_TOK_REAL: obj = fz_new_real(fz_atof(buf)); break;
	case PDF_TOK_STRING: obj = fz_new_string(buf, len); break;
	case PDF_TOK_TRUE: obj = fz_new_bool(1); break;
	case PDF_TOK_FALSE: obj = fz_new_bool(0); break;
	case PDF_TOK_NULL: obj = fz_new_null(); break;

	case PDF_TOK_INT:
		a = atoi(buf);
		error = pdf_lex(&tok, file, buf, cap, &len);
		if (error)
			return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);
		if (tok == PDF_TOK_STREAM || tok == PDF_TOK_ENDOBJ)
		{
			obj = fz_new_int(a);
			goto skip;
		}
		if (tok == PDF_TOK_INT)
		{
			b = atoi(buf);
			error = pdf_lex(&tok, file, buf, cap, &len);
			if (error)
				return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);
			if (tok == PDF_TOK_R)
			{
				obj = fz_new_indirect(a, b, xref);
				break;
			}
		}
		return fz_throw("expected 'R' keyword (%d %d R)", num, gen);

	case PDF_TOK_ENDOBJ:
		obj = fz_new_null();
		goto skip;

	default:
		return fz_throw("syntax error in object (%d %d R)", num, gen);
	}

	error = pdf_lex(&tok, file, buf, cap, &len);
	if (error)
	{
		fz_drop_obj(obj);
		return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);
	}

skip:
	if (tok == PDF_TOK_STREAM)
	{
		int c = fz_read_byte(file);
		while (c == ' ')
			c = fz_read_byte(file);
		if (c == '\r')
		{
			c = fz_peek_byte(file);
			if (c != '\n')
				fz_warn("line feed missing after stream begin marker (%d %d R)", num, gen);
			else
				fz_read_byte(file);
		}
		stm_ofs = fz_tell(file);
	}
	else if (tok == PDF_TOK_ENDOBJ)
	{
		stm_ofs = 0;
	}
	else
	{
		fz_warn("expected 'endobj' or 'stream' keyword (%d %d R)", num, gen);
		stm_ofs = 0;
	}

	if (onum) *onum = num;
	if (ogen) *ogen = gen;
	if (ostmofs) *ostmofs = stm_ofs;
	*op = obj;
	return fz_okay;
}
Пример #8
0
fz_error
pdf_parse_dict(fz_obj **op, pdf_xref *xref, fz_stream *file, char *buf, int cap)
{
	fz_error error = fz_okay;
	fz_obj *dict = NULL;
	fz_obj *key = NULL;
	fz_obj *val = NULL;
	int tok;
	int len;
	int a, b;

	dict = fz_new_dict(8);

	while (1)
	{
		error = pdf_lex(&tok, file, buf, cap, &len);
		if (error)
		{
			fz_drop_obj(dict);
			return fz_rethrow(error, "cannot parse dict");
		}

skip:
		if (tok == PDF_TOK_CLOSE_DICT)
		{
			*op = dict;
			return fz_okay;
		}

		/* for BI .. ID .. EI in content streams */
		if (tok == PDF_TOK_KEYWORD && !strcmp(buf, "ID"))
		{
			*op = dict;
			return fz_okay;
		}

		if (tok != PDF_TOK_NAME)
		{
			fz_drop_obj(dict);
			return fz_throw("invalid key in dict");
		}

		key = fz_new_name(buf);

		error = pdf_lex(&tok, file, buf, cap, &len);
		if (error)
		{
			fz_drop_obj(key);
			fz_drop_obj(dict);
			return fz_rethrow(error, "cannot parse dict");
		}

		switch (tok)
		{
		case PDF_TOK_OPEN_ARRAY:
			error = pdf_parse_array(&val, xref, file, buf, cap);
			if (error)
			{
				fz_drop_obj(key);
				fz_drop_obj(dict);
				return fz_rethrow(error, "cannot parse dict");
			}
			break;

		case PDF_TOK_OPEN_DICT:
			error = pdf_parse_dict(&val, xref, file, buf, cap);
			if (error)
			{
				fz_drop_obj(key);
				fz_drop_obj(dict);
				return fz_rethrow(error, "cannot parse dict");
			}
			break;

		case PDF_TOK_NAME: val = fz_new_name(buf); break;
		case PDF_TOK_REAL: val = fz_new_real(fz_atof(buf)); break;
		case PDF_TOK_STRING: val = fz_new_string(buf, len); break;
		case PDF_TOK_TRUE: val = fz_new_bool(1); break;
		case PDF_TOK_FALSE: val = fz_new_bool(0); break;
		case PDF_TOK_NULL: val = fz_new_null(); break;

		case PDF_TOK_INT:
			/* 64-bit to allow for numbers > INT_MAX and overflow */
			a = (int) strtoll(buf, 0, 10);
			error = pdf_lex(&tok, file, buf, cap, &len);
			if (error)
			{
				fz_drop_obj(key);
				fz_drop_obj(dict);
				return fz_rethrow(error, "cannot parse dict");
			}
			if (tok == PDF_TOK_CLOSE_DICT || tok == PDF_TOK_NAME ||
				(tok == PDF_TOK_KEYWORD && !strcmp(buf, "ID")))
			{
				val = fz_new_int(a);
				fz_dict_put(dict, key, val);
				fz_drop_obj(val);
				fz_drop_obj(key);
				goto skip;
			}
			if (tok == PDF_TOK_INT)
			{
				b = atoi(buf);
				error = pdf_lex(&tok, file, buf, cap, &len);
				if (error)
				{
					fz_drop_obj(key);
					fz_drop_obj(dict);
					return fz_rethrow(error, "cannot parse dict");
				}
				if (tok == PDF_TOK_R)
				{
					val = fz_new_indirect(a, b, xref);
					break;
				}
			}
			fz_drop_obj(key);
			fz_drop_obj(dict);
			return fz_throw("invalid indirect reference in dict");

		default:
			fz_drop_obj(key);
			fz_drop_obj(dict);
			return fz_throw("unknown token in dict");
		}

		fz_dict_put(dict, key, val);
		fz_drop_obj(val);
		fz_drop_obj(key);
	}
}
Пример #9
0
fz_error
pdf_parse_array(fz_obj **op, pdf_xref *xref, fz_stream *file, char *buf, int cap)
{
	fz_error error = fz_okay;
	fz_obj *ary = NULL;
	fz_obj *obj = NULL;
	int a = 0, b = 0, n = 0;
	int tok;
	int len;

	ary = fz_new_array(4);

	while (1)
	{
		error = pdf_lex(&tok, file, buf, cap, &len);
		if (error)
		{
			fz_drop_obj(ary);
			return fz_rethrow(error, "cannot parse array");
		}

		if (tok != PDF_TOK_INT && tok != PDF_TOK_R)
		{
			if (n > 0)
			{
				obj = fz_new_int(a);
				fz_array_push(ary, obj);
				fz_drop_obj(obj);
			}
			if (n > 1)
			{
				obj = fz_new_int(b);
				fz_array_push(ary, obj);
				fz_drop_obj(obj);
			}
			n = 0;
		}

		if (tok == PDF_TOK_INT && n == 2)
		{
			obj = fz_new_int(a);
			fz_array_push(ary, obj);
			fz_drop_obj(obj);
			a = b;
			n --;
		}

		switch (tok)
		{
		case PDF_TOK_CLOSE_ARRAY:
			*op = ary;
			return fz_okay;

		case PDF_TOK_INT:
			if (n == 0)
				a = atoi(buf);
			if (n == 1)
				b = atoi(buf);
			n ++;
			break;

		case PDF_TOK_R:
			if (n != 2)
			{
				fz_drop_obj(ary);
				return fz_throw("cannot parse indirect reference in array");
			}
			obj = fz_new_indirect(a, b, xref);
			fz_array_push(ary, obj);
			fz_drop_obj(obj);
			n = 0;
			break;

		case PDF_TOK_OPEN_ARRAY:
			error = pdf_parse_array(&obj, xref, file, buf, cap);
			if (error)
			{
				fz_drop_obj(ary);
				return fz_rethrow(error, "cannot parse array");
			}
			fz_array_push(ary, obj);
			fz_drop_obj(obj);
			break;

		case PDF_TOK_OPEN_DICT:
			error = pdf_parse_dict(&obj, xref, file, buf, cap);
			if (error)
			{
				fz_drop_obj(ary);
				return fz_rethrow(error, "cannot parse array");
			}
			fz_array_push(ary, obj);
			fz_drop_obj(obj);
			break;

		case PDF_TOK_NAME:
			obj = fz_new_name(buf);
			fz_array_push(ary, obj);
			fz_drop_obj(obj);
			break;
		case PDF_TOK_REAL:
			obj = fz_new_real(fz_atof(buf));
			fz_array_push(ary, obj);
			fz_drop_obj(obj);
			break;
		case PDF_TOK_STRING:
			obj = fz_new_string(buf, len);
			fz_array_push(ary, obj);
			fz_drop_obj(obj);
			break;
		case PDF_TOK_TRUE:
			obj = fz_new_bool(1);
			fz_array_push(ary, obj);
			fz_drop_obj(obj);
			break;
		case PDF_TOK_FALSE:
			obj = fz_new_bool(0);
			fz_array_push(ary, obj);
			fz_drop_obj(obj);
			break;
		case PDF_TOK_NULL:
			obj = fz_new_null();
			fz_array_push(ary, obj);
			fz_drop_obj(obj);
			break;

		default:
			fz_drop_obj(ary);
			return fz_throw("cannot parse token in array");
		}
	}
}