/*
 * Load uncompressed contents of a stream into buf.
 */
fz_error
pdf_load_stream(fz_buffer **bufp, pdf_xref *xref, int num, int gen)
{
	fz_error error;
	fz_stream *stm;
	fz_obj *dict, *obj;
	int i, len;

	error = pdf_open_stream(&stm, xref, num, gen);
	if (error)
		return fz_rethrow(error, "cannot open stream (%d %d R)", num, gen);

	error = pdf_load_object(&dict, xref, num, gen);
	if (error)
		return fz_rethrow(error, "cannot load stream dictionary (%d %d R)", num, gen);

	len = fz_to_int(fz_dict_gets(dict, "Length"));
	obj = fz_dict_gets(dict, "Filter");
	len = pdf_guess_filter_length(len, fz_to_name(obj));
	for (i = 0; i < fz_array_len(obj); i++)
		len = pdf_guess_filter_length(len, fz_to_name(fz_array_get(obj, i)));

	fz_drop_obj(dict);

	error = fz_read_all(bufp, stm, len);
	if (error)
	{
		fz_close(stm);
		return fz_rethrow(error, "cannot read raw stream (%d %d R)", num, gen);
	}

	fz_close(stm);
	return fz_okay;
}
Beispiel #2
0
fz_buffer *
pdf_load_raw_renumbered_stream(fz_context *ctx, pdf_document *doc, int num, int gen, int orig_num, int orig_gen)
{
	fz_stream *stm;
	pdf_obj *dict;
	int len;
	fz_buffer *buf;

	if (num > 0 && num < pdf_xref_len(ctx, doc))
	{
		pdf_xref_entry *entry = pdf_get_xref_entry(ctx, doc, num);
		if (entry->stm_buf)
			return fz_keep_buffer(ctx, entry->stm_buf);
	}

	dict = pdf_load_object(ctx, doc, num, gen);

	len = pdf_to_int(ctx, pdf_dict_get(ctx, dict, PDF_NAME_Length));

	pdf_drop_obj(ctx, dict);

	stm = pdf_open_raw_renumbered_stream(ctx, doc, num, gen, orig_num, orig_gen);

	buf = fz_read_all(ctx, stm, len);

	fz_drop_stream(ctx, stm);
	return buf;
}
/*
 * Load raw (compressed but decrypted) contents of a stream into buf.
 */
fz_error
pdf_load_raw_stream(fz_buffer **bufp, pdf_xref *xref, int num, int gen)
{
	fz_error error;
	fz_stream *stm;
	fz_obj *dict;
	int len;

	error = pdf_load_object(&dict, xref, num, gen);
	if (error)
		return fz_rethrow(error, "cannot load stream dictionary (%d %d R)", num, gen);

	len = fz_to_int(fz_dict_gets(dict, "Length"));

	fz_drop_obj(dict);

	error = pdf_open_raw_stream(&stm, xref, num, gen);
	if (error)
		return fz_rethrow(error, "cannot open raw stream (%d %d R)", num, gen);

	error = fz_read_all(bufp, stm, len);
	if (error)
	{
		fz_close(stm);
		return fz_rethrow(error, "cannot read raw stream (%d %d R)", num, gen);
	}

	fz_close(stm);
	return fz_okay;
}
Beispiel #4
0
fz_buffer *
pdf_load_raw_renumbered_stream(pdf_document *xref, int num, int gen, int orig_num, int orig_gen)
{
	fz_stream *stm;
	pdf_obj *dict;
	int len;
	fz_buffer *buf;

	if (num > 0 && num < xref->len && xref->table[num].stm_buf)
		return fz_keep_buffer(xref->ctx, xref->table[num].stm_buf);

	dict = pdf_load_object(xref, num, gen);
	/* RJW: "cannot load stream dictionary (%d %d R)", num, gen */

	len = pdf_to_int(pdf_dict_gets(dict, "Length"));

	pdf_drop_obj(dict);

	stm = pdf_open_raw_renumbered_stream(xref, num, gen, orig_num, orig_gen);
	/* RJW: "cannot open raw stream (%d %d R)", num, gen */

	buf = fz_read_all(stm, len);
	/* RJW: "cannot read raw stream (%d %d R)", num, gen */

	fz_close(stm);
	return buf;
}
Beispiel #5
0
/* Image specific methods */
static void
res_image_init(fz_context *ctx, pdf_document *doc, pdf_res_table *table)
{
	int len, k;
	pdf_obj *obj;
	pdf_obj *type;
	pdf_obj *res = NULL;
	fz_image *image = NULL;
	unsigned char digest[16];

	fz_var(obj);
	fz_var(image);
	fz_var(res);

	fz_try(ctx)
	{
		table->hash = fz_new_hash_table(ctx, 4096, 16, -1);
		len = pdf_count_objects(ctx, doc);
		for (k = 1; k < len; k++)
		{
			obj = pdf_load_object(ctx, doc, k, 0);
			type = pdf_dict_get(ctx, obj, PDF_NAME_Subtype);
			if (pdf_name_eq(ctx, type, PDF_NAME_Image))
			{
				image = pdf_load_image(ctx, doc, obj);
				res_image_get_md5(ctx, image, digest);
				fz_drop_image(ctx, image);
				image = NULL;

				/* Don't allow overwrites. */
				if (fz_hash_find(ctx, table->hash, digest) == NULL)
					fz_hash_insert(ctx, table->hash, digest, obj);
			}
			else
			{
				pdf_drop_obj(ctx, obj);
			}
			obj = NULL;
		}
	}
	fz_always(ctx)
	{
		fz_drop_image(ctx, image);
		pdf_drop_obj(ctx, obj);
	}
	fz_catch(ctx)
	{
		res_table_free(ctx, table);
		fz_rethrow(ctx);
	}
}
Beispiel #6
0
static fz_buffer *
pdf_load_image_stream(fz_context *ctx, pdf_document *doc, int num, int gen, int orig_num, int orig_gen, fz_compression_params *params, int *truncated)
{
	fz_stream *stm = NULL;
	pdf_obj *dict, *obj;
	int i, len, n;
	fz_buffer *buf;

	fz_var(buf);

	if (num > 0 && num < pdf_xref_len(ctx, doc))
	{
		pdf_xref_entry *entry = pdf_get_xref_entry(ctx, doc, num);
		/* Return ref to existing buffer, but only if uncompressed,
		 * or shortstoppable */
		if (can_reuse_buffer(ctx, entry, params))
			return fz_keep_buffer(ctx, entry->stm_buf);
	}

	dict = pdf_load_object(ctx, doc, num, gen);

	len = pdf_to_int(ctx, pdf_dict_get(ctx, dict, PDF_NAME_Length));
	obj = pdf_dict_get(ctx, dict, PDF_NAME_Filter);
	len = pdf_guess_filter_length(len, pdf_to_name(ctx, obj));
	n = pdf_array_len(ctx, obj);
	for (i = 0; i < n; i++)
		len = pdf_guess_filter_length(len, pdf_to_name(ctx, pdf_array_get(ctx, obj, i)));

	pdf_drop_obj(ctx, dict);

	stm = pdf_open_image_stream(ctx, doc, num, gen, orig_num, orig_gen, params);

	fz_try(ctx)
	{
		if (truncated)
			buf = fz_read_best(ctx, stm, len, truncated);
		else
			buf = fz_read_all(ctx, stm, len);
	}
	fz_always(ctx)
	{
		fz_drop_stream(ctx, stm);
	}
	fz_catch(ctx)
	{
		fz_rethrow_message(ctx, "cannot read raw stream (%d %d R)", num, gen);
	}

	return buf;
}
Beispiel #7
0
static void preloadobjstms(void)
{
	fz_obj *obj;
	int num;

	for (num = 0; num < xref->len; num++)
	{
		if (xref->table[num].type == 'o')
		{
			obj = pdf_load_object(xref, num, 0);
			fz_drop_obj(obj);
		}
	}
}
Beispiel #8
0
static void preloadobjstms(void)
{
	fz_error error;
	fz_obj *obj;
	int num;

	for (num = 0; num < xref->len; num++)
	{
		if (xref->table[num].type == 'o')
		{
			error = pdf_load_object(&obj, xref, num, 0);
			if (error)
				die(error);
			fz_drop_obj(ctx, obj);
		}
	}
}
Beispiel #9
0
static void writeobject(int num, int gen)
{
	fz_error error;
	fz_obj *obj;
	fz_obj *type;

	error = pdf_load_object(&obj, xref, num, gen);
	if (error)
		die(error);

	/* skip ObjStm and XRef objects */
	if (fz_is_dict(ctx, obj))
	{
		type = fz_dict_gets(ctx, obj, "Type");
		if (fz_is_name(ctx, type) && !strcmp(fz_to_name(ctx, type), "ObjStm"))
		{
			uselist[num] = 0;
			fz_drop_obj(ctx, obj);
			return;
		}
		if (fz_is_name(ctx, type) && !strcmp(fz_to_name(ctx, type), "XRef"))
		{
			uselist[num] = 0;
			fz_drop_obj(ctx, obj);
			return;
		}
	}

	if (!pdf_is_stream(xref, num, gen))
	{
		fprintf(out, "%d %d obj\n", num, gen);
		fz_fprint_obj(ctx, out, obj, !doexpand);
		fprintf(out, "endobj\n\n");
	}
	else
	{
		if (doexpand && !pdf_is_jpx_image(ctx, obj))
			expandstream(obj, num, gen);
		else
			copystream(obj, num, gen);
	}

	fz_drop_obj(ctx, obj);
}
Beispiel #10
0
fz_buffer *
pdf_load_image_stream(pdf_document *xref, int num, int gen, int orig_num, int orig_gen, pdf_image_params *params)
{
	fz_context *ctx = xref->ctx;
	fz_stream *stm = NULL;
	pdf_obj *dict, *obj;
	int i, len, n;
	fz_buffer *buf;

	fz_var(buf);

	if (num > 0 && num < xref->len && xref->table[num].stm_buf)
		return fz_keep_buffer(xref->ctx, xref->table[num].stm_buf);

	dict = pdf_load_object(xref, num, gen);
	/* RJW: "cannot load stream dictionary (%d %d R)", num, gen */

	len = pdf_to_int(pdf_dict_gets(dict, "Length"));
	obj = pdf_dict_gets(dict, "Filter");
	len = pdf_guess_filter_length(len, pdf_to_name(obj));
	n = pdf_array_len(obj);
	for (i = 0; i < n; i++)
		len = pdf_guess_filter_length(len, pdf_to_name(pdf_array_get(obj, i)));

	pdf_drop_obj(dict);

	stm = pdf_open_image_stream(xref, num, gen, orig_num, orig_gen, params);
	/* RJW: "cannot open stream (%d %d R)", num, gen */

	fz_try(ctx)
	{
		buf = fz_read_all(stm, len);
	}
	fz_always(ctx)
	{
		fz_close(stm);
	}
	fz_catch(ctx)
	{
		fz_throw(ctx, "cannot read raw stream (%d %d R)", num, gen);
	}

	return buf;
}
Beispiel #11
0
static void showobject(int num)
{
	fz_error error;
	fz_obj *obj;

	if (!xref)
		die(fz_error_make(ctx, "no file specified"));

	error = pdf_load_object(&obj, xref, num, 0);
	if (error)
		die(error);

	if (isimage(obj))
		saveimage(num);
	else if (isfontdesc(obj))
		savefont(obj, num);

	fz_drop_obj(ctx, obj);
}
Beispiel #12
0
static void showobject(int num)
{
	pdf_obj *obj;

	if (!doc)
		fz_throw(ctx, FZ_ERROR_GENERIC, "no file specified");

	fz_try(ctx)
	{
		obj = pdf_load_object(ctx, doc, num);

		if (isimage(obj))
			saveimage(num);
		else if (isfontdesc(obj))
			savefont(obj, num);

		pdf_drop_obj(ctx, obj);
	}
	fz_catch(ctx)
	{
		fz_warn(ctx, "ignoring object %d", num);
	}
}
Beispiel #13
0
/*
 * Load raw (compressed but decrypted) contents of a stream into buf.
 */
fz_buffer *
pdf_load_raw_stream(pdf_document *xref, int num, int gen)
{
    fz_stream *stm;
    pdf_obj *dict;
    int len;
    fz_buffer *buf;

    dict = pdf_load_object(xref, num, gen);
    /* RJW: "cannot load stream dictionary (%d %d R)", num, gen */

    len = pdf_to_int(pdf_dict_gets(dict, "Length"));

    pdf_drop_obj(dict);

    stm = pdf_open_raw_stream(xref, num, gen);
    /* RJW: "cannot open raw stream (%d %d R)", num, gen */

    buf = fz_read_all(stm, len);
    /* RJW: "cannot read raw stream (%d %d R)", num, gen */

    fz_close(stm);
    return buf;
}
Beispiel #14
0
static void writeobject(int num, int gen)
{
	fz_obj *obj;
	fz_obj *type;

	obj = pdf_load_object(xref, num, gen);

	/* skip ObjStm and XRef objects */
	if (fz_is_dict(obj))
	{
		type = fz_dict_gets(obj, "Type");
		if (fz_is_name(type) && !strcmp(fz_to_name(type), "ObjStm"))
		{
			uselist[num] = 0;
			fz_drop_obj(obj);
			return;
		}
		if (fz_is_name(type) && !strcmp(fz_to_name(type), "XRef"))
		{
			uselist[num] = 0;
			fz_drop_obj(obj);
			return;
		}
	}

	if (!pdf_is_stream(xref, num, gen))
	{
		fprintf(out, "%d %d obj\n", num, gen);
		fz_fprint_obj(out, obj, doexpand == 0);
		fprintf(out, "endobj\n\n");
	}
	else
	{
		int dontexpand = 0;
		if (doexpand != 0 && doexpand != expand_all)
		{
			fz_obj *o;

			if ((o = fz_dict_gets(obj, "Type"), !strcmp(fz_to_name(o), "XObject")) &&
				(o = fz_dict_gets(obj, "Subtype"), !strcmp(fz_to_name(o), "Image")))
				dontexpand = !(doexpand & expand_images);
			if (o = fz_dict_gets(obj, "Type"), !strcmp(fz_to_name(o), "Font"))
				dontexpand = !(doexpand & expand_fonts);
			if (o = fz_dict_gets(obj, "Type"), !strcmp(fz_to_name(o), "FontDescriptor"))
				dontexpand = !(doexpand & expand_fonts);
			if ((o = fz_dict_gets(obj, "Length1")) != NULL)
				dontexpand = !(doexpand & expand_fonts);
			if ((o = fz_dict_gets(obj, "Length2")) != NULL)
				dontexpand = !(doexpand & expand_fonts);
			if ((o = fz_dict_gets(obj, "Length3")) != NULL)
				dontexpand = !(doexpand & expand_fonts);
			if (o = fz_dict_gets(obj, "Subtype"), !strcmp(fz_to_name(o), "Type1C"))
				dontexpand = !(doexpand & expand_fonts);
			if (o = fz_dict_gets(obj, "Subtype"), !strcmp(fz_to_name(o), "CIDFontType0C"))
				dontexpand = !(doexpand & expand_fonts);
		}
		if (doexpand && !dontexpand && !pdf_is_jpx_image(ctx, obj))
			expandstream(obj, num, gen);
		else
			copystream(obj, num, gen);
	}

	fz_drop_obj(obj);
}
Beispiel #15
0
static fz_error
pdf_load_obj_stm(pdf_xref *xref, int num, int gen, char *buf, int cap)
{
	fz_error error;
	fz_stream *stm;
	fz_obj *objstm;
	int *numbuf;
	int *ofsbuf;

	fz_obj *obj;
	int first;
	int count;
	int i, n;
	int tok;

	error = pdf_load_object(&objstm, xref, num, gen);
	if (error)
		return fz_rethrow(error, "cannot load object stream object (%d %d R)", num, gen);

	count = fz_to_int(fz_dict_gets(objstm, "N"));
	first = fz_to_int(fz_dict_gets(objstm, "First"));

	numbuf = fz_calloc(count, sizeof(int));
	ofsbuf = fz_calloc(count, sizeof(int));

	error = pdf_open_stream(&stm, xref, num, gen);
	if (error)
	{
		error = fz_rethrow(error, "cannot open object stream (%d %d R)", num, gen);
		goto cleanupbuf;
	}

	for (i = 0; i < count; i++)
	{
		error = pdf_lex(&tok, stm, buf, cap, &n);
		if (error || tok != PDF_TOK_INT)
		{
			error = fz_rethrow(error, "corrupt object stream (%d %d R)", num, gen);
			goto cleanupstm;
		}
		numbuf[i] = atoi(buf);

		error = pdf_lex(&tok, stm, buf, cap, &n);
		if (error || tok != PDF_TOK_INT)
		{
			error = fz_rethrow(error, "corrupt object stream (%d %d R)", num, gen);
			goto cleanupstm;
		}
		ofsbuf[i] = atoi(buf);
	}

	fz_seek(stm, first, 0);

	for (i = 0; i < count; i++)
	{
		fz_seek(stm, first + ofsbuf[i], 0);

		error = pdf_parse_stm_obj(&obj, xref, stm, buf, cap);
		if (error)
		{
			error = fz_rethrow(error, "cannot parse object %d in stream (%d %d R)", i, num, gen);
			goto cleanupstm;
		}

		if (numbuf[i] < 1 || numbuf[i] >= xref->len)
		{
			fz_drop_obj(obj);
			error = fz_throw("object id (%d 0 R) out of range (0..%d)", numbuf[i], xref->len - 1);
			goto cleanupstm;
		}

		if (xref->table[numbuf[i]].type == 'o' && xref->table[numbuf[i]].ofs == num)
		{
			if (xref->table[numbuf[i]].obj)
				fz_drop_obj(xref->table[numbuf[i]].obj);
			xref->table[numbuf[i]].obj = obj;
		}
		else
		{
			fz_drop_obj(obj);
		}
	}

	fz_close(stm);
	fz_free(ofsbuf);
	fz_free(numbuf);
	fz_drop_obj(objstm);
	return fz_okay;

cleanupstm:
	fz_close(stm);
cleanupbuf:
	fz_free(ofsbuf);
	fz_free(numbuf);
	fz_drop_obj(objstm);
	return error; /* already rethrown */
}
Beispiel #16
0
fz_error
pdf_open_xref_with_stream(pdf_xref **xrefp, fz_stream *file, char *password)
{
	pdf_xref *xref;
	fz_error error;
	fz_obj *encrypt, *id;
	fz_obj *dict, *obj;
	int i, repaired = 0;

	/* install pdf specific callback */
	fz_resolve_indirect = pdf_resolve_indirect;

	xref = fz_malloc(sizeof(pdf_xref));

	memset(xref, 0, sizeof(pdf_xref));

	xref->file = fz_keep_stream(file);

	error = pdf_load_xref(xref, xref->scratch, sizeof xref->scratch);
	if (error)
	{
		fz_catch(error, "trying to repair");
		if (xref->table)
		{
			fz_free(xref->table);
			xref->table = NULL;
			xref->len = 0;
		}
		if (xref->trailer)
		{
			fz_drop_obj(xref->trailer);
			xref->trailer = NULL;
		}
		error = pdf_repair_xref(xref, xref->scratch, sizeof xref->scratch);
		if (error)
		{
			pdf_free_xref(xref);
			return fz_rethrow(error, "cannot repair document");
		}
		repaired = 1;
	}

	encrypt = fz_dict_gets(xref->trailer, "Encrypt");
	id = fz_dict_gets(xref->trailer, "ID");
	if (fz_is_dict(encrypt))
	{
		error = pdf_new_crypt(&xref->crypt, encrypt, id);
		if (error)
		{
			pdf_free_xref(xref);
			return fz_rethrow(error, "cannot decrypt document");
		}
	}

	if (pdf_needs_password(xref))
	{
		/* Only care if we have a password */
		if (password)
		{
			int okay = pdf_authenticate_password(xref, password);
			if (!okay)
			{
				pdf_free_xref(xref);
				return fz_throw("invalid password");
			}
		}
	}

	if (repaired)
	{
		int hasroot, hasinfo;

		error = pdf_repair_obj_stms(xref);
		if (error)
		{
			pdf_free_xref(xref);
			return fz_rethrow(error, "cannot repair document");
		}

		hasroot = fz_dict_gets(xref->trailer, "Root") != NULL;
		hasinfo = fz_dict_gets(xref->trailer, "Info") != NULL;

		for (i = 1; i < xref->len; i++)
		{
			if (xref->table[i].type == 0 || xref->table[i].type == 'f')
				continue;

			error = pdf_load_object(&dict, xref, i, 0);
			if (error)
			{
				fz_catch(error, "ignoring broken object (%d 0 R)", i);
				continue;
			}

			if (!hasroot)
			{
				obj = fz_dict_gets(dict, "Type");
				if (fz_is_name(obj) && !strcmp(fz_to_name(obj), "Catalog"))
				{
					obj = fz_new_indirect(i, 0, xref);
					fz_dict_puts(xref->trailer, "Root", obj);
					fz_drop_obj(obj);
				}
			}

			if (!hasinfo)
			{
				if (fz_dict_gets(dict, "Creator") || fz_dict_gets(dict, "Producer"))
				{
					obj = fz_new_indirect(i, 0, xref);
					fz_dict_puts(xref->trailer, "Info", obj);
					fz_drop_obj(obj);
				}
			}

			fz_drop_obj(dict);
		}
	}

	error = pdf_read_ocg(xref);
	if (error)
	{
		pdf_free_xref(xref);
		return fz_rethrow(error, "Broken Optional Content");
	}

	*xrefp = xref;
	return fz_okay;
}