Ejemplo n.º 1
0
/* Used while reading the individual xref sections from a file */
pdf_xref_entry *pdf_get_populating_xref_entry(pdf_document *doc, int num)
{
	/* Return an entry within the xref currently being populated */
	pdf_xref *xref;
	int i;

	if (doc->num_xref_sections == 0)
	{
		doc->xref_sections = fz_calloc(doc->ctx, 1, sizeof(pdf_xref));
		doc->num_xref_sections = 1;
	}

	/* Ensure all xref sections map this entry */
	for (i = doc->num_xref_sections - 1; i >= 0; i--)
	{
		xref = &doc->xref_sections[i];

		if (num >= xref->len)
			pdf_resize_xref(doc->ctx, xref, num+1);
		else
			break; /* Remaining sections already of sufficient size */
	}

	/* Loop leaves xref pointing at the populating section */
	return &doc->xref_sections[doc->num_xref_sections-1].table[num];
}
Ejemplo n.º 2
0
static fz_error
pdf_load_xref(pdf_xref *xref, char *buf, int bufsize)
{
	fz_error error;
	fz_obj *size;
	int i;

	error = pdf_load_version(xref);
	if (error)
		return fz_rethrow(error, "cannot read version marker");

	error = pdf_read_start_xref(xref);
	if (error)
		return fz_rethrow(error, "cannot read startxref");

	error = pdf_read_trailer(xref, buf, bufsize);
	if (error)
		return fz_rethrow(error, "cannot read trailer");

	size = fz_dict_gets(xref->trailer, "Size");
	if (!size)
		return fz_throw("trailer missing Size entry");

	pdf_resize_xref(xref, fz_to_int(size));

	error = pdf_read_xref_sections(xref, xref->startxref, buf, bufsize);
	if (error)
		return fz_rethrow(error, "cannot read xref");

	/* broken pdfs where first object is not free */
	if (xref->table[0].type != 'f')
		return fz_throw("first object in xref is not free");

	/* broken pdfs where object offsets are out of range */
	for (i = 0; i < xref->len; i++)
	{
		if (xref->table[i].type == 'n')
			if (xref->table[i].ofs <= 0 || xref->table[i].ofs >= xref->file_size)
				return fz_throw("object offset out of range: %d (%d 0 R)", xref->table[i].ofs, i);
		if (xref->table[i].type == 'o')
			if (xref->table[i].ofs <= 0 || xref->table[i].ofs >= xref->len || xref->table[xref->table[i].ofs].type != 'n')
				return fz_throw("invalid reference to an objstm that does not exist: %d (%d 0 R)", xref->table[i].ofs, i);
	}

	return fz_okay;
}
Ejemplo n.º 3
0
static fz_error
pdf_read_new_xref(fz_obj **trailerp, pdf_xref *xref, char *buf, int cap)
{
	fz_error error;
	fz_stream *stm;
	fz_obj *trailer;
	fz_obj *index;
	fz_obj *obj;
	int num, gen, stm_ofs;
	int size, w0, w1, w2;
	int t;

	error = pdf_parse_ind_obj(&trailer, xref, xref->file, buf, cap, &num, &gen, &stm_ofs);
	if (error)
		return fz_rethrow(error, "cannot parse compressed xref stream object");

	obj = fz_dict_gets(trailer, "Size");
	if (!obj)
	{
		fz_drop_obj(trailer);
		return fz_throw("xref stream missing Size entry (%d %d R)", num, gen);
	}
	size = fz_to_int(obj);

	if (size > xref->len)
	{
		pdf_resize_xref(xref, size);
	}

	if (num < 0 || num >= xref->len)
	{
		fz_drop_obj(trailer);
		return fz_throw("object id (%d %d R) out of range (0..%d)", num, gen, xref->len - 1);
	}

	obj = fz_dict_gets(trailer, "W");
	if (!obj) {
		fz_drop_obj(trailer);
		return fz_throw("xref stream missing W entry (%d %d R)", num, gen);
	}
	w0 = fz_to_int(fz_array_get(obj, 0));
	w1 = fz_to_int(fz_array_get(obj, 1));
	w2 = fz_to_int(fz_array_get(obj, 2));

	index = fz_dict_gets(trailer, "Index");

	error = pdf_open_stream_at(&stm, xref, num, gen, trailer, stm_ofs);
	if (error)
	{
		fz_drop_obj(trailer);
		return fz_rethrow(error, "cannot open compressed xref stream (%d %d R)", num, gen);
	}

	if (!index)
	{
		error = pdf_read_new_xref_section(xref, stm, 0, size, w0, w1, w2);
		if (error)
		{
			fz_close(stm);
			fz_drop_obj(trailer);
			return fz_rethrow(error, "cannot read xref stream (%d %d R)", num, gen);
		}
	}
	else
	{
		for (t = 0; t < fz_array_len(index); t += 2)
		{
			int i0 = fz_to_int(fz_array_get(index, t + 0));
			int i1 = fz_to_int(fz_array_get(index, t + 1));
			error = pdf_read_new_xref_section(xref, stm, i0, i1, w0, w1, w2);
			if (error)
			{
				fz_close(stm);
				fz_drop_obj(trailer);
				return fz_rethrow(error, "cannot read xref stream section (%d %d R)", num, gen);
			}
		}
	}

	fz_close(stm);

	*trailerp = trailer;

	return fz_okay;
}
Ejemplo n.º 4
0
static fz_error
pdf_read_old_xref(fz_obj **trailerp, pdf_xref *xref, char *buf, int cap)
{
	fz_error error;
	int ofs, len;
	char *s;
	int n;
	int tok;
	int i;
	int c;

	fz_read_line(xref->file, buf, cap);
	if (strncmp(buf, "xref", 4) != 0)
		return fz_throw("cannot find xref marker");

	while (1)
	{
		c = fz_peek_byte(xref->file);
		if (!(c >= '0' && c <= '9'))
			break;

		fz_read_line(xref->file, buf, cap);
		s = buf;
		ofs = atoi(fz_strsep(&s, " "));
		len = atoi(fz_strsep(&s, " "));

		/* broken pdfs where the section is not on a separate line */
		if (s && *s != '\0')
		{
			fz_warn("broken xref section. proceeding anyway.");
			fz_seek(xref->file, -(2 + (int)strlen(s)), 1);
		}

		/* broken pdfs where size in trailer undershoots entries in xref sections */
		if (ofs + len > xref->len)
		{
			fz_warn("broken xref section, proceeding anyway.");
			pdf_resize_xref(xref, ofs + len);
		}

		for (i = ofs; i < ofs + len; i++)
		{
			n = fz_read(xref->file, (unsigned char *) buf, 20);
			if (n < 0)
				return fz_rethrow(n, "cannot read xref table");
			if (!xref->table[i].type)
			{
				s = buf;

				/* broken pdfs where line start with white space */
				while (*s != '\0' && iswhite(*s))
					s++;

				xref->table[i].ofs = atoi(s);
				xref->table[i].gen = atoi(s + 11);
				xref->table[i].type = s[17];
				if (s[17] != 'f' && s[17] != 'n' && s[17] != 'o')
					return fz_throw("unexpected xref type: %#x (%d %d R)", s[17], i, xref->table[i].gen);
			}
		}
	}

	error = pdf_lex(&tok, xref->file, buf, cap, &n);
	if (error)
		return fz_rethrow(error, "cannot parse trailer");
	if (tok != PDF_TOK_TRAILER)
		return fz_throw("expected trailer marker");

	error = pdf_lex(&tok, xref->file, buf, cap, &n);
	if (error)
		return fz_rethrow(error, "cannot parse trailer");
	if (tok != PDF_TOK_OPEN_DICT)
		return fz_throw("expected trailer dictionary");

	error = pdf_parse_dict(trailerp, xref, xref->file, buf, cap);
	if (error)
		return fz_rethrow(error, "cannot parse trailer");
	return fz_okay;
}