Ejemplo n.º 1
0
void
pdf_loadpagetreenode(pdf_xref *xref, fz_obj *node, struct info info)
{
	fz_obj *dict, *kids, *count;
	fz_obj *obj, *tmp;
	int i, n;

	/* prevent infinite recursion */
	if (fz_dictgets(node, ".seen"))
		return;

	kids = fz_dictgets(node, "Kids");
	count = fz_dictgets(node, "Count");

	if (fz_isarray(kids) && fz_isint(count))
	{
		obj = fz_dictgets(node, "Resources");
		if (obj)
			info.resources = obj;
		obj = fz_dictgets(node, "MediaBox");
		if (obj)
			info.mediabox = obj;
		obj = fz_dictgets(node, "CropBox");
		if (obj)
			info.cropbox = obj;
		obj = fz_dictgets(node, "Rotate");
		if (obj)
			info.rotate = obj;

		tmp = fz_newnull();
		fz_dictputs(node, ".seen", tmp);
		fz_dropobj(tmp);

		n = fz_arraylen(kids);
		for (i = 0; i < n; i++)
		{
			obj = fz_arrayget(kids, i);
			pdf_loadpagetreenode(xref, obj, info);
		}

		fz_dictdels(node, ".seen");
	}
	else
	{
		dict = fz_resolveindirect(node);

		if (info.resources && !fz_dictgets(dict, "Resources"))
			fz_dictputs(dict, "Resources", info.resources);
		if (info.mediabox && !fz_dictgets(dict, "MediaBox"))
			fz_dictputs(dict, "MediaBox", info.mediabox);
		if (info.cropbox && !fz_dictgets(dict, "CropBox"))
			fz_dictputs(dict, "CropBox", info.cropbox);
		if (info.rotate && !fz_dictgets(dict, "Rotate"))
			fz_dictputs(dict, "Rotate", info.rotate);

		if (xref->pagelen == xref->pagecap)
		{
			fz_warn("found more pages than expected");
			xref->pagecap ++;
			xref->pagerefs = fz_realloc(xref->pagerefs, sizeof(fz_obj*) * xref->pagecap);
			xref->pageobjs = fz_realloc(xref->pageobjs, sizeof(fz_obj*) * xref->pagecap);
		}

		xref->pagerefs[xref->pagelen] = fz_keepobj(node);
		xref->pageobjs[xref->pagelen] = fz_keepobj(dict);
		xref->pagelen ++;
	}
}
Ejemplo n.º 2
0
void
editflushcatalog(void)
{
	fz_error *error;
	int rootnum, rootgen;
	int listnum, listgen;
	fz_obj *listref;
	fz_obj *obj;
	int i;

	/* Create page tree and add back-links */

	error = pdf_allocobject(editxref, &listnum, &listgen);
	if (error)
		die(error);

	error = fz_packobj(&obj, "<</Type/Pages/Count %i/Kids %o>>",
			fz_arraylen(editpagelist),
			editpagelist);
	if (error)
		die(error);

	pdf_updateobject(editxref, listnum, listgen, obj);

	fz_dropobj(obj);

	error = fz_newindirect(&listref, listnum, listgen);
	if (error)
		die(error);

	for (i = 0; i < fz_arraylen(editpagelist); i++)
	{
		int num = fz_tonum(fz_arrayget(editpagelist, i));
		int gen = fz_togen(fz_arrayget(editpagelist, i));

		error = pdf_loadobject(&obj, editxref, num, gen);
		if (error)
			die(error);

		error = fz_dictputs(obj, "Parent", listref);
		if (error)
			die(error);

		pdf_updateobject(editxref, num, gen, obj);

		fz_dropobj(obj);
	}

	/* Create catalog */

	error = pdf_allocobject(editxref, &rootnum, &rootgen);
	if (error)
		die(error);

	error = fz_packobj(&obj, "<</Type/Catalog/Pages %r>>", listnum, listgen);
	if (error)
		die(error);

	pdf_updateobject(editxref, rootnum, rootgen, obj);

	fz_dropobj(obj);

	/* Create trailer */

	error = fz_packobj(&editxref->trailer, "<</Root %r>>", rootnum, rootgen);
	if (error)
		die(error);
}
Ejemplo n.º 3
0
fz_error
pdf_repairxref(pdf_xref *xref, char *buf, int bufsize)
{
	fz_error error;
	fz_obj *dict, *obj;
	fz_obj *length;

	fz_obj *encrypt = nil;
	fz_obj *id = nil;
	fz_obj *root = nil;
	fz_obj *info = nil;

	struct entry *list = nil;
	int listlen;
	int listcap;
	int maxnum = 0;

	int num = 0;
	int gen = 0;
	int tmpofs, numofs = 0, genofs = 0;
	int stmlen, stmofs = 0;
	int tok;
	int next;
	int i, n;

	pdf_logxref("repairxref %p\n", xref);

	fz_seek(xref->file, 0, 0);

	listlen = 0;
	listcap = 1024;
	list = fz_calloc(listcap, sizeof(struct entry));

	/* look for '%PDF' version marker within first kilobyte of file */
	n = fz_read(xref->file, (unsigned char *)buf, MAX(bufsize, 1024));
	if (n < 0)
	{
		error = fz_rethrow(n, "cannot read from file");
		goto cleanup;
	}

	fz_seek(xref->file, 0, 0);
	for (i = 0; i < n - 4; i++)
	{
		if (memcmp(buf + i, "%PDF", 4) == 0)
		{
			fz_seek(xref->file, i, 0);
			break;
		}
	}

	while (1)
	{
		tmpofs = fz_tell(xref->file);
		if (tmpofs < 0)
		{
			error = fz_throw("cannot tell in file");
			goto cleanup;
		}

		error = pdf_lex(&tok, xref->file, buf, bufsize, &n);
		if (error)
		{
			fz_catch(error, "ignoring the rest of the file");
			break;
		}

		if (tok == PDF_TINT)
		{
			numofs = genofs;
			num = gen;
			genofs = tmpofs;
			gen = atoi(buf);
		}

		if (tok == PDF_TOBJ)
		{
			error = fz_repairobj(xref->file, buf, bufsize, &stmofs, &stmlen, &encrypt, &id);
			if (error)
			{
				error = fz_rethrow(error, "cannot parse object (%d %d R)", num, gen);
				goto cleanup;
			}

			pdf_logxref("found object: (%d %d R)\n", num, gen);

			if (listlen + 1 == listcap)
			{
				listcap = (listcap * 3) / 2;
				list = fz_realloc(list, listcap, sizeof(struct entry));
			}

			list[listlen].num = num;
			list[listlen].gen = gen;
			list[listlen].ofs = numofs;
			list[listlen].stmofs = stmofs;
			list[listlen].stmlen = stmlen;
			listlen ++;

			if (num > maxnum)
				maxnum = num;
		}

		/* trailer dictionary */
		if (tok == PDF_TODICT)
		{
			error = pdf_parsedict(&dict, xref, xref->file, buf, bufsize);
			if (error)
			{
				error = fz_rethrow(error, "cannot parse object");
				goto cleanup;
			}

			obj = fz_dictgets(dict, "Encrypt");
			if (obj)
			{
				if (encrypt)
					fz_dropobj(encrypt);
				encrypt = fz_keepobj(obj);
			}

			obj = fz_dictgets(dict, "ID");
			if (obj)
			{
				if (id)
					fz_dropobj(id);
				id = fz_keepobj(obj);
			}

			obj = fz_dictgets(dict, "Root");
			if (obj)
			{
				if (root)
					fz_dropobj(root);
				root = fz_keepobj(obj);
			}

			obj = fz_dictgets(dict, "Info");
			if (obj)
			{
				if (info)
					fz_dropobj(info);
				info = fz_keepobj(obj);
			}

			fz_dropobj(dict);
		}

		if (tok == PDF_TERROR)
			fz_readbyte(xref->file);

		if (tok == PDF_TEOF)
			break;
	}

	/* make xref reasonable */

	pdf_resizexref(xref, maxnum + 1);

	for (i = 0; i < listlen; i++)
	{
		xref->table[list[i].num].type = 'n';
		xref->table[list[i].num].ofs = list[i].ofs;
		xref->table[list[i].num].gen = list[i].gen;

		xref->table[list[i].num].stmofs = list[i].stmofs;

		/* corrected stream length */
		if (list[i].stmlen >= 0)
		{
			pdf_logxref("correct stream length %d %d = %d\n",
				list[i].num, list[i].gen, list[i].stmlen);

			error = pdf_loadobject(&dict, xref, list[i].num, list[i].gen);
			if (error)
			{
				error = fz_rethrow(error, "cannot load stream object (%d %d R)", list[i].num, list[i].gen);
				goto cleanup;
			}

			length = fz_newint(list[i].stmlen);
			fz_dictputs(dict, "Length", length);
			fz_dropobj(length);

			fz_dropobj(dict);
		}

	}

	xref->table[0].type = 'f';
	xref->table[0].ofs = 0;
	xref->table[0].gen = 65535;
	xref->table[0].stmofs = 0;
	xref->table[0].obj = nil;

	next = 0;
	for (i = xref->len - 1; i >= 0; i--)
	{
		if (xref->table[i].type == 'f')
		{
			xref->table[i].ofs = next;
			if (xref->table[i].gen < 65535)
				xref->table[i].gen ++;
			next = i;
		}
	}

	/* create a repaired trailer, Root will be added later */

	xref->trailer = fz_newdict(5);

	obj = fz_newint(maxnum + 1);
	fz_dictputs(xref->trailer, "Size", obj);
	fz_dropobj(obj);

	if (root)
	{
		fz_dictputs(xref->trailer, "Root", root);
		fz_dropobj(root);
	}
	if (info)
	{
		fz_dictputs(xref->trailer, "Info", info);
		fz_dropobj(info);
	}

	if (encrypt)
	{
		if (fz_isindirect(encrypt))
		{
			/* create new reference with non-nil xref pointer */
			obj = fz_newindirect(fz_tonum(encrypt), fz_togen(encrypt), xref);
			fz_dropobj(encrypt);
			encrypt = obj;
		}
		fz_dictputs(xref->trailer, "Encrypt", encrypt);
		fz_dropobj(encrypt);
	}

	if (id)
	{
		if (fz_isindirect(id))
		{
			/* create new reference with non-nil xref pointer */
			obj = fz_newindirect(fz_tonum(id), fz_togen(id), xref);
			fz_dropobj(id);
			id = obj;
		}
		fz_dictputs(xref->trailer, "ID", id);
		fz_dropobj(id);
	}

	fz_free(list);
	return fz_okay;

cleanup:
	if (encrypt) fz_dropobj(encrypt);
	if (id) fz_dropobj(id);
	if (root) fz_dropobj(root);
	if (info) fz_dropobj(info);
	fz_free(list);
	return error; /* already rethrown */
}