Ejemplo n.º 1
0
static void writexref(void)
{
	fz_obj *trailer;
	fz_obj *obj;
	int startxref;
	int num;

	startxref = ftell(out);

	fprintf(out, "xref\n0 %d\n", xref->len);
	for (num = 0; num < xref->len; num++)
	{
		if (uselist[num])
			fprintf(out, "%010d %05d n \n", ofslist[num], genlist[num]);
		else
			fprintf(out, "%010d %05d f \n", ofslist[num], genlist[num]);
	}
	fprintf(out, "\n");

	trailer = fz_newdict(5);

	obj = fz_newint(xref->len);
	fz_dictputs(trailer, "Size", obj);
	fz_dropobj(obj);

	obj = fz_dictgets(xref->trailer, "Info");
	if (obj)
		fz_dictputs(trailer, "Info", obj);

	obj = fz_dictgets(xref->trailer, "Root");
	if (obj)
		fz_dictputs(trailer, "Root", obj);

	obj = fz_dictgets(xref->trailer, "ID");
	if (obj)
		fz_dictputs(trailer, "ID", obj);

	fprintf(out, "trailer\n");
	fz_fprintobj(out, trailer, !doexpand);
	fprintf(out, "\n");

	fz_dropobj(trailer);

	fprintf(out, "startxref\n%d\n%%%%EOF\n", startxref);
}
Ejemplo n.º 2
0
fz_error *
pdf_loadnametree(fz_obj **dictp, pdf_xref *xref, fz_obj *root)
{
	fz_error *error;
	fz_obj *tree;

	error = fz_newdict(&tree, 128);
	if (error)
		return error;

	error = loadnametreenode(tree, xref, root);
	if (error)
	{
		fz_dropobj(tree);
		return error;
	}

	fz_sortdict(tree);

	*dictp = tree;
	return nil;
}
Ejemplo n.º 3
0
static void retainpages(int argc, char **argv)
{
	fz_error error;
	fz_obj *oldroot, *root, *pages, *kids, *countobj, *parent;

	/* Load the old page tree */
	error = pdf_loadpagetree(xref);
	if (error)
		die(fz_rethrow(error, "cannot load page tree"));

	/* Keep only pages/type entry to avoid references to unretained pages */
	oldroot = fz_dictgets(xref->trailer, "Root");
	pages = fz_dictgets(oldroot, "Pages");

	root = fz_newdict(2);
	fz_dictputs(root, "Type", fz_dictgets(oldroot, "Type"));
	fz_dictputs(root, "Pages", fz_dictgets(oldroot, "Pages"));

	pdf_updateobject(xref, fz_tonum(oldroot), fz_togen(oldroot), root);

	fz_dropobj(root);

	/* Create a new kids array with only the pages we want to keep */
	parent = fz_newindirect(fz_tonum(pages), fz_togen(pages), xref);
	kids = fz_newarray(1);

	/* Retain pages specified */
	while (argc - fz_optind)
	{
		int page, spage, epage;
		char *spec, *dash;
		char *pagelist = argv[fz_optind];

		spec = fz_strsep(&pagelist, ",");
		while (spec)
		{
			dash = strchr(spec, '-');

			if (dash == spec)
				spage = epage = pdf_getpagecount(xref);
			else
				spage = epage = atoi(spec);

			if (dash)
			{
				if (strlen(dash) > 1)
					epage = atoi(dash + 1);
				else
					epage = pdf_getpagecount(xref);
			}

			if (spage > epage)
				page = spage, spage = epage, epage = page;

			if (spage < 1)
				spage = 1;
			if (epage > pdf_getpagecount(xref))
				epage = pdf_getpagecount(xref);

			for (page = spage; page <= epage; page++)
			{
				fz_obj *pageobj = pdf_getpageobject(xref, page);
				fz_obj *pageref = pdf_getpageref(xref, page);

				fz_dictputs(pageobj, "Parent", parent);

				/* Store page object in new kids array */
				fz_arraypush(kids, pageref);
			}

			spec = fz_strsep(&pagelist, ",");
		}

		fz_optind++;
	}

	fz_dropobj(parent);

	/* Update page count and kids array */
	countobj = fz_newint(fz_arraylen(kids));
	fz_dictputs(pages, "Count", countobj);
	fz_dropobj(countobj);
	fz_dictputs(pages, "Kids", kids);
	fz_dropobj(kids);
}
fz_error
pdf_parsedict(fz_obj **op, pdf_xref *xref, fz_stream *file, char *buf, int cap)
{
	fz_error error = fz_okay;
	fz_obj *dict = nil;
	fz_obj *key = nil;
	fz_obj *val = nil;
	pdf_token_e tok;
	int len;
	int a, b;

	dict = fz_newdict(8);

	while (1)
	{
		error = pdf_lex(&tok, file, buf, cap, &len);
		if (error)
		{
			fz_dropobj(dict);
			return fz_rethrow(error, "cannot parse dict");
		}

skip:
		if (tok == PDF_TCDICT)
		{
			*op = dict;
			return fz_okay;
		}

		/* for BI .. ID .. EI in content streams */
		if (tok == PDF_TKEYWORD && !strcmp(buf, "ID"))
		{
			*op = dict;
			return fz_okay;
		}

		if (tok != PDF_TNAME)
		{
			fz_dropobj(dict);
			return fz_throw("invalid key in dict");;
		}

		key = fz_newname(buf);

		error = pdf_lex(&tok, file, buf, cap, &len);
		if (error)
		{
			fz_dropobj(dict);
			return fz_rethrow(error, "cannot parse dict");
		}

		switch (tok)
		{
		case PDF_TOARRAY:
			error = pdf_parsearray(&val, xref, file, buf, cap);
			if (error)
			{
				fz_dropobj(key);
				fz_dropobj(dict);
				return fz_rethrow(error, "cannot parse dict");
			}
			break;

		case PDF_TODICT:
			error = pdf_parsedict(&val, xref, file, buf, cap);
			if (error)
			{
				fz_dropobj(key);
				fz_dropobj(dict);
				return fz_rethrow(error, "cannot parse dict");
			}
			break;

		case PDF_TNAME: val = fz_newname(buf); break;
		case PDF_TREAL: val = fz_newreal(atof(buf)); break;
		case PDF_TSTRING: val = fz_newstring(buf, len); break;
		case PDF_TTRUE: val = fz_newbool(1); break;
		case PDF_TFALSE: val = fz_newbool(0); break;
		case PDF_TNULL: val = fz_newnull(); break;

		case PDF_TINT:
			/* 64-bit to allow for numbers > INT_MAX and overflow */
			a = (int) strtoll(buf, 0, 10);
			error = pdf_lex(&tok, file, buf, cap, &len);
			if (error)
			{
				fz_dropobj(key);
				fz_dropobj(dict);
				return fz_rethrow(error, "cannot parse dict");
			}
			if (tok == PDF_TCDICT || tok == PDF_TNAME ||
				(tok == PDF_TKEYWORD && !strcmp(buf, "ID")))
			{
				val = fz_newint(a);
				fz_dictput(dict, key, val);
				fz_dropobj(val);
				fz_dropobj(key);
				goto skip;
			}
			if (tok == PDF_TINT)
			{
				b = atoi(buf);
				error = pdf_lex(&tok, file, buf, cap, &len);
				if (error)
				{
					fz_dropobj(key);
					fz_dropobj(dict);
					return fz_rethrow(error, "cannot parse dict");
				}
				if (tok == PDF_TR)
				{
					val = fz_newindirect(a, b, xref);
					break;
				}
			}
			fz_dropobj(key);
			fz_dropobj(dict);
			return fz_throw("invalid indirect reference in dict");

		default:
			return fz_throw("unknown token in dict");
		}

		fz_dictput(dict, key, val);
		fz_dropobj(val);
		fz_dropobj(key);
	}
}
static fz_error parsedict(fz_obj **obj, pdf_xref *xref, char **sp, struct vap *v)
{
	fz_error error = fz_okay;
	fz_obj *dict = nil;
	fz_obj *key = nil;
	fz_obj *val = nil;
	char *s = *sp;

	error = fz_newdict(&dict, 8);
	if (error)
		return fz_rethrow(error, "cannot create dict");

	s += 2;	/* skip "<<" */

	while (*s)
	{
		skipwhite(&s);

		/* end-of-dict marker >> */
		if (*s == '>')
		{
			s ++;
			if (*s == '>')
			{
				s ++;
				break;
			}
			error = fz_throw("malformed >> marker");
			goto cleanup;
		}

		/* non-name as key, bail */
		if (*s != '/')
		{
			error = fz_throw("key is not a name");
			goto cleanup;
		}

		error = parsename(&key, &s);
		if (error)
		{
			error = fz_rethrow(error, "cannot parse key");
			goto cleanup;
		}

		skipwhite(&s);

		error = parseobj(&val, xref, &s, v);
		if (error)
		{
			error = fz_rethrow(error, "cannot parse value");
			goto cleanup;
		}

		error = fz_dictput(dict, key, val);
		if (error)
		{
			error = fz_rethrow(error, "cannot insert dict entry");
			goto cleanup;
		}

		fz_dropobj(val); val = nil;
		fz_dropobj(key); key = nil;
	}

	*obj = dict;
	*sp = s;
	return fz_okay;

cleanup:
	if (val) fz_dropobj(val);
	if (key) fz_dropobj(key);
	if (dict) fz_dropobj(dict);
	*obj = nil;
	*sp = s;
	return error; /* already rethrown */
}
Ejemplo n.º 6
0
fz_error *
pdf_parsedict(fz_obj **op, fz_stream *file, char *buf, int cap)
{
	fz_error *error = nil;
	fz_obj *dict = nil;
	fz_obj *key = nil;
	fz_obj *val = nil;
	int tok, len;
	int a, b;

	error = fz_newdict(op, 8);
	if (error) return error;
	dict = *op;

	while (1)
	{
		tok = pdf_lex(file, buf, cap, &len);

skip:
		if (tok == PDF_TCDICT)
			return nil;

		/* for BI .. ID .. EI in content streams */
		if (tok == PDF_TKEYWORD && !strcmp(buf, "ID"))
			return nil;

		if (tok != PDF_TNAME)
			goto cleanup;

		error = fz_newname(&key, buf);
		if (error) goto cleanup;

		tok = pdf_lex(file, buf, cap, &len);

		switch (tok)
		{
		case PDF_TOARRAY:	error = pdf_parsearray(&val, file, buf, cap); break;
		case PDF_TODICT:	error = pdf_parsedict(&val, file, buf, cap); break;
		case PDF_TNAME:		error = fz_newname(&val, buf); break;
		case PDF_TREAL:		error = fz_newreal(&val, atof(buf)); break;
		case PDF_TSTRING:	error = fz_newstring(&val, buf, len); break;
		case PDF_TTRUE:		error = fz_newbool(&val, 1); break;
		case PDF_TFALSE:	error = fz_newbool(&val, 0); break;
		case PDF_TNULL:		error = fz_newnull(&val); break;
		case PDF_TINT:
			a = atoi(buf);
			tok = pdf_lex(file, buf, cap, &len);
			if (tok == PDF_TCDICT || tok == PDF_TNAME ||
				(tok == PDF_TKEYWORD && !strcmp(buf, "ID")))
			{
				error = fz_newint(&val, a);
				if (error) goto cleanup;
				error = fz_dictput(dict, key, val);
				if (error) goto cleanup;
				fz_dropobj(val);
				fz_dropobj(key);
				key = val = nil;
				goto skip;
			}
			if (tok == PDF_TINT)
			{
				b = atoi(buf);
				tok = pdf_lex(file, buf, cap, &len);
				if (tok == PDF_TR)
				{
					error = fz_newindirect(&val, a, b);
					break;
				}
			}
			goto cleanup;
		default:
			goto cleanup;
		}

		if (error) goto cleanup;

		error = fz_dictput(dict, key, val);
		if (error) goto cleanup;

		fz_dropobj(val);
		fz_dropobj(key);
		key = val = nil;
	}

cleanup:
	if (key) fz_dropobj(key);
	if (val) fz_dropobj(val);
	if (dict) fz_dropobj(dict);
	if (error) return error;
	return fz_throw("syntaxerror: corrupt dictionary");
}
Ejemplo n.º 7
0
fz_error
pdf_repairxref(pdf_xref *xref, char *buf, int bufsize)
{
	fz_error error;
	fz_obj *dict, *obj;
	fz_obj *length;

	fz_obj *encrypt = nil;
	fz_obj *id = nil;
	fz_obj *root = nil;
	fz_obj *info = nil;

	struct entry *list = nil;
	int listlen;
	int listcap;
	int maxnum = 0;

	int num = 0;
	int gen = 0;
	int tmpofs, numofs = 0, genofs = 0;
	int stmlen, stmofs = 0;
	int tok;
	int next;
	int i, n;

	pdf_logxref("repairxref %p\n", xref);

	fz_seek(xref->file, 0, 0);

	listlen = 0;
	listcap = 1024;
	list = fz_calloc(listcap, sizeof(struct entry));

	/* look for '%PDF' version marker within first kilobyte of file */
	n = fz_read(xref->file, (unsigned char *)buf, MAX(bufsize, 1024));
	if (n < 0)
	{
		error = fz_rethrow(n, "cannot read from file");
		goto cleanup;
	}

	fz_seek(xref->file, 0, 0);
	for (i = 0; i < n - 4; i++)
	{
		if (memcmp(buf + i, "%PDF", 4) == 0)
		{
			fz_seek(xref->file, i, 0);
			break;
		}
	}

	while (1)
	{
		tmpofs = fz_tell(xref->file);
		if (tmpofs < 0)
		{
			error = fz_throw("cannot tell in file");
			goto cleanup;
		}

		error = pdf_lex(&tok, xref->file, buf, bufsize, &n);
		if (error)
		{
			fz_catch(error, "ignoring the rest of the file");
			break;
		}

		if (tok == PDF_TINT)
		{
			numofs = genofs;
			num = gen;
			genofs = tmpofs;
			gen = atoi(buf);
		}

		if (tok == PDF_TOBJ)
		{
			error = fz_repairobj(xref->file, buf, bufsize, &stmofs, &stmlen, &encrypt, &id);
			if (error)
			{
				error = fz_rethrow(error, "cannot parse object (%d %d R)", num, gen);
				goto cleanup;
			}

			pdf_logxref("found object: (%d %d R)\n", num, gen);

			if (listlen + 1 == listcap)
			{
				listcap = (listcap * 3) / 2;
				list = fz_realloc(list, listcap, sizeof(struct entry));
			}

			list[listlen].num = num;
			list[listlen].gen = gen;
			list[listlen].ofs = numofs;
			list[listlen].stmofs = stmofs;
			list[listlen].stmlen = stmlen;
			listlen ++;

			if (num > maxnum)
				maxnum = num;
		}

		/* trailer dictionary */
		if (tok == PDF_TODICT)
		{
			error = pdf_parsedict(&dict, xref, xref->file, buf, bufsize);
			if (error)
			{
				error = fz_rethrow(error, "cannot parse object");
				goto cleanup;
			}

			obj = fz_dictgets(dict, "Encrypt");
			if (obj)
			{
				if (encrypt)
					fz_dropobj(encrypt);
				encrypt = fz_keepobj(obj);
			}

			obj = fz_dictgets(dict, "ID");
			if (obj)
			{
				if (id)
					fz_dropobj(id);
				id = fz_keepobj(obj);
			}

			obj = fz_dictgets(dict, "Root");
			if (obj)
			{
				if (root)
					fz_dropobj(root);
				root = fz_keepobj(obj);
			}

			obj = fz_dictgets(dict, "Info");
			if (obj)
			{
				if (info)
					fz_dropobj(info);
				info = fz_keepobj(obj);
			}

			fz_dropobj(dict);
		}

		if (tok == PDF_TERROR)
			fz_readbyte(xref->file);

		if (tok == PDF_TEOF)
			break;
	}

	/* make xref reasonable */

	pdf_resizexref(xref, maxnum + 1);

	for (i = 0; i < listlen; i++)
	{
		xref->table[list[i].num].type = 'n';
		xref->table[list[i].num].ofs = list[i].ofs;
		xref->table[list[i].num].gen = list[i].gen;

		xref->table[list[i].num].stmofs = list[i].stmofs;

		/* corrected stream length */
		if (list[i].stmlen >= 0)
		{
			pdf_logxref("correct stream length %d %d = %d\n",
				list[i].num, list[i].gen, list[i].stmlen);

			error = pdf_loadobject(&dict, xref, list[i].num, list[i].gen);
			if (error)
			{
				error = fz_rethrow(error, "cannot load stream object (%d %d R)", list[i].num, list[i].gen);
				goto cleanup;
			}

			length = fz_newint(list[i].stmlen);
			fz_dictputs(dict, "Length", length);
			fz_dropobj(length);

			fz_dropobj(dict);
		}

	}

	xref->table[0].type = 'f';
	xref->table[0].ofs = 0;
	xref->table[0].gen = 65535;
	xref->table[0].stmofs = 0;
	xref->table[0].obj = nil;

	next = 0;
	for (i = xref->len - 1; i >= 0; i--)
	{
		if (xref->table[i].type == 'f')
		{
			xref->table[i].ofs = next;
			if (xref->table[i].gen < 65535)
				xref->table[i].gen ++;
			next = i;
		}
	}

	/* create a repaired trailer, Root will be added later */

	xref->trailer = fz_newdict(5);

	obj = fz_newint(maxnum + 1);
	fz_dictputs(xref->trailer, "Size", obj);
	fz_dropobj(obj);

	if (root)
	{
		fz_dictputs(xref->trailer, "Root", root);
		fz_dropobj(root);
	}
	if (info)
	{
		fz_dictputs(xref->trailer, "Info", info);
		fz_dropobj(info);
	}

	if (encrypt)
	{
		if (fz_isindirect(encrypt))
		{
			/* create new reference with non-nil xref pointer */
			obj = fz_newindirect(fz_tonum(encrypt), fz_togen(encrypt), xref);
			fz_dropobj(encrypt);
			encrypt = obj;
		}
		fz_dictputs(xref->trailer, "Encrypt", encrypt);
		fz_dropobj(encrypt);
	}

	if (id)
	{
		if (fz_isindirect(id))
		{
			/* create new reference with non-nil xref pointer */
			obj = fz_newindirect(fz_tonum(id), fz_togen(id), xref);
			fz_dropobj(id);
			id = obj;
		}
		fz_dictputs(xref->trailer, "ID", id);
		fz_dropobj(id);
	}

	fz_free(list);
	return fz_okay;

cleanup:
	if (encrypt) fz_dropobj(encrypt);
	if (id) fz_dropobj(id);
	if (root) fz_dropobj(root);
	if (info) fz_dropobj(info);
	fz_free(list);
	return error; /* already rethrown */
}