fz_error
pdf_parsestmobj(fz_obj **op, pdf_xref *xref, fz_stream *file, char *buf, int cap)
{
	fz_error error;
	pdf_token_e tok;
	int len;

	error = pdf_lex(&tok, file, buf, cap, &len);
	if (error)
		return fz_rethrow(error, "cannot parse token in object stream");

	switch (tok)
	{
	case PDF_TOARRAY:
		error = pdf_parsearray(op, xref, file, buf, cap);
		if (error)
			return fz_rethrow(error, "cannot parse object stream");
		break;
	case PDF_TODICT:
		error = pdf_parsedict(op, xref, file, buf, cap);
		if (error)
			return fz_rethrow(error, "cannot parse object stream");
		break;
	case PDF_TNAME: *op = fz_newname(buf); break;
	case PDF_TREAL: *op = fz_newreal(atof(buf)); break;
	case PDF_TSTRING: *op = fz_newstring(buf, len); break;
	case PDF_TTRUE: *op = fz_newbool(1); break;
	case PDF_TFALSE: *op = fz_newbool(0); break;
	case PDF_TNULL: *op = fz_newnull(); break;
	case PDF_TINT: *op = fz_newint(atoi(buf)); break;
	default: return fz_throw("unknown token in object stream");
	}

	return fz_okay;
}
示例#2
0
static void addhexfilter(fz_obj *dict)
{
	fz_obj *f, *dp, *newf, *newdp;
	fz_obj *ahx, *nullobj;

	ahx = fz_newname("ASCIIHexDecode");
	nullobj = fz_newnull();
	newf = newdp = nil;

	f = fz_dictgets(dict, "Filter");
	dp = fz_dictgets(dict, "DecodeParms");

	if (fz_isname(f))
	{
		newf = fz_newarray(2);
		fz_arraypush(newf, ahx);
		fz_arraypush(newf, f);
		f = newf;
		if (fz_isdict(dp))
		{
			newdp = fz_newarray(2);
			fz_arraypush(newdp, nullobj);
			fz_arraypush(newdp, dp);
			dp = newdp;
		}
	}
	else if (fz_isarray(f))
	{
		fz_arrayinsert(f, ahx);
		if (fz_isarray(dp))
			fz_arrayinsert(dp, nullobj);
	}
	else
		f = ahx;

	fz_dictputs(dict, "Filter", f);
	if (dp)
		fz_dictputs(dict, "DecodeParms", dp);

	fz_dropobj(ahx);
	fz_dropobj(nullobj);
	if (newf)
		fz_dropobj(newf);
	if (newdp)
		fz_dropobj(newdp);
}
static fz_error parsename(fz_obj **obj, char **sp)
{
	fz_error error;
	char buf[64];
	char *s = *sp;
	char *p = buf;

	s ++;		/* skip '/' */
	while (p < buf + sizeof buf - 1 && isregular(*s))
		*p++ = *s++;
	*p++ = 0;
	*sp = s;

	error = fz_newname(obj, buf);
	if (error)
		return fz_rethrow(error, "cannot create name");
	return fz_okay;
}
示例#4
0
fz_error *
pdf_parsestmobj(fz_obj **op, fz_stream *file, char *buf, int cap)
{
	int tok, len;

	tok = pdf_lex(file, buf, cap, &len);

	switch (tok)
	{
		case PDF_TOARRAY:	return pdf_parsearray(op, file, buf, cap);
		case PDF_TODICT:	return pdf_parsedict(op, file, buf, cap);
		case PDF_TNAME:		return fz_newname(op, buf);
		case PDF_TREAL:		return fz_newreal(op, atof(buf));
		case PDF_TSTRING:	return fz_newstring(op, buf, len);
		case PDF_TTRUE:		return fz_newbool(op, 1);
		case PDF_TFALSE:	return fz_newbool(op, 0);
		case PDF_TNULL:		return fz_newnull(op);
		case PDF_TINT:		return fz_newint(op, atoi(buf));
	}

	return fz_throw("syntaxerror: corrupt object stream");
}
fz_error
pdf_parseindobj(fz_obj **op, pdf_xref *xref,
	fz_stream *file, char *buf, int cap,
	int *onum, int *ogen, int *ostmofs)
{
	fz_error error = fz_okay;
	fz_obj *obj = nil;
	int num = 0, gen = 0, stmofs;
	pdf_token_e tok;
	int len;
	int a, b;

	error = pdf_lex(&tok, file, buf, cap, &len);
	if (error)
		return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);
	if (tok != PDF_TINT)
		return fz_throw("cannot parse indirect object (%d %d R)", num, gen);
	num = atoi(buf);

	error = pdf_lex(&tok, file, buf, cap, &len);
	if (error)
		return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);
	if (tok != PDF_TINT)
		return fz_throw("cannot parse indirect object (%d %d R)", num, gen);
	gen = atoi(buf);

	error = pdf_lex(&tok, file, buf, cap, &len);
	if (error)
		return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);
	if (tok != PDF_TOBJ)
		return fz_throw("cannot parse indirect object (%d %d R)", num, gen);

	error = pdf_lex(&tok, file, buf, cap, &len);
	if (error)
		return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);

	switch (tok)
	{
	case PDF_TOARRAY:
		error = pdf_parsearray(&obj, xref, file, buf, cap);
		if (error)
			return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);
		break;

	case PDF_TODICT:
		error = pdf_parsedict(&obj, xref, file, buf, cap);
		if (error)
			return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);
		break;

	case PDF_TNAME: obj = fz_newname(buf); break;
	case PDF_TREAL: obj = fz_newreal(atof(buf)); break;
	case PDF_TSTRING: obj = fz_newstring(buf, len); break;
	case PDF_TTRUE: obj = fz_newbool(1); break;
	case PDF_TFALSE: obj = fz_newbool(0); break;
	case PDF_TNULL: obj = fz_newnull(); break;

	case PDF_TINT:
		a = atoi(buf);
		error = pdf_lex(&tok, file, buf, cap, &len);
		if (error)
			return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);
		if (tok == PDF_TSTREAM || tok == PDF_TENDOBJ)
		{
			obj = fz_newint(a);
			goto skip;
		}
		if (tok == PDF_TINT)
		{
			b = atoi(buf);
			error = pdf_lex(&tok, file, buf, cap, &len);
			if (error)
				return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);
			if (tok == PDF_TR)
			{
				obj = fz_newindirect(a, b, xref);
				break;
			}
		}
		return fz_throw("cannot parse indirect object (%d %d R)", num, gen);

	case PDF_TENDOBJ:
		obj = fz_newnull();
		goto skip;

	default:
		return fz_throw("cannot parse indirect object (%d %d R)", num, gen);
	}

	error = pdf_lex(&tok, file, buf, cap, &len);
	if (error)
	{
		fz_dropobj(obj);
		return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);
	}

skip:
	if (tok == PDF_TSTREAM)
	{
		int c = fz_readbyte(file);
		while (c == ' ')
			c = fz_readbyte(file);
		if (c == '\r')
		{
			c = fz_peekbyte(file);
			if (c != '\n')
				fz_warn("line feed missing after stream begin marker (%d %d R)", num, gen);
			else
				fz_readbyte(file);
		}
		stmofs = fz_tell(file);
	}
	else if (tok == PDF_TENDOBJ)
	{
		stmofs = 0;
	}
	else
	{
		fz_warn("expected endobj or stream keyword (%d %d R)", num, gen);
		stmofs = 0;
	}

	if (onum) *onum = num;
	if (ogen) *ogen = gen;
	if (ostmofs) *ostmofs = stmofs;
	*op = obj;
	return fz_okay;
}
fz_error
pdf_parsedict(fz_obj **op, pdf_xref *xref, fz_stream *file, char *buf, int cap)
{
	fz_error error = fz_okay;
	fz_obj *dict = nil;
	fz_obj *key = nil;
	fz_obj *val = nil;
	pdf_token_e tok;
	int len;
	int a, b;

	dict = fz_newdict(8);

	while (1)
	{
		error = pdf_lex(&tok, file, buf, cap, &len);
		if (error)
		{
			fz_dropobj(dict);
			return fz_rethrow(error, "cannot parse dict");
		}

skip:
		if (tok == PDF_TCDICT)
		{
			*op = dict;
			return fz_okay;
		}

		/* for BI .. ID .. EI in content streams */
		if (tok == PDF_TKEYWORD && !strcmp(buf, "ID"))
		{
			*op = dict;
			return fz_okay;
		}

		if (tok != PDF_TNAME)
		{
			fz_dropobj(dict);
			return fz_throw("invalid key in dict");;
		}

		key = fz_newname(buf);

		error = pdf_lex(&tok, file, buf, cap, &len);
		if (error)
		{
			fz_dropobj(dict);
			return fz_rethrow(error, "cannot parse dict");
		}

		switch (tok)
		{
		case PDF_TOARRAY:
			error = pdf_parsearray(&val, xref, file, buf, cap);
			if (error)
			{
				fz_dropobj(key);
				fz_dropobj(dict);
				return fz_rethrow(error, "cannot parse dict");
			}
			break;

		case PDF_TODICT:
			error = pdf_parsedict(&val, xref, file, buf, cap);
			if (error)
			{
				fz_dropobj(key);
				fz_dropobj(dict);
				return fz_rethrow(error, "cannot parse dict");
			}
			break;

		case PDF_TNAME: val = fz_newname(buf); break;
		case PDF_TREAL: val = fz_newreal(atof(buf)); break;
		case PDF_TSTRING: val = fz_newstring(buf, len); break;
		case PDF_TTRUE: val = fz_newbool(1); break;
		case PDF_TFALSE: val = fz_newbool(0); break;
		case PDF_TNULL: val = fz_newnull(); break;

		case PDF_TINT:
			/* 64-bit to allow for numbers > INT_MAX and overflow */
			a = (int) strtoll(buf, 0, 10);
			error = pdf_lex(&tok, file, buf, cap, &len);
			if (error)
			{
				fz_dropobj(key);
				fz_dropobj(dict);
				return fz_rethrow(error, "cannot parse dict");
			}
			if (tok == PDF_TCDICT || tok == PDF_TNAME ||
				(tok == PDF_TKEYWORD && !strcmp(buf, "ID")))
			{
				val = fz_newint(a);
				fz_dictput(dict, key, val);
				fz_dropobj(val);
				fz_dropobj(key);
				goto skip;
			}
			if (tok == PDF_TINT)
			{
				b = atoi(buf);
				error = pdf_lex(&tok, file, buf, cap, &len);
				if (error)
				{
					fz_dropobj(key);
					fz_dropobj(dict);
					return fz_rethrow(error, "cannot parse dict");
				}
				if (tok == PDF_TR)
				{
					val = fz_newindirect(a, b, xref);
					break;
				}
			}
			fz_dropobj(key);
			fz_dropobj(dict);
			return fz_throw("invalid indirect reference in dict");

		default:
			return fz_throw("unknown token in dict");
		}

		fz_dictput(dict, key, val);
		fz_dropobj(val);
		fz_dropobj(key);
	}
}
fz_error
pdf_parsearray(fz_obj **op, pdf_xref *xref, fz_stream *file, char *buf, int cap)
{
	fz_error error = fz_okay;
	fz_obj *ary = nil;
	fz_obj *obj = nil;
	int a = 0, b = 0, n = 0;
	pdf_token_e tok;
	int len;

	ary = fz_newarray(4);

	while (1)
	{
		error = pdf_lex(&tok, file, buf, cap, &len);
		if (error)
		{
			fz_dropobj(ary);
			return fz_rethrow(error, "cannot parse array");
		}

		if (tok != PDF_TINT && tok != PDF_TR)
		{
			if (n > 0)
			{
				obj = fz_newint(a);
				fz_arraypush(ary, obj);
				fz_dropobj(obj);
			}
			if (n > 1)
			{
				obj = fz_newint(b);
				fz_arraypush(ary, obj);
				fz_dropobj(obj);
			}
			n = 0;
		}

		if (tok == PDF_TINT && n == 2)
		{
			obj = fz_newint(a);
			fz_arraypush(ary, obj);
			fz_dropobj(obj);
			a = b;
			n --;
		}

		switch (tok)
		{
		case PDF_TCARRAY:
			*op = ary;
			return fz_okay;

		case PDF_TINT:
			if (n == 0)
				a = atoi(buf);
			if (n == 1)
				b = atoi(buf);
			n ++;
			break;

		case PDF_TR:
			if (n != 2)
			{
				fz_dropobj(ary);
				return fz_throw("cannot parse indirect reference in array");
			}
			obj = fz_newindirect(a, b, xref);
			fz_arraypush(ary, obj);
			fz_dropobj(obj);
			n = 0;
			break;

		case PDF_TOARRAY:
			error = pdf_parsearray(&obj, xref, file, buf, cap);
			if (error)
			{
				fz_dropobj(ary);
				return fz_rethrow(error, "cannot parse array");
			}
			fz_arraypush(ary, obj);
			fz_dropobj(obj);
			break;

		case PDF_TODICT:
			error = pdf_parsedict(&obj, xref, file, buf, cap);
			if (error)
			{
				fz_dropobj(ary);
				return fz_rethrow(error, "cannot parse array");
			}
			fz_arraypush(ary, obj);
			fz_dropobj(obj);
			break;

		case PDF_TNAME:
			obj = fz_newname(buf);
			fz_arraypush(ary, obj);
			fz_dropobj(obj);
			break;
		case PDF_TREAL:
			obj = fz_newreal(atof(buf));
			fz_arraypush(ary, obj);
			fz_dropobj(obj);
			break;
		case PDF_TSTRING:
			obj = fz_newstring(buf, len);
			fz_arraypush(ary, obj);
			fz_dropobj(obj);
			break;
		case PDF_TTRUE:
			obj = fz_newbool(1);
			fz_arraypush(ary, obj);
			fz_dropobj(obj);
			break;
		case PDF_TFALSE:
			obj = fz_newbool(0);
			fz_arraypush(ary, obj);
			fz_dropobj(obj);
			break;
		case PDF_TNULL:
			obj = fz_newnull();
			fz_arraypush(ary, obj);
			fz_dropobj(obj);
			break;

		default:
			fz_dropobj(ary);
			return fz_throw("cannot parse token in array");
		}
	}
}
示例#8
0
fz_error *
pdf_parseindobj(fz_obj **op, fz_stream *file, char *buf, int cap,
		int *ooid, int *ogid, int *ostmofs)
{
	fz_error *error = nil;
	fz_obj *obj = nil;
	int oid = 0, gid = 0, stmofs;
	int tok, len;
	int a, b;

	tok = pdf_lex(file, buf, cap, &len);
	if (tok != PDF_TINT)
		goto cleanup;
	oid = atoi(buf);

	tok = pdf_lex(file, buf, cap, &len);
	if (tok != PDF_TINT)
		goto cleanup;
	gid = atoi(buf);

	tok = pdf_lex(file, buf, cap, &len);
	if (tok != PDF_TOBJ)
		goto cleanup;

	tok = pdf_lex(file, buf, cap, &len);
	switch (tok)
	{
		case PDF_TOARRAY:	error = pdf_parsearray(&obj, file, buf, cap); break;
		case PDF_TODICT:	error = pdf_parsedict(&obj, file, buf, cap); break;
		case PDF_TNAME:		error = fz_newname(&obj, buf); break;
		case PDF_TREAL:		error = fz_newreal(&obj, atof(buf)); break;
		case PDF_TSTRING:	error = fz_newstring(&obj, buf, len); break;
		case PDF_TTRUE:		error = fz_newbool(&obj, 1); break;
		case PDF_TFALSE:	error = fz_newbool(&obj, 0); break;
		case PDF_TNULL:		error = fz_newnull(&obj); break;
		case PDF_TINT:
			a = atoi(buf);
			tok = pdf_lex(file, buf, cap, &len);
			if (tok == PDF_TSTREAM || tok == PDF_TENDOBJ)
			{
				error = fz_newint(&obj, a);
				if (error) goto cleanup;
				goto skip;
			}
			if (tok == PDF_TINT)
			{
				b = atoi(buf);
				tok = pdf_lex(file, buf, cap, &len);
				if (tok == PDF_TR)
				{
					error = fz_newindirect(&obj, a, b);
					break;
				}
			}
			goto cleanup;
		default:
			goto cleanup;
	}
	if (error) goto cleanup;

	tok = pdf_lex(file, buf, cap, &len);

skip:
	if (tok == PDF_TSTREAM)
	{
		int c = fz_readbyte(file);
		if (c == '\r')
		{
			c = fz_peekbyte(file);
			if (c != '\n')
				fz_warn("syntaxerror: DOS format line ending after stream keyword (%d %d)\n", oid, gid);
			else
				c = fz_readbyte(file);
		}
		stmofs = fz_tell(file);
	}
	else if (tok == PDF_TENDOBJ)
		stmofs = 0;
	else
		goto cleanup;

	if (ooid) *ooid = oid;
	if (ogid) *ogid = gid;
	if (ostmofs) *ostmofs = stmofs;
	*op = obj;
	return nil;

cleanup:
	if (obj) fz_dropobj(obj);
	if (error) return error;
	return fz_throw("syntaxerror: corrupt indirect object (%d %d)", oid, gid);
}
示例#9
0
fz_error *
pdf_parsedict(fz_obj **op, fz_stream *file, char *buf, int cap)
{
	fz_error *error = nil;
	fz_obj *dict = nil;
	fz_obj *key = nil;
	fz_obj *val = nil;
	int tok, len;
	int a, b;

	error = fz_newdict(op, 8);
	if (error) return error;
	dict = *op;

	while (1)
	{
		tok = pdf_lex(file, buf, cap, &len);

skip:
		if (tok == PDF_TCDICT)
			return nil;

		/* for BI .. ID .. EI in content streams */
		if (tok == PDF_TKEYWORD && !strcmp(buf, "ID"))
			return nil;

		if (tok != PDF_TNAME)
			goto cleanup;

		error = fz_newname(&key, buf);
		if (error) goto cleanup;

		tok = pdf_lex(file, buf, cap, &len);

		switch (tok)
		{
		case PDF_TOARRAY:	error = pdf_parsearray(&val, file, buf, cap); break;
		case PDF_TODICT:	error = pdf_parsedict(&val, file, buf, cap); break;
		case PDF_TNAME:		error = fz_newname(&val, buf); break;
		case PDF_TREAL:		error = fz_newreal(&val, atof(buf)); break;
		case PDF_TSTRING:	error = fz_newstring(&val, buf, len); break;
		case PDF_TTRUE:		error = fz_newbool(&val, 1); break;
		case PDF_TFALSE:	error = fz_newbool(&val, 0); break;
		case PDF_TNULL:		error = fz_newnull(&val); break;
		case PDF_TINT:
			a = atoi(buf);
			tok = pdf_lex(file, buf, cap, &len);
			if (tok == PDF_TCDICT || tok == PDF_TNAME ||
				(tok == PDF_TKEYWORD && !strcmp(buf, "ID")))
			{
				error = fz_newint(&val, a);
				if (error) goto cleanup;
				error = fz_dictput(dict, key, val);
				if (error) goto cleanup;
				fz_dropobj(val);
				fz_dropobj(key);
				key = val = nil;
				goto skip;
			}
			if (tok == PDF_TINT)
			{
				b = atoi(buf);
				tok = pdf_lex(file, buf, cap, &len);
				if (tok == PDF_TR)
				{
					error = fz_newindirect(&val, a, b);
					break;
				}
			}
			goto cleanup;
		default:
			goto cleanup;
		}

		if (error) goto cleanup;

		error = fz_dictput(dict, key, val);
		if (error) goto cleanup;

		fz_dropobj(val);
		fz_dropobj(key);
		key = val = nil;
	}

cleanup:
	if (key) fz_dropobj(key);
	if (val) fz_dropobj(val);
	if (dict) fz_dropobj(dict);
	if (error) return error;
	return fz_throw("syntaxerror: corrupt dictionary");
}
示例#10
0
fz_error *
pdf_parsearray(fz_obj **op, fz_stream *file, char *buf, int cap)
{
	fz_error *error = nil;
	fz_obj *ary = nil;
	fz_obj *obj = nil;
	int a = 0, b = 0, n = 0;
	int tok, len;

	error = fz_newarray(op, 4);
	if (error) return error;
	ary = *op;

	while (1)
	{
		tok = pdf_lex(file, buf, cap, &len);

		if (tok != PDF_TINT && tok != PDF_TR)
		{
			if (n > 0)
			{
				error = fz_newint(&obj, a);
				if (error) goto cleanup;
				error = fz_arraypush(ary, obj);
				if (error) goto cleanup;
				fz_dropobj(obj);
				obj = nil;
			}
			if (n > 1)
			{
				error = fz_newint(&obj, b);
				if (error) goto cleanup;
				error = fz_arraypush(ary, obj);
				if (error) goto cleanup;
				fz_dropobj(obj);
				obj = nil;
			}
			n = 0;
		}

		if (tok == PDF_TINT && n == 2)
		{
			error = fz_newint(&obj, a);
			if (error) goto cleanup;
			error = fz_arraypush(ary, obj);
			if (error) goto cleanup;
			fz_dropobj(obj);
			obj = nil;
			a = b;
			n --;
		}

		switch (tok)
		{
		case PDF_TCARRAY:
			return nil;
		case PDF_TINT:
			if (n == 0)
				a = atoi(buf);
			if (n == 1)
				b = atoi(buf);
			n ++;
			break;
		case PDF_TR:
			if (n != 2)
				goto cleanup;
			error = fz_newindirect(&obj, a, b);
			if (error) goto cleanup;
			n = 0;
			break;
		case PDF_TOARRAY:	error = pdf_parsearray(&obj, file, buf, cap); break;
		case PDF_TODICT:	error = pdf_parsedict(&obj, file, buf, cap); break;
		case PDF_TNAME:		error = fz_newname(&obj, buf); break;
		case PDF_TREAL:		error = fz_newreal(&obj, atof(buf)); break;
		case PDF_TSTRING:	error = fz_newstring(&obj, buf, len); break;
		case PDF_TTRUE:		error = fz_newbool(&obj, 1); break;
		case PDF_TFALSE:	error = fz_newbool(&obj, 0); break;
		case PDF_TNULL:		error = fz_newnull(&obj); break;
		default:		goto cleanup;
		}
		if (error) goto cleanup;

		if (obj)
		{
			error = fz_arraypush(ary, obj);
			if (error) goto cleanup;
			fz_dropobj(obj);
		}

		obj = nil;
	}

cleanup:
	if (obj) fz_dropobj(obj);
	if (ary) fz_dropobj(ary);
	if (error) return error;
	return fz_throw("syntaxerror: corrupt array");
}