示例#1
0
static fz_error
readnewxrefsection(pdf_xref *xref, fz_stream *stm, int i0, int i1, int w0, int w1, int w2)
{
	fz_error error;
	int i, n;

	if (i0 < 0 || i0 + i1 > xref->len)
		return fz_throw("xref stream has too many entries");

	for (i = i0; i < i0 + i1; i++)
	{
		int a = 0;
		int b = 0;
		int c = 0;

		if (fz_peekbyte(stm) == EOF)
		{
			error = fz_readerror(stm);
			if (error)
				return fz_rethrow(error, "truncated xref stream");
			return fz_throw("truncated xref stream");
		}

		for (n = 0; n < w0; n++)
			a = (a << 8) + fz_readbyte(stm);
		for (n = 0; n < w1; n++)
			b = (b << 8) + fz_readbyte(stm);
		for (n = 0; n < w2; n++)
			c = (c << 8) + fz_readbyte(stm);

		error = fz_readerror(stm);
		if (error)
			return fz_rethrow(error, "truncated xref stream");

		if (!xref->table[i].type)
		{
			int t = w0 ? a : 1;
			xref->table[i].type = t == 0 ? 'f' : t == 1 ? 'n' : t == 2 ? 'o' : 0;
			xref->table[i].ofs = w1 ? b : 0;
			xref->table[i].gen = w2 ? c : 0;
		}
	}

	return fz_okay;
}
示例#2
0
static fz_error
readxref(fz_obj **trailerp, pdf_xref *xref, int ofs, char *buf, int cap)
{
	fz_error error;
	int c;

	error = fz_seek(xref->file, ofs, 0);
	if (error)
		return fz_rethrow(error, "cannot seek to xref");

	while (iswhite(fz_peekbyte(xref->file)))
		fz_readbyte(xref->file);

	c = fz_peekbyte(xref->file);
	error = fz_readerror(xref->file);
	if (error)
		return fz_rethrow(error, "cannot read trailer");

	if (c == 'x')
	{
		error = readoldxref(trailerp, xref, buf, cap);
		if (error)
			return fz_rethrow(error, "cannot read xref (ofs=%d)", ofs);
	}
	else if (c >= '0' && c <= '9')
	{
		error = readnewxref(trailerp, xref, buf, cap);
		if (error)
			return fz_rethrow(error, "cannot read xref (ofs=%d)", ofs);
	}
	else
	{
		return fz_throw("cannot recognize xref format");
	}

	return fz_okay;
}
示例#3
0
static fz_error
readtrailer(pdf_xref *xref, char *buf, int cap)
{
	fz_error error;
	int c;

	error = fz_seek(xref->file, xref->startxref, 0);
	if (error)
		return fz_rethrow(error, "cannot seek to startxref");

	while (iswhite(fz_peekbyte(xref->file)))
		fz_readbyte(xref->file);

	c = fz_peekbyte(xref->file);
	error = fz_readerror(xref->file);
	if (error)
		return fz_rethrow(error, "cannot read trailer");

	if (c == 'x')
	{
		error = readoldtrailer(xref, buf, cap);
		if (error)
			return fz_rethrow(error, "cannot read trailer");
	}
	else if (c >= '0' && c <= '9')
	{
		error = readnewtrailer(xref, buf, cap);
		if (error)
			return fz_rethrow(error, "cannot read trailer");
	}
	else
	{
		return fz_throw("cannot recognize xref format: '%c'", c);
	}

	return fz_okay;
}
示例#4
0
fz_error
fz_seek(fz_stream *stm, int offset, int whence)
{
	fz_error error;
	fz_buffer *buf = stm->buffer;
	int t, c;

	if (stm->dead)
		return fz_throw("assert: seek in dead stream");

	if (whence == 1)
	{
		int cur = fz_tell(stm);
		if (cur < 0)
			return fz_throw("cannot tell current position");
		offset = cur + offset;
		whence = 0;
	}

	buf->eof = 0;

	switch (stm->kind)
	{
	case FZ_SFILE:
		t = lseek(stm->file, offset, whence);
		if (t < 0)
		{
			stm->dead = 1;
			return fz_throw("syserr: lseek: %s", strerror(errno));
		}

		buf->rp = buf->bp;
		buf->wp = buf->bp;

		return fz_okay;

	case FZ_SFILTER:
		if (whence == 0)
		{
			if (offset < fz_tell(stm))
			{
				stm->dead = 1;
				return fz_throw("assert: seek backwards in filter");
			}
			while (fz_tell(stm) < offset)
			{
				c = fz_readbyte(stm);
				if (c == EOF)
				{
					error = fz_readerror(stm);
					if (error)
						return fz_rethrow(error, "cannot seek forward in filter");
					break;
				}
			}
			return fz_okay;
		}

		stm->dead = 1;
		return fz_throw("assert: relative seek in filter");

	case FZ_SBUFFER:
		if (whence == 0)
			buf->rp = CLAMP(buf->bp + offset, buf->bp, buf->ep);
		else
			buf->rp = CLAMP(buf->ep + offset, buf->bp, buf->ep);
		return fz_okay;

	default:
		return fz_throw("unknown stream type");
	}
}
fz_error
pdf_parseindobj(fz_obj **op, pdf_xref *xref,
                fz_stream *file, char *buf, int cap,
                int *onum, int *ogen, int *ostmofs)
{
    fz_error error = fz_okay;
    fz_obj *obj = nil;
    int num = 0, gen = 0, stmofs;
    pdf_token_e tok;
    int len;
    int a, b;

    error = pdf_lex(&tok, file, buf, cap, &len);
    if (error)
        return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);
    if (tok != PDF_TINT)
        return fz_throw("cannot parse indirect object (%d %d R)", num, gen);
    num = atoi(buf);

    error = pdf_lex(&tok, file, buf, cap, &len);
    if (error)
        return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);
    if (tok != PDF_TINT)
        return fz_throw("cannot parse indirect object (%d %d R)", num, gen);
    gen = atoi(buf);

    error = pdf_lex(&tok, file, buf, cap, &len);
    if (error)
        return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);
    if (tok != PDF_TOBJ)
        return fz_throw("cannot parse indirect object (%d %d R)", num, gen);

    error = pdf_lex(&tok, file, buf, cap, &len);
    if (error)
        return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);

    switch (tok)
    {
    case PDF_TOARRAY:
        error = pdf_parsearray(&obj, xref, file, buf, cap);
        if (error)
            return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);
        break;

    case PDF_TODICT:
        error = pdf_parsedict(&obj, xref, file, buf, cap);
        if (error)
            return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);
        break;

    case PDF_TNAME:
        obj = fz_newname(buf);
        break;
    case PDF_TREAL:
        obj = fz_newreal(atof(buf));
        break;
    case PDF_TSTRING:
        obj = fz_newstring(buf, len);
        break;
    case PDF_TTRUE:
        obj = fz_newbool(1);
        break;
    case PDF_TFALSE:
        obj = fz_newbool(0);
        break;
    case PDF_TNULL:
        obj = fz_newnull();
        break;

    case PDF_TINT:
        a = atoi(buf);
        error = pdf_lex(&tok, file, buf, cap, &len);
        if (error)
            return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);
        if (tok == PDF_TSTREAM || tok == PDF_TENDOBJ)
        {
            obj = fz_newint(a);
            goto skip;
        }
        if (tok == PDF_TINT)
        {
            b = atoi(buf);
            error = pdf_lex(&tok, file, buf, cap, &len);
            if (error)
                return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);
            if (tok == PDF_TR)
            {
                obj = fz_newindirect(a, b, xref);
                break;
            }
        }
        return fz_throw("cannot parse indirect object (%d %d R)", num, gen);

    case PDF_TENDOBJ:
        obj = fz_newnull();
        goto skip;

    default:
        return fz_throw("cannot parse indirect object (%d %d R)", num, gen);
    }

    error = pdf_lex(&tok, file, buf, cap, &len);
    if (error)
    {
        fz_dropobj(obj);
        return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);
    }

skip:
    if (tok == PDF_TSTREAM)
    {
        int c = fz_readbyte(file);
        if (c == '\r')
        {
            c = fz_peekbyte(file);
            if (c != '\n')
                fz_warn("line feed missing after stream begin marker (%d %d R)", num, gen);
            else
                c = fz_readbyte(file);
        }
        error = fz_readerror(file);
        if (error)
        {
            fz_dropobj(obj);
            return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);
        }
        stmofs = fz_tell(file);
    }
    else if (tok == PDF_TENDOBJ)
    {
        stmofs = 0;
    }
    else
    {
        fz_warn("expected endobj or stream keyword (%d %d R)", num, gen);
        stmofs = 0;
    }

    if (onum) *onum = num;
    if (ogen) *ogen = gen;
    if (ostmofs) *ostmofs = stmofs;
    *op = obj;
    return fz_okay;
}
示例#6
0
static fz_error
readoldtrailer(pdf_xref *xref, char *buf, int cap)
{
	fz_error error;
	int ofs, len;
	char *s;
	int n;
	int t;
	pdf_token_e tok;
	int c;

	pdf_logxref("load old xref format trailer\n");

	error = fz_readline(xref->file, buf, cap);
	if (error)
		return fz_rethrow(error, "cannot read xref marker");
	if (strncmp(buf, "xref", 4) != 0)
		return fz_throw("cannot find xref marker");

	while (1)
	{
		c = fz_peekbyte(xref->file);
		if (!(c >= '0' && c <= '9'))
			break;

		error = fz_readline(xref->file, buf, cap);
		if (error)
			return fz_rethrow(error, "cannot read xref count");

		s = buf;
		ofs = atoi(strsep(&s, " "));
		if (!s)
			return fz_throw("invalid range marker in xref");
		len = atoi(strsep(&s, " "));

		/* broken pdfs where the section is not on a separate line */
		if (s && *s != '\0')
		{
			error = fz_seek(xref->file, -(2 + strlen(s)), 1);
			if (error)
				return fz_rethrow(error, "cannot seek in file");
		}

		t = fz_tell(xref->file);
		if (t < 0)
			return fz_throw("cannot tell in file");

		error = fz_seek(xref->file, t + 20 * len, 0);
		if (error)
			return fz_rethrow(error, "cannot seek in file");
	}

	error = fz_readerror(xref->file);
	if (error)
		return fz_rethrow(error, "cannot read from file");

	error = pdf_lex(&tok, xref->file, buf, cap, &n);
	if (error)
		return fz_rethrow(error, "cannot parse trailer");
	if (tok != PDF_TTRAILER)
		return fz_throw("expected trailer marker");

	error = pdf_lex(&tok, xref->file, buf, cap, &n);
	if (error)
		return fz_rethrow(error, "cannot parse trailer");
	if (tok != PDF_TODICT)
		return fz_throw("expected trailer dictionary");

	error = pdf_parsedict(&xref->trailer, xref, xref->file, buf, cap);
	if (error)
		return fz_rethrow(error, "cannot parse trailer");
	return fz_okay;
}
示例#7
0
fz_error
pdf_lex(pdf_token_e *tok, fz_stream *f, char *buf, int n, int *sl)
{
	fz_error error;
	int c;

	while (1)
	{
		c = fz_peekbyte(f);

		if (c == EOF)
		{
			*tok = PDF_TEOF;
			goto cleanupokay;
		}

		else if (iswhite(c))
			lexwhite(f);

		else if (c == '%')
			lexcomment(f);

		else if (c == '/')
		{
			fz_readbyte(f);
			lexname(f, buf, n);
			*sl = strlen(buf);
			*tok = PDF_TNAME;
			goto cleanupokay;
		}

		else if (c == '(')
		{
			fz_readbyte(f);
			*sl = lexstring(f, buf, n);
			*tok = PDF_TSTRING;
			goto cleanupokay;
		}

		else if (c == '<')
		{
			fz_readbyte(f);
			c = fz_peekbyte(f);
			if (c == '<')
			{
				fz_readbyte(f);
				*tok = PDF_TODICT;
				goto cleanupokay;
			}
			else
			{
				*sl = lexhexstring(f, buf, n);
				*tok = PDF_TSTRING;
				goto cleanupokay;
			}
		}

		else if (c == '>')
		{
			fz_readbyte(f);
			c = fz_readbyte(f);
			if (c == '>')
			{
				*tok = PDF_TCDICT;
				goto cleanupokay;
			}
			*tok = PDF_TERROR;
			goto cleanuperror;
		}

		else if (c == '[')
		{
			fz_readbyte(f);
			*tok = PDF_TOARRAY;
			goto cleanupokay;
		}

		else if (c == ']')
		{
			fz_readbyte(f);
			*tok = PDF_TCARRAY;
			goto cleanupokay;
		}

		else if (c == '{')
		{
			fz_readbyte(f);
			*tok = PDF_TOBRACE;
			goto cleanupokay;
		}

		else if (c == '}')
		{
			fz_readbyte(f);
			*tok = PDF_TCBRACE;
			goto cleanupokay;
		}

		else if (isnumber(c))
		{
			lexnumber(f, buf, n);
			*sl = strlen(buf);
			if (strchr(buf, '.'))
			{
				*tok = PDF_TREAL;
				goto cleanupokay;
			}
			*tok = PDF_TINT;
			goto cleanupokay;
		}

		else if (isregular(c))
		{
			lexname(f, buf, n);
			*sl = strlen(buf);
			*tok = pdf_tokenfromkeyword(buf);
			goto cleanupokay;
		}

		else
		{
			*tok = PDF_TERROR;
			goto cleanuperror;
		}
	}

cleanupokay:
	error = fz_readerror(f);
	if (error)
	{
		*tok = PDF_TERROR;
		return fz_rethrow(error, "cannot read token");
	}
	return fz_okay;

cleanuperror:
	error = fz_readerror(f);
	if (error)
	{
		*tok = PDF_TERROR;
		return fz_rethrow(error, "cannot read token");
	}
	*tok = PDF_TERROR;
	return fz_throw("lexical error");
}