static fz_error
pdf_readnewxrefsection(pdf_xref *xref, fz_stream *stm, int i0, int i1, int w0, int w1, int w2)
{
    int i, n;

    if (i0 < 0 || i0 + i1 > xref->len)
        return fz_throw("xref stream has too many entries");

    for (i = i0; i < i0 + i1; i++)
    {
        int a = 0;
        int b = 0;
        int c = 0;

        if (fz_peekbyte(stm) == EOF)
            return fz_throw("truncated xref stream");

        for (n = 0; n < w0; n++)
            a = (a << 8) + fz_readbyte(stm);
        for (n = 0; n < w1; n++)
            b = (b << 8) + fz_readbyte(stm);
        for (n = 0; n < w2; n++)
            c = (c << 8) + fz_readbyte(stm);

        if (!xref->table[i].type)
        {
            int t = w0 ? a : 1;
            xref->table[i].type = t == 0 ? 'f' : t == 1 ? 'n' : t == 2 ? 'o' : 0;
            xref->table[i].ofs = w1 ? b : 0;
            xref->table[i].gen = w2 ? c : 0;
        }
    }

    return fz_okay;
}
Ejemplo n.º 2
0
static void
lexname(fz_stream *f, unsigned char *s, int n)
{
	unsigned char *p = s;
	unsigned char *q = s;

	while (n > 1)
	{
		if (!isregular(fz_peekbyte(f)))
			break;
		*s++ = fz_readbyte(f);
		n--;
	}
	*s = '\0';

	while (*p)
	{
		if (p[0] == '#' && p[1] != 0 && p[2] != 0)
		{
			*q++ = fromhex(p[1]) * 16 + fromhex(p[2]);
			p += 3;
		}
		else
			*q++ = *p++;
	}
	*q = '\0';
}
static fz_error
pdf_readxref(fz_obj **trailerp, pdf_xref *xref, int ofs, char *buf, int cap)
{
    fz_error error;
    int c;

    fz_seek(xref->file, ofs, 0);

    while (iswhite(fz_peekbyte(xref->file)))
        fz_readbyte(xref->file);

    c = fz_peekbyte(xref->file);
    if (c == 'x')
    {
        error = pdf_readoldxref(trailerp, xref, buf, cap);
        if (error)
            return fz_rethrow(error, "cannot read xref (ofs=%d)", ofs);
    }
    else if (c >= '0' && c <= '9')
    {
        error = pdf_readnewxref(trailerp, xref, buf, cap);
        if (error)
            return fz_rethrow(error, "cannot read xref (ofs=%d)", ofs);
    }
    else
    {
        return fz_throw("cannot recognize xref format");
    }

    return fz_okay;
}
Ejemplo n.º 4
0
static int
lexhexstring(fz_stream *f, char *buf, int n)
{
    char *s = buf;
    char *e = buf + n;
    int a = 0, x = 0;
    int c;

    while (s < e)
    {
        c = fz_readbyte(f);
        switch (c)
        {
        case ISWHITE:
            break;
        case ISHEX:
            if (x)
            {
                *s++ = a * 16 + fromhex(c);
                x = !x;
            }
            else
            {
                a = fromhex(c);
                x = !x;
            }
            break;
        case '>':
        default:
            goto end;
        }
    }
end:
    return s - buf;
}
static fz_error
pdf_readtrailer(pdf_xref *xref, char *buf, int cap)
{
    fz_error error;
    int c;

    fz_seek(xref->file, xref->startxref, 0);

    while (iswhite(fz_peekbyte(xref->file)))
        fz_readbyte(xref->file);

    c = fz_peekbyte(xref->file);
    if (c == 'x')
    {
        error = pdf_readoldtrailer(xref, buf, cap);
        if (error)
            return fz_rethrow(error, "cannot read trailer");
    }
    else if (c >= '0' && c <= '9')
    {
        error = pdf_readnewtrailer(xref, buf, cap);
        if (error)
            return fz_rethrow(error, "cannot read trailer");
    }
    else
    {
        return fz_throw("cannot recognize xref format: '%c'", c);
    }

    return fz_okay;
}
Ejemplo n.º 6
0
static int
lexhexstring(fz_stream *f, unsigned char *buf, int n)
{
	unsigned char *s = buf;
	unsigned char *e = buf + n;
	int a = 0, x = 0;
	int c;

	while (s < e)
	{
		c = fz_readbyte(f);
		if (c == '>')
			break;
		else if (iswhite(c))
			continue;
		else if (ishex(c))
		{
			if (x)
			{
				*s++ = a * 16 + fromhex(c);
				x = !x;
			}
			else
			{
				a = fromhex(c);
				x = !x;
			}
		}
		else
			break;
	}

	return s - buf;
}
static fz_error
loadpostscriptfunc(pdf_function *func, pdf_xref *xref, fz_obj *dict, int oid, int gen)
{
	fz_error error;
	fz_stream *stream;
	int codeptr;

	pdf_logrsrc("load postscript function (%d %d R)\n", oid, gen);

	error = pdf_openstream(&stream, xref, oid, gen);
	if (error)
		return fz_rethrow(error, "cannot open calculator function stream");

	if (fz_readbyte(stream) != '{')
	{
		fz_dropstream(stream);
		return fz_throw("stream is not a calculator function");
	}

	func->u.p.code = nil;
	func->u.p.cap = 0;

	codeptr = 0;
	error = parsecode(func, stream, &codeptr);
	if (error)
	{
		fz_dropstream(stream);
		return fz_rethrow(error, "cannot parse calculator function");
	}

	fz_dropstream(stream);
	return fz_okay;
}
Ejemplo n.º 8
0
static inline void
lexcomment(fz_stream *f)
{
    int c;
    do
    {
        c = fz_readbyte(f);
    }
    while ((c != '\012') && (c != '\015') && (c != EOF));
}
Ejemplo n.º 9
0
static void
lexnumber(fz_stream *f, unsigned char *s, int n)
{
	while (n > 1)
	{
		if (!isnumber(fz_peekbyte(f)))
			break;
		*s++ = fz_readbyte(f);
		n--;
	}
	*s = '\0';
}
Ejemplo n.º 10
0
static inline void
lexcomment(fz_stream *f)
{
	int c;
	while (1)
	{
		c = fz_readbyte(f);
		if (c == '\012') break;
		if (c == '\015') break;
		if (c == EOF) break;
	}
}
Ejemplo n.º 11
0
static inline void
lexwhite(fz_stream *f)
{
	int c;
	while (1)
	{
		c = fz_peekbyte(f);
		if (!iswhite(c))
			break;
		fz_readbyte(f);
	}
}
Ejemplo n.º 12
0
static inline void
lexwhite(fz_stream *f)
{
    int c;
    do
    {
        c = fz_readbyte(f);
    }
    while ((c <= 32) && (iswhite(c)));
    if (c != EOF)
        fz_unreadbyte(f);
}
Ejemplo n.º 13
0
static inline int fillbits(fz_lzwd *lzw)
{
	while (lzw->bidx >= 8)
	{
		int c = fz_readbyte(lzw->chain);
		if (c == EOF)
			return EOF;
		lzw->bidx -= 8;
		lzw->word |= c << lzw->bidx;
	}
	return 0;
}
Ejemplo n.º 14
0
int fz_readline(fz_stream *stm, char *mem, int n)
{
	char *s = mem;
	int c = EOF;
	while (n > 1)
	{
		c = fz_readbyte(stm);
		if (c == EOF)
			break;
		if (c == '\r') {
			c = fz_peekbyte(stm);
			if (c == '\n')
				c = fz_readbyte(stm);
			break;
		}
		if (c == '\n')
			break;
		*s++ = c;
		n--;
	}
	if (n)
		*s = '\0';
	return s - mem;
}
Ejemplo n.º 15
0
static int
readahxd(fz_stream *stm, unsigned char *buf, int len)
{
    fz_ahxd *state = stm->state;
    unsigned char *p = buf;
    unsigned char *ep = buf + len;
    int a, b, c, odd;

    odd = 0;

    while (p < ep)
    {
        if (state->eod)
            return p - buf;

        c = fz_readbyte(state->chain);
        if (c < 0)
            return p - buf;

        if (ishex(c))
        {
            if (!odd)
            {
                a = fromhex(c);
                odd = 1;
            }
            else
            {
                b = fromhex(c);
                *p++ = (a << 4) | b;
                odd = 0;
            }
        }
        else if (c == '>')
        {
            if (odd)
                *p++ = (a << 4);
            state->eod = 1;
        }
        else if (!iswhite(c))
        {
            return fz_throw("bad data in ahxd: '%c'", c);
        }
    }

    return p - buf;
}
Ejemplo n.º 16
0
static int
getdata(fz_stream *stream, int bps)
{
	unsigned int bitmask = (1 << bps) - 1;
	unsigned int buf = 0;
	int bits = 0;
	int s;

	while (bits < bps)
	{
		buf = (buf << 8) | (fz_readbyte(stream) & 0xff);
		bits += 8;
	}
	s = buf >> (bits - bps);
	if (bps < 32)
		s = s & bitmask;
	bits -= bps;

	return s;
}
Ejemplo n.º 17
0
static int
lexhexstring(fz_stream *f, char *buf, int n)
{
	char *s = buf;
	char *e = buf + n;
	int a = 0, x = 0;
	int c;

	while (s < e)
	{
		c = fz_readbyte(f);
		if (c == '>')
			break;
		else if (iswhite(c))
			continue;
		else if (ishex(c))
		{
			if (x)
			{
				*s++ = a * 16 + fromhex(c);
				x = !x;
			}
			else
			{
				a = fromhex(c);
				x = !x;
			}
		}
		else {
			if (c == EOF) {
				break;
			}
			// cf. http://code.google.com/p/sumatrapdf/issues/detail?id=624
			fz_warn("Ignoring invalid character in hexstring: %c", c);
		}
	}

	return s - buf;
}
Ejemplo n.º 18
0
fz_error
pdf_repairxref(pdf_xref *xref, char *buf, int bufsize)
{
	fz_error error;
	fz_obj *dict, *obj;
	fz_obj *length;

	fz_obj *encrypt = nil;
	fz_obj *id = nil;
	fz_obj *root = nil;
	fz_obj *info = nil;

	struct entry *list = nil;
	int listlen;
	int listcap;
	int maxnum = 0;

	int num = 0;
	int gen = 0;
	int tmpofs, numofs = 0, genofs = 0;
	int stmlen, stmofs = 0;
	int tok;
	int next;
	int i, n;

	pdf_logxref("repairxref %p\n", xref);

	fz_seek(xref->file, 0, 0);

	listlen = 0;
	listcap = 1024;
	list = fz_calloc(listcap, sizeof(struct entry));

	/* look for '%PDF' version marker within first kilobyte of file */
	n = fz_read(xref->file, (unsigned char *)buf, MAX(bufsize, 1024));
	if (n < 0)
	{
		error = fz_rethrow(n, "cannot read from file");
		goto cleanup;
	}

	fz_seek(xref->file, 0, 0);
	for (i = 0; i < n - 4; i++)
	{
		if (memcmp(buf + i, "%PDF", 4) == 0)
		{
			fz_seek(xref->file, i, 0);
			break;
		}
	}

	while (1)
	{
		tmpofs = fz_tell(xref->file);
		if (tmpofs < 0)
		{
			error = fz_throw("cannot tell in file");
			goto cleanup;
		}

		error = pdf_lex(&tok, xref->file, buf, bufsize, &n);
		if (error)
		{
			fz_catch(error, "ignoring the rest of the file");
			break;
		}

		if (tok == PDF_TINT)
		{
			numofs = genofs;
			num = gen;
			genofs = tmpofs;
			gen = atoi(buf);
		}

		if (tok == PDF_TOBJ)
		{
			error = fz_repairobj(xref->file, buf, bufsize, &stmofs, &stmlen, &encrypt, &id);
			if (error)
			{
				error = fz_rethrow(error, "cannot parse object (%d %d R)", num, gen);
				goto cleanup;
			}

			pdf_logxref("found object: (%d %d R)\n", num, gen);

			if (listlen + 1 == listcap)
			{
				listcap = (listcap * 3) / 2;
				list = fz_realloc(list, listcap, sizeof(struct entry));
			}

			list[listlen].num = num;
			list[listlen].gen = gen;
			list[listlen].ofs = numofs;
			list[listlen].stmofs = stmofs;
			list[listlen].stmlen = stmlen;
			listlen ++;

			if (num > maxnum)
				maxnum = num;
		}

		/* trailer dictionary */
		if (tok == PDF_TODICT)
		{
			error = pdf_parsedict(&dict, xref, xref->file, buf, bufsize);
			if (error)
			{
				error = fz_rethrow(error, "cannot parse object");
				goto cleanup;
			}

			obj = fz_dictgets(dict, "Encrypt");
			if (obj)
			{
				if (encrypt)
					fz_dropobj(encrypt);
				encrypt = fz_keepobj(obj);
			}

			obj = fz_dictgets(dict, "ID");
			if (obj)
			{
				if (id)
					fz_dropobj(id);
				id = fz_keepobj(obj);
			}

			obj = fz_dictgets(dict, "Root");
			if (obj)
			{
				if (root)
					fz_dropobj(root);
				root = fz_keepobj(obj);
			}

			obj = fz_dictgets(dict, "Info");
			if (obj)
			{
				if (info)
					fz_dropobj(info);
				info = fz_keepobj(obj);
			}

			fz_dropobj(dict);
		}

		if (tok == PDF_TERROR)
			fz_readbyte(xref->file);

		if (tok == PDF_TEOF)
			break;
	}

	/* make xref reasonable */

	pdf_resizexref(xref, maxnum + 1);

	for (i = 0; i < listlen; i++)
	{
		xref->table[list[i].num].type = 'n';
		xref->table[list[i].num].ofs = list[i].ofs;
		xref->table[list[i].num].gen = list[i].gen;

		xref->table[list[i].num].stmofs = list[i].stmofs;

		/* corrected stream length */
		if (list[i].stmlen >= 0)
		{
			pdf_logxref("correct stream length %d %d = %d\n",
				list[i].num, list[i].gen, list[i].stmlen);

			error = pdf_loadobject(&dict, xref, list[i].num, list[i].gen);
			if (error)
			{
				error = fz_rethrow(error, "cannot load stream object (%d %d R)", list[i].num, list[i].gen);
				goto cleanup;
			}

			length = fz_newint(list[i].stmlen);
			fz_dictputs(dict, "Length", length);
			fz_dropobj(length);

			fz_dropobj(dict);
		}

	}

	xref->table[0].type = 'f';
	xref->table[0].ofs = 0;
	xref->table[0].gen = 65535;
	xref->table[0].stmofs = 0;
	xref->table[0].obj = nil;

	next = 0;
	for (i = xref->len - 1; i >= 0; i--)
	{
		if (xref->table[i].type == 'f')
		{
			xref->table[i].ofs = next;
			if (xref->table[i].gen < 65535)
				xref->table[i].gen ++;
			next = i;
		}
	}

	/* create a repaired trailer, Root will be added later */

	xref->trailer = fz_newdict(5);

	obj = fz_newint(maxnum + 1);
	fz_dictputs(xref->trailer, "Size", obj);
	fz_dropobj(obj);

	if (root)
	{
		fz_dictputs(xref->trailer, "Root", root);
		fz_dropobj(root);
	}
	if (info)
	{
		fz_dictputs(xref->trailer, "Info", info);
		fz_dropobj(info);
	}

	if (encrypt)
	{
		if (fz_isindirect(encrypt))
		{
			/* create new reference with non-nil xref pointer */
			obj = fz_newindirect(fz_tonum(encrypt), fz_togen(encrypt), xref);
			fz_dropobj(encrypt);
			encrypt = obj;
		}
		fz_dictputs(xref->trailer, "Encrypt", encrypt);
		fz_dropobj(encrypt);
	}

	if (id)
	{
		if (fz_isindirect(id))
		{
			/* create new reference with non-nil xref pointer */
			obj = fz_newindirect(fz_tonum(id), fz_togen(id), xref);
			fz_dropobj(id);
			id = obj;
		}
		fz_dictputs(xref->trailer, "ID", id);
		fz_dropobj(id);
	}

	fz_free(list);
	return fz_okay;

cleanup:
	if (encrypt) fz_dropobj(encrypt);
	if (id) fz_dropobj(id);
	if (root) fz_dropobj(root);
	if (info) fz_dropobj(info);
	fz_free(list);
	return error; /* already rethrown */
}
Ejemplo n.º 19
0
static fz_error
fz_repairobj(fz_stream *file, char *buf, int cap, int *stmofsp, int *stmlenp, fz_obj **encrypt, fz_obj **id)
{
	fz_error error;
	int tok;
	int stmlen;
	int len;
	int n;

	*stmofsp = 0;
	*stmlenp = -1;

	stmlen = 0;

	error = pdf_lex(&tok, file, buf, cap, &len);
	if (error)
		return fz_rethrow(error, "cannot parse object");
	if (tok == PDF_TODICT)
	{
		fz_obj *dict, *obj;

		/* Send nil xref so we don't try to resolve references */
		error = pdf_parsedict(&dict, nil, file, buf, cap);
		if (error)
			return fz_rethrow(error, "cannot parse object");

		obj = fz_dictgets(dict, "Type");
		if (fz_isname(obj) && !strcmp(fz_toname(obj), "XRef"))
		{
			obj = fz_dictgets(dict, "Encrypt");
			if (obj)
			{
				if (*encrypt)
					fz_dropobj(*encrypt);
				*encrypt = fz_keepobj(obj);
			}

			obj = fz_dictgets(dict, "ID");
			if (obj)
			{
				if (*id)
					fz_dropobj(*id);
				*id = fz_keepobj(obj);
			}
		}

		obj = fz_dictgets(dict, "Length");
		if (fz_isint(obj))
			stmlen = fz_toint(obj);

		fz_dropobj(dict);
	}

	while ( tok != PDF_TSTREAM &&
		tok != PDF_TENDOBJ &&
		tok != PDF_TERROR &&
		tok != PDF_TEOF )
	{
		error = pdf_lex(&tok, file, buf, cap, &len);
		if (error)
			return fz_rethrow(error, "cannot scan for endobj or stream token");
	}

	if (tok == PDF_TSTREAM)
	{
		int c = fz_readbyte(file);
		if (c == '\r') {
			c = fz_peekbyte(file);
			if (c == '\n')
				fz_readbyte(file);
		}

		*stmofsp = fz_tell(file);
		if (*stmofsp < 0)
			return fz_throw("cannot seek in file");

		if (stmlen > 0)
		{
			fz_seek(file, *stmofsp + stmlen, 0);
			error = pdf_lex(&tok, file, buf, cap, &len);
			if (error)
				fz_catch(error, "cannot find endstream token, falling back to scanning");
			if (tok == PDF_TENDSTREAM)
				goto atobjend;
			fz_seek(file, *stmofsp, 0);
		}

		n = fz_read(file, (unsigned char *) buf, 9);
		if (n < 0)
			return fz_rethrow(n, "cannot read from file");

		while (memcmp(buf, "endstream", 9) != 0)
		{
			c = fz_readbyte(file);
			if (c == EOF)
				break;
			memmove(buf, buf + 1, 8);
			buf[8] = c;
		}

		*stmlenp = fz_tell(file) - *stmofsp - 9;

atobjend:
		error = pdf_lex(&tok, file, buf, cap, &len);
		if (error)
			return fz_rethrow(error, "cannot scan for endobj token");
		if (tok != PDF_TENDOBJ)
			fz_warn("object missing 'endobj' token");
	}

	return fz_okay;
}
fz_error
pdf_parseindobj(fz_obj **op, pdf_xref *xref,
	fz_stream *file, char *buf, int cap,
	int *onum, int *ogen, int *ostmofs)
{
	fz_error error = fz_okay;
	fz_obj *obj = nil;
	int num = 0, gen = 0, stmofs;
	pdf_token_e tok;
	int len;
	int a, b;

	error = pdf_lex(&tok, file, buf, cap, &len);
	if (error)
		return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);
	if (tok != PDF_TINT)
		return fz_throw("cannot parse indirect object (%d %d R)", num, gen);
	num = atoi(buf);

	error = pdf_lex(&tok, file, buf, cap, &len);
	if (error)
		return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);
	if (tok != PDF_TINT)
		return fz_throw("cannot parse indirect object (%d %d R)", num, gen);
	gen = atoi(buf);

	error = pdf_lex(&tok, file, buf, cap, &len);
	if (error)
		return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);
	if (tok != PDF_TOBJ)
		return fz_throw("cannot parse indirect object (%d %d R)", num, gen);

	error = pdf_lex(&tok, file, buf, cap, &len);
	if (error)
		return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);

	switch (tok)
	{
	case PDF_TOARRAY:
		error = pdf_parsearray(&obj, xref, file, buf, cap);
		if (error)
			return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);
		break;

	case PDF_TODICT:
		error = pdf_parsedict(&obj, xref, file, buf, cap);
		if (error)
			return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);
		break;

	case PDF_TNAME: obj = fz_newname(buf); break;
	case PDF_TREAL: obj = fz_newreal(atof(buf)); break;
	case PDF_TSTRING: obj = fz_newstring(buf, len); break;
	case PDF_TTRUE: obj = fz_newbool(1); break;
	case PDF_TFALSE: obj = fz_newbool(0); break;
	case PDF_TNULL: obj = fz_newnull(); break;

	case PDF_TINT:
		a = atoi(buf);
		error = pdf_lex(&tok, file, buf, cap, &len);
		if (error)
			return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);
		if (tok == PDF_TSTREAM || tok == PDF_TENDOBJ)
		{
			obj = fz_newint(a);
			goto skip;
		}
		if (tok == PDF_TINT)
		{
			b = atoi(buf);
			error = pdf_lex(&tok, file, buf, cap, &len);
			if (error)
				return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);
			if (tok == PDF_TR)
			{
				obj = fz_newindirect(a, b, xref);
				break;
			}
		}
		return fz_throw("cannot parse indirect object (%d %d R)", num, gen);

	case PDF_TENDOBJ:
		obj = fz_newnull();
		goto skip;

	default:
		return fz_throw("cannot parse indirect object (%d %d R)", num, gen);
	}

	error = pdf_lex(&tok, file, buf, cap, &len);
	if (error)
	{
		fz_dropobj(obj);
		return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);
	}

skip:
	if (tok == PDF_TSTREAM)
	{
		int c = fz_readbyte(file);
		while (c == ' ')
			c = fz_readbyte(file);
		if (c == '\r')
		{
			c = fz_peekbyte(file);
			if (c != '\n')
				fz_warn("line feed missing after stream begin marker (%d %d R)", num, gen);
			else
				fz_readbyte(file);
		}
		stmofs = fz_tell(file);
	}
	else if (tok == PDF_TENDOBJ)
	{
		stmofs = 0;
	}
	else
	{
		fz_warn("expected endobj or stream keyword (%d %d R)", num, gen);
		stmofs = 0;
	}

	if (onum) *onum = num;
	if (ogen) *ogen = gen;
	if (ostmofs) *ostmofs = stmofs;
	*op = obj;
	return fz_okay;
}
Ejemplo n.º 21
0
static int
lexnumber(fz_stream *f, char *s, int n, int *tok)
{
    char *buf = s;
    *tok = PDF_TINT;

    /* Initially we might have +, -, . or a digit */
    if (n > 1)
    {
        int c = fz_readbyte(f);
        switch (c)
        {
        case '.':
            *tok = PDF_TREAL;
            *s++ = c;
            n--;
            goto loop_after_dot;
        case '+':
        case '-':
        case RANGE_0_9:
            *s++ = c;
            n--;
            goto loop_after_sign;
        default:
            fz_unreadbyte(f);
            goto end;
        case EOF:
            goto end;
        }
    }

    /* We can't accept a sign from here on in, just . or a digit */
loop_after_sign:
    while (n > 1)
    {
        int c = fz_readbyte(f);
        switch (c)
        {
        case '.':
            *tok = PDF_TREAL;
            *s++ = c;
            n--;
            goto loop_after_dot;
        case RANGE_0_9:
            *s++ = c;
            break;
        default:
            fz_unreadbyte(f);
            goto end;
        case EOF:
            goto end;
        }
        n--;
    }

    /* In here, we've seen a dot, so can accept just digits */
loop_after_dot:
    while (n > 1)
    {
        int c = fz_readbyte(f);
        switch (c)
        {
        case RANGE_0_9:
            *s++ = c;
            break;
        default:
            fz_unreadbyte(f);
            goto end;
        case EOF:
            goto end;
        }
        n--;
    }

end:
    *s = '\0';
    return s-buf;
}
Ejemplo n.º 22
0
fz_error
pdf_lex(pdf_token_e *tok, fz_stream *f, char *buf, int n, int *sl)
{
	fz_error error;
	int c;

	while (1)
	{
		c = fz_peekbyte(f);

		if (c == EOF)
		{
			*tok = PDF_TEOF;
			goto cleanupokay;
		}

		else if (iswhite(c))
			lexwhite(f);

		else if (c == '%')
			lexcomment(f);

		else if (c == '/')
		{
			fz_readbyte(f);
			lexname(f, buf, n);
			*sl = strlen(buf);
			*tok = PDF_TNAME;
			goto cleanupokay;
		}

		else if (c == '(')
		{
			fz_readbyte(f);
			*sl = lexstring(f, buf, n);
			*tok = PDF_TSTRING;
			goto cleanupokay;
		}

		else if (c == '<')
		{
			fz_readbyte(f);
			c = fz_peekbyte(f);
			if (c == '<')
			{
				fz_readbyte(f);
				*tok = PDF_TODICT;
				goto cleanupokay;
			}
			else
			{
				*sl = lexhexstring(f, buf, n);
				*tok = PDF_TSTRING;
				goto cleanupokay;
			}
		}

		else if (c == '>')
		{
			fz_readbyte(f);
			c = fz_readbyte(f);
			if (c == '>')
			{
				*tok = PDF_TCDICT;
				goto cleanupokay;
			}
			*tok = PDF_TERROR;
			goto cleanuperror;
		}

		else if (c == '[')
		{
			fz_readbyte(f);
			*tok = PDF_TOARRAY;
			goto cleanupokay;
		}

		else if (c == ']')
		{
			fz_readbyte(f);
			*tok = PDF_TCARRAY;
			goto cleanupokay;
		}

		else if (c == '{')
		{
			fz_readbyte(f);
			*tok = PDF_TOBRACE;
			goto cleanupokay;
		}

		else if (c == '}')
		{
			fz_readbyte(f);
			*tok = PDF_TCBRACE;
			goto cleanupokay;
		}

		else if (isnumber(c))
		{
			lexnumber(f, buf, n);
			*sl = strlen(buf);
			if (strchr(buf, '.'))
			{
				*tok = PDF_TREAL;
				goto cleanupokay;
			}
			*tok = PDF_TINT;
			goto cleanupokay;
		}

		else if (isregular(c))
		{
			lexname(f, buf, n);
			*sl = strlen(buf);
			*tok = pdf_tokenfromkeyword(buf);
			goto cleanupokay;
		}

		else
		{
			*tok = PDF_TERROR;
			goto cleanuperror;
		}
	}

cleanupokay:
	error = fz_readerror(f);
	if (error)
	{
		*tok = PDF_TERROR;
		return fz_rethrow(error, "cannot read token");
	}
	return fz_okay;

cleanuperror:
	error = fz_readerror(f);
	if (error)
	{
		*tok = PDF_TERROR;
		return fz_rethrow(error, "cannot read token");
	}
	*tok = PDF_TERROR;
	return fz_throw("lexical error");
}
Ejemplo n.º 23
0
static int
lexstring(fz_stream *f, unsigned char *buf, int n)
{
	unsigned char *s = buf;
	unsigned char *e = buf + n;
	int bal = 1;
	int oct;
	int c;

	while (s < e)
	{
		c = fz_readbyte(f);
		if (c == '(')
		{
			bal++;
			*s++ = c;
		}
		else if (c == ')')
		{
			bal --;
			if (bal == 0)
				break;
			*s++ = c;
		}
		else if (c == '\\')
		{
			c = fz_readbyte(f);
			if (c == 'n') *s++ = '\n';
			else if (c == 'r') *s++ = '\r';
			else if (c == 't') *s++ = '\t';
			else if (c == 'b') *s++ = '\b';
			else if (c == 'f') *s++ = '\f';
			else if (c == '(') *s++ = '(';
			else if (c == ')') *s++ = ')';
			else if (c == '\\') *s++ = '\\';

			else if (c >= '0' && c <= '9')
			{
				oct = c - '0';
				c = fz_peekbyte(f);
				if (c >= '0' && c <= '9')
				{
					fz_readbyte(f);
					oct = oct * 8 + (c - '0');
					c = fz_peekbyte(f);
					if (c >= '0' && c <= '9')
					{
						fz_readbyte(f);
						oct = oct * 8 + (c - '0');
					}
				}
				*s++ = oct;
			}

			else if (c == '\n')
				;
			else if (c == '\r')
			{
				c = fz_peekbyte(f);
				if (c == '\n')
					fz_readbyte(f);
			}
			else *s++ = c;
		}
		else
		{
			*s++ = c;
		}
	}

	return s - buf;
}
Ejemplo n.º 24
0
fz_error
pdf_lex(int *tok, fz_stream *f, char *buf, int n, int *sl)
{
    while (1)
    {
        int c = fz_readbyte(f);
        switch (c)
        {
        case EOF:
            *tok = PDF_TEOF;
            return fz_okay;
        case ISWHITE:
            lexwhite(f);
            break;
        case '%':
            lexcomment(f);
            break;
        case '/':
            lexname(f, buf, n);
            *sl = strlen(buf);
            *tok = PDF_TNAME;
            return fz_okay;
        case '(':
            *sl = lexstring(f, buf, n);
            *tok = PDF_TSTRING;
            return fz_okay;
        case ')':
            *tok = PDF_TERROR;
            goto cleanuperror;
        case '<':
            c = fz_readbyte(f);
            if (c == '<')
            {
                *tok = PDF_TODICT;
            }
            else
            {
                fz_unreadbyte(f);
                *sl = lexhexstring(f, buf, n);
                *tok = PDF_TSTRING;
            }
            return fz_okay;
        case '>':
            c = fz_readbyte(f);
            if (c == '>')
            {
                *tok = PDF_TCDICT;
                return fz_okay;
            }
            *tok = PDF_TERROR;
            goto cleanuperror;
        case '[':
            *tok = PDF_TOARRAY;
            return fz_okay;
        case ']':
            *tok = PDF_TCARRAY;
            return fz_okay;
        case '{':
            *tok = PDF_TOBRACE;
            return fz_okay;
        case '}':
            *tok = PDF_TCBRACE;
            return fz_okay;
        case ISNUMBER:
            fz_unreadbyte(f);
            *sl = lexnumber(f, buf, n, tok);
            return fz_okay;
        default: /* isregular: !isdelim && !iswhite && c != EOF */
            fz_unreadbyte(f);
            lexname(f, buf, n);
            *sl = strlen(buf);
            *tok = pdf_tokenfromkeyword(buf);
            return fz_okay;
        }
    }

cleanuperror:
    *tok = PDF_TERROR;
    return fz_throw("lexical error");
}
Ejemplo n.º 25
0
int
pdf_lex(fz_stream *f, unsigned char *buf, int n, int *sl)
{
	int c;

	while (1)
	{
		c = fz_peekbyte(f);

		if (c == EOF)
			return PDF_TEOF;

		else if (iswhite(c))
			lexwhite(f);

		else if (c == '%')
			lexcomment(f);


		else if (c == '/')
		{
			fz_readbyte(f);
			lexname(f, buf, n);
			*sl = strlen(buf);
			return PDF_TNAME;
		}

		else if (c == '(')
		{
			fz_readbyte(f);
			*sl = lexstring(f, buf, n);
			return PDF_TSTRING;
		}

		else if (c == '<')
		{
			fz_readbyte(f);
			c = fz_peekbyte(f);
			if (c == '<')
			{
				fz_readbyte(f);
				return PDF_TODICT;
			}
			else
			{
				*sl = lexhexstring(f, buf, n);
				return PDF_TSTRING;
			}
		}

		else if (c == '>')
		{
			fz_readbyte(f);
			c = fz_readbyte(f);
			if (c == '>')
				return PDF_TCDICT;
			return PDF_TERROR;
		}

		else if (c == '[')
		{
			fz_readbyte(f);
			return PDF_TOARRAY;
		}

		else if (c == ']')
		{
			fz_readbyte(f);
			return PDF_TCARRAY;
		}

		else if (c == '{')
		{
			fz_readbyte(f);
			return PDF_TOBRACE;
		}

		else if (c ==  '}')
		{
			fz_readbyte(f);
			return PDF_TCBRACE;
		}

		else if (isnumber(c))
		{
			lexnumber(f, buf, n);
			*sl = strlen(buf);
			if (strchr(buf, '.'))
				return PDF_TREAL;
			return PDF_TINT;
		}

		else if (isregular(c))
		{
			lexname(f, buf, n);
			*sl = strlen(buf);
			return tokenfromkeyword(buf);
		}

		else
			return PDF_TERROR;
	}
}
Ejemplo n.º 26
0
static int
lexstring(fz_stream *f, char *buf, int n)
{
    char *s = buf;
    char *e = buf + n;
    int bal = 1;
    int oct;
    int c;

    while (s < e)
    {
        c = fz_readbyte(f);
        switch (c)
        {
        case EOF:
            goto end;
        case '(':
            bal++;
            *s++ = c;
            break;
        case ')':
            bal --;
            if (bal == 0)
                goto end;
            *s++ = c;
            break;
        case '\\':
            c = fz_readbyte(f);
            switch (c)
            {
            case EOF:
                goto end;
            case 'n':
                *s++ = '\n';
                break;
            case 'r':
                *s++ = '\r';
                break;
            case 't':
                *s++ = '\t';
                break;
            case 'b':
                *s++ = '\b';
                break;
            case 'f':
                *s++ = '\f';
                break;
            case '(':
                *s++ = '(';
                break;
            case ')':
                *s++ = ')';
                break;
            case '\\':
                *s++ = '\\';
                break;
            case RANGE_0_9:
                oct = c - '0';
                c = fz_readbyte(f);
                if (c >= '0' && c <= '9')
                {
                    oct = oct * 8 + (c - '0');
                    c = fz_readbyte(f);
                    if (c >= '0' && c <= '9')
                        oct = oct * 8 + (c - '0');
                    else if (c != EOF)
                        fz_unreadbyte(f);
                }
                else if (c != EOF)
                    fz_unreadbyte(f);
                *s++ = oct;
                break;
            case '\n':
                break;
            case '\r':
                c = fz_readbyte(f);
                if ((c != '\n') && (c != EOF))
                    fz_unreadbyte(f);
                break;
            default:
                *s++ = c;
            }
            break;
        default:
            *s++ = c;
            break;
        }
    }
end:
    return s - buf;
}
Ejemplo n.º 27
0
static void
lexname(fz_stream *f, char *s, int n)
{
    while (n > 1)
    {
        int c = fz_readbyte(f);
        switch (c)
        {
        case ISWHITE:
        case ISDELIM:
            fz_unreadbyte(f);
            goto end;
        case EOF:
            goto end;
        case '#':
        {
            int d;
            c = fz_readbyte(f);
            switch (c)
            {
            case RANGE_0_9:
                d = (c - '0') << 4;
                break;
            case RANGE_a_f:
                d = (c - 'a' + 10) << 4;
                break;
            case RANGE_A_F:
                d = (c - 'A' + 10) << 4;
                break;
            default:
                fz_unreadbyte(f);
            /* fallthrough */
            case EOF:
                goto end;
            }
            c = fz_readbyte(f);
            switch (c)
            {
            case RANGE_0_9:
                c -= '0';
                break;
            case RANGE_a_f:
                c -= 'a' - 10;
                break;
            case RANGE_A_F:
                c -= 'A' - 10;
                break;
            default:
                fz_unreadbyte(f);
            /* fallthrough */
            case EOF:
                *s++ = d;
                n--;
                goto end;
            }
            *s++ = d + c;
            n--;
            break;
        }
        default:
            *s++ = c;
            n--;
            break;
        }
    }
end:
    *s = '\0';
}
Ejemplo n.º 28
0
fz_error
fz_seek(fz_stream *stm, int offset, int whence)
{
	fz_error error;
	fz_buffer *buf = stm->buffer;
	int t, c;

	if (stm->dead)
		return fz_throw("assert: seek in dead stream");

	if (whence == 1)
	{
		int cur = fz_tell(stm);
		if (cur < 0)
			return fz_throw("cannot tell current position");
		offset = cur + offset;
		whence = 0;
	}

	buf->eof = 0;

	switch (stm->kind)
	{
	case FZ_SFILE:
		t = lseek(stm->file, offset, whence);
		if (t < 0)
		{
			stm->dead = 1;
			return fz_throw("syserr: lseek: %s", strerror(errno));
		}

		buf->rp = buf->bp;
		buf->wp = buf->bp;

		return fz_okay;

	case FZ_SFILTER:
		if (whence == 0)
		{
			if (offset < fz_tell(stm))
			{
				stm->dead = 1;
				return fz_throw("assert: seek backwards in filter");
			}
			while (fz_tell(stm) < offset)
			{
				c = fz_readbyte(stm);
				if (c == EOF)
				{
					error = fz_readerror(stm);
					if (error)
						return fz_rethrow(error, "cannot seek forward in filter");
					break;
				}
			}
			return fz_okay;
		}

		stm->dead = 1;
		return fz_throw("assert: relative seek in filter");

	case FZ_SBUFFER:
		if (whence == 0)
			buf->rp = CLAMP(buf->bp + offset, buf->bp, buf->ep);
		else
			buf->rp = CLAMP(buf->ep + offset, buf->bp, buf->ep);
		return fz_okay;

	default:
		return fz_throw("unknown stream type");
	}
}
Ejemplo n.º 29
0
fz_error *
pdf_parseindobj(fz_obj **op, fz_stream *file, char *buf, int cap,
		int *ooid, int *ogid, int *ostmofs)
{
	fz_error *error = nil;
	fz_obj *obj = nil;
	int oid = 0, gid = 0, stmofs;
	int tok, len;
	int a, b;

	tok = pdf_lex(file, buf, cap, &len);
	if (tok != PDF_TINT)
		goto cleanup;
	oid = atoi(buf);

	tok = pdf_lex(file, buf, cap, &len);
	if (tok != PDF_TINT)
		goto cleanup;
	gid = atoi(buf);

	tok = pdf_lex(file, buf, cap, &len);
	if (tok != PDF_TOBJ)
		goto cleanup;

	tok = pdf_lex(file, buf, cap, &len);
	switch (tok)
	{
		case PDF_TOARRAY:	error = pdf_parsearray(&obj, file, buf, cap); break;
		case PDF_TODICT:	error = pdf_parsedict(&obj, file, buf, cap); break;
		case PDF_TNAME:		error = fz_newname(&obj, buf); break;
		case PDF_TREAL:		error = fz_newreal(&obj, atof(buf)); break;
		case PDF_TSTRING:	error = fz_newstring(&obj, buf, len); break;
		case PDF_TTRUE:		error = fz_newbool(&obj, 1); break;
		case PDF_TFALSE:	error = fz_newbool(&obj, 0); break;
		case PDF_TNULL:		error = fz_newnull(&obj); break;
		case PDF_TINT:
			a = atoi(buf);
			tok = pdf_lex(file, buf, cap, &len);
			if (tok == PDF_TSTREAM || tok == PDF_TENDOBJ)
			{
				error = fz_newint(&obj, a);
				if (error) goto cleanup;
				goto skip;
			}
			if (tok == PDF_TINT)
			{
				b = atoi(buf);
				tok = pdf_lex(file, buf, cap, &len);
				if (tok == PDF_TR)
				{
					error = fz_newindirect(&obj, a, b);
					break;
				}
			}
			goto cleanup;
		default:
			goto cleanup;
	}
	if (error) goto cleanup;

	tok = pdf_lex(file, buf, cap, &len);

skip:
	if (tok == PDF_TSTREAM)
	{
		int c = fz_readbyte(file);
		if (c == '\r')
		{
			c = fz_peekbyte(file);
			if (c != '\n')
				fz_warn("syntaxerror: DOS format line ending after stream keyword (%d %d)\n", oid, gid);
			else
				c = fz_readbyte(file);
		}
		stmofs = fz_tell(file);
	}
	else if (tok == PDF_TENDOBJ)
		stmofs = 0;
	else
		goto cleanup;

	if (ooid) *ooid = oid;
	if (ogid) *ogid = gid;
	if (ostmofs) *ostmofs = stmofs;
	*op = obj;
	return nil;

cleanup:
	if (obj) fz_dropobj(obj);
	if (error) return error;
	return fz_throw("syntaxerror: corrupt indirect object (%d %d)", oid, gid);
}