static fz_error
pdf_readoldtrailer(pdf_xref *xref, char *buf, int cap)
{
    fz_error error;
    int len;
    char *s;
    int n;
    int t;
    pdf_token_e tok;
    int c;

    pdf_logxref("load old xref format trailer\n");

    fz_readline(xref->file, buf, cap);
    if (strncmp(buf, "xref", 4) != 0)
        return fz_throw("cannot find xref marker");

    while (1)
    {
        c = fz_peekbyte(xref->file);
        if (!(c >= '0' && c <= '9'))
            break;

        fz_readline(xref->file, buf, cap);
        s = buf;
        fz_strsep(&s, " "); /* ignore ofs */
        if (!s)
            return fz_throw("invalid range marker in xref");
        len = atoi(fz_strsep(&s, " "));

        /* broken pdfs where the section is not on a separate line */
        if (s && *s != '\0')
            fz_seek(xref->file, -(2 + (int)strlen(s)), 1);

        t = fz_tell(xref->file);
        if (t < 0)
            return fz_throw("cannot tell in file");

        fz_seek(xref->file, t + 20 * len, 0);
    }

    error = pdf_lex(&tok, xref->file, buf, cap, &n);
    if (error)
        return fz_rethrow(error, "cannot parse trailer");
    if (tok != PDF_TTRAILER)
        return fz_throw("expected trailer marker");

    error = pdf_lex(&tok, xref->file, buf, cap, &n);
    if (error)
        return fz_rethrow(error, "cannot parse trailer");
    if (tok != PDF_TODICT)
        return fz_throw("expected trailer dictionary");

    error = pdf_parsedict(&xref->trailer, xref, xref->file, buf, cap);
    if (error)
        return fz_rethrow(error, "cannot parse trailer");
    return fz_okay;
}
Exemple #2
0
static fz_error
pdf_repairobjstm(pdf_xref *xref, int num, int gen)
{
	fz_error error;
	fz_obj *obj;
	fz_stream *stm;
	int tok;
	int i, n, count;
	char buf[256];

	error = pdf_loadobject(&obj, xref, num, gen);
	if (error)
		return fz_rethrow(error, "cannot load object stream object (%d %d R)", num, gen);

	count = fz_toint(fz_dictgets(obj, "N"));

	fz_dropobj(obj);

	error = pdf_openstream(&stm, xref, num, gen);
	if (error)
		return fz_rethrow(error, "cannot open object stream object (%d %d R)", num, gen);

	for (i = 0; i < count; i++)
	{
		error = pdf_lex(&tok, stm, buf, sizeof buf, &n);
		if (error || tok != PDF_TINT)
		{
			fz_close(stm);
			return fz_rethrow(error, "corrupt object stream (%d %d R)", num, gen);
		}

		n = atoi(buf);
		if (n >= xref->len)
			pdf_resizexref(xref, n + 1);

		xref->table[n].ofs = num;
		xref->table[n].gen = i;
		xref->table[n].stmofs = 0;
		xref->table[n].obj = nil;
		xref->table[n].type = 'o';

		error = pdf_lex(&tok, stm, buf, sizeof buf, &n);
		if (error || tok != PDF_TINT)
		{
			fz_close(stm);
			return fz_rethrow(error, "corrupt object stream (%d %d R)", num, gen);
		}
	}

	fz_close(stm);
	return fz_okay;
}
Exemple #3
0
static float
pdf_extract_font_size(pdf_xref *xref, char *appearance, char **font_name)
{
	fz_context *ctx = xref->ctx;
	fz_stream *stream = fz_open_memory(ctx, appearance, strlen(appearance));
	float font_size = 0;
	int tok, len;

	*font_name = NULL;
	do
	{
		fz_error error = pdf_lex(&tok, stream, xref->scratch, sizeof(xref->scratch), &len);
		if (error || tok == PDF_TOK_EOF)
		{
			fz_free(ctx, *font_name);
			*font_name = NULL;
			break;
		}
		if (tok == PDF_TOK_NAME)
		{
			fz_free(ctx, *font_name);
			*font_name = fz_strdup(ctx, xref->scratch);
		}
		else if (tok == PDF_TOK_REAL || tok == PDF_TOK_INT)
		{
			font_size = fz_atof(xref->scratch);
		}
	} while (tok != PDF_TOK_KEYWORD || strcmp(xref->scratch, "Tf") != 0);
	fz_close(stream);
	return font_size;
}
fz_error
pdf_parse_stm_obj(fz_obj **op, pdf_xref *xref, fz_stream *file, char *buf, int cap)
{
	fz_error error;
	int tok;
	int len;

	error = pdf_lex(&tok, file, buf, cap, &len);
	if (error)
		return fz_rethrow(error, "cannot parse token in object stream");

	switch (tok)
	{
	case PDF_TOK_OPEN_ARRAY:
		error = pdf_parse_array(op, xref, file, buf, cap);
		if (error)
			return fz_rethrow(error, "cannot parse object stream");
		break;
	case PDF_TOK_OPEN_DICT:
		error = pdf_parse_dict(op, xref, file, buf, cap);
		if (error)
			return fz_rethrow(error, "cannot parse object stream");
		break;
	case PDF_TOK_NAME: *op = fz_new_name(buf); break;
	case PDF_TOK_REAL: *op = fz_new_real(fz_atof(buf)); break;
	case PDF_TOK_STRING: *op = fz_new_string(buf, len); break;
	case PDF_TOK_TRUE: *op = fz_new_bool(1); break;
	case PDF_TOK_FALSE: *op = fz_new_bool(0); break;
	case PDF_TOK_NULL: *op = fz_new_null(); break;
	case PDF_TOK_INT: *op = fz_new_int(atoi(buf)); break;
	default: return fz_throw("unknown token in object stream");
	}

	return fz_okay;
}
pdf_obj *
pdf_parse_stm_obj(pdf_document *doc, fz_stream *file, pdf_lexbuf *buf)
{
	pdf_token tok;
	fz_context *ctx = file->ctx;

	tok = pdf_lex(file, buf);

	switch (tok)
	{
	case PDF_TOK_OPEN_ARRAY:
		return pdf_parse_array(doc, file, buf);
	case PDF_TOK_OPEN_DICT:
		return pdf_parse_dict(doc, file, buf);
	case PDF_TOK_NAME: return pdf_new_name(doc, buf->scratch); break;
	case PDF_TOK_REAL: return pdf_new_real(doc, buf->f); break;
	case PDF_TOK_STRING: return pdf_new_string(doc, buf->scratch, buf->len); break;
	case PDF_TOK_TRUE: return pdf_new_bool(doc, 1); break;
	case PDF_TOK_FALSE: return pdf_new_bool(doc, 0); break;
	case PDF_TOK_NULL: return pdf_new_null(doc); break;
	case PDF_TOK_INT: return pdf_new_int(doc, buf->i); break;
	default: fz_throw(ctx, FZ_ERROR_GENERIC, "unknown token in object stream");
	}
	return NULL; /* Stupid MSVC */
}
fz_error
pdf_parsestmobj(fz_obj **op, pdf_xref *xref, fz_stream *file, char *buf, int cap)
{
	fz_error error;
	pdf_token_e tok;
	int len;

	error = pdf_lex(&tok, file, buf, cap, &len);
	if (error)
		return fz_rethrow(error, "cannot parse token in object stream");

	switch (tok)
	{
	case PDF_TOARRAY:
		error = pdf_parsearray(op, xref, file, buf, cap);
		if (error)
			return fz_rethrow(error, "cannot parse object stream");
		break;
	case PDF_TODICT:
		error = pdf_parsedict(op, xref, file, buf, cap);
		if (error)
			return fz_rethrow(error, "cannot parse object stream");
		break;
	case PDF_TNAME: *op = fz_newname(buf); break;
	case PDF_TREAL: *op = fz_newreal(atof(buf)); break;
	case PDF_TSTRING: *op = fz_newstring(buf, len); break;
	case PDF_TTRUE: *op = fz_newbool(1); break;
	case PDF_TFALSE: *op = fz_newbool(0); break;
	case PDF_TNULL: *op = fz_newnull(); break;
	case PDF_TINT: *op = fz_newint(atoi(buf)); break;
	default: return fz_throw("unknown token in object stream");
	}

	return fz_okay;
}
Exemple #7
0
static int
pdf_lex_cmap(fz_stream *file, pdf_lexbuf *buf)
{
	int tok = pdf_lex(file, buf);

	/* RJW: Lost debugging here: "cannot parse cmap token" */

	if (tok == PDF_TOK_KEYWORD)
		tok = pdf_cmap_token_from_keyword(buf->scratch);

	return tok;
}
Exemple #8
0
static int
pdf_lex_cmap(fz_stream *file, char *buf, int n, int *sl)
{
	int tok = pdf_lex(file, buf, n, sl);

	/* RJW: Lost debugging here: "cannot parse cmap token" */

	if (tok == PDF_TOK_KEYWORD)
		tok = pdf_cmap_token_from_keyword(buf);

	return tok;
}
static fz_error lexcmap(pdf_token_e *tok, fz_stream *file, char *buf, int n, int *sl)
{
	fz_error error;

	error = pdf_lex(tok, file, buf, n, sl);
	if (error)
		return fz_rethrow(error, "cannot parse cmap token");

	if (*tok == PDF_TKEYWORD)
		*tok = pdf_cmaptokenfromkeyword(buf);

	return fz_okay;
}
Exemple #10
0
pdf_obj *
pdf_parse_stm_obj(pdf_document *xref, fz_stream *file, pdf_lexbuf *buf)
{
    int tok;
    fz_context *ctx = file->ctx;

    tok = pdf_lex(file, buf);
    /* RJW: "cannot parse token in object stream") */

    switch (tok)
    {
    case PDF_TOK_OPEN_ARRAY:
        return pdf_parse_array(xref, file, buf);
    /* RJW: "cannot parse object stream" */
    case PDF_TOK_OPEN_DICT:
        return pdf_parse_dict(xref, file, buf);
    /* RJW: "cannot parse object stream" */
    case PDF_TOK_NAME:
        return fz_new_name(ctx, buf->scratch);
        break;
    case PDF_TOK_REAL:
        return pdf_new_real(ctx, buf->f);
        break;
    case PDF_TOK_STRING:
        return pdf_new_string(ctx, buf->scratch, buf->len);
        break;
    case PDF_TOK_TRUE:
        return pdf_new_bool(ctx, 1);
        break;
    case PDF_TOK_FALSE:
        return pdf_new_bool(ctx, 0);
        break;
    case PDF_TOK_NULL:
        return pdf_new_null(ctx);
        break;
    case PDF_TOK_INT:
        return pdf_new_int(ctx, buf->i);
        break;
    default:
        fz_throw(ctx, "unknown token in object stream");
    }
    return NULL; /* Stupid MSVC */
}
fz_error *
pdf_parsestmobj(fz_obj **op, fz_stream *file, char *buf, int cap)
{
	int tok, len;

	tok = pdf_lex(file, buf, cap, &len);

	switch (tok)
	{
		case PDF_TOARRAY:	return pdf_parsearray(op, file, buf, cap);
		case PDF_TODICT:	return pdf_parsedict(op, file, buf, cap);
		case PDF_TNAME:		return fz_newname(op, buf);
		case PDF_TREAL:		return fz_newreal(op, atof(buf));
		case PDF_TSTRING:	return fz_newstring(op, buf, len);
		case PDF_TTRUE:		return fz_newbool(op, 1);
		case PDF_TFALSE:	return fz_newbool(op, 0);
		case PDF_TNULL:		return fz_newnull(op);
		case PDF_TINT:		return fz_newint(op, atoi(buf));
	}

	return fz_throw("syntaxerror: corrupt object stream");
}
static fz_error
pdf_readoldxref(fz_obj **trailerp, pdf_xref *xref, char *buf, int cap)
{
    fz_error error;
    int ofs, len;
    char *s;
    int n;
    pdf_token_e tok;
    int i;
    int c;

    pdf_logxref("load old xref format\n");

    fz_readline(xref->file, buf, cap);
    if (strncmp(buf, "xref", 4) != 0)
        return fz_throw("cannot find xref marker");

    while (1)
    {
        c = fz_peekbyte(xref->file);
        if (!(c >= '0' && c <= '9'))
            break;

        fz_readline(xref->file, buf, cap);
        s = buf;
        ofs = atoi(fz_strsep(&s, " "));
        len = atoi(fz_strsep(&s, " "));

        /* broken pdfs where the section is not on a separate line */
        if (s && *s != '\0')
        {
            fz_warn("broken xref section. proceeding anyway.");
            fz_seek(xref->file, -(2 + (int)strlen(s)), 1);
        }

        /* broken pdfs where size in trailer undershoots entries in xref sections */
        if (ofs + len > xref->cap)
        {
            fz_warn("broken xref section, proceeding anyway.");
            xref->cap = ofs + len;
            xref->table = fz_realloc(xref->table, xref->cap * sizeof(pdf_xrefentry));
        }

        if ((ofs + len) > xref->len)
        {
            for (i = xref->len; i < (ofs + len); i++)
            {
                xref->table[i].ofs = 0;
                xref->table[i].gen = 0;
                xref->table[i].stmofs = 0;
                xref->table[i].obj = nil;
                xref->table[i].type = 0;
            }
            xref->len = ofs + len;
        }

        for (i = ofs; i < ofs + len; i++)
        {
            n = fz_read(xref->file, (unsigned char *) buf, 20);
            if (n < 0)
                return fz_rethrow(n, "cannot read xref table");
            if (!xref->table[i].type)
            {
                s = buf;

                /* broken pdfs where line start with white space */
                while (*s != '\0' && iswhite(*s))
                    s++;

                xref->table[i].ofs = atoi(s);
                xref->table[i].gen = atoi(s + 11);
                xref->table[i].type = s[17];
            }
        }
    }

    error = pdf_lex(&tok, xref->file, buf, cap, &n);
    if (error)
        return fz_rethrow(error, "cannot parse trailer");
    if (tok != PDF_TTRAILER)
        return fz_throw("expected trailer marker");

    error = pdf_lex(&tok, xref->file, buf, cap, &n);
    if (error)
        return fz_rethrow(error, "cannot parse trailer");
    if (tok != PDF_TODICT)
        return fz_throw("expected trailer dictionary");

    error = pdf_parsedict(trailerp, xref, xref->file, buf, cap);
    if (error)
        return fz_rethrow(error, "cannot parse trailer");
    return fz_okay;
}
fz_error
pdf_parseindobj(fz_obj **op, pdf_xref *xref,
	fz_stream *file, char *buf, int cap,
	int *onum, int *ogen, int *ostmofs)
{
	fz_error error = fz_okay;
	fz_obj *obj = nil;
	int num = 0, gen = 0, stmofs;
	pdf_token_e tok;
	int len;
	int a, b;

	error = pdf_lex(&tok, file, buf, cap, &len);
	if (error)
		return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);
	if (tok != PDF_TINT)
		return fz_throw("cannot parse indirect object (%d %d R)", num, gen);
	num = atoi(buf);

	error = pdf_lex(&tok, file, buf, cap, &len);
	if (error)
		return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);
	if (tok != PDF_TINT)
		return fz_throw("cannot parse indirect object (%d %d R)", num, gen);
	gen = atoi(buf);

	error = pdf_lex(&tok, file, buf, cap, &len);
	if (error)
		return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);
	if (tok != PDF_TOBJ)
		return fz_throw("cannot parse indirect object (%d %d R)", num, gen);

	error = pdf_lex(&tok, file, buf, cap, &len);
	if (error)
		return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);

	switch (tok)
	{
	case PDF_TOARRAY:
		error = pdf_parsearray(&obj, xref, file, buf, cap);
		if (error)
			return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);
		break;

	case PDF_TODICT:
		error = pdf_parsedict(&obj, xref, file, buf, cap);
		if (error)
			return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);
		break;

	case PDF_TNAME: obj = fz_newname(buf); break;
	case PDF_TREAL: obj = fz_newreal(atof(buf)); break;
	case PDF_TSTRING: obj = fz_newstring(buf, len); break;
	case PDF_TTRUE: obj = fz_newbool(1); break;
	case PDF_TFALSE: obj = fz_newbool(0); break;
	case PDF_TNULL: obj = fz_newnull(); break;

	case PDF_TINT:
		a = atoi(buf);
		error = pdf_lex(&tok, file, buf, cap, &len);
		if (error)
			return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);
		if (tok == PDF_TSTREAM || tok == PDF_TENDOBJ)
		{
			obj = fz_newint(a);
			goto skip;
		}
		if (tok == PDF_TINT)
		{
			b = atoi(buf);
			error = pdf_lex(&tok, file, buf, cap, &len);
			if (error)
				return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);
			if (tok == PDF_TR)
			{
				obj = fz_newindirect(a, b, xref);
				break;
			}
		}
		return fz_throw("cannot parse indirect object (%d %d R)", num, gen);

	case PDF_TENDOBJ:
		obj = fz_newnull();
		goto skip;

	default:
		return fz_throw("cannot parse indirect object (%d %d R)", num, gen);
	}

	error = pdf_lex(&tok, file, buf, cap, &len);
	if (error)
	{
		fz_dropobj(obj);
		return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);
	}

skip:
	if (tok == PDF_TSTREAM)
	{
		int c = fz_readbyte(file);
		while (c == ' ')
			c = fz_readbyte(file);
		if (c == '\r')
		{
			c = fz_peekbyte(file);
			if (c != '\n')
				fz_warn("line feed missing after stream begin marker (%d %d R)", num, gen);
			else
				fz_readbyte(file);
		}
		stmofs = fz_tell(file);
	}
	else if (tok == PDF_TENDOBJ)
	{
		stmofs = 0;
	}
	else
	{
		fz_warn("expected endobj or stream keyword (%d %d R)", num, gen);
		stmofs = 0;
	}

	if (onum) *onum = num;
	if (ogen) *ogen = gen;
	if (ostmofs) *ostmofs = stmofs;
	*op = obj;
	return fz_okay;
}
Exemple #14
0
static fz_error
fz_repairobj(fz_stream *file, char *buf, int cap, int *stmofsp, int *stmlenp, fz_obj **encrypt, fz_obj **id)
{
	fz_error error;
	int tok;
	int stmlen;
	int len;
	int n;

	*stmofsp = 0;
	*stmlenp = -1;

	stmlen = 0;

	error = pdf_lex(&tok, file, buf, cap, &len);
	if (error)
		return fz_rethrow(error, "cannot parse object");
	if (tok == PDF_TODICT)
	{
		fz_obj *dict, *obj;

		/* Send nil xref so we don't try to resolve references */
		error = pdf_parsedict(&dict, nil, file, buf, cap);
		if (error)
			return fz_rethrow(error, "cannot parse object");

		obj = fz_dictgets(dict, "Type");
		if (fz_isname(obj) && !strcmp(fz_toname(obj), "XRef"))
		{
			obj = fz_dictgets(dict, "Encrypt");
			if (obj)
			{
				if (*encrypt)
					fz_dropobj(*encrypt);
				*encrypt = fz_keepobj(obj);
			}

			obj = fz_dictgets(dict, "ID");
			if (obj)
			{
				if (*id)
					fz_dropobj(*id);
				*id = fz_keepobj(obj);
			}
		}

		obj = fz_dictgets(dict, "Length");
		if (fz_isint(obj))
			stmlen = fz_toint(obj);

		fz_dropobj(dict);
	}

	while ( tok != PDF_TSTREAM &&
		tok != PDF_TENDOBJ &&
		tok != PDF_TERROR &&
		tok != PDF_TEOF )
	{
		error = pdf_lex(&tok, file, buf, cap, &len);
		if (error)
			return fz_rethrow(error, "cannot scan for endobj or stream token");
	}

	if (tok == PDF_TSTREAM)
	{
		int c = fz_readbyte(file);
		if (c == '\r') {
			c = fz_peekbyte(file);
			if (c == '\n')
				fz_readbyte(file);
		}

		*stmofsp = fz_tell(file);
		if (*stmofsp < 0)
			return fz_throw("cannot seek in file");

		if (stmlen > 0)
		{
			fz_seek(file, *stmofsp + stmlen, 0);
			error = pdf_lex(&tok, file, buf, cap, &len);
			if (error)
				fz_catch(error, "cannot find endstream token, falling back to scanning");
			if (tok == PDF_TENDSTREAM)
				goto atobjend;
			fz_seek(file, *stmofsp, 0);
		}

		n = fz_read(file, (unsigned char *) buf, 9);
		if (n < 0)
			return fz_rethrow(n, "cannot read from file");

		while (memcmp(buf, "endstream", 9) != 0)
		{
			c = fz_readbyte(file);
			if (c == EOF)
				break;
			memmove(buf, buf + 1, 8);
			buf[8] = c;
		}

		*stmlenp = fz_tell(file) - *stmofsp - 9;

atobjend:
		error = pdf_lex(&tok, file, buf, cap, &len);
		if (error)
			return fz_rethrow(error, "cannot scan for endobj token");
		if (tok != PDF_TENDOBJ)
			fz_warn("object missing 'endobj' token");
	}

	return fz_okay;
}
fz_error
pdf_parsedict(fz_obj **op, pdf_xref *xref, fz_stream *file, char *buf, int cap)
{
	fz_error error = fz_okay;
	fz_obj *dict = nil;
	fz_obj *key = nil;
	fz_obj *val = nil;
	pdf_token_e tok;
	int len;
	int a, b;

	dict = fz_newdict(8);

	while (1)
	{
		error = pdf_lex(&tok, file, buf, cap, &len);
		if (error)
		{
			fz_dropobj(dict);
			return fz_rethrow(error, "cannot parse dict");
		}

skip:
		if (tok == PDF_TCDICT)
		{
			*op = dict;
			return fz_okay;
		}

		/* for BI .. ID .. EI in content streams */
		if (tok == PDF_TKEYWORD && !strcmp(buf, "ID"))
		{
			*op = dict;
			return fz_okay;
		}

		if (tok != PDF_TNAME)
		{
			fz_dropobj(dict);
			return fz_throw("invalid key in dict");;
		}

		key = fz_newname(buf);

		error = pdf_lex(&tok, file, buf, cap, &len);
		if (error)
		{
			fz_dropobj(dict);
			return fz_rethrow(error, "cannot parse dict");
		}

		switch (tok)
		{
		case PDF_TOARRAY:
			error = pdf_parsearray(&val, xref, file, buf, cap);
			if (error)
			{
				fz_dropobj(key);
				fz_dropobj(dict);
				return fz_rethrow(error, "cannot parse dict");
			}
			break;

		case PDF_TODICT:
			error = pdf_parsedict(&val, xref, file, buf, cap);
			if (error)
			{
				fz_dropobj(key);
				fz_dropobj(dict);
				return fz_rethrow(error, "cannot parse dict");
			}
			break;

		case PDF_TNAME: val = fz_newname(buf); break;
		case PDF_TREAL: val = fz_newreal(atof(buf)); break;
		case PDF_TSTRING: val = fz_newstring(buf, len); break;
		case PDF_TTRUE: val = fz_newbool(1); break;
		case PDF_TFALSE: val = fz_newbool(0); break;
		case PDF_TNULL: val = fz_newnull(); break;

		case PDF_TINT:
			/* 64-bit to allow for numbers > INT_MAX and overflow */
			a = (int) strtoll(buf, 0, 10);
			error = pdf_lex(&tok, file, buf, cap, &len);
			if (error)
			{
				fz_dropobj(key);
				fz_dropobj(dict);
				return fz_rethrow(error, "cannot parse dict");
			}
			if (tok == PDF_TCDICT || tok == PDF_TNAME ||
				(tok == PDF_TKEYWORD && !strcmp(buf, "ID")))
			{
				val = fz_newint(a);
				fz_dictput(dict, key, val);
				fz_dropobj(val);
				fz_dropobj(key);
				goto skip;
			}
			if (tok == PDF_TINT)
			{
				b = atoi(buf);
				error = pdf_lex(&tok, file, buf, cap, &len);
				if (error)
				{
					fz_dropobj(key);
					fz_dropobj(dict);
					return fz_rethrow(error, "cannot parse dict");
				}
				if (tok == PDF_TR)
				{
					val = fz_newindirect(a, b, xref);
					break;
				}
			}
			fz_dropobj(key);
			fz_dropobj(dict);
			return fz_throw("invalid indirect reference in dict");

		default:
			return fz_throw("unknown token in dict");
		}

		fz_dictput(dict, key, val);
		fz_dropobj(val);
		fz_dropobj(key);
	}
}
fz_error
pdf_parsearray(fz_obj **op, pdf_xref *xref, fz_stream *file, char *buf, int cap)
{
	fz_error error = fz_okay;
	fz_obj *ary = nil;
	fz_obj *obj = nil;
	int a = 0, b = 0, n = 0;
	pdf_token_e tok;
	int len;

	ary = fz_newarray(4);

	while (1)
	{
		error = pdf_lex(&tok, file, buf, cap, &len);
		if (error)
		{
			fz_dropobj(ary);
			return fz_rethrow(error, "cannot parse array");
		}

		if (tok != PDF_TINT && tok != PDF_TR)
		{
			if (n > 0)
			{
				obj = fz_newint(a);
				fz_arraypush(ary, obj);
				fz_dropobj(obj);
			}
			if (n > 1)
			{
				obj = fz_newint(b);
				fz_arraypush(ary, obj);
				fz_dropobj(obj);
			}
			n = 0;
		}

		if (tok == PDF_TINT && n == 2)
		{
			obj = fz_newint(a);
			fz_arraypush(ary, obj);
			fz_dropobj(obj);
			a = b;
			n --;
		}

		switch (tok)
		{
		case PDF_TCARRAY:
			*op = ary;
			return fz_okay;

		case PDF_TINT:
			if (n == 0)
				a = atoi(buf);
			if (n == 1)
				b = atoi(buf);
			n ++;
			break;

		case PDF_TR:
			if (n != 2)
			{
				fz_dropobj(ary);
				return fz_throw("cannot parse indirect reference in array");
			}
			obj = fz_newindirect(a, b, xref);
			fz_arraypush(ary, obj);
			fz_dropobj(obj);
			n = 0;
			break;

		case PDF_TOARRAY:
			error = pdf_parsearray(&obj, xref, file, buf, cap);
			if (error)
			{
				fz_dropobj(ary);
				return fz_rethrow(error, "cannot parse array");
			}
			fz_arraypush(ary, obj);
			fz_dropobj(obj);
			break;

		case PDF_TODICT:
			error = pdf_parsedict(&obj, xref, file, buf, cap);
			if (error)
			{
				fz_dropobj(ary);
				return fz_rethrow(error, "cannot parse array");
			}
			fz_arraypush(ary, obj);
			fz_dropobj(obj);
			break;

		case PDF_TNAME:
			obj = fz_newname(buf);
			fz_arraypush(ary, obj);
			fz_dropobj(obj);
			break;
		case PDF_TREAL:
			obj = fz_newreal(atof(buf));
			fz_arraypush(ary, obj);
			fz_dropobj(obj);
			break;
		case PDF_TSTRING:
			obj = fz_newstring(buf, len);
			fz_arraypush(ary, obj);
			fz_dropobj(obj);
			break;
		case PDF_TTRUE:
			obj = fz_newbool(1);
			fz_arraypush(ary, obj);
			fz_dropobj(obj);
			break;
		case PDF_TFALSE:
			obj = fz_newbool(0);
			fz_arraypush(ary, obj);
			fz_dropobj(obj);
			break;
		case PDF_TNULL:
			obj = fz_newnull();
			fz_arraypush(ary, obj);
			fz_dropobj(obj);
			break;

		default:
			fz_dropobj(ary);
			return fz_throw("cannot parse token in array");
		}
	}
}
Exemple #17
0
fz_error
pdf_repairxref(pdf_xref *xref, char *buf, int bufsize)
{
	fz_error error;
	fz_obj *dict, *obj;
	fz_obj *length;

	fz_obj *encrypt = nil;
	fz_obj *id = nil;
	fz_obj *root = nil;
	fz_obj *info = nil;

	struct entry *list = nil;
	int listlen;
	int listcap;
	int maxnum = 0;

	int num = 0;
	int gen = 0;
	int tmpofs, numofs = 0, genofs = 0;
	int stmlen, stmofs = 0;
	int tok;
	int next;
	int i, n;

	pdf_logxref("repairxref %p\n", xref);

	fz_seek(xref->file, 0, 0);

	listlen = 0;
	listcap = 1024;
	list = fz_calloc(listcap, sizeof(struct entry));

	/* look for '%PDF' version marker within first kilobyte of file */
	n = fz_read(xref->file, (unsigned char *)buf, MAX(bufsize, 1024));
	if (n < 0)
	{
		error = fz_rethrow(n, "cannot read from file");
		goto cleanup;
	}

	fz_seek(xref->file, 0, 0);
	for (i = 0; i < n - 4; i++)
	{
		if (memcmp(buf + i, "%PDF", 4) == 0)
		{
			fz_seek(xref->file, i, 0);
			break;
		}
	}

	while (1)
	{
		tmpofs = fz_tell(xref->file);
		if (tmpofs < 0)
		{
			error = fz_throw("cannot tell in file");
			goto cleanup;
		}

		error = pdf_lex(&tok, xref->file, buf, bufsize, &n);
		if (error)
		{
			fz_catch(error, "ignoring the rest of the file");
			break;
		}

		if (tok == PDF_TINT)
		{
			numofs = genofs;
			num = gen;
			genofs = tmpofs;
			gen = atoi(buf);
		}

		if (tok == PDF_TOBJ)
		{
			error = fz_repairobj(xref->file, buf, bufsize, &stmofs, &stmlen, &encrypt, &id);
			if (error)
			{
				error = fz_rethrow(error, "cannot parse object (%d %d R)", num, gen);
				goto cleanup;
			}

			pdf_logxref("found object: (%d %d R)\n", num, gen);

			if (listlen + 1 == listcap)
			{
				listcap = (listcap * 3) / 2;
				list = fz_realloc(list, listcap, sizeof(struct entry));
			}

			list[listlen].num = num;
			list[listlen].gen = gen;
			list[listlen].ofs = numofs;
			list[listlen].stmofs = stmofs;
			list[listlen].stmlen = stmlen;
			listlen ++;

			if (num > maxnum)
				maxnum = num;
		}

		/* trailer dictionary */
		if (tok == PDF_TODICT)
		{
			error = pdf_parsedict(&dict, xref, xref->file, buf, bufsize);
			if (error)
			{
				error = fz_rethrow(error, "cannot parse object");
				goto cleanup;
			}

			obj = fz_dictgets(dict, "Encrypt");
			if (obj)
			{
				if (encrypt)
					fz_dropobj(encrypt);
				encrypt = fz_keepobj(obj);
			}

			obj = fz_dictgets(dict, "ID");
			if (obj)
			{
				if (id)
					fz_dropobj(id);
				id = fz_keepobj(obj);
			}

			obj = fz_dictgets(dict, "Root");
			if (obj)
			{
				if (root)
					fz_dropobj(root);
				root = fz_keepobj(obj);
			}

			obj = fz_dictgets(dict, "Info");
			if (obj)
			{
				if (info)
					fz_dropobj(info);
				info = fz_keepobj(obj);
			}

			fz_dropobj(dict);
		}

		if (tok == PDF_TERROR)
			fz_readbyte(xref->file);

		if (tok == PDF_TEOF)
			break;
	}

	/* make xref reasonable */

	pdf_resizexref(xref, maxnum + 1);

	for (i = 0; i < listlen; i++)
	{
		xref->table[list[i].num].type = 'n';
		xref->table[list[i].num].ofs = list[i].ofs;
		xref->table[list[i].num].gen = list[i].gen;

		xref->table[list[i].num].stmofs = list[i].stmofs;

		/* corrected stream length */
		if (list[i].stmlen >= 0)
		{
			pdf_logxref("correct stream length %d %d = %d\n",
				list[i].num, list[i].gen, list[i].stmlen);

			error = pdf_loadobject(&dict, xref, list[i].num, list[i].gen);
			if (error)
			{
				error = fz_rethrow(error, "cannot load stream object (%d %d R)", list[i].num, list[i].gen);
				goto cleanup;
			}

			length = fz_newint(list[i].stmlen);
			fz_dictputs(dict, "Length", length);
			fz_dropobj(length);

			fz_dropobj(dict);
		}

	}

	xref->table[0].type = 'f';
	xref->table[0].ofs = 0;
	xref->table[0].gen = 65535;
	xref->table[0].stmofs = 0;
	xref->table[0].obj = nil;

	next = 0;
	for (i = xref->len - 1; i >= 0; i--)
	{
		if (xref->table[i].type == 'f')
		{
			xref->table[i].ofs = next;
			if (xref->table[i].gen < 65535)
				xref->table[i].gen ++;
			next = i;
		}
	}

	/* create a repaired trailer, Root will be added later */

	xref->trailer = fz_newdict(5);

	obj = fz_newint(maxnum + 1);
	fz_dictputs(xref->trailer, "Size", obj);
	fz_dropobj(obj);

	if (root)
	{
		fz_dictputs(xref->trailer, "Root", root);
		fz_dropobj(root);
	}
	if (info)
	{
		fz_dictputs(xref->trailer, "Info", info);
		fz_dropobj(info);
	}

	if (encrypt)
	{
		if (fz_isindirect(encrypt))
		{
			/* create new reference with non-nil xref pointer */
			obj = fz_newindirect(fz_tonum(encrypt), fz_togen(encrypt), xref);
			fz_dropobj(encrypt);
			encrypt = obj;
		}
		fz_dictputs(xref->trailer, "Encrypt", encrypt);
		fz_dropobj(encrypt);
	}

	if (id)
	{
		if (fz_isindirect(id))
		{
			/* create new reference with non-nil xref pointer */
			obj = fz_newindirect(fz_tonum(id), fz_togen(id), xref);
			fz_dropobj(id);
			id = obj;
		}
		fz_dictputs(xref->trailer, "ID", id);
		fz_dropobj(id);
	}

	fz_free(list);
	return fz_okay;

cleanup:
	if (encrypt) fz_dropobj(encrypt);
	if (id) fz_dropobj(id);
	if (root) fz_dropobj(root);
	if (info) fz_dropobj(info);
	fz_free(list);
	return error; /* already rethrown */
}
Exemple #18
0
pdf_obj *
pdf_parse_ind_obj(fz_context *ctx, pdf_document *doc,
	fz_stream *file, pdf_lexbuf *buf,
	int *onum, int *ogen, fz_off_t *ostmofs, int *try_repair)
{
	pdf_obj *obj = NULL;
	int num = 0, gen = 0;
	fz_off_t stm_ofs;
	pdf_token tok;
	fz_off_t a, b;

	fz_var(obj);

	tok = pdf_lex(ctx, file, buf);
	if (tok != PDF_TOK_INT)
	{
		if (try_repair)
			*try_repair = 1;
		fz_throw(ctx, FZ_ERROR_GENERIC, "expected object number");
	}
	num = buf->i;

	tok = pdf_lex(ctx, file, buf);
	if (tok != PDF_TOK_INT)
	{
		if (try_repair)
			*try_repair = 1;
		fz_throw(ctx, FZ_ERROR_GENERIC, "expected generation number (%d ? obj)", num);
	}
	gen = buf->i;

	tok = pdf_lex(ctx, file, buf);
	if (tok != PDF_TOK_OBJ)
	{
		if (try_repair)
			*try_repair = 1;
		fz_throw(ctx, FZ_ERROR_GENERIC, "expected 'obj' keyword (%d %d ?)", num, gen);
	}

	tok = pdf_lex(ctx, file, buf);

	switch (tok)
	{
	case PDF_TOK_OPEN_ARRAY:
		obj = pdf_parse_array(ctx, doc, file, buf);
		break;

	case PDF_TOK_OPEN_DICT:
		obj = pdf_parse_dict(ctx, doc, file, buf);
		break;

	case PDF_TOK_NAME: obj = pdf_new_name(ctx, doc, buf->scratch); break;
	case PDF_TOK_REAL: obj = pdf_new_real(ctx, doc, buf->f); break;
	case PDF_TOK_STRING: obj = pdf_new_string(ctx, doc, buf->scratch, buf->len); break;
	case PDF_TOK_TRUE: obj = pdf_new_bool(ctx, doc, 1); break;
	case PDF_TOK_FALSE: obj = pdf_new_bool(ctx, doc, 0); break;
	case PDF_TOK_NULL: obj = pdf_new_null(ctx, doc); break;

	case PDF_TOK_INT:
		a = buf->i;
		tok = pdf_lex(ctx, file, buf);

		if (tok == PDF_TOK_STREAM || tok == PDF_TOK_ENDOBJ)
		{
			obj = pdf_new_int_offset(ctx, doc, a);
			goto skip;
		}
		if (tok == PDF_TOK_INT)
		{
			b = buf->i;
			tok = pdf_lex(ctx, file, buf);
			if (tok == PDF_TOK_R)
			{
				obj = pdf_new_indirect(ctx, doc, a, b);
				break;
			}
		}
		fz_throw(ctx, FZ_ERROR_GENERIC, "expected 'R' keyword (%d %d R)", num, gen);

	case PDF_TOK_ENDOBJ:
		obj = pdf_new_null(ctx, doc);
		goto skip;

	default:
		fz_throw(ctx, FZ_ERROR_GENERIC, "syntax error in object (%d %d R)", num, gen);
	}

	fz_try(ctx)
	{
		tok = pdf_lex(ctx, file, buf);
	}
	fz_catch(ctx)
	{
		pdf_drop_obj(ctx, obj);
		fz_rethrow(ctx);
	}

skip:
	if (tok == PDF_TOK_STREAM)
	{
		int c = fz_read_byte(ctx, file);
		while (c == ' ')
			c = fz_read_byte(ctx, file);
		if (c == '\r')
		{
			c = fz_peek_byte(ctx, file);
			if (c != '\n')
				fz_warn(ctx, "line feed missing after stream begin marker (%d %d R)", num, gen);
			else
				fz_read_byte(ctx, file);
		}
		stm_ofs = fz_tell(ctx, file);
	}
	else if (tok == PDF_TOK_ENDOBJ)
	{
		stm_ofs = 0;
	}
	else
	{
		fz_warn(ctx, "expected 'endobj' or 'stream' keyword (%d %d R)", num, gen);
		stm_ofs = 0;
	}

	if (onum) *onum = num;
	if (ogen) *ogen = gen;
	if (ostmofs) *ostmofs = stm_ofs;
	return obj;
}
fz_error
pdf_loadobjstm(pdf_xref *xref, int oid, int gen, char *buf, int cap)
{
    fz_error error;
    fz_stream *stm;
    fz_obj *objstm;
    int *oidbuf;
    int *ofsbuf;

    fz_obj *obj;
    int first;
    int count;
    int i, n;
    pdf_token_e tok;

    pdf_logxref("loadobjstm (%d %d R)\n", oid, gen);

    error = pdf_loadobject(&objstm, xref, oid, gen);
    if (error)
        return fz_rethrow(error, "cannot load object stream object");

    count = fz_toint(fz_dictgets(objstm, "N"));
    first = fz_toint(fz_dictgets(objstm, "First"));

    pdf_logxref("  count %d\n", count);

    oidbuf = fz_malloc(count * sizeof(int));
    if (!oidbuf)
    {
        error = fz_rethrow(-1, "out of memory: object id buffer");
        goto cleanupobj;
    }

    ofsbuf = fz_malloc(count * sizeof(int));
    if (!ofsbuf)
    {
        error = fz_rethrow(-1, "out of memory: offset buffer");
        goto cleanupoid;
    }

    error = pdf_openstream(&stm, xref, oid, gen);
    if (error)
    {
        error = fz_rethrow(error, "cannot open object stream");
        goto cleanupofs;
    }

    for (i = 0; i < count; i++)
    {
        error = pdf_lex(&tok, stm, buf, cap, &n);
        if (error || tok != PDF_TINT)
        {
            error = fz_rethrow(error, "corrupt object stream");
            goto cleanupstm;
        }
        oidbuf[i] = atoi(buf);

        error = pdf_lex(&tok, stm, buf, cap, &n);
        if (error || tok != PDF_TINT)
        {
            error = fz_rethrow(error, "corrupt object stream");
            goto cleanupstm;
        }
        ofsbuf[i] = atoi(buf);
    }

    error = fz_seek(stm, first, 0);
    if (error)
    {
        error = fz_rethrow(error, "cannot seek in object stream");
        goto cleanupstm;
    }

    for (i = 0; i < count; i++)
    {
        /* FIXME: seek to first + ofsbuf[i] */

        error = pdf_parsestmobj(&obj, xref, stm, buf, cap);
        if (error)
        {
            error = fz_rethrow(error, "cannot parse object %d in stream", i);
            goto cleanupstm;
        }

        if (oidbuf[i] < 1 || oidbuf[i] >= xref->len)
        {
            fz_dropobj(obj);
            error = fz_throw("object id (%d 0 R) out of range (0..%d)", oidbuf[i], xref->len - 1);
            goto cleanupstm;
        }

        if (xref->table[oidbuf[i]].obj)
            fz_dropobj(xref->table[oidbuf[i]].obj);
        xref->table[oidbuf[i]].obj = obj;
    }

    fz_dropstream(stm);
    fz_free(ofsbuf);
    fz_free(oidbuf);
    fz_dropobj(objstm);
    return fz_okay;

cleanupstm:
    fz_dropstream(stm);
cleanupofs:
    fz_free(ofsbuf);
cleanupoid:
    fz_free(oidbuf);
cleanupobj:
    fz_dropobj(objstm);
    return error; /* already rethrown */
}
Exemple #20
0
pdf_obj *
pdf_parse_array(pdf_document *xref, fz_stream *file, pdf_lexbuf *buf)
{
    pdf_obj *ary = NULL;
    pdf_obj *obj = NULL;
    int a = 0, b = 0, n = 0;
    int tok;
    fz_context *ctx = file->ctx;
    pdf_obj *op;

    fz_var(obj);

    ary = pdf_new_array(ctx, 4);

    fz_try(ctx)
    {
        while (1)
        {
            tok = pdf_lex(file, buf);

            if (tok != PDF_TOK_INT && tok != PDF_TOK_R)
            {
                if (n > 0)
                {
                    obj = pdf_new_int(ctx, a);
                    pdf_array_push(ary, obj);
                    pdf_drop_obj(obj);
                    obj = NULL;
                }
                if (n > 1)
                {
                    obj = pdf_new_int(ctx, b);
                    pdf_array_push(ary, obj);
                    pdf_drop_obj(obj);
                    obj = NULL;
                }
                n = 0;
            }

            if (tok == PDF_TOK_INT && n == 2)
            {
                obj = pdf_new_int(ctx, a);
                pdf_array_push(ary, obj);
                pdf_drop_obj(obj);
                obj = NULL;
                a = b;
                n --;
            }

            switch (tok)
            {
            case PDF_TOK_CLOSE_ARRAY:
                op = ary;
                goto end;

            case PDF_TOK_INT:
                if (n == 0)
                    a = buf->i;
                if (n == 1)
                    b = buf->i;
                n ++;
                break;

            case PDF_TOK_R:
                if (n != 2)
                    fz_throw(ctx, "cannot parse indirect reference in array");
                obj = pdf_new_indirect(ctx, a, b, xref);
                pdf_array_push(ary, obj);
                pdf_drop_obj(obj);
                obj = NULL;
                n = 0;
                break;

            case PDF_TOK_OPEN_ARRAY:
                obj = pdf_parse_array(xref, file, buf);
                pdf_array_push(ary, obj);
                pdf_drop_obj(obj);
                obj = NULL;
                break;

            case PDF_TOK_OPEN_DICT:
                obj = pdf_parse_dict(xref, file, buf);
                pdf_array_push(ary, obj);
                pdf_drop_obj(obj);
                obj = NULL;
                break;

            case PDF_TOK_NAME:
                obj = fz_new_name(ctx, buf->scratch);
                pdf_array_push(ary, obj);
                pdf_drop_obj(obj);
                obj = NULL;
                break;
            case PDF_TOK_REAL:
                obj = pdf_new_real(ctx, buf->f);
                pdf_array_push(ary, obj);
                pdf_drop_obj(obj);
                obj = NULL;
                break;
            case PDF_TOK_STRING:
                obj = pdf_new_string(ctx, buf->scratch, buf->len);
                pdf_array_push(ary, obj);
                pdf_drop_obj(obj);
                obj = NULL;
                break;
            case PDF_TOK_TRUE:
                obj = pdf_new_bool(ctx, 1);
                pdf_array_push(ary, obj);
                pdf_drop_obj(obj);
                obj = NULL;
                break;
            case PDF_TOK_FALSE:
                obj = pdf_new_bool(ctx, 0);
                pdf_array_push(ary, obj);
                pdf_drop_obj(obj);
                obj = NULL;
                break;
            case PDF_TOK_NULL:
                obj = pdf_new_null(ctx);
                pdf_array_push(ary, obj);
                pdf_drop_obj(obj);
                obj = NULL;
                break;

            default:
                fz_throw(ctx, "cannot parse token in array");
            }
        }
end:
        {}
    }
    fz_catch(ctx)
    {
        pdf_drop_obj(obj);
        pdf_drop_obj(ary);
        fz_throw(ctx, "cannot parse array");
    }
    return op;
}
fz_error *
pdf_parsearray(fz_obj **op, fz_stream *file, char *buf, int cap)
{
	fz_error *error = nil;
	fz_obj *ary = nil;
	fz_obj *obj = nil;
	int a = 0, b = 0, n = 0;
	int tok, len;

	error = fz_newarray(op, 4);
	if (error) return error;
	ary = *op;

	while (1)
	{
		tok = pdf_lex(file, buf, cap, &len);

		if (tok != PDF_TINT && tok != PDF_TR)
		{
			if (n > 0)
			{
				error = fz_newint(&obj, a);
				if (error) goto cleanup;
				error = fz_arraypush(ary, obj);
				if (error) goto cleanup;
				fz_dropobj(obj);
				obj = nil;
			}
			if (n > 1)
			{
				error = fz_newint(&obj, b);
				if (error) goto cleanup;
				error = fz_arraypush(ary, obj);
				if (error) goto cleanup;
				fz_dropobj(obj);
				obj = nil;
			}
			n = 0;
		}

		if (tok == PDF_TINT && n == 2)
		{
			error = fz_newint(&obj, a);
			if (error) goto cleanup;
			error = fz_arraypush(ary, obj);
			if (error) goto cleanup;
			fz_dropobj(obj);
			obj = nil;
			a = b;
			n --;
		}

		switch (tok)
		{
		case PDF_TCARRAY:
			return nil;
		case PDF_TINT:
			if (n == 0)
				a = atoi(buf);
			if (n == 1)
				b = atoi(buf);
			n ++;
			break;
		case PDF_TR:
			if (n != 2)
				goto cleanup;
			error = fz_newindirect(&obj, a, b);
			if (error) goto cleanup;
			n = 0;
			break;
		case PDF_TOARRAY:	error = pdf_parsearray(&obj, file, buf, cap); break;
		case PDF_TODICT:	error = pdf_parsedict(&obj, file, buf, cap); break;
		case PDF_TNAME:		error = fz_newname(&obj, buf); break;
		case PDF_TREAL:		error = fz_newreal(&obj, atof(buf)); break;
		case PDF_TSTRING:	error = fz_newstring(&obj, buf, len); break;
		case PDF_TTRUE:		error = fz_newbool(&obj, 1); break;
		case PDF_TFALSE:	error = fz_newbool(&obj, 0); break;
		case PDF_TNULL:		error = fz_newnull(&obj); break;
		default:		goto cleanup;
		}
		if (error) goto cleanup;

		if (obj)
		{
			error = fz_arraypush(ary, obj);
			if (error) goto cleanup;
			fz_dropobj(obj);
		}

		obj = nil;
	}

cleanup:
	if (obj) fz_dropobj(obj);
	if (ary) fz_dropobj(ary);
	if (error) return error;
	return fz_throw("syntaxerror: corrupt array");
}
fz_error *
pdf_parseindobj(fz_obj **op, fz_stream *file, char *buf, int cap,
		int *ooid, int *ogid, int *ostmofs)
{
	fz_error *error = nil;
	fz_obj *obj = nil;
	int oid = 0, gid = 0, stmofs;
	int tok, len;
	int a, b;

	tok = pdf_lex(file, buf, cap, &len);
	if (tok != PDF_TINT)
		goto cleanup;
	oid = atoi(buf);

	tok = pdf_lex(file, buf, cap, &len);
	if (tok != PDF_TINT)
		goto cleanup;
	gid = atoi(buf);

	tok = pdf_lex(file, buf, cap, &len);
	if (tok != PDF_TOBJ)
		goto cleanup;

	tok = pdf_lex(file, buf, cap, &len);
	switch (tok)
	{
		case PDF_TOARRAY:	error = pdf_parsearray(&obj, file, buf, cap); break;
		case PDF_TODICT:	error = pdf_parsedict(&obj, file, buf, cap); break;
		case PDF_TNAME:		error = fz_newname(&obj, buf); break;
		case PDF_TREAL:		error = fz_newreal(&obj, atof(buf)); break;
		case PDF_TSTRING:	error = fz_newstring(&obj, buf, len); break;
		case PDF_TTRUE:		error = fz_newbool(&obj, 1); break;
		case PDF_TFALSE:	error = fz_newbool(&obj, 0); break;
		case PDF_TNULL:		error = fz_newnull(&obj); break;
		case PDF_TINT:
			a = atoi(buf);
			tok = pdf_lex(file, buf, cap, &len);
			if (tok == PDF_TSTREAM || tok == PDF_TENDOBJ)
			{
				error = fz_newint(&obj, a);
				if (error) goto cleanup;
				goto skip;
			}
			if (tok == PDF_TINT)
			{
				b = atoi(buf);
				tok = pdf_lex(file, buf, cap, &len);
				if (tok == PDF_TR)
				{
					error = fz_newindirect(&obj, a, b);
					break;
				}
			}
			goto cleanup;
		default:
			goto cleanup;
	}
	if (error) goto cleanup;

	tok = pdf_lex(file, buf, cap, &len);

skip:
	if (tok == PDF_TSTREAM)
	{
		int c = fz_readbyte(file);
		if (c == '\r')
		{
			c = fz_peekbyte(file);
			if (c != '\n')
				fz_warn("syntaxerror: DOS format line ending after stream keyword (%d %d)\n", oid, gid);
			else
				c = fz_readbyte(file);
		}
		stmofs = fz_tell(file);
	}
	else if (tok == PDF_TENDOBJ)
		stmofs = 0;
	else
		goto cleanup;

	if (ooid) *ooid = oid;
	if (ogid) *ogid = gid;
	if (ostmofs) *ostmofs = stmofs;
	*op = obj;
	return nil;

cleanup:
	if (obj) fz_dropobj(obj);
	if (error) return error;
	return fz_throw("syntaxerror: corrupt indirect object (%d %d)", oid, gid);
}
static fz_error
pdf_loadobjstm(pdf_xref *xref, int num, int gen, char *buf, int cap)
{
    fz_error error;
    fz_stream *stm;
    fz_obj *objstm;
    int *numbuf;
    int *ofsbuf;

    fz_obj *obj;
    int first;
    int count;
    int i, n;
    pdf_token_e tok;

    pdf_logxref("loadobjstm (%d %d R)\n", num, gen);

    error = pdf_loadobject(&objstm, xref, num, gen);
    if (error)
        return fz_rethrow(error, "cannot load object stream object (%d %d R)", num, gen);

    count = fz_toint(fz_dictgets(objstm, "N"));
    first = fz_toint(fz_dictgets(objstm, "First"));

    pdf_logxref("\tcount %d\n", count);

    numbuf = fz_malloc(count * sizeof(int));
    ofsbuf = fz_malloc(count * sizeof(int));

    error = pdf_openstream(&stm, xref, num, gen);
    if (error)
    {
        error = fz_rethrow(error, "cannot open object stream (%d %d R)", num, gen);
        goto cleanupbuf;
    }

    for (i = 0; i < count; i++)
    {
        error = pdf_lex(&tok, stm, buf, cap, &n);
        if (error || tok != PDF_TINT)
        {
            error = fz_rethrow(error, "corrupt object stream (%d %d R)", num, gen);
            goto cleanupstm;
        }
        numbuf[i] = atoi(buf);

        error = pdf_lex(&tok, stm, buf, cap, &n);
        if (error || tok != PDF_TINT)
        {
            error = fz_rethrow(error, "corrupt object stream (%d %d R)", num, gen);
            goto cleanupstm;
        }
        ofsbuf[i] = atoi(buf);
    }

    fz_seek(stm, first, 0);

    for (i = 0; i < count; i++)
    {
        fz_seek(stm, first + ofsbuf[i], 0);

        error = pdf_parsestmobj(&obj, xref, stm, buf, cap);
        if (error)
        {
            error = fz_rethrow(error, "cannot parse object %d in stream (%d %d R)", i, num, gen);
            goto cleanupstm;
        }

        if (numbuf[i] < 1 || numbuf[i] >= xref->len)
        {
            fz_dropobj(obj);
            error = fz_throw("object id (%d 0 R) out of range (0..%d)", numbuf[i], xref->len - 1);
            goto cleanupstm;
        }

        if (xref->table[numbuf[i]].type == 'o' && xref->table[numbuf[i]].ofs == num)
        {
            if (xref->table[numbuf[i]].obj)
                fz_dropobj(xref->table[numbuf[i]].obj);
            xref->table[numbuf[i]].obj = obj;
        }
        else
        {
            fz_dropobj(obj);
        }
    }

    fz_close(stm);
    fz_free(ofsbuf);
    fz_free(numbuf);
    fz_dropobj(objstm);
    return fz_okay;

cleanupstm:
    fz_close(stm);
cleanupbuf:
    fz_free(ofsbuf);
    fz_free(numbuf);
    fz_dropobj(objstm);
    return error; /* already rethrown */
}
pdf_obj *
pdf_parse_dict(pdf_document *doc, fz_stream *file, pdf_lexbuf *buf)
{
	pdf_obj *dict;
	pdf_obj *key = NULL;
	pdf_obj *val = NULL;
	pdf_token tok;
	int a, b;
	fz_context *ctx = file->ctx;

	dict = pdf_new_dict(doc, 8);

	fz_var(key);
	fz_var(val);

	fz_try(ctx)
	{
		while (1)
		{
			tok = pdf_lex(file, buf);
	skip:
			if (tok == PDF_TOK_CLOSE_DICT)
				break;

			/* for BI .. ID .. EI in content streams */
			if (tok == PDF_TOK_KEYWORD && !strcmp(buf->scratch, "ID"))
				break;

			if (tok != PDF_TOK_NAME)
				fz_throw(ctx, FZ_ERROR_GENERIC, "invalid key in dict");

			key = pdf_new_name(doc, buf->scratch);

			tok = pdf_lex(file, buf);

			switch (tok)
			{
			case PDF_TOK_OPEN_ARRAY:
				val = pdf_parse_array(doc, file, buf);
				break;

			case PDF_TOK_OPEN_DICT:
				val = pdf_parse_dict(doc, file, buf);
				break;

			case PDF_TOK_NAME: val = pdf_new_name(doc, buf->scratch); break;
			case PDF_TOK_REAL: val = pdf_new_real(doc, buf->f); break;
			case PDF_TOK_STRING: val = pdf_new_string(doc, buf->scratch, buf->len); break;
			case PDF_TOK_TRUE: val = pdf_new_bool(doc, 1); break;
			case PDF_TOK_FALSE: val = pdf_new_bool(doc, 0); break;
			case PDF_TOK_NULL: val = pdf_new_null(doc); break;

			case PDF_TOK_INT:
				/* 64-bit to allow for numbers > INT_MAX and overflow */
				a = buf->i;
				tok = pdf_lex(file, buf);
				if (tok == PDF_TOK_CLOSE_DICT || tok == PDF_TOK_NAME ||
					(tok == PDF_TOK_KEYWORD && !strcmp(buf->scratch, "ID")))
				{
					val = pdf_new_int(doc, a);
					pdf_dict_put(dict, key, val);
					pdf_drop_obj(val);
					val = NULL;
					pdf_drop_obj(key);
					key = NULL;
					goto skip;
				}
				if (tok == PDF_TOK_INT)
				{
					b = buf->i;
					tok = pdf_lex(file, buf);
					if (tok == PDF_TOK_R)
					{
						val = pdf_new_indirect(doc, a, b);
						break;
					}
				}
				fz_throw(ctx, FZ_ERROR_GENERIC, "invalid indirect reference in dict");

			default:
				fz_throw(ctx, FZ_ERROR_GENERIC, "unknown token in dict");
			}

			pdf_dict_put(dict, key, val);
			pdf_drop_obj(val);
			val = NULL;
			pdf_drop_obj(key);
			key = NULL;
		}
	}
	fz_catch(ctx)
	{
		pdf_drop_obj(dict);
		pdf_drop_obj(key);
		pdf_drop_obj(val);
		fz_rethrow_message(ctx, "cannot parse dict");
	}
	return dict;
}
Exemple #25
0
static fz_error
pdf_read_old_xref(fz_obj **trailerp, pdf_xref *xref, char *buf, int cap)
{
	fz_error error;
	int ofs, len;
	char *s;
	int n;
	int tok;
	int i;
	int c;

	fz_read_line(xref->file, buf, cap);
	if (strncmp(buf, "xref", 4) != 0)
		return fz_throw("cannot find xref marker");

	while (1)
	{
		c = fz_peek_byte(xref->file);
		if (!(c >= '0' && c <= '9'))
			break;

		fz_read_line(xref->file, buf, cap);
		s = buf;
		ofs = atoi(fz_strsep(&s, " "));
		len = atoi(fz_strsep(&s, " "));

		/* broken pdfs where the section is not on a separate line */
		if (s && *s != '\0')
		{
			fz_warn("broken xref section. proceeding anyway.");
			fz_seek(xref->file, -(2 + (int)strlen(s)), 1);
		}

		/* broken pdfs where size in trailer undershoots entries in xref sections */
		if (ofs + len > xref->len)
		{
			fz_warn("broken xref section, proceeding anyway.");
			pdf_resize_xref(xref, ofs + len);
		}

		for (i = ofs; i < ofs + len; i++)
		{
			n = fz_read(xref->file, (unsigned char *) buf, 20);
			if (n < 0)
				return fz_rethrow(n, "cannot read xref table");
			if (!xref->table[i].type)
			{
				s = buf;

				/* broken pdfs where line start with white space */
				while (*s != '\0' && iswhite(*s))
					s++;

				xref->table[i].ofs = atoi(s);
				xref->table[i].gen = atoi(s + 11);
				xref->table[i].type = s[17];
				if (s[17] != 'f' && s[17] != 'n' && s[17] != 'o')
					return fz_throw("unexpected xref type: %#x (%d %d R)", s[17], i, xref->table[i].gen);
			}
		}
	}

	error = pdf_lex(&tok, xref->file, buf, cap, &n);
	if (error)
		return fz_rethrow(error, "cannot parse trailer");
	if (tok != PDF_TOK_TRAILER)
		return fz_throw("expected trailer marker");

	error = pdf_lex(&tok, xref->file, buf, cap, &n);
	if (error)
		return fz_rethrow(error, "cannot parse trailer");
	if (tok != PDF_TOK_OPEN_DICT)
		return fz_throw("expected trailer dictionary");

	error = pdf_parse_dict(trailerp, xref, xref->file, buf, cap);
	if (error)
		return fz_rethrow(error, "cannot parse trailer");
	return fz_okay;
}
fz_error *
pdf_parsedict(fz_obj **op, fz_stream *file, char *buf, int cap)
{
	fz_error *error = nil;
	fz_obj *dict = nil;
	fz_obj *key = nil;
	fz_obj *val = nil;
	int tok, len;
	int a, b;

	error = fz_newdict(op, 8);
	if (error) return error;
	dict = *op;

	while (1)
	{
		tok = pdf_lex(file, buf, cap, &len);

skip:
		if (tok == PDF_TCDICT)
			return nil;

		/* for BI .. ID .. EI in content streams */
		if (tok == PDF_TKEYWORD && !strcmp(buf, "ID"))
			return nil;

		if (tok != PDF_TNAME)
			goto cleanup;

		error = fz_newname(&key, buf);
		if (error) goto cleanup;

		tok = pdf_lex(file, buf, cap, &len);

		switch (tok)
		{
		case PDF_TOARRAY:	error = pdf_parsearray(&val, file, buf, cap); break;
		case PDF_TODICT:	error = pdf_parsedict(&val, file, buf, cap); break;
		case PDF_TNAME:		error = fz_newname(&val, buf); break;
		case PDF_TREAL:		error = fz_newreal(&val, atof(buf)); break;
		case PDF_TSTRING:	error = fz_newstring(&val, buf, len); break;
		case PDF_TTRUE:		error = fz_newbool(&val, 1); break;
		case PDF_TFALSE:	error = fz_newbool(&val, 0); break;
		case PDF_TNULL:		error = fz_newnull(&val); break;
		case PDF_TINT:
			a = atoi(buf);
			tok = pdf_lex(file, buf, cap, &len);
			if (tok == PDF_TCDICT || tok == PDF_TNAME ||
				(tok == PDF_TKEYWORD && !strcmp(buf, "ID")))
			{
				error = fz_newint(&val, a);
				if (error) goto cleanup;
				error = fz_dictput(dict, key, val);
				if (error) goto cleanup;
				fz_dropobj(val);
				fz_dropobj(key);
				key = val = nil;
				goto skip;
			}
			if (tok == PDF_TINT)
			{
				b = atoi(buf);
				tok = pdf_lex(file, buf, cap, &len);
				if (tok == PDF_TR)
				{
					error = fz_newindirect(&val, a, b);
					break;
				}
			}
			goto cleanup;
		default:
			goto cleanup;
		}

		if (error) goto cleanup;

		error = fz_dictput(dict, key, val);
		if (error) goto cleanup;

		fz_dropobj(val);
		fz_dropobj(key);
		key = val = nil;
	}

cleanup:
	if (key) fz_dropobj(key);
	if (val) fz_dropobj(val);
	if (dict) fz_dropobj(dict);
	if (error) return error;
	return fz_throw("syntaxerror: corrupt dictionary");
}
Exemple #27
0
pdf_obj *
pdf_parse_dict(pdf_document *xref, fz_stream *file, pdf_lexbuf *buf)
{
    pdf_obj *dict;
    pdf_obj *key = NULL;
    pdf_obj *val = NULL;
    int tok;
    int a, b;
    fz_context *ctx = file->ctx;

    dict = pdf_new_dict(ctx, 8);

    fz_var(key);
    fz_var(val);

    fz_try(ctx)
    {
        while (1)
        {
            tok = pdf_lex(file, buf);
skip:
            if (tok == PDF_TOK_CLOSE_DICT)
                break;

            /* for BI .. ID .. EI in content streams */
            if (tok == PDF_TOK_KEYWORD && !strcmp(buf->scratch, "ID"))
                break;

            if (tok != PDF_TOK_NAME)
                fz_throw(ctx, "invalid key in dict");

            key = fz_new_name(ctx, buf->scratch);

            tok = pdf_lex(file, buf);

            switch (tok)
            {
            case PDF_TOK_OPEN_ARRAY:
                /* cf. http://code.google.com/p/sumatrapdf/issues/detail?id=1643 */
                fz_try(ctx)
                {
                    val = pdf_parse_array(xref, file, buf);
                }
                fz_catch(ctx)
                {
                    fz_warn(ctx, "ignoring broken array for '%s'", pdf_to_name(key));
                    pdf_drop_obj(key);
                    val = key = NULL;
                    do
                        tok = pdf_lex(file, buf);
                    while (tok != PDF_TOK_CLOSE_DICT && tok != PDF_TOK_CLOSE_ARRAY &&
                            tok != PDF_TOK_EOF && tok != PDF_TOK_OPEN_ARRAY && tok != PDF_TOK_OPEN_DICT);
                    if (tok == PDF_TOK_CLOSE_DICT)
                        goto skip;
                    if (tok == PDF_TOK_CLOSE_ARRAY)
                        continue;
                    fz_throw(ctx, "cannot make sense of broken array after all");
                }
                break;

            case PDF_TOK_OPEN_DICT:
                val = pdf_parse_dict(xref, file, buf);
                break;

            case PDF_TOK_NAME:
                val = fz_new_name(ctx, buf->scratch);
                break;
            case PDF_TOK_REAL:
                val = pdf_new_real(ctx, buf->f);
                break;
            case PDF_TOK_STRING:
                val = pdf_new_string(ctx, buf->scratch, buf->len);
                break;
            case PDF_TOK_TRUE:
                val = pdf_new_bool(ctx, 1);
                break;
            case PDF_TOK_FALSE:
                val = pdf_new_bool(ctx, 0);
                break;
            case PDF_TOK_NULL:
                val = pdf_new_null(ctx);
                break;

            case PDF_TOK_INT:
                /* 64-bit to allow for numbers > INT_MAX and overflow */
                a = buf->i;
                tok = pdf_lex(file, buf);
                if (tok == PDF_TOK_CLOSE_DICT || tok == PDF_TOK_NAME ||
                        (tok == PDF_TOK_KEYWORD && !strcmp(buf->scratch, "ID")))
                {
                    val = pdf_new_int(ctx, a);
                    fz_dict_put(dict, key, val);
                    pdf_drop_obj(val);
                    val = NULL;
                    pdf_drop_obj(key);
                    key = NULL;
                    goto skip;
                }
                if (tok == PDF_TOK_INT)
                {
                    b = buf->i;
                    tok = pdf_lex(file, buf);
                    if (tok == PDF_TOK_R)
                    {
                        val = pdf_new_indirect(ctx, a, b, xref);
                        break;
                    }
                }
                fz_throw(ctx, "invalid indirect reference in dict");

            default:
                fz_throw(ctx, "unknown token in dict");
            }

            fz_dict_put(dict, key, val);
            pdf_drop_obj(val);
            val = NULL;
            pdf_drop_obj(key);
            key = NULL;
        }
    }
    fz_catch(ctx)
    {
        pdf_drop_obj(dict);
        pdf_drop_obj(key);
        pdf_drop_obj(val);
        fz_throw(ctx, "cannot parse dict");
    }
    return dict;
}
static fz_error
parsecode(pdf_function *func, fz_stream *stream, int *codeptr)
{
	fz_error error;
	char buf[64];
	int len;
	pdf_token_e tok;
	int opptr, elseptr;
	int a, b, mid, cmp;

	memset(buf, 0, sizeof(buf));

	while (1)
	{
		error = pdf_lex(&tok, stream, buf, sizeof buf, &len);
		if (error)
			return fz_rethrow(error, "calculator function lexical error");

		switch(tok)
		{
		case PDF_TEOF:
			return fz_throw("truncated calculator function");

		case PDF_TINT:
			error = resizecode(func, *codeptr);
			if (error)
				return fz_rethrow(error, "resize calculator function code");
			func->u.p.code[*codeptr].type = PSINT;
			func->u.p.code[*codeptr].u.i = atoi(buf);
			++*codeptr;
			break;

		case PDF_TREAL:
			error = resizecode(func, *codeptr);
			if (error)
				return fz_rethrow(error, "resize calculator function code");
			func->u.p.code[*codeptr].type = PSREAL;
			func->u.p.code[*codeptr].u.f = atof(buf);
			++*codeptr;
			break;

		case PDF_TOBRACE:
			opptr = *codeptr;
			*codeptr += 3;

			error = resizecode(func, opptr + 2);
			if (error)
				return fz_rethrow(error, "resize calculator function code");

			error = parsecode(func, stream, codeptr);
			if (error)
				return fz_rethrow(error, "error in 'if' branch");

			error = pdf_lex(&tok, stream, buf, sizeof buf, &len);
			if (error)
				return fz_rethrow(error, "calculator function syntax error");

			if (tok == PDF_TOBRACE)
			{
				elseptr = *codeptr;
				error = parsecode(func, stream, codeptr);
				if (error)
					return fz_rethrow(error, "error in 'else' branch");

				error = pdf_lex(&tok, stream, buf, sizeof buf, &len);
				if (error)
					return fz_rethrow(error, "calculator function syntax error");
			}
			else
			{
				elseptr = -1;
			}

			if (tok == PDF_TKEYWORD)
			{
				if (!strcmp(buf, "if"))
				{
					if (elseptr >= 0)
						return fz_throw("too many branches for 'if'");
					func->u.p.code[opptr].type = PSOPERATOR;
					func->u.p.code[opptr].u.op = PSOIF;
					func->u.p.code[opptr+2].type = PSBLOCK;
					func->u.p.code[opptr+2].u.block = *codeptr;
				}
				else if (!strcmp(buf, "ifelse"))
				{
					if (elseptr < 0)
						return fz_throw("not enough branches for 'ifelse'");
					func->u.p.code[opptr].type = PSOPERATOR;
					func->u.p.code[opptr].u.op = PSOIFELSE;
					func->u.p.code[opptr+1].type = PSBLOCK;
					func->u.p.code[opptr+1].u.block = elseptr;
					func->u.p.code[opptr+2].type = PSBLOCK;
					func->u.p.code[opptr+2].u.block = *codeptr;
				}
				else
				{
					return fz_throw("unknown keyword in 'if-else' context: '%s'", buf);
				}
			}
			else
			{
				return fz_throw("missing keyword in 'if-else' context");
			}
			break;

		case PDF_TCBRACE:
			error = resizecode(func, *codeptr);
			if (error)
				return fz_rethrow(error, "resize calculator function code");
			func->u.p.code[*codeptr].type = PSOPERATOR;
			func->u.p.code[*codeptr].u.op = PSORETURN;
			++*codeptr;
			return fz_okay;

		case PDF_TKEYWORD:
			cmp = -1;
			a = -1;
			b = sizeof(psopnames) / sizeof(psopnames[0]);
			while (b - a > 1)
			{
				mid = (a + b) / 2;
				cmp = strcmp(buf, psopnames[mid]);
				if (cmp > 0)
					a = mid;
				else if (cmp < 0)
					b = mid;
				else
					a = b = mid;
			}
			if (cmp != 0)
				return fz_throw("unknown operator: '%s'", buf);

			error = resizecode(func, *codeptr);
			if (error)
				return fz_rethrow(error, "resize calculator function code");

			func->u.p.code[*codeptr].type = PSOPERATOR;
			func->u.p.code[*codeptr].u.op = a;
			++*codeptr;
			break;

		default:
			return fz_throw("calculator function syntax error");
		}
	}
}
Exemple #29
0
pdf_obj *
pdf_parse_ind_obj(pdf_document *xref,
                  fz_stream *file, pdf_lexbuf *buf,
                  int *onum, int *ogen, int *ostmofs)
{
    pdf_obj *obj = NULL;
    int num = 0, gen = 0, stm_ofs;
    int tok;
    int a, b;
    fz_context *ctx = file->ctx;

    fz_var(obj);

    tok = pdf_lex(file, buf);
    /* RJW: cannot parse indirect object (%d %d R)", num, gen */
    if (tok != PDF_TOK_INT)
        fz_throw(ctx, "expected object number (%d %d R)", num, gen);
    num = buf->i;

    tok = pdf_lex(file, buf);
    /* RJW: "cannot parse indirect object (%d %d R)", num, gen */
    if (tok != PDF_TOK_INT)
        fz_throw(ctx, "expected generation number (%d %d R)", num, gen);
    gen = buf->i;

    tok = pdf_lex(file, buf);
    /* RJW: "cannot parse indirect object (%d %d R)", num, gen */
    if (tok != PDF_TOK_OBJ)
        fz_throw(ctx, "expected 'obj' keyword (%d %d R)", num, gen);

    tok = pdf_lex(file, buf);
    /* RJW: "cannot parse indirect object (%d %d R)", num, gen */

    switch (tok)
    {
    case PDF_TOK_OPEN_ARRAY:
        obj = pdf_parse_array(xref, file, buf);
        /* RJW: "cannot parse indirect object (%d %d R)", num, gen */
        break;

    case PDF_TOK_OPEN_DICT:
        obj = pdf_parse_dict(xref, file, buf);
        /* RJW: "cannot parse indirect object (%d %d R)", num, gen */
        break;

    case PDF_TOK_NAME:
        obj = fz_new_name(ctx, buf->scratch);
        break;
    case PDF_TOK_REAL:
        obj = pdf_new_real(ctx, buf->f);
        break;
    case PDF_TOK_STRING:
        obj = pdf_new_string(ctx, buf->scratch, buf->len);
        break;
    case PDF_TOK_TRUE:
        obj = pdf_new_bool(ctx, 1);
        break;
    case PDF_TOK_FALSE:
        obj = pdf_new_bool(ctx, 0);
        break;
    case PDF_TOK_NULL:
        obj = pdf_new_null(ctx);
        break;

    case PDF_TOK_INT:
        a = buf->i;
        tok = pdf_lex(file, buf);
        /* "cannot parse indirect object (%d %d R)", num, gen */
        if (tok == PDF_TOK_STREAM || tok == PDF_TOK_ENDOBJ)
        {
            obj = pdf_new_int(ctx, a);
            goto skip;
        }
        if (tok == PDF_TOK_INT)
        {
            b = buf->i;
            tok = pdf_lex(file, buf);
            /* RJW: "cannot parse indirect object (%d %d R)", num, gen); */
            if (tok == PDF_TOK_R)
            {
                obj = pdf_new_indirect(ctx, a, b, xref);
                break;
            }
        }
        fz_throw(ctx, "expected 'R' keyword (%d %d R)", num, gen);

    case PDF_TOK_ENDOBJ:
        obj = pdf_new_null(ctx);
        goto skip;

    default:
        fz_throw(ctx, "syntax error in object (%d %d R)", num, gen);
    }

    fz_try(ctx)
    {
        tok = pdf_lex(file, buf);
    }
    fz_catch(ctx)
    {
        pdf_drop_obj(obj);
        fz_throw(ctx, "cannot parse indirect object (%d %d R)", num, gen);
    }

skip:
    if (tok == PDF_TOK_STREAM)
    {
        int c = fz_read_byte(file);
        while (c == ' ')
            c = fz_read_byte(file);
        if (c == '\r')
        {
            c = fz_peek_byte(file);
            if (c != '\n')
                fz_warn(ctx, "line feed missing after stream begin marker (%d %d R)", num, gen);
            else
                fz_read_byte(file);
        }
        stm_ofs = fz_tell(file);
    }
    else if (tok == PDF_TOK_ENDOBJ)
    {
        stm_ofs = 0;
    }
    else
    {
        fz_warn(ctx, "expected 'endobj' or 'stream' keyword (%d %d R)", num, gen);
        stm_ofs = 0;
    }

    if (onum) *onum = num;
    if (ogen) *ogen = gen;
    if (ostmofs) *ostmofs = stm_ofs;
    return obj;
}
Exemple #30
0
int
pdf_repair_obj(fz_context *ctx, pdf_document *doc, pdf_lexbuf *buf, fz_off_t *stmofsp, int *stmlenp, pdf_obj **encrypt, pdf_obj **id, pdf_obj **page, fz_off_t *tmpofs, pdf_obj **root)
{
	fz_stream *file = doc->file;
	pdf_token tok;
	int stm_len;

	*stmofsp = 0;
	if (stmlenp)
		*stmlenp = -1;

	stm_len = 0;

	/* On entry to this function, we know that we've just seen
	 * '<int> <int> obj'. We expect the next thing we see to be a
	 * pdf object. Regardless of the type of thing we meet next
	 * we only need to fully parse it if it is a dictionary. */
	tok = pdf_lex(ctx, file, buf);

	if (tok == PDF_TOK_OPEN_DICT)
	{
		pdf_obj *dict, *obj;

		fz_try(ctx)
		{
			dict = pdf_parse_dict(ctx, doc, file, buf);
		}
		fz_catch(ctx)
		{
			fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
			/* Don't let a broken object at EOF overwrite a good one */
			if (file->eof)
				fz_rethrow(ctx);
			/* Silently swallow the error */
			dict = pdf_new_dict(ctx, NULL, 2);
		}

		/* We must be careful not to try to resolve any indirections
		 * here. We have just read dict, so we know it to be a non
		 * indirected dictionary. Before we look at any values that
		 * we get back from looking up in it, we need to check they
		 * aren't indirected. */

		if (encrypt || id || root)
		{
			obj = pdf_dict_get(ctx, dict, PDF_NAME_Type);
			if (!pdf_is_indirect(ctx, obj) && pdf_name_eq(ctx, obj, PDF_NAME_XRef))
			{
				if (encrypt)
				{
					obj = pdf_dict_get(ctx, dict, PDF_NAME_Encrypt);
					if (obj)
					{
						pdf_drop_obj(ctx, *encrypt);
						*encrypt = pdf_keep_obj(ctx, obj);
					}
				}

				if (id)
				{
					obj = pdf_dict_get(ctx, dict, PDF_NAME_ID);
					if (obj)
					{
						pdf_drop_obj(ctx, *id);
						*id = pdf_keep_obj(ctx, obj);
					}
				}

				if (root)
					*root = pdf_keep_obj(ctx, pdf_dict_get(ctx, dict, PDF_NAME_Root));
			}
		}

		obj = pdf_dict_get(ctx, dict, PDF_NAME_Length);
		if (!pdf_is_indirect(ctx, obj) && pdf_is_int(ctx, obj))
			stm_len = pdf_to_int(ctx, obj);

		if (doc->file_reading_linearly && page)
		{
			obj = pdf_dict_get(ctx, dict, PDF_NAME_Type);
			if (!pdf_is_indirect(ctx, obj) && pdf_name_eq(ctx, obj, PDF_NAME_Page))
			{
				pdf_drop_obj(ctx, *page);
				*page = pdf_keep_obj(ctx, dict);
			}
		}

		pdf_drop_obj(ctx, dict);
	}