コード例 #1
0
fz_error
pdf_parse_stm_obj(fz_obj **op, pdf_xref *xref, fz_stream *file, char *buf, int cap)
{
	fz_error error;
	int tok;
	int len;

	error = pdf_lex(&tok, file, buf, cap, &len);
	if (error)
		return fz_rethrow(error, "cannot parse token in object stream");

	switch (tok)
	{
	case PDF_TOK_OPEN_ARRAY:
		error = pdf_parse_array(op, xref, file, buf, cap);
		if (error)
			return fz_rethrow(error, "cannot parse object stream");
		break;
	case PDF_TOK_OPEN_DICT:
		error = pdf_parse_dict(op, xref, file, buf, cap);
		if (error)
			return fz_rethrow(error, "cannot parse object stream");
		break;
	case PDF_TOK_NAME: *op = fz_new_name(buf); break;
	case PDF_TOK_REAL: *op = fz_new_real(fz_atof(buf)); break;
	case PDF_TOK_STRING: *op = fz_new_string(buf, len); break;
	case PDF_TOK_TRUE: *op = fz_new_bool(1); break;
	case PDF_TOK_FALSE: *op = fz_new_bool(0); break;
	case PDF_TOK_NULL: *op = fz_new_null(); break;
	case PDF_TOK_INT: *op = fz_new_int(atoi(buf)); break;
	default: return fz_throw("unknown token in object stream");
	}

	return fz_okay;
}
コード例 #2
0
pdf_obj *
pdf_parse_stm_obj(pdf_document *xref, fz_stream *file, pdf_lexbuf *buf)
{
	int tok;
	fz_context *ctx = file->ctx;

	tok = pdf_lex(file, buf);

	switch (tok)
	{
	case PDF_TOK_OPEN_ARRAY:
		return pdf_parse_array(xref, file, buf);
	case PDF_TOK_OPEN_DICT:
		return pdf_parse_dict(xref, file, buf);
	case PDF_TOK_NAME: return fz_new_name(ctx, buf->scratch); break;
	case PDF_TOK_REAL: return pdf_new_real(ctx, buf->f); break;
	case PDF_TOK_STRING: return pdf_new_string(ctx, buf->scratch, buf->len); break;
	case PDF_TOK_TRUE: return pdf_new_bool(ctx, 1); break;
	case PDF_TOK_FALSE: return pdf_new_bool(ctx, 0); break;
	case PDF_TOK_NULL: return pdf_new_null(ctx); break;
	case PDF_TOK_INT: return pdf_new_int(ctx, buf->i); break;
	default: fz_throw(ctx, "unknown token in object stream");
	}
	return NULL; /* Stupid MSVC */
}
コード例 #3
0
ファイル: pdf_parse.c プロジェクト: kzmkv/SumatraPDF
pdf_obj *
pdf_to_utf8_name(fz_context *ctx, pdf_obj *src)
{
    char *buf = pdf_to_utf8(ctx, src);
    pdf_obj *dst = fz_new_name(ctx, buf);
    fz_free(ctx, buf);
    return dst;
}
コード例 #4
0
fz_obj *
pdf_to_utf8_name(fz_obj *src)
{
	char *buf = pdf_to_utf8(src);
	fz_obj *dst = fz_new_name(buf);
	fz_free(buf);
	return dst;
}
コード例 #5
0
ファイル: mupdfclean.c プロジェクト: plotnick/mupdf
static void addhexfilter(fz_obj *dict)
{
	fz_obj *f, *dp, *newf, *newdp;
	fz_obj *ahx, *nullobj;

	ahx = fz_new_name(ctx, "ASCIIHexDecode");
	nullobj = fz_new_null(ctx);
	newf = newdp = NULL;

	f = fz_dict_gets(dict, "Filter");
	dp = fz_dict_gets(dict, "DecodeParms");

	if (fz_is_name(f))
	{
		newf = fz_new_array(ctx, 2);
		fz_array_push(newf, ahx);
		fz_array_push(newf, f);
		f = newf;
		if (fz_is_dict(dp))
		{
			newdp = fz_new_array(ctx, 2);
			fz_array_push(newdp, nullobj);
			fz_array_push(newdp, dp);
			dp = newdp;
		}
	}
	else if (fz_is_array(f))
	{
		fz_array_insert(f, ahx);
		if (fz_is_array(dp))
			fz_array_insert(dp, nullobj);
	}
	else
		f = ahx;

	fz_dict_puts(dict, "Filter", f);
	if (dp)
		fz_dict_puts(dict, "DecodeParms", dp);

	fz_drop_obj(ahx);
	fz_drop_obj(nullobj);
	if (newf)
		fz_drop_obj(newf);
	if (newdp)
		fz_drop_obj(newdp);
}
コード例 #6
0
ファイル: pdf_parse.c プロジェクト: kzmkv/SumatraPDF
pdf_obj *
pdf_parse_ind_obj(pdf_document *xref,
                  fz_stream *file, pdf_lexbuf *buf,
                  int *onum, int *ogen, int *ostmofs)
{
    pdf_obj *obj = NULL;
    int num = 0, gen = 0, stm_ofs;
    int tok;
    int a, b;
    fz_context *ctx = file->ctx;

    fz_var(obj);

    tok = pdf_lex(file, buf);
    /* RJW: cannot parse indirect object (%d %d R)", num, gen */
    if (tok != PDF_TOK_INT)
        fz_throw(ctx, "expected object number (%d %d R)", num, gen);
    num = buf->i;

    tok = pdf_lex(file, buf);
    /* RJW: "cannot parse indirect object (%d %d R)", num, gen */
    if (tok != PDF_TOK_INT)
        fz_throw(ctx, "expected generation number (%d %d R)", num, gen);
    gen = buf->i;

    tok = pdf_lex(file, buf);
    /* RJW: "cannot parse indirect object (%d %d R)", num, gen */
    if (tok != PDF_TOK_OBJ)
        fz_throw(ctx, "expected 'obj' keyword (%d %d R)", num, gen);

    tok = pdf_lex(file, buf);
    /* RJW: "cannot parse indirect object (%d %d R)", num, gen */

    switch (tok)
    {
    case PDF_TOK_OPEN_ARRAY:
        obj = pdf_parse_array(xref, file, buf);
        /* RJW: "cannot parse indirect object (%d %d R)", num, gen */
        break;

    case PDF_TOK_OPEN_DICT:
        obj = pdf_parse_dict(xref, file, buf);
        /* RJW: "cannot parse indirect object (%d %d R)", num, gen */
        break;

    case PDF_TOK_NAME:
        obj = fz_new_name(ctx, buf->scratch);
        break;
    case PDF_TOK_REAL:
        obj = pdf_new_real(ctx, buf->f);
        break;
    case PDF_TOK_STRING:
        obj = pdf_new_string(ctx, buf->scratch, buf->len);
        break;
    case PDF_TOK_TRUE:
        obj = pdf_new_bool(ctx, 1);
        break;
    case PDF_TOK_FALSE:
        obj = pdf_new_bool(ctx, 0);
        break;
    case PDF_TOK_NULL:
        obj = pdf_new_null(ctx);
        break;

    case PDF_TOK_INT:
        a = buf->i;
        tok = pdf_lex(file, buf);
        /* "cannot parse indirect object (%d %d R)", num, gen */
        if (tok == PDF_TOK_STREAM || tok == PDF_TOK_ENDOBJ)
        {
            obj = pdf_new_int(ctx, a);
            goto skip;
        }
        if (tok == PDF_TOK_INT)
        {
            b = buf->i;
            tok = pdf_lex(file, buf);
            /* RJW: "cannot parse indirect object (%d %d R)", num, gen); */
            if (tok == PDF_TOK_R)
            {
                obj = pdf_new_indirect(ctx, a, b, xref);
                break;
            }
        }
        fz_throw(ctx, "expected 'R' keyword (%d %d R)", num, gen);

    case PDF_TOK_ENDOBJ:
        obj = pdf_new_null(ctx);
        goto skip;

    default:
        fz_throw(ctx, "syntax error in object (%d %d R)", num, gen);
    }

    fz_try(ctx)
    {
        tok = pdf_lex(file, buf);
    }
    fz_catch(ctx)
    {
        pdf_drop_obj(obj);
        fz_throw(ctx, "cannot parse indirect object (%d %d R)", num, gen);
    }

skip:
    if (tok == PDF_TOK_STREAM)
    {
        int c = fz_read_byte(file);
        while (c == ' ')
            c = fz_read_byte(file);
        if (c == '\r')
        {
            c = fz_peek_byte(file);
            if (c != '\n')
                fz_warn(ctx, "line feed missing after stream begin marker (%d %d R)", num, gen);
            else
                fz_read_byte(file);
        }
        stm_ofs = fz_tell(file);
    }
    else if (tok == PDF_TOK_ENDOBJ)
    {
        stm_ofs = 0;
    }
    else
    {
        fz_warn(ctx, "expected 'endobj' or 'stream' keyword (%d %d R)", num, gen);
        stm_ofs = 0;
    }

    if (onum) *onum = num;
    if (ogen) *ogen = gen;
    if (ostmofs) *ostmofs = stm_ofs;
    return obj;
}
コード例 #7
0
ファイル: pdf_parse.c プロジェクト: kzmkv/SumatraPDF
pdf_obj *
pdf_parse_dict(pdf_document *xref, fz_stream *file, pdf_lexbuf *buf)
{
    pdf_obj *dict;
    pdf_obj *key = NULL;
    pdf_obj *val = NULL;
    int tok;
    int a, b;
    fz_context *ctx = file->ctx;

    dict = pdf_new_dict(ctx, 8);

    fz_var(key);
    fz_var(val);

    fz_try(ctx)
    {
        while (1)
        {
            tok = pdf_lex(file, buf);
skip:
            if (tok == PDF_TOK_CLOSE_DICT)
                break;

            /* for BI .. ID .. EI in content streams */
            if (tok == PDF_TOK_KEYWORD && !strcmp(buf->scratch, "ID"))
                break;

            if (tok != PDF_TOK_NAME)
                fz_throw(ctx, "invalid key in dict");

            key = fz_new_name(ctx, buf->scratch);

            tok = pdf_lex(file, buf);

            switch (tok)
            {
            case PDF_TOK_OPEN_ARRAY:
                /* cf. http://code.google.com/p/sumatrapdf/issues/detail?id=1643 */
                fz_try(ctx)
                {
                    val = pdf_parse_array(xref, file, buf);
                }
                fz_catch(ctx)
                {
                    fz_warn(ctx, "ignoring broken array for '%s'", pdf_to_name(key));
                    pdf_drop_obj(key);
                    val = key = NULL;
                    do
                        tok = pdf_lex(file, buf);
                    while (tok != PDF_TOK_CLOSE_DICT && tok != PDF_TOK_CLOSE_ARRAY &&
                            tok != PDF_TOK_EOF && tok != PDF_TOK_OPEN_ARRAY && tok != PDF_TOK_OPEN_DICT);
                    if (tok == PDF_TOK_CLOSE_DICT)
                        goto skip;
                    if (tok == PDF_TOK_CLOSE_ARRAY)
                        continue;
                    fz_throw(ctx, "cannot make sense of broken array after all");
                }
                break;

            case PDF_TOK_OPEN_DICT:
                val = pdf_parse_dict(xref, file, buf);
                break;

            case PDF_TOK_NAME:
                val = fz_new_name(ctx, buf->scratch);
                break;
            case PDF_TOK_REAL:
                val = pdf_new_real(ctx, buf->f);
                break;
            case PDF_TOK_STRING:
                val = pdf_new_string(ctx, buf->scratch, buf->len);
                break;
            case PDF_TOK_TRUE:
                val = pdf_new_bool(ctx, 1);
                break;
            case PDF_TOK_FALSE:
                val = pdf_new_bool(ctx, 0);
                break;
            case PDF_TOK_NULL:
                val = pdf_new_null(ctx);
                break;

            case PDF_TOK_INT:
                /* 64-bit to allow for numbers > INT_MAX and overflow */
                a = buf->i;
                tok = pdf_lex(file, buf);
                if (tok == PDF_TOK_CLOSE_DICT || tok == PDF_TOK_NAME ||
                        (tok == PDF_TOK_KEYWORD && !strcmp(buf->scratch, "ID")))
                {
                    val = pdf_new_int(ctx, a);
                    fz_dict_put(dict, key, val);
                    pdf_drop_obj(val);
                    val = NULL;
                    pdf_drop_obj(key);
                    key = NULL;
                    goto skip;
                }
                if (tok == PDF_TOK_INT)
                {
                    b = buf->i;
                    tok = pdf_lex(file, buf);
                    if (tok == PDF_TOK_R)
                    {
                        val = pdf_new_indirect(ctx, a, b, xref);
                        break;
                    }
                }
                fz_throw(ctx, "invalid indirect reference in dict");

            default:
                fz_throw(ctx, "unknown token in dict");
            }

            fz_dict_put(dict, key, val);
            pdf_drop_obj(val);
            val = NULL;
            pdf_drop_obj(key);
            key = NULL;
        }
    }
    fz_catch(ctx)
    {
        pdf_drop_obj(dict);
        pdf_drop_obj(key);
        pdf_drop_obj(val);
        fz_throw(ctx, "cannot parse dict");
    }
    return dict;
}
コード例 #8
0
ファイル: pdf_parse.c プロジェクト: kzmkv/SumatraPDF
pdf_obj *
pdf_parse_array(pdf_document *xref, fz_stream *file, pdf_lexbuf *buf)
{
    pdf_obj *ary = NULL;
    pdf_obj *obj = NULL;
    int a = 0, b = 0, n = 0;
    int tok;
    fz_context *ctx = file->ctx;
    pdf_obj *op;

    fz_var(obj);

    ary = pdf_new_array(ctx, 4);

    fz_try(ctx)
    {
        while (1)
        {
            tok = pdf_lex(file, buf);

            if (tok != PDF_TOK_INT && tok != PDF_TOK_R)
            {
                if (n > 0)
                {
                    obj = pdf_new_int(ctx, a);
                    pdf_array_push(ary, obj);
                    pdf_drop_obj(obj);
                    obj = NULL;
                }
                if (n > 1)
                {
                    obj = pdf_new_int(ctx, b);
                    pdf_array_push(ary, obj);
                    pdf_drop_obj(obj);
                    obj = NULL;
                }
                n = 0;
            }

            if (tok == PDF_TOK_INT && n == 2)
            {
                obj = pdf_new_int(ctx, a);
                pdf_array_push(ary, obj);
                pdf_drop_obj(obj);
                obj = NULL;
                a = b;
                n --;
            }

            switch (tok)
            {
            case PDF_TOK_CLOSE_ARRAY:
                op = ary;
                goto end;

            case PDF_TOK_INT:
                if (n == 0)
                    a = buf->i;
                if (n == 1)
                    b = buf->i;
                n ++;
                break;

            case PDF_TOK_R:
                if (n != 2)
                    fz_throw(ctx, "cannot parse indirect reference in array");
                obj = pdf_new_indirect(ctx, a, b, xref);
                pdf_array_push(ary, obj);
                pdf_drop_obj(obj);
                obj = NULL;
                n = 0;
                break;

            case PDF_TOK_OPEN_ARRAY:
                obj = pdf_parse_array(xref, file, buf);
                pdf_array_push(ary, obj);
                pdf_drop_obj(obj);
                obj = NULL;
                break;

            case PDF_TOK_OPEN_DICT:
                obj = pdf_parse_dict(xref, file, buf);
                pdf_array_push(ary, obj);
                pdf_drop_obj(obj);
                obj = NULL;
                break;

            case PDF_TOK_NAME:
                obj = fz_new_name(ctx, buf->scratch);
                pdf_array_push(ary, obj);
                pdf_drop_obj(obj);
                obj = NULL;
                break;
            case PDF_TOK_REAL:
                obj = pdf_new_real(ctx, buf->f);
                pdf_array_push(ary, obj);
                pdf_drop_obj(obj);
                obj = NULL;
                break;
            case PDF_TOK_STRING:
                obj = pdf_new_string(ctx, buf->scratch, buf->len);
                pdf_array_push(ary, obj);
                pdf_drop_obj(obj);
                obj = NULL;
                break;
            case PDF_TOK_TRUE:
                obj = pdf_new_bool(ctx, 1);
                pdf_array_push(ary, obj);
                pdf_drop_obj(obj);
                obj = NULL;
                break;
            case PDF_TOK_FALSE:
                obj = pdf_new_bool(ctx, 0);
                pdf_array_push(ary, obj);
                pdf_drop_obj(obj);
                obj = NULL;
                break;
            case PDF_TOK_NULL:
                obj = pdf_new_null(ctx);
                pdf_array_push(ary, obj);
                pdf_drop_obj(obj);
                obj = NULL;
                break;

            default:
                fz_throw(ctx, "cannot parse token in array");
            }
        }
end:
        {}
    }
    fz_catch(ctx)
    {
        pdf_drop_obj(obj);
        pdf_drop_obj(ary);
        fz_throw(ctx, "cannot parse array");
    }
    return op;
}
コード例 #9
0
pdf_obj *
pdf_parse_dict(pdf_document *xref, fz_stream *file, pdf_lexbuf *buf)
{
	pdf_obj *dict;
	pdf_obj *key = NULL;
	pdf_obj *val = NULL;
	int tok;
	int a, b;
	fz_context *ctx = file->ctx;

	dict = pdf_new_dict(ctx, 8);

	fz_var(key);
	fz_var(val);

	fz_try(ctx)
	{
		while (1)
		{
			tok = pdf_lex(file, buf);
	skip:
			if (tok == PDF_TOK_CLOSE_DICT)
				break;

			/* for BI .. ID .. EI in content streams */
			if (tok == PDF_TOK_KEYWORD && !strcmp(buf->scratch, "ID"))
				break;

			if (tok != PDF_TOK_NAME)
				fz_throw(ctx, "invalid key in dict");

			key = fz_new_name(ctx, buf->scratch);

			tok = pdf_lex(file, buf);

			switch (tok)
			{
			case PDF_TOK_OPEN_ARRAY:
				val = pdf_parse_array(xref, file, buf);
				break;

			case PDF_TOK_OPEN_DICT:
				val = pdf_parse_dict(xref, file, buf);
				break;

			case PDF_TOK_NAME: val = fz_new_name(ctx, buf->scratch); break;
			case PDF_TOK_REAL: val = pdf_new_real(ctx, buf->f); break;
			case PDF_TOK_STRING: val = pdf_new_string(ctx, buf->scratch, buf->len); break;
			case PDF_TOK_TRUE: val = pdf_new_bool(ctx, 1); break;
			case PDF_TOK_FALSE: val = pdf_new_bool(ctx, 0); break;
			case PDF_TOK_NULL: val = pdf_new_null(ctx); break;

			case PDF_TOK_INT:
				/* 64-bit to allow for numbers > INT_MAX and overflow */
				a = buf->i;
				tok = pdf_lex(file, buf);
				if (tok == PDF_TOK_CLOSE_DICT || tok == PDF_TOK_NAME ||
					(tok == PDF_TOK_KEYWORD && !strcmp(buf->scratch, "ID")))
				{
					val = pdf_new_int(ctx, a);
					pdf_dict_put(dict, key, val);
					pdf_drop_obj(val);
					val = NULL;
					pdf_drop_obj(key);
					key = NULL;
					goto skip;
				}
				if (tok == PDF_TOK_INT)
				{
					b = buf->i;
					tok = pdf_lex(file, buf);
					if (tok == PDF_TOK_R)
					{
						val = pdf_new_indirect(ctx, a, b, xref);
						break;
					}
				}
				fz_throw(ctx, "invalid indirect reference in dict");

			default:
				fz_throw(ctx, "unknown token in dict");
			}

			pdf_dict_put(dict, key, val);
			pdf_drop_obj(val);
			val = NULL;
			pdf_drop_obj(key);
			key = NULL;
		}
	}
	fz_catch(ctx)
	{
		pdf_drop_obj(dict);
		pdf_drop_obj(key);
		pdf_drop_obj(val);
		fz_throw(ctx, "cannot parse dict");
	}
	return dict;
}
コード例 #10
0
fz_error
pdf_parse_ind_obj(fz_obj **op, pdf_xref *xref,
	fz_stream *file, char *buf, int cap,
	int *onum, int *ogen, int *ostmofs)
{
	fz_error error = fz_okay;
	fz_obj *obj = NULL;
	int num = 0, gen = 0, stm_ofs;
	int tok;
	int len;
	int a, b;

	error = pdf_lex(&tok, file, buf, cap, &len);
	if (error)
		return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);
	if (tok != PDF_TOK_INT)
		return fz_throw("expected object number (%d %d R)", num, gen);
	num = atoi(buf);

	error = pdf_lex(&tok, file, buf, cap, &len);
	if (error)
		return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);
	if (tok != PDF_TOK_INT)
		return fz_throw("expected generation number (%d %d R)", num, gen);
	gen = atoi(buf);

	error = pdf_lex(&tok, file, buf, cap, &len);
	if (error)
		return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);
	if (tok != PDF_TOK_OBJ)
		return fz_throw("expected 'obj' keyword (%d %d R)", num, gen);

	error = pdf_lex(&tok, file, buf, cap, &len);
	if (error)
		return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);

	switch (tok)
	{
	case PDF_TOK_OPEN_ARRAY:
		error = pdf_parse_array(&obj, xref, file, buf, cap);
		if (error)
			return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);
		break;

	case PDF_TOK_OPEN_DICT:
		error = pdf_parse_dict(&obj, xref, file, buf, cap);
		if (error)
			return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);
		break;

	case PDF_TOK_NAME: obj = fz_new_name(buf); break;
	case PDF_TOK_REAL: obj = fz_new_real(fz_atof(buf)); break;
	case PDF_TOK_STRING: obj = fz_new_string(buf, len); break;
	case PDF_TOK_TRUE: obj = fz_new_bool(1); break;
	case PDF_TOK_FALSE: obj = fz_new_bool(0); break;
	case PDF_TOK_NULL: obj = fz_new_null(); break;

	case PDF_TOK_INT:
		a = atoi(buf);
		error = pdf_lex(&tok, file, buf, cap, &len);
		if (error)
			return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);
		if (tok == PDF_TOK_STREAM || tok == PDF_TOK_ENDOBJ)
		{
			obj = fz_new_int(a);
			goto skip;
		}
		if (tok == PDF_TOK_INT)
		{
			b = atoi(buf);
			error = pdf_lex(&tok, file, buf, cap, &len);
			if (error)
				return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);
			if (tok == PDF_TOK_R)
			{
				obj = fz_new_indirect(a, b, xref);
				break;
			}
		}
		return fz_throw("expected 'R' keyword (%d %d R)", num, gen);

	case PDF_TOK_ENDOBJ:
		obj = fz_new_null();
		goto skip;

	default:
		return fz_throw("syntax error in object (%d %d R)", num, gen);
	}

	error = pdf_lex(&tok, file, buf, cap, &len);
	if (error)
	{
		fz_drop_obj(obj);
		return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);
	}

skip:
	if (tok == PDF_TOK_STREAM)
	{
		int c = fz_read_byte(file);
		while (c == ' ')
			c = fz_read_byte(file);
		if (c == '\r')
		{
			c = fz_peek_byte(file);
			if (c != '\n')
				fz_warn("line feed missing after stream begin marker (%d %d R)", num, gen);
			else
				fz_read_byte(file);
		}
		stm_ofs = fz_tell(file);
	}
	else if (tok == PDF_TOK_ENDOBJ)
	{
		stm_ofs = 0;
	}
	else
	{
		fz_warn("expected 'endobj' or 'stream' keyword (%d %d R)", num, gen);
		stm_ofs = 0;
	}

	if (onum) *onum = num;
	if (ogen) *ogen = gen;
	if (ostmofs) *ostmofs = stm_ofs;
	*op = obj;
	return fz_okay;
}
コード例 #11
0
fz_error
pdf_parse_dict(fz_obj **op, pdf_xref *xref, fz_stream *file, char *buf, int cap)
{
	fz_error error = fz_okay;
	fz_obj *dict = NULL;
	fz_obj *key = NULL;
	fz_obj *val = NULL;
	int tok;
	int len;
	int a, b;

	dict = fz_new_dict(8);

	while (1)
	{
		error = pdf_lex(&tok, file, buf, cap, &len);
		if (error)
		{
			fz_drop_obj(dict);
			return fz_rethrow(error, "cannot parse dict");
		}

skip:
		if (tok == PDF_TOK_CLOSE_DICT)
		{
			*op = dict;
			return fz_okay;
		}

		/* for BI .. ID .. EI in content streams */
		if (tok == PDF_TOK_KEYWORD && !strcmp(buf, "ID"))
		{
			*op = dict;
			return fz_okay;
		}

		if (tok != PDF_TOK_NAME)
		{
			fz_drop_obj(dict);
			return fz_throw("invalid key in dict");
		}

		key = fz_new_name(buf);

		error = pdf_lex(&tok, file, buf, cap, &len);
		if (error)
		{
			fz_drop_obj(key);
			fz_drop_obj(dict);
			return fz_rethrow(error, "cannot parse dict");
		}

		switch (tok)
		{
		case PDF_TOK_OPEN_ARRAY:
			error = pdf_parse_array(&val, xref, file, buf, cap);
			if (error)
			{
				fz_drop_obj(key);
				fz_drop_obj(dict);
				return fz_rethrow(error, "cannot parse dict");
			}
			break;

		case PDF_TOK_OPEN_DICT:
			error = pdf_parse_dict(&val, xref, file, buf, cap);
			if (error)
			{
				fz_drop_obj(key);
				fz_drop_obj(dict);
				return fz_rethrow(error, "cannot parse dict");
			}
			break;

		case PDF_TOK_NAME: val = fz_new_name(buf); break;
		case PDF_TOK_REAL: val = fz_new_real(fz_atof(buf)); break;
		case PDF_TOK_STRING: val = fz_new_string(buf, len); break;
		case PDF_TOK_TRUE: val = fz_new_bool(1); break;
		case PDF_TOK_FALSE: val = fz_new_bool(0); break;
		case PDF_TOK_NULL: val = fz_new_null(); break;

		case PDF_TOK_INT:
			/* 64-bit to allow for numbers > INT_MAX and overflow */
			a = (int) strtoll(buf, 0, 10);
			error = pdf_lex(&tok, file, buf, cap, &len);
			if (error)
			{
				fz_drop_obj(key);
				fz_drop_obj(dict);
				return fz_rethrow(error, "cannot parse dict");
			}
			if (tok == PDF_TOK_CLOSE_DICT || tok == PDF_TOK_NAME ||
				(tok == PDF_TOK_KEYWORD && !strcmp(buf, "ID")))
			{
				val = fz_new_int(a);
				fz_dict_put(dict, key, val);
				fz_drop_obj(val);
				fz_drop_obj(key);
				goto skip;
			}
			if (tok == PDF_TOK_INT)
			{
				b = atoi(buf);
				error = pdf_lex(&tok, file, buf, cap, &len);
				if (error)
				{
					fz_drop_obj(key);
					fz_drop_obj(dict);
					return fz_rethrow(error, "cannot parse dict");
				}
				if (tok == PDF_TOK_R)
				{
					val = fz_new_indirect(a, b, xref);
					break;
				}
			}
			fz_drop_obj(key);
			fz_drop_obj(dict);
			return fz_throw("invalid indirect reference in dict");

		default:
			fz_drop_obj(key);
			fz_drop_obj(dict);
			return fz_throw("unknown token in dict");
		}

		fz_dict_put(dict, key, val);
		fz_drop_obj(val);
		fz_drop_obj(key);
	}
}
コード例 #12
0
fz_error
pdf_parse_array(fz_obj **op, pdf_xref *xref, fz_stream *file, char *buf, int cap)
{
	fz_error error = fz_okay;
	fz_obj *ary = NULL;
	fz_obj *obj = NULL;
	int a = 0, b = 0, n = 0;
	int tok;
	int len;

	ary = fz_new_array(4);

	while (1)
	{
		error = pdf_lex(&tok, file, buf, cap, &len);
		if (error)
		{
			fz_drop_obj(ary);
			return fz_rethrow(error, "cannot parse array");
		}

		if (tok != PDF_TOK_INT && tok != PDF_TOK_R)
		{
			if (n > 0)
			{
				obj = fz_new_int(a);
				fz_array_push(ary, obj);
				fz_drop_obj(obj);
			}
			if (n > 1)
			{
				obj = fz_new_int(b);
				fz_array_push(ary, obj);
				fz_drop_obj(obj);
			}
			n = 0;
		}

		if (tok == PDF_TOK_INT && n == 2)
		{
			obj = fz_new_int(a);
			fz_array_push(ary, obj);
			fz_drop_obj(obj);
			a = b;
			n --;
		}

		switch (tok)
		{
		case PDF_TOK_CLOSE_ARRAY:
			*op = ary;
			return fz_okay;

		case PDF_TOK_INT:
			if (n == 0)
				a = atoi(buf);
			if (n == 1)
				b = atoi(buf);
			n ++;
			break;

		case PDF_TOK_R:
			if (n != 2)
			{
				fz_drop_obj(ary);
				return fz_throw("cannot parse indirect reference in array");
			}
			obj = fz_new_indirect(a, b, xref);
			fz_array_push(ary, obj);
			fz_drop_obj(obj);
			n = 0;
			break;

		case PDF_TOK_OPEN_ARRAY:
			error = pdf_parse_array(&obj, xref, file, buf, cap);
			if (error)
			{
				fz_drop_obj(ary);
				return fz_rethrow(error, "cannot parse array");
			}
			fz_array_push(ary, obj);
			fz_drop_obj(obj);
			break;

		case PDF_TOK_OPEN_DICT:
			error = pdf_parse_dict(&obj, xref, file, buf, cap);
			if (error)
			{
				fz_drop_obj(ary);
				return fz_rethrow(error, "cannot parse array");
			}
			fz_array_push(ary, obj);
			fz_drop_obj(obj);
			break;

		case PDF_TOK_NAME:
			obj = fz_new_name(buf);
			fz_array_push(ary, obj);
			fz_drop_obj(obj);
			break;
		case PDF_TOK_REAL:
			obj = fz_new_real(fz_atof(buf));
			fz_array_push(ary, obj);
			fz_drop_obj(obj);
			break;
		case PDF_TOK_STRING:
			obj = fz_new_string(buf, len);
			fz_array_push(ary, obj);
			fz_drop_obj(obj);
			break;
		case PDF_TOK_TRUE:
			obj = fz_new_bool(1);
			fz_array_push(ary, obj);
			fz_drop_obj(obj);
			break;
		case PDF_TOK_FALSE:
			obj = fz_new_bool(0);
			fz_array_push(ary, obj);
			fz_drop_obj(obj);
			break;
		case PDF_TOK_NULL:
			obj = fz_new_null();
			fz_array_push(ary, obj);
			fz_drop_obj(obj);
			break;

		default:
			fz_drop_obj(ary);
			return fz_throw("cannot parse token in array");
		}
	}
}