Beispiel #1
0
fz_error
pdf_parse_stm_obj(fz_obj **op, pdf_xref *xref, fz_stream *file, char *buf, int cap)
{
	fz_error error;
	int tok;
	int len;

	error = pdf_lex(&tok, file, buf, cap, &len);
	if (error)
		return fz_rethrow(error, "cannot parse token in object stream");

	switch (tok)
	{
	case PDF_TOK_OPEN_ARRAY:
		error = pdf_parse_array(op, xref, file, buf, cap);
		if (error)
			return fz_rethrow(error, "cannot parse object stream");
		break;
	case PDF_TOK_OPEN_DICT:
		error = pdf_parse_dict(op, xref, file, buf, cap);
		if (error)
			return fz_rethrow(error, "cannot parse object stream");
		break;
	case PDF_TOK_NAME: *op = fz_new_name(buf); break;
	case PDF_TOK_REAL: *op = fz_new_real(fz_atof(buf)); break;
	case PDF_TOK_STRING: *op = fz_new_string(buf, len); break;
	case PDF_TOK_TRUE: *op = fz_new_bool(1); break;
	case PDF_TOK_FALSE: *op = fz_new_bool(0); break;
	case PDF_TOK_NULL: *op = fz_new_null(); break;
	case PDF_TOK_INT: *op = fz_new_int(atoi(buf)); break;
	default: return fz_throw("unknown token in object stream");
	}

	return fz_okay;
}
Beispiel #2
0
fz_error
pdf_cache_object(pdf_xref *xref, int num, int gen)
{
	fz_error error;
	pdf_xref_entry *x;
	int rnum, rgen;

	if (num < 0 || num >= xref->len)
		return fz_throw("object out of range (%d %d R); xref size %d", num, gen, xref->len);

	x = &xref->table[num];

	if (x->obj)
		return fz_okay;

	if (x->type == 'f')
	{
		x->obj = fz_new_null();
		return fz_okay;
	}
	else if (x->type == 'n')
	{
		fz_seek(xref->file, x->ofs, 0);

		error = pdf_parse_ind_obj(&x->obj, xref, xref->file, xref->scratch, sizeof xref->scratch,
			&rnum, &rgen, &x->stm_ofs);
		if (error)
			return fz_rethrow(error, "cannot parse object (%d %d R)", num, gen);

		if (rnum != num)
			return fz_throw("found object (%d %d R) instead of (%d %d R)", rnum, rgen, num, gen);

		if (xref->crypt)
			pdf_crypt_obj(xref->crypt, x->obj, num, gen);
	}
	else if (x->type == 'o')
	{
		if (!x->obj)
		{
			error = pdf_load_obj_stm(xref, x->ofs, 0, xref->scratch, sizeof xref->scratch);
			if (error)
				return fz_rethrow(error, "cannot load object stream containing object (%d %d R)", num, gen);
			if (!x->obj)
				return fz_throw("object (%d %d R) was not found in its object stream", num, gen);
		}
	}
	else
	{
		return fz_throw("assert: corrupt xref struct");
	}

	return fz_okay;
}
Beispiel #3
0
static void addhexfilter(fz_obj *dict)
{
	fz_obj *f, *dp, *newf, *newdp;
	fz_obj *ahx, *nullobj;

	ahx = fz_new_name(ctx, "ASCIIHexDecode");
	nullobj = fz_new_null(ctx);
	newf = newdp = NULL;

	f = fz_dict_gets(dict, "Filter");
	dp = fz_dict_gets(dict, "DecodeParms");

	if (fz_is_name(f))
	{
		newf = fz_new_array(ctx, 2);
		fz_array_push(newf, ahx);
		fz_array_push(newf, f);
		f = newf;
		if (fz_is_dict(dp))
		{
			newdp = fz_new_array(ctx, 2);
			fz_array_push(newdp, nullobj);
			fz_array_push(newdp, dp);
			dp = newdp;
		}
	}
	else if (fz_is_array(f))
	{
		fz_array_insert(f, ahx);
		if (fz_is_array(dp))
			fz_array_insert(dp, nullobj);
	}
	else
		f = ahx;

	fz_dict_puts(dict, "Filter", f);
	if (dp)
		fz_dict_puts(dict, "DecodeParms", dp);

	fz_drop_obj(ahx);
	fz_drop_obj(nullobj);
	if (newf)
		fz_drop_obj(newf);
	if (newdp)
		fz_drop_obj(newdp);
}
Beispiel #4
0
static fz_outline *
pdf_load_outline_imp(pdf_xref *xref, fz_obj *dict)
{
    pdf_link *link;
    fz_outline *node;
    fz_obj *obj;
    /* SumatraPDF: prevent potential stack overflow */
    fz_outline *prev, *root = NULL;
    fz_obj *origDict = dict;
    fz_context *ctx = xref->ctx;

    if (fz_is_null(ctx, dict))
        return NULL;

    /* SumatraPDF: prevent cyclic outlines */
    do
    {
        if (fz_dict_gets(ctx, dict, ".seen"))
            break;
        obj = fz_new_null(ctx);
        fz_dict_puts(ctx, dict, ".seen", obj);
        fz_drop_obj(ctx, obj);

        node = fz_malloc(ctx, sizeof(fz_outline));
        node->title = NULL;
        node->page = -1;
        node->down = NULL;
        node->next = NULL;

        obj = fz_dict_gets(ctx, dict, "Title");
        if (obj)
            node->title = pdf_to_utf8(ctx, obj);

        /* SumatraPDF: support expansion states */
        node->is_open = fz_to_int(ctx, fz_dict_gets(ctx, dict, "Count")) >= 0;
        /* SumatraPDF: extended outline actions */
        node->data = node->free_data = NULL;

        if (fz_dict_gets(ctx, dict, "Dest") || fz_dict_gets(ctx, dict, "A"))
        {
            link = pdf_load_link(xref, dict);
            if (link) /* SumatraPDF: don't crash if it's no link after all */
            {
                if (link->kind == PDF_LINK_GOTO)
                    node->page = pdf_find_page_number(xref, fz_array_get(ctx, link->dest, 0));
                /* SumatraPDF: extended outline actions */
                node->data = link;
                node->free_data = pdf_free_link;
            }
        }

        obj = fz_dict_gets(ctx, dict, "First");
        if (obj)
            node->down = pdf_load_outline_imp(xref, obj);

        /* SumatraPDF: prevent potential stack overflow */
        if (!root)
            prev = root = node;
        else
            prev = prev->next = node;

        dict = fz_dict_gets(ctx, dict, "Next");
    } while (dict && !fz_is_null(ctx, dict));
    node = root;
    /* SumatraPDF: prevent cyclic outlines */
    for (dict = origDict; dict && fz_dict_gets(ctx, dict, ".seen"); dict = fz_dict_gets(ctx, dict, "Next"))
        fz_dict_dels(ctx, dict, ".seen");

    return node;
}
Beispiel #5
0
fz_error
pdf_parse_ind_obj(fz_obj **op, pdf_xref *xref,
	fz_stream *file, char *buf, int cap,
	int *onum, int *ogen, int *ostmofs)
{
	fz_error error = fz_okay;
	fz_obj *obj = NULL;
	int num = 0, gen = 0, stm_ofs;
	int tok;
	int len;
	int a, b;

	error = pdf_lex(&tok, file, buf, cap, &len);
	if (error)
		return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);
	if (tok != PDF_TOK_INT)
		return fz_throw("expected object number (%d %d R)", num, gen);
	num = atoi(buf);

	error = pdf_lex(&tok, file, buf, cap, &len);
	if (error)
		return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);
	if (tok != PDF_TOK_INT)
		return fz_throw("expected generation number (%d %d R)", num, gen);
	gen = atoi(buf);

	error = pdf_lex(&tok, file, buf, cap, &len);
	if (error)
		return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);
	if (tok != PDF_TOK_OBJ)
		return fz_throw("expected 'obj' keyword (%d %d R)", num, gen);

	error = pdf_lex(&tok, file, buf, cap, &len);
	if (error)
		return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);

	switch (tok)
	{
	case PDF_TOK_OPEN_ARRAY:
		error = pdf_parse_array(&obj, xref, file, buf, cap);
		if (error)
			return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);
		break;

	case PDF_TOK_OPEN_DICT:
		error = pdf_parse_dict(&obj, xref, file, buf, cap);
		if (error)
			return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);
		break;

	case PDF_TOK_NAME: obj = fz_new_name(buf); break;
	case PDF_TOK_REAL: obj = fz_new_real(fz_atof(buf)); break;
	case PDF_TOK_STRING: obj = fz_new_string(buf, len); break;
	case PDF_TOK_TRUE: obj = fz_new_bool(1); break;
	case PDF_TOK_FALSE: obj = fz_new_bool(0); break;
	case PDF_TOK_NULL: obj = fz_new_null(); break;

	case PDF_TOK_INT:
		a = atoi(buf);
		error = pdf_lex(&tok, file, buf, cap, &len);
		if (error)
			return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);
		if (tok == PDF_TOK_STREAM || tok == PDF_TOK_ENDOBJ)
		{
			obj = fz_new_int(a);
			goto skip;
		}
		if (tok == PDF_TOK_INT)
		{
			b = atoi(buf);
			error = pdf_lex(&tok, file, buf, cap, &len);
			if (error)
				return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);
			if (tok == PDF_TOK_R)
			{
				obj = fz_new_indirect(a, b, xref);
				break;
			}
		}
		return fz_throw("expected 'R' keyword (%d %d R)", num, gen);

	case PDF_TOK_ENDOBJ:
		obj = fz_new_null();
		goto skip;

	default:
		return fz_throw("syntax error in object (%d %d R)", num, gen);
	}

	error = pdf_lex(&tok, file, buf, cap, &len);
	if (error)
	{
		fz_drop_obj(obj);
		return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);
	}

skip:
	if (tok == PDF_TOK_STREAM)
	{
		int c = fz_read_byte(file);
		while (c == ' ')
			c = fz_read_byte(file);
		if (c == '\r')
		{
			c = fz_peek_byte(file);
			if (c != '\n')
				fz_warn("line feed missing after stream begin marker (%d %d R)", num, gen);
			else
				fz_read_byte(file);
		}
		stm_ofs = fz_tell(file);
	}
	else if (tok == PDF_TOK_ENDOBJ)
	{
		stm_ofs = 0;
	}
	else
	{
		fz_warn("expected 'endobj' or 'stream' keyword (%d %d R)", num, gen);
		stm_ofs = 0;
	}

	if (onum) *onum = num;
	if (ogen) *ogen = gen;
	if (ostmofs) *ostmofs = stm_ofs;
	*op = obj;
	return fz_okay;
}
Beispiel #6
0
fz_error
pdf_parse_dict(fz_obj **op, pdf_xref *xref, fz_stream *file, char *buf, int cap)
{
	fz_error error = fz_okay;
	fz_obj *dict = NULL;
	fz_obj *key = NULL;
	fz_obj *val = NULL;
	int tok;
	int len;
	int a, b;

	dict = fz_new_dict(8);

	while (1)
	{
		error = pdf_lex(&tok, file, buf, cap, &len);
		if (error)
		{
			fz_drop_obj(dict);
			return fz_rethrow(error, "cannot parse dict");
		}

skip:
		if (tok == PDF_TOK_CLOSE_DICT)
		{
			*op = dict;
			return fz_okay;
		}

		/* for BI .. ID .. EI in content streams */
		if (tok == PDF_TOK_KEYWORD && !strcmp(buf, "ID"))
		{
			*op = dict;
			return fz_okay;
		}

		if (tok != PDF_TOK_NAME)
		{
			fz_drop_obj(dict);
			return fz_throw("invalid key in dict");
		}

		key = fz_new_name(buf);

		error = pdf_lex(&tok, file, buf, cap, &len);
		if (error)
		{
			fz_drop_obj(key);
			fz_drop_obj(dict);
			return fz_rethrow(error, "cannot parse dict");
		}

		switch (tok)
		{
		case PDF_TOK_OPEN_ARRAY:
			error = pdf_parse_array(&val, xref, file, buf, cap);
			if (error)
			{
				fz_drop_obj(key);
				fz_drop_obj(dict);
				return fz_rethrow(error, "cannot parse dict");
			}
			break;

		case PDF_TOK_OPEN_DICT:
			error = pdf_parse_dict(&val, xref, file, buf, cap);
			if (error)
			{
				fz_drop_obj(key);
				fz_drop_obj(dict);
				return fz_rethrow(error, "cannot parse dict");
			}
			break;

		case PDF_TOK_NAME: val = fz_new_name(buf); break;
		case PDF_TOK_REAL: val = fz_new_real(fz_atof(buf)); break;
		case PDF_TOK_STRING: val = fz_new_string(buf, len); break;
		case PDF_TOK_TRUE: val = fz_new_bool(1); break;
		case PDF_TOK_FALSE: val = fz_new_bool(0); break;
		case PDF_TOK_NULL: val = fz_new_null(); break;

		case PDF_TOK_INT:
			/* 64-bit to allow for numbers > INT_MAX and overflow */
			a = (int) strtoll(buf, 0, 10);
			error = pdf_lex(&tok, file, buf, cap, &len);
			if (error)
			{
				fz_drop_obj(key);
				fz_drop_obj(dict);
				return fz_rethrow(error, "cannot parse dict");
			}
			if (tok == PDF_TOK_CLOSE_DICT || tok == PDF_TOK_NAME ||
				(tok == PDF_TOK_KEYWORD && !strcmp(buf, "ID")))
			{
				val = fz_new_int(a);
				fz_dict_put(dict, key, val);
				fz_drop_obj(val);
				fz_drop_obj(key);
				goto skip;
			}
			if (tok == PDF_TOK_INT)
			{
				b = atoi(buf);
				error = pdf_lex(&tok, file, buf, cap, &len);
				if (error)
				{
					fz_drop_obj(key);
					fz_drop_obj(dict);
					return fz_rethrow(error, "cannot parse dict");
				}
				if (tok == PDF_TOK_R)
				{
					val = fz_new_indirect(a, b, xref);
					break;
				}
			}
			fz_drop_obj(key);
			fz_drop_obj(dict);
			return fz_throw("invalid indirect reference in dict");

		default:
			fz_drop_obj(key);
			fz_drop_obj(dict);
			return fz_throw("unknown token in dict");
		}

		fz_dict_put(dict, key, val);
		fz_drop_obj(val);
		fz_drop_obj(key);
	}
}
Beispiel #7
0
fz_error
pdf_parse_array(fz_obj **op, pdf_xref *xref, fz_stream *file, char *buf, int cap)
{
	fz_error error = fz_okay;
	fz_obj *ary = NULL;
	fz_obj *obj = NULL;
	int a = 0, b = 0, n = 0;
	int tok;
	int len;

	ary = fz_new_array(4);

	while (1)
	{
		error = pdf_lex(&tok, file, buf, cap, &len);
		if (error)
		{
			fz_drop_obj(ary);
			return fz_rethrow(error, "cannot parse array");
		}

		if (tok != PDF_TOK_INT && tok != PDF_TOK_R)
		{
			if (n > 0)
			{
				obj = fz_new_int(a);
				fz_array_push(ary, obj);
				fz_drop_obj(obj);
			}
			if (n > 1)
			{
				obj = fz_new_int(b);
				fz_array_push(ary, obj);
				fz_drop_obj(obj);
			}
			n = 0;
		}

		if (tok == PDF_TOK_INT && n == 2)
		{
			obj = fz_new_int(a);
			fz_array_push(ary, obj);
			fz_drop_obj(obj);
			a = b;
			n --;
		}

		switch (tok)
		{
		case PDF_TOK_CLOSE_ARRAY:
			*op = ary;
			return fz_okay;

		case PDF_TOK_INT:
			if (n == 0)
				a = atoi(buf);
			if (n == 1)
				b = atoi(buf);
			n ++;
			break;

		case PDF_TOK_R:
			if (n != 2)
			{
				fz_drop_obj(ary);
				return fz_throw("cannot parse indirect reference in array");
			}
			obj = fz_new_indirect(a, b, xref);
			fz_array_push(ary, obj);
			fz_drop_obj(obj);
			n = 0;
			break;

		case PDF_TOK_OPEN_ARRAY:
			error = pdf_parse_array(&obj, xref, file, buf, cap);
			if (error)
			{
				fz_drop_obj(ary);
				return fz_rethrow(error, "cannot parse array");
			}
			fz_array_push(ary, obj);
			fz_drop_obj(obj);
			break;

		case PDF_TOK_OPEN_DICT:
			error = pdf_parse_dict(&obj, xref, file, buf, cap);
			if (error)
			{
				fz_drop_obj(ary);
				return fz_rethrow(error, "cannot parse array");
			}
			fz_array_push(ary, obj);
			fz_drop_obj(obj);
			break;

		case PDF_TOK_NAME:
			obj = fz_new_name(buf);
			fz_array_push(ary, obj);
			fz_drop_obj(obj);
			break;
		case PDF_TOK_REAL:
			obj = fz_new_real(fz_atof(buf));
			fz_array_push(ary, obj);
			fz_drop_obj(obj);
			break;
		case PDF_TOK_STRING:
			obj = fz_new_string(buf, len);
			fz_array_push(ary, obj);
			fz_drop_obj(obj);
			break;
		case PDF_TOK_TRUE:
			obj = fz_new_bool(1);
			fz_array_push(ary, obj);
			fz_drop_obj(obj);
			break;
		case PDF_TOK_FALSE:
			obj = fz_new_bool(0);
			fz_array_push(ary, obj);
			fz_drop_obj(obj);
			break;
		case PDF_TOK_NULL:
			obj = fz_new_null();
			fz_array_push(ary, obj);
			fz_drop_obj(obj);
			break;

		default:
			fz_drop_obj(ary);
			return fz_throw("cannot parse token in array");
		}
	}
}