Exemple #1
0
char *
fz_read_line(fz_context *ctx, fz_stream *stm, char *mem, int n)
{
	char *s = mem;
	int c = EOF;
	while (n > 1)
	{
		c = fz_read_byte(ctx, stm);
		if (c == EOF)
			break;
		if (c == '\r') {
			c = fz_peek_byte(ctx, stm);
			if (c == '\n')
				fz_read_byte(ctx, stm);
			break;
		}
		if (c == '\n')
			break;
		*s++ = c;
		n--;
	}
	if (n)
		*s = '\0';
	return (s == mem && c == EOF) ? NULL : mem;
}
static fz_image *
parse_inline_image(fz_context *ctx, pdf_csi *csi, fz_stream *stm)
{
	pdf_document *doc = csi->doc;
	pdf_obj *rdb = csi->rdb;
	pdf_obj *obj = NULL;
	fz_image *img = NULL;
	int ch, found;

	fz_var(obj);
	fz_var(img);

	fz_try(ctx)
	{
		obj = pdf_parse_dict(ctx, doc, stm, &doc->lexbuf.base);

		/* read whitespace after ID keyword */
		ch = fz_read_byte(ctx, stm);
		if (ch == '\r')
			if (fz_peek_byte(ctx, stm) == '\n')
				fz_read_byte(ctx, stm);

		img = pdf_load_inline_image(ctx, doc, rdb, obj, stm);

		/* find EI */
		found = 0;
		ch = fz_read_byte(ctx, stm);
		do
		{
			while (ch != 'E' && ch != EOF)
				ch = fz_read_byte(ctx, stm);
			if (ch == 'E')
			{
				ch = fz_read_byte(ctx, stm);
				if (ch == 'I')
				{
					ch = fz_peek_byte(ctx, stm);
					if (ch == ' ' || ch <= 32 || ch == EOF || ch == '<' || ch == '/')
					{
						found = 1;
						break;
					}
				}
			}
		} while (ch != EOF);
		if (!found)
			fz_throw(ctx, FZ_ERROR_GENERIC, "syntax error after inline image");
	}
	fz_always(ctx)
	{
		pdf_drop_obj(ctx, obj);
	}
	fz_catch(ctx)
	{
		fz_drop_image(ctx, img);
		fz_rethrow(ctx);
	}

	return img;
}
Exemple #3
0
static fz_error
pdf_read_new_xref_section(pdf_xref *xref, fz_stream *stm, int i0, int i1, int w0, int w1, int w2)
{
	int i, n;

	if (i0 < 0 || i0 + i1 > xref->len)
		return fz_throw("xref stream has too many entries");

	for (i = i0; i < i0 + i1; i++)
	{
		int a = 0;
		int b = 0;
		int c = 0;

		if (fz_is_eof(stm))
			return fz_throw("truncated xref stream");

		for (n = 0; n < w0; n++)
			a = (a << 8) + fz_read_byte(stm);
		for (n = 0; n < w1; n++)
			b = (b << 8) + fz_read_byte(stm);
		for (n = 0; n < w2; n++)
			c = (c << 8) + fz_read_byte(stm);

		if (!xref->table[i].type)
		{
			int t = w0 ? a : 1;
			xref->table[i].type = t == 0 ? 'f' : t == 1 ? 'n' : t == 2 ? 'o' : 0;
			xref->table[i].ofs = w1 ? b : 0;
			xref->table[i].gen = w2 ? c : 0;
		}
	}

	return fz_okay;
}
Exemple #4
0
int
pdf_lex(fz_stream *f, pdf_lexbuf *buf)
{
	while (1)
	{
		int c = fz_read_byte(f);
		switch (c)
		{
		case EOF:
			return PDF_TOK_EOF;
		case IS_WHITE:
			lex_white(f);
			break;
		case '%':
			lex_comment(f);
			break;
		case '/':
			lex_name(f, buf);
			return PDF_TOK_NAME;
		case '(':
			return lex_string(f, buf);
		case ')':
			fz_warn(f->ctx, "lexical error (unexpected ')')");
			continue;
		case '<':
			c = fz_read_byte(f);
			if (c == '<')
			{
				return PDF_TOK_OPEN_DICT;
			}
			else
			{
				fz_unread_byte(f);
				return lex_hex_string(f, buf);
			}
		case '>':
			c = fz_read_byte(f);
			if (c == '>')
			{
				return PDF_TOK_CLOSE_DICT;
			}
			fz_warn(f->ctx, "lexical error (unexpected '>')");
			continue;
		case '[':
			return PDF_TOK_OPEN_ARRAY;
		case ']':
			return PDF_TOK_CLOSE_ARRAY;
		case '{':
			return PDF_TOK_OPEN_BRACE;
		case '}':
			return PDF_TOK_CLOSE_BRACE;
		case IS_NUMBER:
			return lex_number(f, buf, c);
		default: /* isregular: !isdelim && !iswhite && c != EOF */
			fz_unread_byte(f);
			lex_name(f, buf);
			return pdf_token_from_keyword(buf->scratch);
		}
	}
}
Exemple #5
0
static void fz_test_fill_compressed_8bpc_image(fz_context *ctx, fz_test_device *dev, fz_image *image, fz_stream *stream, const fz_color_params *color_params)
{
	unsigned int count = (unsigned int)image->w * (unsigned int)image->h;
	unsigned int i;

	if (image->colorspace == fz_device_rgb(ctx))
	{
		int threshold_u8 = dev->threshold * 255;
		for (i = 0; i < count; i++)
		{
			int r = fz_read_byte(ctx, stream);
			int g = fz_read_byte(ctx, stream);
			int b = fz_read_byte(ctx, stream);
			if (is_rgb_color_u8(threshold_u8, r, g, b))
			{
				*dev->is_color = 1;
				dev->resolved = 1;
				if (dev->passthrough == NULL)
					fz_throw(ctx, FZ_ERROR_ABORT, "Page found as color; stopping interpretation");
				break;
			}
		}
	}
	else
	{
		fz_color_converter cc;
		unsigned int n = (unsigned int)image->n;

		fz_init_cached_color_converter(ctx, &cc, NULL, fz_device_rgb(ctx), image->colorspace, color_params);

		fz_try(ctx)
		{
			for (i = 0; i < count; i++)
			{
				float cs[FZ_MAX_COLORS];
				float ds[FZ_MAX_COLORS];
				unsigned int k;

				for (k = 0; k < n; k++)
					cs[k] = fz_read_byte(ctx, stream) / 255.0f;

				cc.convert(ctx, &cc, ds, cs);

				if (is_rgb_color(dev->threshold, ds[0], ds[1], ds[2]))
				{
					*dev->is_color = 1;
					dev->resolved = 1;
					if (dev->passthrough == NULL)
						fz_throw(ctx, FZ_ERROR_ABORT, "Page found as color; stopping interpretation");
					break;
				}
			}
		}
		fz_always(ctx)
			fz_fin_cached_color_converter(ctx, &cc);
		fz_catch(ctx)
			fz_rethrow(ctx);
	}
}
Exemple #6
0
static inline int getlong(fz_stream *file)
{
	int a = fz_read_byte(file);
	int b = fz_read_byte(file);
	int c = fz_read_byte(file);
	int d = fz_read_byte(file);
	return a | b << 8 | c << 16 | d << 24;
}
Exemple #7
0
uint16_t fz_read_uint16_le(fz_context *ctx, fz_stream *stm)
{
	uint32_t a = fz_read_byte(ctx, stm);
	uint32_t b = fz_read_byte(ctx, stm);
	uint32_t x = (a) | (b<<8);
	if (a == EOF || b == EOF)
		fz_throw(ctx, FZ_ERROR_GENERIC, "premature end of file in int16");
	return x;
}
Exemple #8
0
uint32_t fz_read_uint32_le(fz_context *ctx, fz_stream *stm)
{
	uint32_t a = fz_read_byte(ctx, stm);
	uint32_t b = fz_read_byte(ctx, stm);
	uint32_t c = fz_read_byte(ctx, stm);
	uint32_t d = fz_read_byte(ctx, stm);
	uint32_t x = (a) | (b<<8) | (c<<16) | (d<<24);
	if (a == EOF || b == EOF || c == EOF || d == EOF)
		fz_throw(ctx, FZ_ERROR_GENERIC, "premature end of file in int32");
	return x;
}
Exemple #9
0
void
fz_seek(fz_stream *stm, int offset, int whence)
{
	stm->avail = 0; /* Reset bit reading */
	if (stm->seek)
	{
		if (whence == 1)
		{
			offset = fz_tell(stm) + offset;
			whence = 0;
		}
		stm->seek(stm, offset, whence);
		stm->eof = 0;
	}
	else if (whence != 2)
	{
		if (whence == 0)
			offset -= fz_tell(stm);
		if (offset < 0)
			fz_warn(stm->ctx, "cannot seek backwards");
		/* dog slow, but rare enough */
		while (offset-- > 0)
		{
			if (fz_read_byte(stm) == EOF)
			{
				fz_warn(stm->ctx, "seek failed");
				break;
			}
		}
	}
	else
		fz_warn(stm->ctx, "cannot seek");
}
Exemple #10
0
static fz_error
pdf_read_xref(fz_obj **trailerp, pdf_xref *xref, int ofs, char *buf, int cap)
{
	fz_error error;
	int c;

	fz_seek(xref->file, ofs, 0);

	while (iswhite(fz_peek_byte(xref->file)))
		fz_read_byte(xref->file);

	c = fz_peek_byte(xref->file);
	if (c == 'x')
	{
		error = pdf_read_old_xref(trailerp, xref, buf, cap);
		if (error)
			return fz_rethrow(error, "cannot read xref (ofs=%d)", ofs);
	}
	else if (c >= '0' && c <= '9')
	{
		error = pdf_read_new_xref(trailerp, xref, buf, cap);
		if (error)
			return fz_rethrow(error, "cannot read xref (ofs=%d)", ofs);
	}
	else
	{
		return fz_throw("cannot recognize xref format");
	}

	return fz_okay;
}
Exemple #11
0
static fz_error
pdf_read_trailer(pdf_xref *xref, char *buf, int cap)
{
	fz_error error;
	int c;

	fz_seek(xref->file, xref->startxref, 0);

	while (iswhite(fz_peek_byte(xref->file)))
		fz_read_byte(xref->file);

	c = fz_peek_byte(xref->file);
	if (c == 'x')
	{
		error = pdf_read_old_trailer(xref, buf, cap);
		if (error)
			return fz_rethrow(error, "cannot read trailer");
	}
	else if (c >= '0' && c <= '9')
	{
		error = pdf_read_new_trailer(xref, buf, cap);
		if (error)
			return fz_rethrow(error, "cannot read trailer");
	}
	else
	{
		return fz_throw("cannot recognize xref format: '%c'", c);
	}

	return fz_okay;
}
Exemple #12
0
static void
lex_comment(fz_stream *f)
{
	int c;
	do {
		c = fz_read_byte(f);
	} while ((c != '\012') && (c != '\015') && (c != EOF));
}
Exemple #13
0
static int
next_ahxd(fz_stream *stm, int max)
{
	fz_ahxd *state = stm->state;
	unsigned char *p = state->buffer;
	unsigned char *ep;
	int a, b, c, odd;

	if (max > sizeof(state->buffer))
		max = sizeof(state->buffer);
	ep = p + max;

	odd = 0;

	while (p < ep)
	{
		if (state->eod)
			break;

		c = fz_read_byte(state->chain);
		if (c < 0)
			break;

		if (ishex(c))
		{
			if (!odd)
			{
				a = unhex(c);
				odd = 1;
			}
			else
			{
				b = unhex(c);
				*p++ = (a << 4) | b;
				odd = 0;
			}
		}
		else if (c == '>')
		{
			if (odd)
				*p++ = (a << 4);
			state->eod = 1;
			break;
		}
		else if (!iswhite(c))
		{
			fz_throw(stm->ctx, FZ_ERROR_GENERIC, "bad data in ahxd: '%c'", c);
		}
	}
	stm->rp = state->buffer;
	stm->wp = p;
	stm->pos += p - state->buffer;

	if (stm->rp != p)
		return *stm->rp++;
	return EOF;
}
Exemple #14
0
static void
lex_white(fz_stream *f)
{
	int c;
	do {
		c = fz_read_byte(f);
	} while ((c <= 32) && (iswhite(c)));
	if (c != EOF)
		fz_unread_byte(f);
}
Exemple #15
0
uint64_t fz_read_uint64_le(fz_context *ctx, fz_stream *stm)
{
	uint64_t a = fz_read_byte(ctx, stm);
	uint64_t b = fz_read_byte(ctx, stm);
	uint64_t c = fz_read_byte(ctx, stm);
	uint64_t d = fz_read_byte(ctx, stm);
	uint64_t e = fz_read_byte(ctx, stm);
	uint64_t f = fz_read_byte(ctx, stm);
	uint64_t g = fz_read_byte(ctx, stm);
	uint64_t h = fz_read_byte(ctx, stm);
	uint64_t x = (a) | (b<<8) | (c<<16) | (d<<24) | (e<<32) | (f<<40) | (g<<48) | (h<<56);
	if (a == EOF || b == EOF || c == EOF || d == EOF || e == EOF || f == EOF || g == EOF || h == EOF)
		fz_throw(ctx, FZ_ERROR_GENERIC, "premature end of file in int64");
	return x;
}
Exemple #16
0
void
fz_read_line(fz_stream *stm, char *mem, int n)
{
	char *s = mem;
	int c = EOF;
	while (n > 1)
	{
		c = fz_read_byte(stm);
		if (c == EOF)
			break;
		if (c == '\r') {
			c = fz_peek_byte(stm);
			if (c == '\n')
				fz_read_byte(stm);
			break;
		}
		if (c == '\n')
			break;
		*s++ = c;
		n--;
	}
	if (n)
		*s = '\0';
}
Exemple #17
0
void fz_read_string(fz_context *ctx, fz_stream *stm, char *buffer, int len)
{
	int c;
	do
	{
		if (len <= 0)
			fz_throw(ctx, FZ_ERROR_GENERIC, "Buffer overrun reading null terminated string");

		c = fz_read_byte(ctx, stm);
		if (c == EOF)
			fz_throw(ctx, FZ_ERROR_GENERIC, "EOF reading null terminated string");
		*buffer++ = c;
		len--;
	}
	while (c != 0);
}
Exemple #18
0
static int
read_ahxd(fz_stream *stm, unsigned char *buf, int len)
{
	fz_ahxd *state = stm->state;
	unsigned char *p = buf;
	unsigned char *ep = buf + len;
	int a, b, c, odd;

	odd = 0;

	while (p < ep)
	{
		if (state->eod)
			return p - buf;

		c = fz_read_byte(state->chain);
		if (c < 0)
			return p - buf;

		if (ishex(c))
		{
			if (!odd)
			{
				a = unhex(c);
				odd = 1;
			}
			else
			{
				b = unhex(c);
				*p++ = (a << 4) | b;
				odd = 0;
			}
		}
		else if (c == '>')
		{
			if (odd)
				*p++ = (a << 4);
			state->eod = 1;
		}
		else if (!iswhite(c))
		{
			fz_throw(stm->ctx, "bad data in ahxd: '%c'", c);
		}
	}

	return p - buf;
}
Exemple #19
0
static int
lex_hex_string(fz_stream *f, pdf_lexbuf *lb)
{
	char *s = lb->scratch;
	char *e = s + lb->size;
	int a = 0, x = 0;
	int c;

	while (1)
	{
		if (s == e)
		{
			s += pdf_lexbuf_grow(lb);
			e = lb->scratch + lb->size;
		}
		c = fz_read_byte(f);
		switch (c)
		{
		case IS_WHITE:
			break;
		case IS_HEX:
			if (x)
			{
				*s++ = a * 16 + unhex(c);
				x = !x;
			}
			else
			{
				a = unhex(c);
				x = !x;
			}
			break;
		case '>':
		case EOF:
			goto end;
		default:
			fz_warn(f->ctx, "ignoring invalid character in hex string");
		}
	}
end:
	lb->len = s - lb->scratch;
	return PDF_TOK_STRING;
}
static int
lex_hex_string(fz_stream *f, char *buf, int n)
{
	char *s = buf;
	char *e = buf + n;
	int a = 0, x = 0;
	int c;

	while (s < e)
	{
		c = fz_read_byte(f);
		switch (c)
		{
		case IS_WHITE:
			break;
		case IS_HEX:
			if (x)
			{
				*s++ = a * 16 + unhex(c);
				x = !x;
			}
			else
			{
				a = unhex(c);
				x = !x;
			}
			break;
		case '>':
		case EOF:
			goto end;
		default:
			fz_warn("ignoring invalid character in hex string: '%c'", c);
		}
	}
end:
	return s - buf;
}
Exemple #21
0
void
fz_seek(fz_stream *stm, int offset, int whence)
{
	if (stm->seek)
	{
		if (whence == 1)
		{
			offset = fz_tell(stm) + offset;
			whence = 0;
		}
		if (whence == 0)
		{
			int dist = stm->pos - offset;
			if (dist >= 0 && dist <= stm->wp - stm->bp)
			{
				stm->rp = stm->wp - dist;
				stm->eof = 0;
				return;
			}
		}
		stm->seek(stm, offset, whence);
		stm->eof = 0;
	}
	else if (whence != 2)
	{
		if (whence == 0)
			offset -= fz_tell(stm);
		if (offset < 0)
			printf("cannot seek backwards\n");
		/* dog slow, but rare enough */
		while (offset-- > 0)
			fz_read_byte(stm);
	}
	else
		printf("cannot seek\n");
}
Exemple #22
0
static void
fz_test_fill_image(fz_context *ctx, fz_device *dev, fz_image *image, const fz_matrix *ctm, float alpha)
{
	fz_test_device *t = (fz_test_device*)dev;

	fz_pixmap *pix;
	unsigned int count, i, k;
	unsigned char *s;

	if (*t->is_color || !image->colorspace || image->colorspace == fz_device_gray(ctx))
		return;

	if (image->buffer && image->bpc == 8)
	{
		fz_stream *stream = fz_open_compressed_buffer(ctx, image->buffer);
		count = (unsigned int)image->w * (unsigned int)image->h;
		if (image->colorspace == fz_device_rgb(ctx))
		{
			int threshold_u8 = t->threshold * 255;
			for (i = 0; i < count; i++)
			{
				int r = fz_read_byte(ctx, stream);
				int g = fz_read_byte(ctx, stream);
				int b = fz_read_byte(ctx, stream);
				if (is_rgb_color_u8(threshold_u8, r, g, b))
				{
					*t->is_color = 1;
					dev->hints |= FZ_IGNORE_IMAGE;
					fz_drop_stream(ctx, stream);
					fz_throw(ctx, FZ_ERROR_ABORT, "Page found as color; stopping interpretation");
					break;
				}
			}
		}
		else
		{
			fz_color_converter cc;
			unsigned int n = (unsigned int)image->n;

			fz_init_cached_color_converter(ctx, &cc, fz_device_rgb(ctx), image->colorspace);
			for (i = 0; i < count; i++)
			{
				float cs[FZ_MAX_COLORS];
				float ds[FZ_MAX_COLORS];

				for (k = 0; k < n; k++)
					cs[k] = fz_read_byte(ctx, stream) / 255.0f;

				cc.convert(ctx, &cc, ds, cs);

				if (is_rgb_color(t->threshold, ds[0], ds[1], ds[2]))
				{
					*t->is_color = 1;
					dev->hints |= FZ_IGNORE_IMAGE;
					break;
				}
			}
			fz_fin_cached_color_converter(ctx, &cc);
		}
		fz_drop_stream(ctx, stream);
		return;
	}

	pix = fz_new_pixmap_from_image(ctx, image, 0, 0);
	if (pix == NULL) /* Should never happen really, but... */
		return;

	count = (unsigned int)pix->w * (unsigned int)pix->h;
	s = pix->samples;

	if (pix->colorspace == fz_device_rgb(ctx))
	{
		int threshold_u8 = t->threshold * 255;
		for (i = 0; i < count; i++)
		{
			if (s[3] != 0 && is_rgb_color_u8(threshold_u8, s[0], s[1], s[2]))
			{
				*t->is_color = 1;
				dev->hints |= FZ_IGNORE_IMAGE;
				fz_drop_pixmap(ctx, pix);
				fz_throw(ctx, FZ_ERROR_ABORT, "Page found as color; stopping interpretation");
				break;
			}
			s += 4;
		}
	}
	else
	{
		fz_color_converter cc;
		unsigned int n = (unsigned int)pix->n-1;

		fz_init_cached_color_converter(ctx, &cc, fz_device_rgb(ctx), pix->colorspace);
		for (i = 0; i < count; i++)
		{
			float cs[FZ_MAX_COLORS];
			float ds[FZ_MAX_COLORS];

			for (k = 0; k < n; k++)
				cs[k] = (*s++) / 255.0f;
			if (*s++ == 0)
				continue;

			cc.convert(ctx, &cc, ds, cs);

			if (is_rgb_color(t->threshold, ds[0], ds[1], ds[2]))
			{
				*t->is_color = 1;
				dev->hints |= FZ_IGNORE_IMAGE;
				fz_drop_pixmap(ctx, pix);
				fz_throw(ctx, FZ_ERROR_ABORT, "Page found as color; stopping interpretation");
				break;
			}
		}
		fz_fin_cached_color_converter(ctx, &cc);
	}

	fz_drop_pixmap(ctx, pix);
}
Exemple #23
0
pdf_obj *
pdf_parse_ind_obj(pdf_document *xref,
                  fz_stream *file, pdf_lexbuf *buf,
                  int *onum, int *ogen, int *ostmofs)
{
    pdf_obj *obj = NULL;
    int num = 0, gen = 0, stm_ofs;
    int tok;
    int a, b;
    fz_context *ctx = file->ctx;

    fz_var(obj);

    tok = pdf_lex(file, buf);
    /* RJW: cannot parse indirect object (%d %d R)", num, gen */
    if (tok != PDF_TOK_INT)
        fz_throw(ctx, "expected object number (%d %d R)", num, gen);
    num = buf->i;

    tok = pdf_lex(file, buf);
    /* RJW: "cannot parse indirect object (%d %d R)", num, gen */
    if (tok != PDF_TOK_INT)
        fz_throw(ctx, "expected generation number (%d %d R)", num, gen);
    gen = buf->i;

    tok = pdf_lex(file, buf);
    /* RJW: "cannot parse indirect object (%d %d R)", num, gen */
    if (tok != PDF_TOK_OBJ)
        fz_throw(ctx, "expected 'obj' keyword (%d %d R)", num, gen);

    tok = pdf_lex(file, buf);
    /* RJW: "cannot parse indirect object (%d %d R)", num, gen */

    switch (tok)
    {
    case PDF_TOK_OPEN_ARRAY:
        obj = pdf_parse_array(xref, file, buf);
        /* RJW: "cannot parse indirect object (%d %d R)", num, gen */
        break;

    case PDF_TOK_OPEN_DICT:
        obj = pdf_parse_dict(xref, file, buf);
        /* RJW: "cannot parse indirect object (%d %d R)", num, gen */
        break;

    case PDF_TOK_NAME:
        obj = fz_new_name(ctx, buf->scratch);
        break;
    case PDF_TOK_REAL:
        obj = pdf_new_real(ctx, buf->f);
        break;
    case PDF_TOK_STRING:
        obj = pdf_new_string(ctx, buf->scratch, buf->len);
        break;
    case PDF_TOK_TRUE:
        obj = pdf_new_bool(ctx, 1);
        break;
    case PDF_TOK_FALSE:
        obj = pdf_new_bool(ctx, 0);
        break;
    case PDF_TOK_NULL:
        obj = pdf_new_null(ctx);
        break;

    case PDF_TOK_INT:
        a = buf->i;
        tok = pdf_lex(file, buf);
        /* "cannot parse indirect object (%d %d R)", num, gen */
        if (tok == PDF_TOK_STREAM || tok == PDF_TOK_ENDOBJ)
        {
            obj = pdf_new_int(ctx, a);
            goto skip;
        }
        if (tok == PDF_TOK_INT)
        {
            b = buf->i;
            tok = pdf_lex(file, buf);
            /* RJW: "cannot parse indirect object (%d %d R)", num, gen); */
            if (tok == PDF_TOK_R)
            {
                obj = pdf_new_indirect(ctx, a, b, xref);
                break;
            }
        }
        fz_throw(ctx, "expected 'R' keyword (%d %d R)", num, gen);

    case PDF_TOK_ENDOBJ:
        obj = pdf_new_null(ctx);
        goto skip;

    default:
        fz_throw(ctx, "syntax error in object (%d %d R)", num, gen);
    }

    fz_try(ctx)
    {
        tok = pdf_lex(file, buf);
    }
    fz_catch(ctx)
    {
        pdf_drop_obj(obj);
        fz_throw(ctx, "cannot parse indirect object (%d %d R)", num, gen);
    }

skip:
    if (tok == PDF_TOK_STREAM)
    {
        int c = fz_read_byte(file);
        while (c == ' ')
            c = fz_read_byte(file);
        if (c == '\r')
        {
            c = fz_peek_byte(file);
            if (c != '\n')
                fz_warn(ctx, "line feed missing after stream begin marker (%d %d R)", num, gen);
            else
                fz_read_byte(file);
        }
        stm_ofs = fz_tell(file);
    }
    else if (tok == PDF_TOK_ENDOBJ)
    {
        stm_ofs = 0;
    }
    else
    {
        fz_warn(ctx, "expected 'endobj' or 'stream' keyword (%d %d R)", num, gen);
        stm_ofs = 0;
    }

    if (onum) *onum = num;
    if (ogen) *ogen = gen;
    if (ostmofs) *ostmofs = stm_ofs;
    return obj;
}
Exemple #24
0
static inline int getshort(fz_stream *file)
{
	int a = fz_read_byte(file);
	int b = fz_read_byte(file);
	return a | b << 8;
}
Exemple #25
0
pdf_obj *
pdf_parse_ind_obj(fz_context *ctx, pdf_document *doc,
	fz_stream *file, pdf_lexbuf *buf,
	int *onum, int *ogen, fz_off_t *ostmofs, int *try_repair)
{
	pdf_obj *obj = NULL;
	int num = 0, gen = 0;
	fz_off_t stm_ofs;
	pdf_token tok;
	fz_off_t a, b;

	fz_var(obj);

	tok = pdf_lex(ctx, file, buf);
	if (tok != PDF_TOK_INT)
	{
		if (try_repair)
			*try_repair = 1;
		fz_throw(ctx, FZ_ERROR_GENERIC, "expected object number");
	}
	num = buf->i;

	tok = pdf_lex(ctx, file, buf);
	if (tok != PDF_TOK_INT)
	{
		if (try_repair)
			*try_repair = 1;
		fz_throw(ctx, FZ_ERROR_GENERIC, "expected generation number (%d ? obj)", num);
	}
	gen = buf->i;

	tok = pdf_lex(ctx, file, buf);
	if (tok != PDF_TOK_OBJ)
	{
		if (try_repair)
			*try_repair = 1;
		fz_throw(ctx, FZ_ERROR_GENERIC, "expected 'obj' keyword (%d %d ?)", num, gen);
	}

	tok = pdf_lex(ctx, file, buf);

	switch (tok)
	{
	case PDF_TOK_OPEN_ARRAY:
		obj = pdf_parse_array(ctx, doc, file, buf);
		break;

	case PDF_TOK_OPEN_DICT:
		obj = pdf_parse_dict(ctx, doc, file, buf);
		break;

	case PDF_TOK_NAME: obj = pdf_new_name(ctx, doc, buf->scratch); break;
	case PDF_TOK_REAL: obj = pdf_new_real(ctx, doc, buf->f); break;
	case PDF_TOK_STRING: obj = pdf_new_string(ctx, doc, buf->scratch, buf->len); break;
	case PDF_TOK_TRUE: obj = pdf_new_bool(ctx, doc, 1); break;
	case PDF_TOK_FALSE: obj = pdf_new_bool(ctx, doc, 0); break;
	case PDF_TOK_NULL: obj = pdf_new_null(ctx, doc); break;

	case PDF_TOK_INT:
		a = buf->i;
		tok = pdf_lex(ctx, file, buf);

		if (tok == PDF_TOK_STREAM || tok == PDF_TOK_ENDOBJ)
		{
			obj = pdf_new_int_offset(ctx, doc, a);
			goto skip;
		}
		if (tok == PDF_TOK_INT)
		{
			b = buf->i;
			tok = pdf_lex(ctx, file, buf);
			if (tok == PDF_TOK_R)
			{
				obj = pdf_new_indirect(ctx, doc, a, b);
				break;
			}
		}
		fz_throw(ctx, FZ_ERROR_GENERIC, "expected 'R' keyword (%d %d R)", num, gen);

	case PDF_TOK_ENDOBJ:
		obj = pdf_new_null(ctx, doc);
		goto skip;

	default:
		fz_throw(ctx, FZ_ERROR_GENERIC, "syntax error in object (%d %d R)", num, gen);
	}

	fz_try(ctx)
	{
		tok = pdf_lex(ctx, file, buf);
	}
	fz_catch(ctx)
	{
		pdf_drop_obj(ctx, obj);
		fz_rethrow(ctx);
	}

skip:
	if (tok == PDF_TOK_STREAM)
	{
		int c = fz_read_byte(ctx, file);
		while (c == ' ')
			c = fz_read_byte(ctx, file);
		if (c == '\r')
		{
			c = fz_peek_byte(ctx, file);
			if (c != '\n')
				fz_warn(ctx, "line feed missing after stream begin marker (%d %d R)", num, gen);
			else
				fz_read_byte(ctx, file);
		}
		stm_ofs = fz_tell(ctx, file);
	}
	else if (tok == PDF_TOK_ENDOBJ)
	{
		stm_ofs = 0;
	}
	else
	{
		fz_warn(ctx, "expected 'endobj' or 'stream' keyword (%d %d R)", num, gen);
		stm_ofs = 0;
	}

	if (onum) *onum = num;
	if (ogen) *ogen = gen;
	if (ostmofs) *ostmofs = stm_ofs;
	return obj;
}
Exemple #26
0
static int
lex_number(fz_stream *f, pdf_lexbuf *buf, int c)
{
	int neg = 0;
	int i = 0;
	int n;
	int d;
	float v;

	/* Initially we might have +, -, . or a digit */
	switch (c)
	{
	case '.':
		goto loop_after_dot;
	case '-':
		neg = 1;
		break;
	case '+':
		break;
	default: /* Must be a digit */
		i = c - '0';
		break;
	}

	while (1)
	{
		c = fz_read_byte(f);
		switch (c)
		{
		case '.':
			goto loop_after_dot;
		case RANGE_0_9:
			i = 10*i + c - '0';
			/* FIXME: Need overflow check here; do we care? */
			break;
		default:
			fz_unread_byte(f);
			/* Fallthrough */
		case EOF:
			if (neg)
				i = -i;
			buf->i = i;
			return PDF_TOK_INT;
		}
	}

	/* In here, we've seen a dot, so can accept just digits */
loop_after_dot:
	n = 0;
	d = 1;
	while (1)
	{
		c = fz_read_byte(f);
		switch (c)
		{
		case RANGE_0_9:
			if (d >= INT_MAX/10)
				goto underflow;
			n = n*10 + (c - '0');
			d *= 10;
			break;
		default:
			fz_unread_byte(f);
			/* Fallthrough */
		case EOF:
			v = (float)i + ((float)n / (float)d);
			if (neg)
				v = -v;
			buf->f = v;
			return PDF_TOK_REAL;
		}
	}

underflow:
	/* Ignore any digits after here, because they are too small */
	while (1)
	{
		c = fz_read_byte(f);
		switch (c)
		{
		case RANGE_0_9:
			break;
		default:
			fz_unread_byte(f);
			/* Fallthrough */
		case EOF:
			v = (float)i + ((float)n / (float)d);
			if (neg)
				v = -v;
			buf->f = v;
			return PDF_TOK_REAL;
		}
	}
}
Exemple #27
0
static int
lex_string(fz_stream *f, pdf_lexbuf *lb)
{
	char *s = lb->scratch;
	char *e = s + lb->size;
	int bal = 1;
	int oct;
	int c;

	while (1)
	{
		if (s == e)
		{
			s += pdf_lexbuf_grow(lb);
			e = lb->scratch + lb->size;
		}
		c = fz_read_byte(f);
		switch (c)
		{
		case EOF:
			goto end;
		case '(':
			bal++;
			*s++ = c;
			break;
		case ')':
			bal --;
			if (bal == 0)
				goto end;
			*s++ = c;
			break;
		case '\\':
			c = fz_read_byte(f);
			switch (c)
			{
			case EOF:
				goto end;
			case 'n':
				*s++ = '\n';
				break;
			case 'r':
				*s++ = '\r';
				break;
			case 't':
				*s++ = '\t';
				break;
			case 'b':
				*s++ = '\b';
				break;
			case 'f':
				*s++ = '\f';
				break;
			case '(':
				*s++ = '(';
				break;
			case ')':
				*s++ = ')';
				break;
			case '\\':
				*s++ = '\\';
				break;
			case RANGE_0_7:
				oct = c - '0';
				c = fz_read_byte(f);
				if (c >= '0' && c <= '7')
				{
					oct = oct * 8 + (c - '0');
					c = fz_read_byte(f);
					if (c >= '0' && c <= '7')
						oct = oct * 8 + (c - '0');
					else if (c != EOF)
						fz_unread_byte(f);
				}
				else if (c != EOF)
					fz_unread_byte(f);
				*s++ = oct;
				break;
			case '\n':
				break;
			case '\r':
				c = fz_read_byte(f);
				if ((c != '\n') && (c != EOF))
					fz_unread_byte(f);
				break;
			default:
				*s++ = c;
			}
			break;
		default:
			*s++ = c;
			break;
		}
	}
end:
	lb->len = s - lb->scratch;
	return PDF_TOK_STRING;
}
Exemple #28
0
static int
next_dctd(fz_stream *stm, int max)
{
	fz_dctd *state = stm->state;
	j_decompress_ptr cinfo = &state->cinfo;
	unsigned char *p = state->buffer;
	unsigned char *ep;

	if (max > sizeof(state->buffer))
		max = sizeof(state->buffer);
	ep = state->buffer + max;

	if (setjmp(state->jb))
	{
		if (cinfo->src)
			state->curr_stm->rp = state->curr_stm->wp - cinfo->src->bytes_in_buffer;
		fz_throw(stm->ctx, FZ_ERROR_GENERIC, "jpeg error: %s", state->msg);
	}

	if (!state->init)
	{
		int c;
		cinfo->client_data = state;
		cinfo->err = &state->errmgr;
		jpeg_std_error(cinfo->err);
		cinfo->err->error_exit = error_exit;

		fz_dct_mem_init(state);

		jpeg_create_decompress(cinfo);
		state->init = 1;

		/* Skip over any stray returns at the start of the stream */
		while ((c = fz_peek_byte(state->chain)) == '\n' || c == '\r')
			(void)fz_read_byte(state->chain);

		cinfo->src = &state->srcmgr;
		cinfo->src->init_source = init_source;
		cinfo->src->fill_input_buffer = fill_input_buffer;
		cinfo->src->skip_input_data = skip_input_data;
		cinfo->src->resync_to_restart = jpeg_resync_to_restart;
		cinfo->src->term_source = term_source;

		/* optionally load additional JPEG tables first */
		if (state->jpegtables)
		{
			state->curr_stm = state->jpegtables;
			cinfo->src->next_input_byte = state->curr_stm->rp;
			cinfo->src->bytes_in_buffer = state->curr_stm->wp - state->curr_stm->rp;
			jpeg_read_header(cinfo, 0);
			state->curr_stm->rp = state->curr_stm->wp - state->cinfo.src->bytes_in_buffer;
			state->curr_stm = state->chain;
		}

		cinfo->src->next_input_byte = state->curr_stm->rp;
		cinfo->src->bytes_in_buffer = state->curr_stm->wp - state->curr_stm->rp;

		jpeg_read_header(cinfo, 1);

		/* default value if ColorTransform is not set */
		if (state->color_transform == -1)
		{
			if (state->cinfo.num_components == 3)
				state->color_transform = 1;
			else
				state->color_transform = 0;
		}

		if (cinfo->saw_Adobe_marker)
			state->color_transform = cinfo->Adobe_transform;

		/* Guess the input colorspace, and set output colorspace accordingly */
		switch (cinfo->num_components)
		{
		case 3:
			if (state->color_transform)
				cinfo->jpeg_color_space = JCS_YCbCr;
			else
				cinfo->jpeg_color_space = JCS_RGB;
			break;
		case 4:
			if (state->color_transform)
				cinfo->jpeg_color_space = JCS_YCCK;
			else
				cinfo->jpeg_color_space = JCS_CMYK;
			break;
		}

		cinfo->scale_num = 8/(1<<state->l2factor);
		cinfo->scale_denom = 8;

		jpeg_start_decompress(cinfo);

		state->stride = cinfo->output_width * cinfo->output_components;
		state->scanline = fz_malloc(state->ctx, state->stride);
		state->rp = state->scanline;
		state->wp = state->scanline;
	}

	while (state->rp < state->wp && p < ep)
		*p++ = *state->rp++;

	while (p < ep)
	{
		if (cinfo->output_scanline == cinfo->output_height)
			break;

		if (p + state->stride <= ep)
		{
			jpeg_read_scanlines(cinfo, &p, 1);
			p += state->stride;
		}
		else
		{
			jpeg_read_scanlines(cinfo, &state->scanline, 1);
			state->rp = state->scanline;
			state->wp = state->scanline + state->stride;
		}

		while (state->rp < state->wp && p < ep)
			*p++ = *state->rp++;
	}
	stm->rp = state->buffer;
	stm->wp = p;
	stm->pos += (p - state->buffer);
	if (p == stm->rp)
		return EOF;

	return *stm->rp++;
}
Exemple #29
0
static int
read_dctd(fz_stream *stm, unsigned char *buf, int len)
{
	fz_dctd *state = stm->state;
	j_decompress_ptr cinfo = &state->cinfo;
	unsigned char *p = buf;
	unsigned char *ep = buf + len;

	if (setjmp(state->jb))
	{
		if (cinfo->src)
			state->curr_stm->rp = state->curr_stm->wp - cinfo->src->bytes_in_buffer;
		fz_throw(stm->ctx, FZ_ERROR_GENERIC, "jpeg error: %s", state->msg);
	}

	if (!state->init)
	{
		int c;
		cinfo->client_data = state;
		cinfo->err = &state->errmgr;
		jpeg_std_error(cinfo->err);
		cinfo->err->error_exit = error_exit;
		jpeg_create_decompress(cinfo);
		state->init = 1;

		/* Skip over any stray returns at the start of the stream */
		while ((c = fz_peek_byte(state->chain)) == '\n' || c == '\r')
			(void)fz_read_byte(state->chain);

		cinfo->src = &state->srcmgr;
		cinfo->src->init_source = init_source;
		cinfo->src->fill_input_buffer = fill_input_buffer;
		cinfo->src->skip_input_data = skip_input_data;
		cinfo->src->resync_to_restart = jpeg_resync_to_restart;
		cinfo->src->term_source = term_source;

		/* optionally load additional JPEG tables first */
		if (state->jpegtables)
		{
			state->curr_stm = state->jpegtables;
			cinfo->src->next_input_byte = state->curr_stm->rp;
			cinfo->src->bytes_in_buffer = state->curr_stm->wp - state->curr_stm->rp;
			jpeg_read_header(cinfo, 0);
			state->curr_stm->rp = state->curr_stm->wp - state->cinfo.src->bytes_in_buffer;
			state->curr_stm = state->chain;
		}

		cinfo->src->next_input_byte = state->curr_stm->rp;
		cinfo->src->bytes_in_buffer = state->curr_stm->wp - state->curr_stm->rp;

		jpeg_read_header(cinfo, 1);

		/* speed up jpeg decoding a bit */
		/* cf. http://code.google.com/p/sumatrapdf/issues/detail?id=1960 * /
		cinfo->dct_method = JDCT_FASTEST;
		cinfo->do_fancy_upsampling = FALSE;
		*/

		/* default value if ColorTransform is not set */
		if (state->color_transform == -1)
		{
			if (state->cinfo.num_components == 3)
				state->color_transform = 1;
			else
				state->color_transform = 0;
		}

		if (cinfo->saw_Adobe_marker)
			state->color_transform = cinfo->Adobe_transform;

		/* Guess the input colorspace, and set output colorspace accordingly */
		switch (cinfo->num_components)
		{
		case 3:
			if (state->color_transform)
				cinfo->jpeg_color_space = JCS_YCbCr;
			else
				cinfo->jpeg_color_space = JCS_RGB;
			break;
		case 4:
			if (state->color_transform)
				cinfo->jpeg_color_space = JCS_YCCK;
			else
				cinfo->jpeg_color_space = JCS_CMYK;
			break;
		}

		cinfo->scale_num = 8/(1<<state->l2factor);
		cinfo->scale_denom = 8;

		jpeg_start_decompress(cinfo);

		state->stride = cinfo->output_width * cinfo->output_components;
		state->scanline = fz_malloc(state->ctx, state->stride);
		state->rp = state->scanline;
		state->wp = state->scanline;
	}

	while (state->rp < state->wp && p < ep)
		*p++ = *state->rp++;

	while (p < ep)
	{
		if (cinfo->output_scanline == cinfo->output_height)
			break;

		if (p + state->stride <= ep)
		{
			jpeg_read_scanlines(cinfo, &p, 1);
			p += state->stride;
		}
		else
		{
			jpeg_read_scanlines(cinfo, &state->scanline, 1);
			state->rp = state->scanline;
			state->wp = state->scanline + state->stride;
		}

		while (state->rp < state->wp && p < ep)
			*p++ = *state->rp++;
	}

	return p - buf;
}
Exemple #30
0
pdf_token
pdf_lex(fz_stream *f, pdf_lexbuf *buf)
{
	while (1)
	{
		int c = fz_read_byte(f);
		switch (c)
		{
		case EOF:
			return PDF_TOK_EOF;
		case IS_WHITE:
			lex_white(f);
			break;
		case '%':
			lex_comment(f);
			break;
		case '/':
			lex_name(f, buf);
			return PDF_TOK_NAME;
		case '(':
			return lex_string(f, buf);
		case ')':
			fz_warn(f->ctx, "lexical error (unexpected ')')");
			continue;
		case '<':
			c = fz_read_byte(f);
			if (c == '<')
			{
				return PDF_TOK_OPEN_DICT;
			}
			else
			{
				fz_unread_byte(f);
				return lex_hex_string(f, buf);
			}
		case '>':
			c = fz_read_byte(f);
			if (c == '>')
			{
				return PDF_TOK_CLOSE_DICT;
			}
			fz_warn(f->ctx, "lexical error (unexpected '>')");
			if (c == EOF)
			{
				return PDF_TOK_EOF;
			}
			fz_unread_byte(f);
			continue;
		case '[':
			return PDF_TOK_OPEN_ARRAY;
		case ']':
			return PDF_TOK_CLOSE_ARRAY;
		case '{':
			return PDF_TOK_OPEN_BRACE;
		case '}':
			return PDF_TOK_CLOSE_BRACE;
		case IS_NUMBER:
			/* cf. https://code.google.com/p/sumatrapdf/issues/detail?id=2231 */
			{
				int tok = lex_number(f, buf, c);
				while (1)
				{
					c = fz_peek_byte(f);
					switch (c)
					{
					case IS_NUMBER:
						fz_warn(f->ctx, "ignoring invalid character after number: '%c'", c);
						fz_read_byte(f);
						continue;
					default:
						return tok;
					}
				}
			}
		default: /* isregular: !isdelim && !iswhite && c != EOF */
			fz_unread_byte(f);
			lex_name(f, buf);
			return pdf_token_from_keyword(buf->scratch);
		}
	}
}