char * fz_read_line(fz_context *ctx, fz_stream *stm, char *mem, int n) { char *s = mem; int c = EOF; while (n > 1) { c = fz_read_byte(ctx, stm); if (c == EOF) break; if (c == '\r') { c = fz_peek_byte(ctx, stm); if (c == '\n') fz_read_byte(ctx, stm); break; } if (c == '\n') break; *s++ = c; n--; } if (n) *s = '\0'; return (s == mem && c == EOF) ? NULL : mem; }
static fz_image * parse_inline_image(fz_context *ctx, pdf_csi *csi, fz_stream *stm) { pdf_document *doc = csi->doc; pdf_obj *rdb = csi->rdb; pdf_obj *obj = NULL; fz_image *img = NULL; int ch, found; fz_var(obj); fz_var(img); fz_try(ctx) { obj = pdf_parse_dict(ctx, doc, stm, &doc->lexbuf.base); /* read whitespace after ID keyword */ ch = fz_read_byte(ctx, stm); if (ch == '\r') if (fz_peek_byte(ctx, stm) == '\n') fz_read_byte(ctx, stm); img = pdf_load_inline_image(ctx, doc, rdb, obj, stm); /* find EI */ found = 0; ch = fz_read_byte(ctx, stm); do { while (ch != 'E' && ch != EOF) ch = fz_read_byte(ctx, stm); if (ch == 'E') { ch = fz_read_byte(ctx, stm); if (ch == 'I') { ch = fz_peek_byte(ctx, stm); if (ch == ' ' || ch <= 32 || ch == EOF || ch == '<' || ch == '/') { found = 1; break; } } } } while (ch != EOF); if (!found) fz_throw(ctx, FZ_ERROR_GENERIC, "syntax error after inline image"); } fz_always(ctx) { pdf_drop_obj(ctx, obj); } fz_catch(ctx) { fz_drop_image(ctx, img); fz_rethrow(ctx); } return img; }
static fz_error pdf_read_new_xref_section(pdf_xref *xref, fz_stream *stm, int i0, int i1, int w0, int w1, int w2) { int i, n; if (i0 < 0 || i0 + i1 > xref->len) return fz_throw("xref stream has too many entries"); for (i = i0; i < i0 + i1; i++) { int a = 0; int b = 0; int c = 0; if (fz_is_eof(stm)) return fz_throw("truncated xref stream"); for (n = 0; n < w0; n++) a = (a << 8) + fz_read_byte(stm); for (n = 0; n < w1; n++) b = (b << 8) + fz_read_byte(stm); for (n = 0; n < w2; n++) c = (c << 8) + fz_read_byte(stm); if (!xref->table[i].type) { int t = w0 ? a : 1; xref->table[i].type = t == 0 ? 'f' : t == 1 ? 'n' : t == 2 ? 'o' : 0; xref->table[i].ofs = w1 ? b : 0; xref->table[i].gen = w2 ? c : 0; } } return fz_okay; }
int pdf_lex(fz_stream *f, pdf_lexbuf *buf) { while (1) { int c = fz_read_byte(f); switch (c) { case EOF: return PDF_TOK_EOF; case IS_WHITE: lex_white(f); break; case '%': lex_comment(f); break; case '/': lex_name(f, buf); return PDF_TOK_NAME; case '(': return lex_string(f, buf); case ')': fz_warn(f->ctx, "lexical error (unexpected ')')"); continue; case '<': c = fz_read_byte(f); if (c == '<') { return PDF_TOK_OPEN_DICT; } else { fz_unread_byte(f); return lex_hex_string(f, buf); } case '>': c = fz_read_byte(f); if (c == '>') { return PDF_TOK_CLOSE_DICT; } fz_warn(f->ctx, "lexical error (unexpected '>')"); continue; case '[': return PDF_TOK_OPEN_ARRAY; case ']': return PDF_TOK_CLOSE_ARRAY; case '{': return PDF_TOK_OPEN_BRACE; case '}': return PDF_TOK_CLOSE_BRACE; case IS_NUMBER: return lex_number(f, buf, c); default: /* isregular: !isdelim && !iswhite && c != EOF */ fz_unread_byte(f); lex_name(f, buf); return pdf_token_from_keyword(buf->scratch); } } }
static void fz_test_fill_compressed_8bpc_image(fz_context *ctx, fz_test_device *dev, fz_image *image, fz_stream *stream, const fz_color_params *color_params) { unsigned int count = (unsigned int)image->w * (unsigned int)image->h; unsigned int i; if (image->colorspace == fz_device_rgb(ctx)) { int threshold_u8 = dev->threshold * 255; for (i = 0; i < count; i++) { int r = fz_read_byte(ctx, stream); int g = fz_read_byte(ctx, stream); int b = fz_read_byte(ctx, stream); if (is_rgb_color_u8(threshold_u8, r, g, b)) { *dev->is_color = 1; dev->resolved = 1; if (dev->passthrough == NULL) fz_throw(ctx, FZ_ERROR_ABORT, "Page found as color; stopping interpretation"); break; } } } else { fz_color_converter cc; unsigned int n = (unsigned int)image->n; fz_init_cached_color_converter(ctx, &cc, NULL, fz_device_rgb(ctx), image->colorspace, color_params); fz_try(ctx) { for (i = 0; i < count; i++) { float cs[FZ_MAX_COLORS]; float ds[FZ_MAX_COLORS]; unsigned int k; for (k = 0; k < n; k++) cs[k] = fz_read_byte(ctx, stream) / 255.0f; cc.convert(ctx, &cc, ds, cs); if (is_rgb_color(dev->threshold, ds[0], ds[1], ds[2])) { *dev->is_color = 1; dev->resolved = 1; if (dev->passthrough == NULL) fz_throw(ctx, FZ_ERROR_ABORT, "Page found as color; stopping interpretation"); break; } } } fz_always(ctx) fz_fin_cached_color_converter(ctx, &cc); fz_catch(ctx) fz_rethrow(ctx); } }
static inline int getlong(fz_stream *file) { int a = fz_read_byte(file); int b = fz_read_byte(file); int c = fz_read_byte(file); int d = fz_read_byte(file); return a | b << 8 | c << 16 | d << 24; }
uint16_t fz_read_uint16_le(fz_context *ctx, fz_stream *stm) { uint32_t a = fz_read_byte(ctx, stm); uint32_t b = fz_read_byte(ctx, stm); uint32_t x = (a) | (b<<8); if (a == EOF || b == EOF) fz_throw(ctx, FZ_ERROR_GENERIC, "premature end of file in int16"); return x; }
uint32_t fz_read_uint32_le(fz_context *ctx, fz_stream *stm) { uint32_t a = fz_read_byte(ctx, stm); uint32_t b = fz_read_byte(ctx, stm); uint32_t c = fz_read_byte(ctx, stm); uint32_t d = fz_read_byte(ctx, stm); uint32_t x = (a) | (b<<8) | (c<<16) | (d<<24); if (a == EOF || b == EOF || c == EOF || d == EOF) fz_throw(ctx, FZ_ERROR_GENERIC, "premature end of file in int32"); return x; }
void fz_seek(fz_stream *stm, int offset, int whence) { stm->avail = 0; /* Reset bit reading */ if (stm->seek) { if (whence == 1) { offset = fz_tell(stm) + offset; whence = 0; } stm->seek(stm, offset, whence); stm->eof = 0; } else if (whence != 2) { if (whence == 0) offset -= fz_tell(stm); if (offset < 0) fz_warn(stm->ctx, "cannot seek backwards"); /* dog slow, but rare enough */ while (offset-- > 0) { if (fz_read_byte(stm) == EOF) { fz_warn(stm->ctx, "seek failed"); break; } } } else fz_warn(stm->ctx, "cannot seek"); }
static fz_error pdf_read_xref(fz_obj **trailerp, pdf_xref *xref, int ofs, char *buf, int cap) { fz_error error; int c; fz_seek(xref->file, ofs, 0); while (iswhite(fz_peek_byte(xref->file))) fz_read_byte(xref->file); c = fz_peek_byte(xref->file); if (c == 'x') { error = pdf_read_old_xref(trailerp, xref, buf, cap); if (error) return fz_rethrow(error, "cannot read xref (ofs=%d)", ofs); } else if (c >= '0' && c <= '9') { error = pdf_read_new_xref(trailerp, xref, buf, cap); if (error) return fz_rethrow(error, "cannot read xref (ofs=%d)", ofs); } else { return fz_throw("cannot recognize xref format"); } return fz_okay; }
static fz_error pdf_read_trailer(pdf_xref *xref, char *buf, int cap) { fz_error error; int c; fz_seek(xref->file, xref->startxref, 0); while (iswhite(fz_peek_byte(xref->file))) fz_read_byte(xref->file); c = fz_peek_byte(xref->file); if (c == 'x') { error = pdf_read_old_trailer(xref, buf, cap); if (error) return fz_rethrow(error, "cannot read trailer"); } else if (c >= '0' && c <= '9') { error = pdf_read_new_trailer(xref, buf, cap); if (error) return fz_rethrow(error, "cannot read trailer"); } else { return fz_throw("cannot recognize xref format: '%c'", c); } return fz_okay; }
static void lex_comment(fz_stream *f) { int c; do { c = fz_read_byte(f); } while ((c != '\012') && (c != '\015') && (c != EOF)); }
static int next_ahxd(fz_stream *stm, int max) { fz_ahxd *state = stm->state; unsigned char *p = state->buffer; unsigned char *ep; int a, b, c, odd; if (max > sizeof(state->buffer)) max = sizeof(state->buffer); ep = p + max; odd = 0; while (p < ep) { if (state->eod) break; c = fz_read_byte(state->chain); if (c < 0) break; if (ishex(c)) { if (!odd) { a = unhex(c); odd = 1; } else { b = unhex(c); *p++ = (a << 4) | b; odd = 0; } } else if (c == '>') { if (odd) *p++ = (a << 4); state->eod = 1; break; } else if (!iswhite(c)) { fz_throw(stm->ctx, FZ_ERROR_GENERIC, "bad data in ahxd: '%c'", c); } } stm->rp = state->buffer; stm->wp = p; stm->pos += p - state->buffer; if (stm->rp != p) return *stm->rp++; return EOF; }
static void lex_white(fz_stream *f) { int c; do { c = fz_read_byte(f); } while ((c <= 32) && (iswhite(c))); if (c != EOF) fz_unread_byte(f); }
uint64_t fz_read_uint64_le(fz_context *ctx, fz_stream *stm) { uint64_t a = fz_read_byte(ctx, stm); uint64_t b = fz_read_byte(ctx, stm); uint64_t c = fz_read_byte(ctx, stm); uint64_t d = fz_read_byte(ctx, stm); uint64_t e = fz_read_byte(ctx, stm); uint64_t f = fz_read_byte(ctx, stm); uint64_t g = fz_read_byte(ctx, stm); uint64_t h = fz_read_byte(ctx, stm); uint64_t x = (a) | (b<<8) | (c<<16) | (d<<24) | (e<<32) | (f<<40) | (g<<48) | (h<<56); if (a == EOF || b == EOF || c == EOF || d == EOF || e == EOF || f == EOF || g == EOF || h == EOF) fz_throw(ctx, FZ_ERROR_GENERIC, "premature end of file in int64"); return x; }
void fz_read_line(fz_stream *stm, char *mem, int n) { char *s = mem; int c = EOF; while (n > 1) { c = fz_read_byte(stm); if (c == EOF) break; if (c == '\r') { c = fz_peek_byte(stm); if (c == '\n') fz_read_byte(stm); break; } if (c == '\n') break; *s++ = c; n--; } if (n) *s = '\0'; }
void fz_read_string(fz_context *ctx, fz_stream *stm, char *buffer, int len) { int c; do { if (len <= 0) fz_throw(ctx, FZ_ERROR_GENERIC, "Buffer overrun reading null terminated string"); c = fz_read_byte(ctx, stm); if (c == EOF) fz_throw(ctx, FZ_ERROR_GENERIC, "EOF reading null terminated string"); *buffer++ = c; len--; } while (c != 0); }
static int read_ahxd(fz_stream *stm, unsigned char *buf, int len) { fz_ahxd *state = stm->state; unsigned char *p = buf; unsigned char *ep = buf + len; int a, b, c, odd; odd = 0; while (p < ep) { if (state->eod) return p - buf; c = fz_read_byte(state->chain); if (c < 0) return p - buf; if (ishex(c)) { if (!odd) { a = unhex(c); odd = 1; } else { b = unhex(c); *p++ = (a << 4) | b; odd = 0; } } else if (c == '>') { if (odd) *p++ = (a << 4); state->eod = 1; } else if (!iswhite(c)) { fz_throw(stm->ctx, "bad data in ahxd: '%c'", c); } } return p - buf; }
static int lex_hex_string(fz_stream *f, pdf_lexbuf *lb) { char *s = lb->scratch; char *e = s + lb->size; int a = 0, x = 0; int c; while (1) { if (s == e) { s += pdf_lexbuf_grow(lb); e = lb->scratch + lb->size; } c = fz_read_byte(f); switch (c) { case IS_WHITE: break; case IS_HEX: if (x) { *s++ = a * 16 + unhex(c); x = !x; } else { a = unhex(c); x = !x; } break; case '>': case EOF: goto end; default: fz_warn(f->ctx, "ignoring invalid character in hex string"); } } end: lb->len = s - lb->scratch; return PDF_TOK_STRING; }
static int lex_hex_string(fz_stream *f, char *buf, int n) { char *s = buf; char *e = buf + n; int a = 0, x = 0; int c; while (s < e) { c = fz_read_byte(f); switch (c) { case IS_WHITE: break; case IS_HEX: if (x) { *s++ = a * 16 + unhex(c); x = !x; } else { a = unhex(c); x = !x; } break; case '>': case EOF: goto end; default: fz_warn("ignoring invalid character in hex string: '%c'", c); } } end: return s - buf; }
void fz_seek(fz_stream *stm, int offset, int whence) { if (stm->seek) { if (whence == 1) { offset = fz_tell(stm) + offset; whence = 0; } if (whence == 0) { int dist = stm->pos - offset; if (dist >= 0 && dist <= stm->wp - stm->bp) { stm->rp = stm->wp - dist; stm->eof = 0; return; } } stm->seek(stm, offset, whence); stm->eof = 0; } else if (whence != 2) { if (whence == 0) offset -= fz_tell(stm); if (offset < 0) printf("cannot seek backwards\n"); /* dog slow, but rare enough */ while (offset-- > 0) fz_read_byte(stm); } else printf("cannot seek\n"); }
static void fz_test_fill_image(fz_context *ctx, fz_device *dev, fz_image *image, const fz_matrix *ctm, float alpha) { fz_test_device *t = (fz_test_device*)dev; fz_pixmap *pix; unsigned int count, i, k; unsigned char *s; if (*t->is_color || !image->colorspace || image->colorspace == fz_device_gray(ctx)) return; if (image->buffer && image->bpc == 8) { fz_stream *stream = fz_open_compressed_buffer(ctx, image->buffer); count = (unsigned int)image->w * (unsigned int)image->h; if (image->colorspace == fz_device_rgb(ctx)) { int threshold_u8 = t->threshold * 255; for (i = 0; i < count; i++) { int r = fz_read_byte(ctx, stream); int g = fz_read_byte(ctx, stream); int b = fz_read_byte(ctx, stream); if (is_rgb_color_u8(threshold_u8, r, g, b)) { *t->is_color = 1; dev->hints |= FZ_IGNORE_IMAGE; fz_drop_stream(ctx, stream); fz_throw(ctx, FZ_ERROR_ABORT, "Page found as color; stopping interpretation"); break; } } } else { fz_color_converter cc; unsigned int n = (unsigned int)image->n; fz_init_cached_color_converter(ctx, &cc, fz_device_rgb(ctx), image->colorspace); for (i = 0; i < count; i++) { float cs[FZ_MAX_COLORS]; float ds[FZ_MAX_COLORS]; for (k = 0; k < n; k++) cs[k] = fz_read_byte(ctx, stream) / 255.0f; cc.convert(ctx, &cc, ds, cs); if (is_rgb_color(t->threshold, ds[0], ds[1], ds[2])) { *t->is_color = 1; dev->hints |= FZ_IGNORE_IMAGE; break; } } fz_fin_cached_color_converter(ctx, &cc); } fz_drop_stream(ctx, stream); return; } pix = fz_new_pixmap_from_image(ctx, image, 0, 0); if (pix == NULL) /* Should never happen really, but... */ return; count = (unsigned int)pix->w * (unsigned int)pix->h; s = pix->samples; if (pix->colorspace == fz_device_rgb(ctx)) { int threshold_u8 = t->threshold * 255; for (i = 0; i < count; i++) { if (s[3] != 0 && is_rgb_color_u8(threshold_u8, s[0], s[1], s[2])) { *t->is_color = 1; dev->hints |= FZ_IGNORE_IMAGE; fz_drop_pixmap(ctx, pix); fz_throw(ctx, FZ_ERROR_ABORT, "Page found as color; stopping interpretation"); break; } s += 4; } } else { fz_color_converter cc; unsigned int n = (unsigned int)pix->n-1; fz_init_cached_color_converter(ctx, &cc, fz_device_rgb(ctx), pix->colorspace); for (i = 0; i < count; i++) { float cs[FZ_MAX_COLORS]; float ds[FZ_MAX_COLORS]; for (k = 0; k < n; k++) cs[k] = (*s++) / 255.0f; if (*s++ == 0) continue; cc.convert(ctx, &cc, ds, cs); if (is_rgb_color(t->threshold, ds[0], ds[1], ds[2])) { *t->is_color = 1; dev->hints |= FZ_IGNORE_IMAGE; fz_drop_pixmap(ctx, pix); fz_throw(ctx, FZ_ERROR_ABORT, "Page found as color; stopping interpretation"); break; } } fz_fin_cached_color_converter(ctx, &cc); } fz_drop_pixmap(ctx, pix); }
pdf_obj * pdf_parse_ind_obj(pdf_document *xref, fz_stream *file, pdf_lexbuf *buf, int *onum, int *ogen, int *ostmofs) { pdf_obj *obj = NULL; int num = 0, gen = 0, stm_ofs; int tok; int a, b; fz_context *ctx = file->ctx; fz_var(obj); tok = pdf_lex(file, buf); /* RJW: cannot parse indirect object (%d %d R)", num, gen */ if (tok != PDF_TOK_INT) fz_throw(ctx, "expected object number (%d %d R)", num, gen); num = buf->i; tok = pdf_lex(file, buf); /* RJW: "cannot parse indirect object (%d %d R)", num, gen */ if (tok != PDF_TOK_INT) fz_throw(ctx, "expected generation number (%d %d R)", num, gen); gen = buf->i; tok = pdf_lex(file, buf); /* RJW: "cannot parse indirect object (%d %d R)", num, gen */ if (tok != PDF_TOK_OBJ) fz_throw(ctx, "expected 'obj' keyword (%d %d R)", num, gen); tok = pdf_lex(file, buf); /* RJW: "cannot parse indirect object (%d %d R)", num, gen */ switch (tok) { case PDF_TOK_OPEN_ARRAY: obj = pdf_parse_array(xref, file, buf); /* RJW: "cannot parse indirect object (%d %d R)", num, gen */ break; case PDF_TOK_OPEN_DICT: obj = pdf_parse_dict(xref, file, buf); /* RJW: "cannot parse indirect object (%d %d R)", num, gen */ break; case PDF_TOK_NAME: obj = fz_new_name(ctx, buf->scratch); break; case PDF_TOK_REAL: obj = pdf_new_real(ctx, buf->f); break; case PDF_TOK_STRING: obj = pdf_new_string(ctx, buf->scratch, buf->len); break; case PDF_TOK_TRUE: obj = pdf_new_bool(ctx, 1); break; case PDF_TOK_FALSE: obj = pdf_new_bool(ctx, 0); break; case PDF_TOK_NULL: obj = pdf_new_null(ctx); break; case PDF_TOK_INT: a = buf->i; tok = pdf_lex(file, buf); /* "cannot parse indirect object (%d %d R)", num, gen */ if (tok == PDF_TOK_STREAM || tok == PDF_TOK_ENDOBJ) { obj = pdf_new_int(ctx, a); goto skip; } if (tok == PDF_TOK_INT) { b = buf->i; tok = pdf_lex(file, buf); /* RJW: "cannot parse indirect object (%d %d R)", num, gen); */ if (tok == PDF_TOK_R) { obj = pdf_new_indirect(ctx, a, b, xref); break; } } fz_throw(ctx, "expected 'R' keyword (%d %d R)", num, gen); case PDF_TOK_ENDOBJ: obj = pdf_new_null(ctx); goto skip; default: fz_throw(ctx, "syntax error in object (%d %d R)", num, gen); } fz_try(ctx) { tok = pdf_lex(file, buf); } fz_catch(ctx) { pdf_drop_obj(obj); fz_throw(ctx, "cannot parse indirect object (%d %d R)", num, gen); } skip: if (tok == PDF_TOK_STREAM) { int c = fz_read_byte(file); while (c == ' ') c = fz_read_byte(file); if (c == '\r') { c = fz_peek_byte(file); if (c != '\n') fz_warn(ctx, "line feed missing after stream begin marker (%d %d R)", num, gen); else fz_read_byte(file); } stm_ofs = fz_tell(file); } else if (tok == PDF_TOK_ENDOBJ) { stm_ofs = 0; } else { fz_warn(ctx, "expected 'endobj' or 'stream' keyword (%d %d R)", num, gen); stm_ofs = 0; } if (onum) *onum = num; if (ogen) *ogen = gen; if (ostmofs) *ostmofs = stm_ofs; return obj; }
static inline int getshort(fz_stream *file) { int a = fz_read_byte(file); int b = fz_read_byte(file); return a | b << 8; }
pdf_obj * pdf_parse_ind_obj(fz_context *ctx, pdf_document *doc, fz_stream *file, pdf_lexbuf *buf, int *onum, int *ogen, fz_off_t *ostmofs, int *try_repair) { pdf_obj *obj = NULL; int num = 0, gen = 0; fz_off_t stm_ofs; pdf_token tok; fz_off_t a, b; fz_var(obj); tok = pdf_lex(ctx, file, buf); if (tok != PDF_TOK_INT) { if (try_repair) *try_repair = 1; fz_throw(ctx, FZ_ERROR_GENERIC, "expected object number"); } num = buf->i; tok = pdf_lex(ctx, file, buf); if (tok != PDF_TOK_INT) { if (try_repair) *try_repair = 1; fz_throw(ctx, FZ_ERROR_GENERIC, "expected generation number (%d ? obj)", num); } gen = buf->i; tok = pdf_lex(ctx, file, buf); if (tok != PDF_TOK_OBJ) { if (try_repair) *try_repair = 1; fz_throw(ctx, FZ_ERROR_GENERIC, "expected 'obj' keyword (%d %d ?)", num, gen); } tok = pdf_lex(ctx, file, buf); switch (tok) { case PDF_TOK_OPEN_ARRAY: obj = pdf_parse_array(ctx, doc, file, buf); break; case PDF_TOK_OPEN_DICT: obj = pdf_parse_dict(ctx, doc, file, buf); break; case PDF_TOK_NAME: obj = pdf_new_name(ctx, doc, buf->scratch); break; case PDF_TOK_REAL: obj = pdf_new_real(ctx, doc, buf->f); break; case PDF_TOK_STRING: obj = pdf_new_string(ctx, doc, buf->scratch, buf->len); break; case PDF_TOK_TRUE: obj = pdf_new_bool(ctx, doc, 1); break; case PDF_TOK_FALSE: obj = pdf_new_bool(ctx, doc, 0); break; case PDF_TOK_NULL: obj = pdf_new_null(ctx, doc); break; case PDF_TOK_INT: a = buf->i; tok = pdf_lex(ctx, file, buf); if (tok == PDF_TOK_STREAM || tok == PDF_TOK_ENDOBJ) { obj = pdf_new_int_offset(ctx, doc, a); goto skip; } if (tok == PDF_TOK_INT) { b = buf->i; tok = pdf_lex(ctx, file, buf); if (tok == PDF_TOK_R) { obj = pdf_new_indirect(ctx, doc, a, b); break; } } fz_throw(ctx, FZ_ERROR_GENERIC, "expected 'R' keyword (%d %d R)", num, gen); case PDF_TOK_ENDOBJ: obj = pdf_new_null(ctx, doc); goto skip; default: fz_throw(ctx, FZ_ERROR_GENERIC, "syntax error in object (%d %d R)", num, gen); } fz_try(ctx) { tok = pdf_lex(ctx, file, buf); } fz_catch(ctx) { pdf_drop_obj(ctx, obj); fz_rethrow(ctx); } skip: if (tok == PDF_TOK_STREAM) { int c = fz_read_byte(ctx, file); while (c == ' ') c = fz_read_byte(ctx, file); if (c == '\r') { c = fz_peek_byte(ctx, file); if (c != '\n') fz_warn(ctx, "line feed missing after stream begin marker (%d %d R)", num, gen); else fz_read_byte(ctx, file); } stm_ofs = fz_tell(ctx, file); } else if (tok == PDF_TOK_ENDOBJ) { stm_ofs = 0; } else { fz_warn(ctx, "expected 'endobj' or 'stream' keyword (%d %d R)", num, gen); stm_ofs = 0; } if (onum) *onum = num; if (ogen) *ogen = gen; if (ostmofs) *ostmofs = stm_ofs; return obj; }
static int lex_number(fz_stream *f, pdf_lexbuf *buf, int c) { int neg = 0; int i = 0; int n; int d; float v; /* Initially we might have +, -, . or a digit */ switch (c) { case '.': goto loop_after_dot; case '-': neg = 1; break; case '+': break; default: /* Must be a digit */ i = c - '0'; break; } while (1) { c = fz_read_byte(f); switch (c) { case '.': goto loop_after_dot; case RANGE_0_9: i = 10*i + c - '0'; /* FIXME: Need overflow check here; do we care? */ break; default: fz_unread_byte(f); /* Fallthrough */ case EOF: if (neg) i = -i; buf->i = i; return PDF_TOK_INT; } } /* In here, we've seen a dot, so can accept just digits */ loop_after_dot: n = 0; d = 1; while (1) { c = fz_read_byte(f); switch (c) { case RANGE_0_9: if (d >= INT_MAX/10) goto underflow; n = n*10 + (c - '0'); d *= 10; break; default: fz_unread_byte(f); /* Fallthrough */ case EOF: v = (float)i + ((float)n / (float)d); if (neg) v = -v; buf->f = v; return PDF_TOK_REAL; } } underflow: /* Ignore any digits after here, because they are too small */ while (1) { c = fz_read_byte(f); switch (c) { case RANGE_0_9: break; default: fz_unread_byte(f); /* Fallthrough */ case EOF: v = (float)i + ((float)n / (float)d); if (neg) v = -v; buf->f = v; return PDF_TOK_REAL; } } }
static int lex_string(fz_stream *f, pdf_lexbuf *lb) { char *s = lb->scratch; char *e = s + lb->size; int bal = 1; int oct; int c; while (1) { if (s == e) { s += pdf_lexbuf_grow(lb); e = lb->scratch + lb->size; } c = fz_read_byte(f); switch (c) { case EOF: goto end; case '(': bal++; *s++ = c; break; case ')': bal --; if (bal == 0) goto end; *s++ = c; break; case '\\': c = fz_read_byte(f); switch (c) { case EOF: goto end; case 'n': *s++ = '\n'; break; case 'r': *s++ = '\r'; break; case 't': *s++ = '\t'; break; case 'b': *s++ = '\b'; break; case 'f': *s++ = '\f'; break; case '(': *s++ = '('; break; case ')': *s++ = ')'; break; case '\\': *s++ = '\\'; break; case RANGE_0_7: oct = c - '0'; c = fz_read_byte(f); if (c >= '0' && c <= '7') { oct = oct * 8 + (c - '0'); c = fz_read_byte(f); if (c >= '0' && c <= '7') oct = oct * 8 + (c - '0'); else if (c != EOF) fz_unread_byte(f); } else if (c != EOF) fz_unread_byte(f); *s++ = oct; break; case '\n': break; case '\r': c = fz_read_byte(f); if ((c != '\n') && (c != EOF)) fz_unread_byte(f); break; default: *s++ = c; } break; default: *s++ = c; break; } } end: lb->len = s - lb->scratch; return PDF_TOK_STRING; }
static int next_dctd(fz_stream *stm, int max) { fz_dctd *state = stm->state; j_decompress_ptr cinfo = &state->cinfo; unsigned char *p = state->buffer; unsigned char *ep; if (max > sizeof(state->buffer)) max = sizeof(state->buffer); ep = state->buffer + max; if (setjmp(state->jb)) { if (cinfo->src) state->curr_stm->rp = state->curr_stm->wp - cinfo->src->bytes_in_buffer; fz_throw(stm->ctx, FZ_ERROR_GENERIC, "jpeg error: %s", state->msg); } if (!state->init) { int c; cinfo->client_data = state; cinfo->err = &state->errmgr; jpeg_std_error(cinfo->err); cinfo->err->error_exit = error_exit; fz_dct_mem_init(state); jpeg_create_decompress(cinfo); state->init = 1; /* Skip over any stray returns at the start of the stream */ while ((c = fz_peek_byte(state->chain)) == '\n' || c == '\r') (void)fz_read_byte(state->chain); cinfo->src = &state->srcmgr; cinfo->src->init_source = init_source; cinfo->src->fill_input_buffer = fill_input_buffer; cinfo->src->skip_input_data = skip_input_data; cinfo->src->resync_to_restart = jpeg_resync_to_restart; cinfo->src->term_source = term_source; /* optionally load additional JPEG tables first */ if (state->jpegtables) { state->curr_stm = state->jpegtables; cinfo->src->next_input_byte = state->curr_stm->rp; cinfo->src->bytes_in_buffer = state->curr_stm->wp - state->curr_stm->rp; jpeg_read_header(cinfo, 0); state->curr_stm->rp = state->curr_stm->wp - state->cinfo.src->bytes_in_buffer; state->curr_stm = state->chain; } cinfo->src->next_input_byte = state->curr_stm->rp; cinfo->src->bytes_in_buffer = state->curr_stm->wp - state->curr_stm->rp; jpeg_read_header(cinfo, 1); /* default value if ColorTransform is not set */ if (state->color_transform == -1) { if (state->cinfo.num_components == 3) state->color_transform = 1; else state->color_transform = 0; } if (cinfo->saw_Adobe_marker) state->color_transform = cinfo->Adobe_transform; /* Guess the input colorspace, and set output colorspace accordingly */ switch (cinfo->num_components) { case 3: if (state->color_transform) cinfo->jpeg_color_space = JCS_YCbCr; else cinfo->jpeg_color_space = JCS_RGB; break; case 4: if (state->color_transform) cinfo->jpeg_color_space = JCS_YCCK; else cinfo->jpeg_color_space = JCS_CMYK; break; } cinfo->scale_num = 8/(1<<state->l2factor); cinfo->scale_denom = 8; jpeg_start_decompress(cinfo); state->stride = cinfo->output_width * cinfo->output_components; state->scanline = fz_malloc(state->ctx, state->stride); state->rp = state->scanline; state->wp = state->scanline; } while (state->rp < state->wp && p < ep) *p++ = *state->rp++; while (p < ep) { if (cinfo->output_scanline == cinfo->output_height) break; if (p + state->stride <= ep) { jpeg_read_scanlines(cinfo, &p, 1); p += state->stride; } else { jpeg_read_scanlines(cinfo, &state->scanline, 1); state->rp = state->scanline; state->wp = state->scanline + state->stride; } while (state->rp < state->wp && p < ep) *p++ = *state->rp++; } stm->rp = state->buffer; stm->wp = p; stm->pos += (p - state->buffer); if (p == stm->rp) return EOF; return *stm->rp++; }
static int read_dctd(fz_stream *stm, unsigned char *buf, int len) { fz_dctd *state = stm->state; j_decompress_ptr cinfo = &state->cinfo; unsigned char *p = buf; unsigned char *ep = buf + len; if (setjmp(state->jb)) { if (cinfo->src) state->curr_stm->rp = state->curr_stm->wp - cinfo->src->bytes_in_buffer; fz_throw(stm->ctx, FZ_ERROR_GENERIC, "jpeg error: %s", state->msg); } if (!state->init) { int c; cinfo->client_data = state; cinfo->err = &state->errmgr; jpeg_std_error(cinfo->err); cinfo->err->error_exit = error_exit; jpeg_create_decompress(cinfo); state->init = 1; /* Skip over any stray returns at the start of the stream */ while ((c = fz_peek_byte(state->chain)) == '\n' || c == '\r') (void)fz_read_byte(state->chain); cinfo->src = &state->srcmgr; cinfo->src->init_source = init_source; cinfo->src->fill_input_buffer = fill_input_buffer; cinfo->src->skip_input_data = skip_input_data; cinfo->src->resync_to_restart = jpeg_resync_to_restart; cinfo->src->term_source = term_source; /* optionally load additional JPEG tables first */ if (state->jpegtables) { state->curr_stm = state->jpegtables; cinfo->src->next_input_byte = state->curr_stm->rp; cinfo->src->bytes_in_buffer = state->curr_stm->wp - state->curr_stm->rp; jpeg_read_header(cinfo, 0); state->curr_stm->rp = state->curr_stm->wp - state->cinfo.src->bytes_in_buffer; state->curr_stm = state->chain; } cinfo->src->next_input_byte = state->curr_stm->rp; cinfo->src->bytes_in_buffer = state->curr_stm->wp - state->curr_stm->rp; jpeg_read_header(cinfo, 1); /* speed up jpeg decoding a bit */ /* cf. http://code.google.com/p/sumatrapdf/issues/detail?id=1960 * / cinfo->dct_method = JDCT_FASTEST; cinfo->do_fancy_upsampling = FALSE; */ /* default value if ColorTransform is not set */ if (state->color_transform == -1) { if (state->cinfo.num_components == 3) state->color_transform = 1; else state->color_transform = 0; } if (cinfo->saw_Adobe_marker) state->color_transform = cinfo->Adobe_transform; /* Guess the input colorspace, and set output colorspace accordingly */ switch (cinfo->num_components) { case 3: if (state->color_transform) cinfo->jpeg_color_space = JCS_YCbCr; else cinfo->jpeg_color_space = JCS_RGB; break; case 4: if (state->color_transform) cinfo->jpeg_color_space = JCS_YCCK; else cinfo->jpeg_color_space = JCS_CMYK; break; } cinfo->scale_num = 8/(1<<state->l2factor); cinfo->scale_denom = 8; jpeg_start_decompress(cinfo); state->stride = cinfo->output_width * cinfo->output_components; state->scanline = fz_malloc(state->ctx, state->stride); state->rp = state->scanline; state->wp = state->scanline; } while (state->rp < state->wp && p < ep) *p++ = *state->rp++; while (p < ep) { if (cinfo->output_scanline == cinfo->output_height) break; if (p + state->stride <= ep) { jpeg_read_scanlines(cinfo, &p, 1); p += state->stride; } else { jpeg_read_scanlines(cinfo, &state->scanline, 1); state->rp = state->scanline; state->wp = state->scanline + state->stride; } while (state->rp < state->wp && p < ep) *p++ = *state->rp++; } return p - buf; }
pdf_token pdf_lex(fz_stream *f, pdf_lexbuf *buf) { while (1) { int c = fz_read_byte(f); switch (c) { case EOF: return PDF_TOK_EOF; case IS_WHITE: lex_white(f); break; case '%': lex_comment(f); break; case '/': lex_name(f, buf); return PDF_TOK_NAME; case '(': return lex_string(f, buf); case ')': fz_warn(f->ctx, "lexical error (unexpected ')')"); continue; case '<': c = fz_read_byte(f); if (c == '<') { return PDF_TOK_OPEN_DICT; } else { fz_unread_byte(f); return lex_hex_string(f, buf); } case '>': c = fz_read_byte(f); if (c == '>') { return PDF_TOK_CLOSE_DICT; } fz_warn(f->ctx, "lexical error (unexpected '>')"); if (c == EOF) { return PDF_TOK_EOF; } fz_unread_byte(f); continue; case '[': return PDF_TOK_OPEN_ARRAY; case ']': return PDF_TOK_CLOSE_ARRAY; case '{': return PDF_TOK_OPEN_BRACE; case '}': return PDF_TOK_CLOSE_BRACE; case IS_NUMBER: /* cf. https://code.google.com/p/sumatrapdf/issues/detail?id=2231 */ { int tok = lex_number(f, buf, c); while (1) { c = fz_peek_byte(f); switch (c) { case IS_NUMBER: fz_warn(f->ctx, "ignoring invalid character after number: '%c'", c); fz_read_byte(f); continue; default: return tok; } } } default: /* isregular: !isdelim && !iswhite && c != EOF */ fz_unread_byte(f); lex_name(f, buf); return pdf_token_from_keyword(buf->scratch); } } }