char * fz_read_line(fz_context *ctx, fz_stream *stm, char *mem, int n) { char *s = mem; int c = EOF; while (n > 1) { c = fz_read_byte(ctx, stm); if (c == EOF) break; if (c == '\r') { c = fz_peek_byte(ctx, stm); if (c == '\n') fz_read_byte(ctx, stm); break; } if (c == '\n') break; *s++ = c; n--; } if (n) *s = '\0'; return (s == mem && c == EOF) ? NULL : mem; }
static fz_error pdf_read_old_trailer(pdf_xref *xref, char *buf, int cap) { fz_error error; int len; char *s; int n; int t; int tok; int c; fz_read_line(xref->file, buf, cap); if (strncmp(buf, "xref", 4) != 0) return fz_throw("cannot find xref marker"); while (1) { c = fz_peek_byte(xref->file); if (!(c >= '0' && c <= '9')) break; fz_read_line(xref->file, buf, cap); s = buf; fz_strsep(&s, " "); /* ignore ofs */ if (!s) return fz_throw("invalid range marker in xref"); len = atoi(fz_strsep(&s, " ")); /* broken pdfs where the section is not on a separate line */ if (s && *s != '\0') fz_seek(xref->file, -(2 + (int)strlen(s)), 1); t = fz_tell(xref->file); if (t < 0) return fz_throw("cannot tell in file"); fz_seek(xref->file, t + 20 * len, 0); } error = pdf_lex(&tok, xref->file, buf, cap, &n); if (error) return fz_rethrow(error, "cannot parse trailer"); if (tok != PDF_TOK_TRAILER) return fz_throw("expected trailer marker"); error = pdf_lex(&tok, xref->file, buf, cap, &n); if (error) return fz_rethrow(error, "cannot parse trailer"); if (tok != PDF_TOK_OPEN_DICT) return fz_throw("expected trailer dictionary"); error = pdf_parse_dict(&xref->trailer, xref, xref->file, buf, cap); if (error) return fz_rethrow(error, "cannot parse trailer"); return fz_okay; }
void fz_read_line(fz_stream *stm, char *mem, int n) { char *s = mem; int c = EOF; while (n > 1) { c = fz_read_byte(stm); if (c == EOF) break; if (c == '\r') { c = fz_peek_byte(stm); if (c == '\n') fz_read_byte(stm); break; } if (c == '\n') break; *s++ = c; n--; } if (n) *s = '\0'; }
static int read_dctd(fz_stream *stm, unsigned char *buf, int len) { fz_dctd *state = stm->state; j_decompress_ptr cinfo = &state->cinfo; unsigned char *p = buf; unsigned char *ep = buf + len; if (setjmp(state->jb)) { if (cinfo->src) state->curr_stm->rp = state->curr_stm->wp - cinfo->src->bytes_in_buffer; fz_throw(stm->ctx, FZ_ERROR_GENERIC, "jpeg error: %s", state->msg); } if (!state->init) { int c; cinfo->client_data = state; cinfo->err = &state->errmgr; jpeg_std_error(cinfo->err); cinfo->err->error_exit = error_exit; jpeg_create_decompress(cinfo); state->init = 1; /* Skip over any stray returns at the start of the stream */ while ((c = fz_peek_byte(state->chain)) == '\n' || c == '\r') (void)fz_read_byte(state->chain); cinfo->src = &state->srcmgr; cinfo->src->init_source = init_source; cinfo->src->fill_input_buffer = fill_input_buffer; cinfo->src->skip_input_data = skip_input_data; cinfo->src->resync_to_restart = jpeg_resync_to_restart; cinfo->src->term_source = term_source; /* optionally load additional JPEG tables first */ if (state->jpegtables) { state->curr_stm = state->jpegtables; cinfo->src->next_input_byte = state->curr_stm->rp; cinfo->src->bytes_in_buffer = state->curr_stm->wp - state->curr_stm->rp; jpeg_read_header(cinfo, 0); state->curr_stm->rp = state->curr_stm->wp - state->cinfo.src->bytes_in_buffer; state->curr_stm = state->chain; } cinfo->src->next_input_byte = state->curr_stm->rp; cinfo->src->bytes_in_buffer = state->curr_stm->wp - state->curr_stm->rp; jpeg_read_header(cinfo, 1); /* speed up jpeg decoding a bit */ /* cf. http://code.google.com/p/sumatrapdf/issues/detail?id=1960 * / cinfo->dct_method = JDCT_FASTEST; cinfo->do_fancy_upsampling = FALSE; */ /* default value if ColorTransform is not set */ if (state->color_transform == -1) { if (state->cinfo.num_components == 3) state->color_transform = 1; else state->color_transform = 0; } if (cinfo->saw_Adobe_marker) state->color_transform = cinfo->Adobe_transform; /* Guess the input colorspace, and set output colorspace accordingly */ switch (cinfo->num_components) { case 3: if (state->color_transform) cinfo->jpeg_color_space = JCS_YCbCr; else cinfo->jpeg_color_space = JCS_RGB; break; case 4: if (state->color_transform) cinfo->jpeg_color_space = JCS_YCCK; else cinfo->jpeg_color_space = JCS_CMYK; break; } cinfo->scale_num = 8/(1<<state->l2factor); cinfo->scale_denom = 8; jpeg_start_decompress(cinfo); state->stride = cinfo->output_width * cinfo->output_components; state->scanline = fz_malloc(state->ctx, state->stride); state->rp = state->scanline; state->wp = state->scanline; } while (state->rp < state->wp && p < ep) *p++ = *state->rp++; while (p < ep) { if (cinfo->output_scanline == cinfo->output_height) break; if (p + state->stride <= ep) { jpeg_read_scanlines(cinfo, &p, 1); p += state->stride; } else { jpeg_read_scanlines(cinfo, &state->scanline, 1); state->rp = state->scanline; state->wp = state->scanline + state->stride; } while (state->rp < state->wp && p < ep) *p++ = *state->rp++; } return p - buf; }
static int next_dctd(fz_stream *stm, int max) { fz_dctd *state = stm->state; j_decompress_ptr cinfo = &state->cinfo; unsigned char *p = state->buffer; unsigned char *ep; if (max > sizeof(state->buffer)) max = sizeof(state->buffer); ep = state->buffer + max; if (setjmp(state->jb)) { if (cinfo->src) state->curr_stm->rp = state->curr_stm->wp - cinfo->src->bytes_in_buffer; fz_throw(stm->ctx, FZ_ERROR_GENERIC, "jpeg error: %s", state->msg); } if (!state->init) { int c; cinfo->client_data = state; cinfo->err = &state->errmgr; jpeg_std_error(cinfo->err); cinfo->err->error_exit = error_exit; fz_dct_mem_init(state); jpeg_create_decompress(cinfo); state->init = 1; /* Skip over any stray returns at the start of the stream */ while ((c = fz_peek_byte(state->chain)) == '\n' || c == '\r') (void)fz_read_byte(state->chain); cinfo->src = &state->srcmgr; cinfo->src->init_source = init_source; cinfo->src->fill_input_buffer = fill_input_buffer; cinfo->src->skip_input_data = skip_input_data; cinfo->src->resync_to_restart = jpeg_resync_to_restart; cinfo->src->term_source = term_source; /* optionally load additional JPEG tables first */ if (state->jpegtables) { state->curr_stm = state->jpegtables; cinfo->src->next_input_byte = state->curr_stm->rp; cinfo->src->bytes_in_buffer = state->curr_stm->wp - state->curr_stm->rp; jpeg_read_header(cinfo, 0); state->curr_stm->rp = state->curr_stm->wp - state->cinfo.src->bytes_in_buffer; state->curr_stm = state->chain; } cinfo->src->next_input_byte = state->curr_stm->rp; cinfo->src->bytes_in_buffer = state->curr_stm->wp - state->curr_stm->rp; jpeg_read_header(cinfo, 1); /* default value if ColorTransform is not set */ if (state->color_transform == -1) { if (state->cinfo.num_components == 3) state->color_transform = 1; else state->color_transform = 0; } if (cinfo->saw_Adobe_marker) state->color_transform = cinfo->Adobe_transform; /* Guess the input colorspace, and set output colorspace accordingly */ switch (cinfo->num_components) { case 3: if (state->color_transform) cinfo->jpeg_color_space = JCS_YCbCr; else cinfo->jpeg_color_space = JCS_RGB; break; case 4: if (state->color_transform) cinfo->jpeg_color_space = JCS_YCCK; else cinfo->jpeg_color_space = JCS_CMYK; break; } cinfo->scale_num = 8/(1<<state->l2factor); cinfo->scale_denom = 8; jpeg_start_decompress(cinfo); state->stride = cinfo->output_width * cinfo->output_components; state->scanline = fz_malloc(state->ctx, state->stride); state->rp = state->scanline; state->wp = state->scanline; } while (state->rp < state->wp && p < ep) *p++ = *state->rp++; while (p < ep) { if (cinfo->output_scanline == cinfo->output_height) break; if (p + state->stride <= ep) { jpeg_read_scanlines(cinfo, &p, 1); p += state->stride; } else { jpeg_read_scanlines(cinfo, &state->scanline, 1); state->rp = state->scanline; state->wp = state->scanline + state->stride; } while (state->rp < state->wp && p < ep) *p++ = *state->rp++; } stm->rp = state->buffer; stm->wp = p; stm->pos += (p - state->buffer); if (p == stm->rp) return EOF; return *stm->rp++; }
pdf_obj * pdf_parse_ind_obj(pdf_document *xref, fz_stream *file, pdf_lexbuf *buf, int *onum, int *ogen, int *ostmofs) { pdf_obj *obj = NULL; int num = 0, gen = 0, stm_ofs; int tok; int a, b; fz_context *ctx = file->ctx; fz_var(obj); tok = pdf_lex(file, buf); /* RJW: cannot parse indirect object (%d %d R)", num, gen */ if (tok != PDF_TOK_INT) fz_throw(ctx, "expected object number (%d %d R)", num, gen); num = buf->i; tok = pdf_lex(file, buf); /* RJW: "cannot parse indirect object (%d %d R)", num, gen */ if (tok != PDF_TOK_INT) fz_throw(ctx, "expected generation number (%d %d R)", num, gen); gen = buf->i; tok = pdf_lex(file, buf); /* RJW: "cannot parse indirect object (%d %d R)", num, gen */ if (tok != PDF_TOK_OBJ) fz_throw(ctx, "expected 'obj' keyword (%d %d R)", num, gen); tok = pdf_lex(file, buf); /* RJW: "cannot parse indirect object (%d %d R)", num, gen */ switch (tok) { case PDF_TOK_OPEN_ARRAY: obj = pdf_parse_array(xref, file, buf); /* RJW: "cannot parse indirect object (%d %d R)", num, gen */ break; case PDF_TOK_OPEN_DICT: obj = pdf_parse_dict(xref, file, buf); /* RJW: "cannot parse indirect object (%d %d R)", num, gen */ break; case PDF_TOK_NAME: obj = fz_new_name(ctx, buf->scratch); break; case PDF_TOK_REAL: obj = pdf_new_real(ctx, buf->f); break; case PDF_TOK_STRING: obj = pdf_new_string(ctx, buf->scratch, buf->len); break; case PDF_TOK_TRUE: obj = pdf_new_bool(ctx, 1); break; case PDF_TOK_FALSE: obj = pdf_new_bool(ctx, 0); break; case PDF_TOK_NULL: obj = pdf_new_null(ctx); break; case PDF_TOK_INT: a = buf->i; tok = pdf_lex(file, buf); /* "cannot parse indirect object (%d %d R)", num, gen */ if (tok == PDF_TOK_STREAM || tok == PDF_TOK_ENDOBJ) { obj = pdf_new_int(ctx, a); goto skip; } if (tok == PDF_TOK_INT) { b = buf->i; tok = pdf_lex(file, buf); /* RJW: "cannot parse indirect object (%d %d R)", num, gen); */ if (tok == PDF_TOK_R) { obj = pdf_new_indirect(ctx, a, b, xref); break; } } fz_throw(ctx, "expected 'R' keyword (%d %d R)", num, gen); case PDF_TOK_ENDOBJ: obj = pdf_new_null(ctx); goto skip; default: fz_throw(ctx, "syntax error in object (%d %d R)", num, gen); } fz_try(ctx) { tok = pdf_lex(file, buf); } fz_catch(ctx) { pdf_drop_obj(obj); fz_throw(ctx, "cannot parse indirect object (%d %d R)", num, gen); } skip: if (tok == PDF_TOK_STREAM) { int c = fz_read_byte(file); while (c == ' ') c = fz_read_byte(file); if (c == '\r') { c = fz_peek_byte(file); if (c != '\n') fz_warn(ctx, "line feed missing after stream begin marker (%d %d R)", num, gen); else fz_read_byte(file); } stm_ofs = fz_tell(file); } else if (tok == PDF_TOK_ENDOBJ) { stm_ofs = 0; } else { fz_warn(ctx, "expected 'endobj' or 'stream' keyword (%d %d R)", num, gen); stm_ofs = 0; } if (onum) *onum = num; if (ogen) *ogen = gen; if (ostmofs) *ostmofs = stm_ofs; return obj; }
pdf_obj * pdf_parse_ind_obj(fz_context *ctx, pdf_document *doc, fz_stream *file, pdf_lexbuf *buf, int *onum, int *ogen, fz_off_t *ostmofs, int *try_repair) { pdf_obj *obj = NULL; int num = 0, gen = 0; fz_off_t stm_ofs; pdf_token tok; fz_off_t a, b; fz_var(obj); tok = pdf_lex(ctx, file, buf); if (tok != PDF_TOK_INT) { if (try_repair) *try_repair = 1; fz_throw(ctx, FZ_ERROR_GENERIC, "expected object number"); } num = buf->i; tok = pdf_lex(ctx, file, buf); if (tok != PDF_TOK_INT) { if (try_repair) *try_repair = 1; fz_throw(ctx, FZ_ERROR_GENERIC, "expected generation number (%d ? obj)", num); } gen = buf->i; tok = pdf_lex(ctx, file, buf); if (tok != PDF_TOK_OBJ) { if (try_repair) *try_repair = 1; fz_throw(ctx, FZ_ERROR_GENERIC, "expected 'obj' keyword (%d %d ?)", num, gen); } tok = pdf_lex(ctx, file, buf); switch (tok) { case PDF_TOK_OPEN_ARRAY: obj = pdf_parse_array(ctx, doc, file, buf); break; case PDF_TOK_OPEN_DICT: obj = pdf_parse_dict(ctx, doc, file, buf); break; case PDF_TOK_NAME: obj = pdf_new_name(ctx, doc, buf->scratch); break; case PDF_TOK_REAL: obj = pdf_new_real(ctx, doc, buf->f); break; case PDF_TOK_STRING: obj = pdf_new_string(ctx, doc, buf->scratch, buf->len); break; case PDF_TOK_TRUE: obj = pdf_new_bool(ctx, doc, 1); break; case PDF_TOK_FALSE: obj = pdf_new_bool(ctx, doc, 0); break; case PDF_TOK_NULL: obj = pdf_new_null(ctx, doc); break; case PDF_TOK_INT: a = buf->i; tok = pdf_lex(ctx, file, buf); if (tok == PDF_TOK_STREAM || tok == PDF_TOK_ENDOBJ) { obj = pdf_new_int_offset(ctx, doc, a); goto skip; } if (tok == PDF_TOK_INT) { b = buf->i; tok = pdf_lex(ctx, file, buf); if (tok == PDF_TOK_R) { obj = pdf_new_indirect(ctx, doc, a, b); break; } } fz_throw(ctx, FZ_ERROR_GENERIC, "expected 'R' keyword (%d %d R)", num, gen); case PDF_TOK_ENDOBJ: obj = pdf_new_null(ctx, doc); goto skip; default: fz_throw(ctx, FZ_ERROR_GENERIC, "syntax error in object (%d %d R)", num, gen); } fz_try(ctx) { tok = pdf_lex(ctx, file, buf); } fz_catch(ctx) { pdf_drop_obj(ctx, obj); fz_rethrow(ctx); } skip: if (tok == PDF_TOK_STREAM) { int c = fz_read_byte(ctx, file); while (c == ' ') c = fz_read_byte(ctx, file); if (c == '\r') { c = fz_peek_byte(ctx, file); if (c != '\n') fz_warn(ctx, "line feed missing after stream begin marker (%d %d R)", num, gen); else fz_read_byte(ctx, file); } stm_ofs = fz_tell(ctx, file); } else if (tok == PDF_TOK_ENDOBJ) { stm_ofs = 0; } else { fz_warn(ctx, "expected 'endobj' or 'stream' keyword (%d %d R)", num, gen); stm_ofs = 0; } if (onum) *onum = num; if (ogen) *ogen = gen; if (ostmofs) *ostmofs = stm_ofs; return obj; }
pdf_token pdf_lex(fz_stream *f, pdf_lexbuf *buf) { while (1) { int c = fz_read_byte(f); switch (c) { case EOF: return PDF_TOK_EOF; case IS_WHITE: lex_white(f); break; case '%': lex_comment(f); break; case '/': lex_name(f, buf); return PDF_TOK_NAME; case '(': return lex_string(f, buf); case ')': fz_warn(f->ctx, "lexical error (unexpected ')')"); continue; case '<': c = fz_read_byte(f); if (c == '<') { return PDF_TOK_OPEN_DICT; } else { fz_unread_byte(f); return lex_hex_string(f, buf); } case '>': c = fz_read_byte(f); if (c == '>') { return PDF_TOK_CLOSE_DICT; } fz_warn(f->ctx, "lexical error (unexpected '>')"); if (c == EOF) { return PDF_TOK_EOF; } fz_unread_byte(f); continue; case '[': return PDF_TOK_OPEN_ARRAY; case ']': return PDF_TOK_CLOSE_ARRAY; case '{': return PDF_TOK_OPEN_BRACE; case '}': return PDF_TOK_CLOSE_BRACE; case IS_NUMBER: /* cf. https://code.google.com/p/sumatrapdf/issues/detail?id=2231 */ { int tok = lex_number(f, buf, c); while (1) { c = fz_peek_byte(f); switch (c) { case IS_NUMBER: fz_warn(f->ctx, "ignoring invalid character after number: '%c'", c); fz_read_byte(f); continue; default: return tok; } } } default: /* isregular: !isdelim && !iswhite && c != EOF */ fz_unread_byte(f); lex_name(f, buf); return pdf_token_from_keyword(buf->scratch); } } }
static fz_error pdf_read_old_xref(fz_obj **trailerp, pdf_xref *xref, char *buf, int cap) { fz_error error; int ofs, len; char *s; int n; int tok; int i; int c; fz_read_line(xref->file, buf, cap); if (strncmp(buf, "xref", 4) != 0) return fz_throw("cannot find xref marker"); while (1) { c = fz_peek_byte(xref->file); if (!(c >= '0' && c <= '9')) break; fz_read_line(xref->file, buf, cap); s = buf; ofs = atoi(fz_strsep(&s, " ")); len = atoi(fz_strsep(&s, " ")); /* broken pdfs where the section is not on a separate line */ if (s && *s != '\0') { fz_warn("broken xref section. proceeding anyway."); fz_seek(xref->file, -(2 + (int)strlen(s)), 1); } /* broken pdfs where size in trailer undershoots entries in xref sections */ if (ofs + len > xref->len) { fz_warn("broken xref section, proceeding anyway."); pdf_resize_xref(xref, ofs + len); } for (i = ofs; i < ofs + len; i++) { n = fz_read(xref->file, (unsigned char *) buf, 20); if (n < 0) return fz_rethrow(n, "cannot read xref table"); if (!xref->table[i].type) { s = buf; /* broken pdfs where line start with white space */ while (*s != '\0' && iswhite(*s)) s++; xref->table[i].ofs = atoi(s); xref->table[i].gen = atoi(s + 11); xref->table[i].type = s[17]; if (s[17] != 'f' && s[17] != 'n' && s[17] != 'o') return fz_throw("unexpected xref type: %#x (%d %d R)", s[17], i, xref->table[i].gen); } } } error = pdf_lex(&tok, xref->file, buf, cap, &n); if (error) return fz_rethrow(error, "cannot parse trailer"); if (tok != PDF_TOK_TRAILER) return fz_throw("expected trailer marker"); error = pdf_lex(&tok, xref->file, buf, cap, &n); if (error) return fz_rethrow(error, "cannot parse trailer"); if (tok != PDF_TOK_OPEN_DICT) return fz_throw("expected trailer dictionary"); error = pdf_parse_dict(trailerp, xref, xref->file, buf, cap); if (error) return fz_rethrow(error, "cannot parse trailer"); return fz_okay; }
fz_error pdf_parse_ind_obj(fz_obj **op, pdf_xref *xref, fz_stream *file, char *buf, int cap, int *onum, int *ogen, int *ostmofs) { fz_error error = fz_okay; fz_obj *obj = NULL; int num = 0, gen = 0, stm_ofs; int tok; int len; int a, b; error = pdf_lex(&tok, file, buf, cap, &len); if (error) return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen); if (tok != PDF_TOK_INT) return fz_throw("expected object number (%d %d R)", num, gen); num = atoi(buf); error = pdf_lex(&tok, file, buf, cap, &len); if (error) return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen); if (tok != PDF_TOK_INT) return fz_throw("expected generation number (%d %d R)", num, gen); gen = atoi(buf); error = pdf_lex(&tok, file, buf, cap, &len); if (error) return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen); if (tok != PDF_TOK_OBJ) return fz_throw("expected 'obj' keyword (%d %d R)", num, gen); error = pdf_lex(&tok, file, buf, cap, &len); if (error) return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen); switch (tok) { case PDF_TOK_OPEN_ARRAY: error = pdf_parse_array(&obj, xref, file, buf, cap); if (error) return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen); break; case PDF_TOK_OPEN_DICT: error = pdf_parse_dict(&obj, xref, file, buf, cap); if (error) return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen); break; case PDF_TOK_NAME: obj = fz_new_name(buf); break; case PDF_TOK_REAL: obj = fz_new_real(fz_atof(buf)); break; case PDF_TOK_STRING: obj = fz_new_string(buf, len); break; case PDF_TOK_TRUE: obj = fz_new_bool(1); break; case PDF_TOK_FALSE: obj = fz_new_bool(0); break; case PDF_TOK_NULL: obj = fz_new_null(); break; case PDF_TOK_INT: a = atoi(buf); error = pdf_lex(&tok, file, buf, cap, &len); if (error) return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen); if (tok == PDF_TOK_STREAM || tok == PDF_TOK_ENDOBJ) { obj = fz_new_int(a); goto skip; } if (tok == PDF_TOK_INT) { b = atoi(buf); error = pdf_lex(&tok, file, buf, cap, &len); if (error) return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen); if (tok == PDF_TOK_R) { obj = fz_new_indirect(a, b, xref); break; } } return fz_throw("expected 'R' keyword (%d %d R)", num, gen); case PDF_TOK_ENDOBJ: obj = fz_new_null(); goto skip; default: return fz_throw("syntax error in object (%d %d R)", num, gen); } error = pdf_lex(&tok, file, buf, cap, &len); if (error) { fz_drop_obj(obj); return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen); } skip: if (tok == PDF_TOK_STREAM) { int c = fz_read_byte(file); while (c == ' ') c = fz_read_byte(file); if (c == '\r') { c = fz_peek_byte(file); if (c != '\n') fz_warn("line feed missing after stream begin marker (%d %d R)", num, gen); else fz_read_byte(file); } stm_ofs = fz_tell(file); } else if (tok == PDF_TOK_ENDOBJ) { stm_ofs = 0; } else { fz_warn("expected 'endobj' or 'stream' keyword (%d %d R)", num, gen); stm_ofs = 0; } if (onum) *onum = num; if (ogen) *ogen = gen; if (ostmofs) *ostmofs = stm_ofs; *op = obj; return fz_okay; }