fz_error pdf_parse_stm_obj(fz_obj **op, pdf_xref *xref, fz_stream *file, char *buf, int cap) { fz_error error; int tok; int len; error = pdf_lex(&tok, file, buf, cap, &len); if (error) return fz_rethrow(error, "cannot parse token in object stream"); switch (tok) { case PDF_TOK_OPEN_ARRAY: error = pdf_parse_array(op, xref, file, buf, cap); if (error) return fz_rethrow(error, "cannot parse object stream"); break; case PDF_TOK_OPEN_DICT: error = pdf_parse_dict(op, xref, file, buf, cap); if (error) return fz_rethrow(error, "cannot parse object stream"); break; case PDF_TOK_NAME: *op = fz_new_name(buf); break; case PDF_TOK_REAL: *op = fz_new_real(fz_atof(buf)); break; case PDF_TOK_STRING: *op = fz_new_string(buf, len); break; case PDF_TOK_TRUE: *op = fz_new_bool(1); break; case PDF_TOK_FALSE: *op = fz_new_bool(0); break; case PDF_TOK_NULL: *op = fz_new_null(); break; case PDF_TOK_INT: *op = fz_new_int(atoi(buf)); break; default: return fz_throw("unknown token in object stream"); } return fz_okay; }
fz_error pdf_cache_object(pdf_xref *xref, int num, int gen) { fz_error error; pdf_xref_entry *x; int rnum, rgen; if (num < 0 || num >= xref->len) return fz_throw("object out of range (%d %d R); xref size %d", num, gen, xref->len); x = &xref->table[num]; if (x->obj) return fz_okay; if (x->type == 'f') { x->obj = fz_new_null(); return fz_okay; } else if (x->type == 'n') { fz_seek(xref->file, x->ofs, 0); error = pdf_parse_ind_obj(&x->obj, xref, xref->file, xref->scratch, sizeof xref->scratch, &rnum, &rgen, &x->stm_ofs); if (error) return fz_rethrow(error, "cannot parse object (%d %d R)", num, gen); if (rnum != num) return fz_throw("found object (%d %d R) instead of (%d %d R)", rnum, rgen, num, gen); if (xref->crypt) pdf_crypt_obj(xref->crypt, x->obj, num, gen); } else if (x->type == 'o') { if (!x->obj) { error = pdf_load_obj_stm(xref, x->ofs, 0, xref->scratch, sizeof xref->scratch); if (error) return fz_rethrow(error, "cannot load object stream containing object (%d %d R)", num, gen); if (!x->obj) return fz_throw("object (%d %d R) was not found in its object stream", num, gen); } } else { return fz_throw("assert: corrupt xref struct"); } return fz_okay; }
static void addhexfilter(fz_obj *dict) { fz_obj *f, *dp, *newf, *newdp; fz_obj *ahx, *nullobj; ahx = fz_new_name(ctx, "ASCIIHexDecode"); nullobj = fz_new_null(ctx); newf = newdp = NULL; f = fz_dict_gets(dict, "Filter"); dp = fz_dict_gets(dict, "DecodeParms"); if (fz_is_name(f)) { newf = fz_new_array(ctx, 2); fz_array_push(newf, ahx); fz_array_push(newf, f); f = newf; if (fz_is_dict(dp)) { newdp = fz_new_array(ctx, 2); fz_array_push(newdp, nullobj); fz_array_push(newdp, dp); dp = newdp; } } else if (fz_is_array(f)) { fz_array_insert(f, ahx); if (fz_is_array(dp)) fz_array_insert(dp, nullobj); } else f = ahx; fz_dict_puts(dict, "Filter", f); if (dp) fz_dict_puts(dict, "DecodeParms", dp); fz_drop_obj(ahx); fz_drop_obj(nullobj); if (newf) fz_drop_obj(newf); if (newdp) fz_drop_obj(newdp); }
static fz_outline * pdf_load_outline_imp(pdf_xref *xref, fz_obj *dict) { pdf_link *link; fz_outline *node; fz_obj *obj; /* SumatraPDF: prevent potential stack overflow */ fz_outline *prev, *root = NULL; fz_obj *origDict = dict; fz_context *ctx = xref->ctx; if (fz_is_null(ctx, dict)) return NULL; /* SumatraPDF: prevent cyclic outlines */ do { if (fz_dict_gets(ctx, dict, ".seen")) break; obj = fz_new_null(ctx); fz_dict_puts(ctx, dict, ".seen", obj); fz_drop_obj(ctx, obj); node = fz_malloc(ctx, sizeof(fz_outline)); node->title = NULL; node->page = -1; node->down = NULL; node->next = NULL; obj = fz_dict_gets(ctx, dict, "Title"); if (obj) node->title = pdf_to_utf8(ctx, obj); /* SumatraPDF: support expansion states */ node->is_open = fz_to_int(ctx, fz_dict_gets(ctx, dict, "Count")) >= 0; /* SumatraPDF: extended outline actions */ node->data = node->free_data = NULL; if (fz_dict_gets(ctx, dict, "Dest") || fz_dict_gets(ctx, dict, "A")) { link = pdf_load_link(xref, dict); if (link) /* SumatraPDF: don't crash if it's no link after all */ { if (link->kind == PDF_LINK_GOTO) node->page = pdf_find_page_number(xref, fz_array_get(ctx, link->dest, 0)); /* SumatraPDF: extended outline actions */ node->data = link; node->free_data = pdf_free_link; } } obj = fz_dict_gets(ctx, dict, "First"); if (obj) node->down = pdf_load_outline_imp(xref, obj); /* SumatraPDF: prevent potential stack overflow */ if (!root) prev = root = node; else prev = prev->next = node; dict = fz_dict_gets(ctx, dict, "Next"); } while (dict && !fz_is_null(ctx, dict)); node = root; /* SumatraPDF: prevent cyclic outlines */ for (dict = origDict; dict && fz_dict_gets(ctx, dict, ".seen"); dict = fz_dict_gets(ctx, dict, "Next")) fz_dict_dels(ctx, dict, ".seen"); return node; }
fz_error pdf_parse_ind_obj(fz_obj **op, pdf_xref *xref, fz_stream *file, char *buf, int cap, int *onum, int *ogen, int *ostmofs) { fz_error error = fz_okay; fz_obj *obj = NULL; int num = 0, gen = 0, stm_ofs; int tok; int len; int a, b; error = pdf_lex(&tok, file, buf, cap, &len); if (error) return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen); if (tok != PDF_TOK_INT) return fz_throw("expected object number (%d %d R)", num, gen); num = atoi(buf); error = pdf_lex(&tok, file, buf, cap, &len); if (error) return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen); if (tok != PDF_TOK_INT) return fz_throw("expected generation number (%d %d R)", num, gen); gen = atoi(buf); error = pdf_lex(&tok, file, buf, cap, &len); if (error) return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen); if (tok != PDF_TOK_OBJ) return fz_throw("expected 'obj' keyword (%d %d R)", num, gen); error = pdf_lex(&tok, file, buf, cap, &len); if (error) return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen); switch (tok) { case PDF_TOK_OPEN_ARRAY: error = pdf_parse_array(&obj, xref, file, buf, cap); if (error) return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen); break; case PDF_TOK_OPEN_DICT: error = pdf_parse_dict(&obj, xref, file, buf, cap); if (error) return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen); break; case PDF_TOK_NAME: obj = fz_new_name(buf); break; case PDF_TOK_REAL: obj = fz_new_real(fz_atof(buf)); break; case PDF_TOK_STRING: obj = fz_new_string(buf, len); break; case PDF_TOK_TRUE: obj = fz_new_bool(1); break; case PDF_TOK_FALSE: obj = fz_new_bool(0); break; case PDF_TOK_NULL: obj = fz_new_null(); break; case PDF_TOK_INT: a = atoi(buf); error = pdf_lex(&tok, file, buf, cap, &len); if (error) return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen); if (tok == PDF_TOK_STREAM || tok == PDF_TOK_ENDOBJ) { obj = fz_new_int(a); goto skip; } if (tok == PDF_TOK_INT) { b = atoi(buf); error = pdf_lex(&tok, file, buf, cap, &len); if (error) return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen); if (tok == PDF_TOK_R) { obj = fz_new_indirect(a, b, xref); break; } } return fz_throw("expected 'R' keyword (%d %d R)", num, gen); case PDF_TOK_ENDOBJ: obj = fz_new_null(); goto skip; default: return fz_throw("syntax error in object (%d %d R)", num, gen); } error = pdf_lex(&tok, file, buf, cap, &len); if (error) { fz_drop_obj(obj); return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen); } skip: if (tok == PDF_TOK_STREAM) { int c = fz_read_byte(file); while (c == ' ') c = fz_read_byte(file); if (c == '\r') { c = fz_peek_byte(file); if (c != '\n') fz_warn("line feed missing after stream begin marker (%d %d R)", num, gen); else fz_read_byte(file); } stm_ofs = fz_tell(file); } else if (tok == PDF_TOK_ENDOBJ) { stm_ofs = 0; } else { fz_warn("expected 'endobj' or 'stream' keyword (%d %d R)", num, gen); stm_ofs = 0; } if (onum) *onum = num; if (ogen) *ogen = gen; if (ostmofs) *ostmofs = stm_ofs; *op = obj; return fz_okay; }
fz_error pdf_parse_dict(fz_obj **op, pdf_xref *xref, fz_stream *file, char *buf, int cap) { fz_error error = fz_okay; fz_obj *dict = NULL; fz_obj *key = NULL; fz_obj *val = NULL; int tok; int len; int a, b; dict = fz_new_dict(8); while (1) { error = pdf_lex(&tok, file, buf, cap, &len); if (error) { fz_drop_obj(dict); return fz_rethrow(error, "cannot parse dict"); } skip: if (tok == PDF_TOK_CLOSE_DICT) { *op = dict; return fz_okay; } /* for BI .. ID .. EI in content streams */ if (tok == PDF_TOK_KEYWORD && !strcmp(buf, "ID")) { *op = dict; return fz_okay; } if (tok != PDF_TOK_NAME) { fz_drop_obj(dict); return fz_throw("invalid key in dict"); } key = fz_new_name(buf); error = pdf_lex(&tok, file, buf, cap, &len); if (error) { fz_drop_obj(key); fz_drop_obj(dict); return fz_rethrow(error, "cannot parse dict"); } switch (tok) { case PDF_TOK_OPEN_ARRAY: error = pdf_parse_array(&val, xref, file, buf, cap); if (error) { fz_drop_obj(key); fz_drop_obj(dict); return fz_rethrow(error, "cannot parse dict"); } break; case PDF_TOK_OPEN_DICT: error = pdf_parse_dict(&val, xref, file, buf, cap); if (error) { fz_drop_obj(key); fz_drop_obj(dict); return fz_rethrow(error, "cannot parse dict"); } break; case PDF_TOK_NAME: val = fz_new_name(buf); break; case PDF_TOK_REAL: val = fz_new_real(fz_atof(buf)); break; case PDF_TOK_STRING: val = fz_new_string(buf, len); break; case PDF_TOK_TRUE: val = fz_new_bool(1); break; case PDF_TOK_FALSE: val = fz_new_bool(0); break; case PDF_TOK_NULL: val = fz_new_null(); break; case PDF_TOK_INT: /* 64-bit to allow for numbers > INT_MAX and overflow */ a = (int) strtoll(buf, 0, 10); error = pdf_lex(&tok, file, buf, cap, &len); if (error) { fz_drop_obj(key); fz_drop_obj(dict); return fz_rethrow(error, "cannot parse dict"); } if (tok == PDF_TOK_CLOSE_DICT || tok == PDF_TOK_NAME || (tok == PDF_TOK_KEYWORD && !strcmp(buf, "ID"))) { val = fz_new_int(a); fz_dict_put(dict, key, val); fz_drop_obj(val); fz_drop_obj(key); goto skip; } if (tok == PDF_TOK_INT) { b = atoi(buf); error = pdf_lex(&tok, file, buf, cap, &len); if (error) { fz_drop_obj(key); fz_drop_obj(dict); return fz_rethrow(error, "cannot parse dict"); } if (tok == PDF_TOK_R) { val = fz_new_indirect(a, b, xref); break; } } fz_drop_obj(key); fz_drop_obj(dict); return fz_throw("invalid indirect reference in dict"); default: fz_drop_obj(key); fz_drop_obj(dict); return fz_throw("unknown token in dict"); } fz_dict_put(dict, key, val); fz_drop_obj(val); fz_drop_obj(key); } }
fz_error pdf_parse_array(fz_obj **op, pdf_xref *xref, fz_stream *file, char *buf, int cap) { fz_error error = fz_okay; fz_obj *ary = NULL; fz_obj *obj = NULL; int a = 0, b = 0, n = 0; int tok; int len; ary = fz_new_array(4); while (1) { error = pdf_lex(&tok, file, buf, cap, &len); if (error) { fz_drop_obj(ary); return fz_rethrow(error, "cannot parse array"); } if (tok != PDF_TOK_INT && tok != PDF_TOK_R) { if (n > 0) { obj = fz_new_int(a); fz_array_push(ary, obj); fz_drop_obj(obj); } if (n > 1) { obj = fz_new_int(b); fz_array_push(ary, obj); fz_drop_obj(obj); } n = 0; } if (tok == PDF_TOK_INT && n == 2) { obj = fz_new_int(a); fz_array_push(ary, obj); fz_drop_obj(obj); a = b; n --; } switch (tok) { case PDF_TOK_CLOSE_ARRAY: *op = ary; return fz_okay; case PDF_TOK_INT: if (n == 0) a = atoi(buf); if (n == 1) b = atoi(buf); n ++; break; case PDF_TOK_R: if (n != 2) { fz_drop_obj(ary); return fz_throw("cannot parse indirect reference in array"); } obj = fz_new_indirect(a, b, xref); fz_array_push(ary, obj); fz_drop_obj(obj); n = 0; break; case PDF_TOK_OPEN_ARRAY: error = pdf_parse_array(&obj, xref, file, buf, cap); if (error) { fz_drop_obj(ary); return fz_rethrow(error, "cannot parse array"); } fz_array_push(ary, obj); fz_drop_obj(obj); break; case PDF_TOK_OPEN_DICT: error = pdf_parse_dict(&obj, xref, file, buf, cap); if (error) { fz_drop_obj(ary); return fz_rethrow(error, "cannot parse array"); } fz_array_push(ary, obj); fz_drop_obj(obj); break; case PDF_TOK_NAME: obj = fz_new_name(buf); fz_array_push(ary, obj); fz_drop_obj(obj); break; case PDF_TOK_REAL: obj = fz_new_real(fz_atof(buf)); fz_array_push(ary, obj); fz_drop_obj(obj); break; case PDF_TOK_STRING: obj = fz_new_string(buf, len); fz_array_push(ary, obj); fz_drop_obj(obj); break; case PDF_TOK_TRUE: obj = fz_new_bool(1); fz_array_push(ary, obj); fz_drop_obj(obj); break; case PDF_TOK_FALSE: obj = fz_new_bool(0); fz_array_push(ary, obj); fz_drop_obj(obj); break; case PDF_TOK_NULL: obj = fz_new_null(); fz_array_push(ary, obj); fz_drop_obj(obj); break; default: fz_drop_obj(ary); return fz_throw("cannot parse token in array"); } } }