/* Used while reading the individual xref sections from a file */ pdf_xref_entry *pdf_get_populating_xref_entry(pdf_document *doc, int num) { /* Return an entry within the xref currently being populated */ pdf_xref *xref; int i; if (doc->num_xref_sections == 0) { doc->xref_sections = fz_calloc(doc->ctx, 1, sizeof(pdf_xref)); doc->num_xref_sections = 1; } /* Ensure all xref sections map this entry */ for (i = doc->num_xref_sections - 1; i >= 0; i--) { xref = &doc->xref_sections[i]; if (num >= xref->len) pdf_resize_xref(doc->ctx, xref, num+1); else break; /* Remaining sections already of sufficient size */ } /* Loop leaves xref pointing at the populating section */ return &doc->xref_sections[doc->num_xref_sections-1].table[num]; }
static fz_error pdf_load_xref(pdf_xref *xref, char *buf, int bufsize) { fz_error error; fz_obj *size; int i; error = pdf_load_version(xref); if (error) return fz_rethrow(error, "cannot read version marker"); error = pdf_read_start_xref(xref); if (error) return fz_rethrow(error, "cannot read startxref"); error = pdf_read_trailer(xref, buf, bufsize); if (error) return fz_rethrow(error, "cannot read trailer"); size = fz_dict_gets(xref->trailer, "Size"); if (!size) return fz_throw("trailer missing Size entry"); pdf_resize_xref(xref, fz_to_int(size)); error = pdf_read_xref_sections(xref, xref->startxref, buf, bufsize); if (error) return fz_rethrow(error, "cannot read xref"); /* broken pdfs where first object is not free */ if (xref->table[0].type != 'f') return fz_throw("first object in xref is not free"); /* broken pdfs where object offsets are out of range */ for (i = 0; i < xref->len; i++) { if (xref->table[i].type == 'n') if (xref->table[i].ofs <= 0 || xref->table[i].ofs >= xref->file_size) return fz_throw("object offset out of range: %d (%d 0 R)", xref->table[i].ofs, i); if (xref->table[i].type == 'o') if (xref->table[i].ofs <= 0 || xref->table[i].ofs >= xref->len || xref->table[xref->table[i].ofs].type != 'n') return fz_throw("invalid reference to an objstm that does not exist: %d (%d 0 R)", xref->table[i].ofs, i); } return fz_okay; }
static fz_error pdf_read_new_xref(fz_obj **trailerp, pdf_xref *xref, char *buf, int cap) { fz_error error; fz_stream *stm; fz_obj *trailer; fz_obj *index; fz_obj *obj; int num, gen, stm_ofs; int size, w0, w1, w2; int t; error = pdf_parse_ind_obj(&trailer, xref, xref->file, buf, cap, &num, &gen, &stm_ofs); if (error) return fz_rethrow(error, "cannot parse compressed xref stream object"); obj = fz_dict_gets(trailer, "Size"); if (!obj) { fz_drop_obj(trailer); return fz_throw("xref stream missing Size entry (%d %d R)", num, gen); } size = fz_to_int(obj); if (size > xref->len) { pdf_resize_xref(xref, size); } if (num < 0 || num >= xref->len) { fz_drop_obj(trailer); return fz_throw("object id (%d %d R) out of range (0..%d)", num, gen, xref->len - 1); } obj = fz_dict_gets(trailer, "W"); if (!obj) { fz_drop_obj(trailer); return fz_throw("xref stream missing W entry (%d %d R)", num, gen); } w0 = fz_to_int(fz_array_get(obj, 0)); w1 = fz_to_int(fz_array_get(obj, 1)); w2 = fz_to_int(fz_array_get(obj, 2)); index = fz_dict_gets(trailer, "Index"); error = pdf_open_stream_at(&stm, xref, num, gen, trailer, stm_ofs); if (error) { fz_drop_obj(trailer); return fz_rethrow(error, "cannot open compressed xref stream (%d %d R)", num, gen); } if (!index) { error = pdf_read_new_xref_section(xref, stm, 0, size, w0, w1, w2); if (error) { fz_close(stm); fz_drop_obj(trailer); return fz_rethrow(error, "cannot read xref stream (%d %d R)", num, gen); } } else { for (t = 0; t < fz_array_len(index); t += 2) { int i0 = fz_to_int(fz_array_get(index, t + 0)); int i1 = fz_to_int(fz_array_get(index, t + 1)); error = pdf_read_new_xref_section(xref, stm, i0, i1, w0, w1, w2); if (error) { fz_close(stm); fz_drop_obj(trailer); return fz_rethrow(error, "cannot read xref stream section (%d %d R)", num, gen); } } } fz_close(stm); *trailerp = trailer; return fz_okay; }
static fz_error pdf_read_old_xref(fz_obj **trailerp, pdf_xref *xref, char *buf, int cap) { fz_error error; int ofs, len; char *s; int n; int tok; int i; int c; fz_read_line(xref->file, buf, cap); if (strncmp(buf, "xref", 4) != 0) return fz_throw("cannot find xref marker"); while (1) { c = fz_peek_byte(xref->file); if (!(c >= '0' && c <= '9')) break; fz_read_line(xref->file, buf, cap); s = buf; ofs = atoi(fz_strsep(&s, " ")); len = atoi(fz_strsep(&s, " ")); /* broken pdfs where the section is not on a separate line */ if (s && *s != '\0') { fz_warn("broken xref section. proceeding anyway."); fz_seek(xref->file, -(2 + (int)strlen(s)), 1); } /* broken pdfs where size in trailer undershoots entries in xref sections */ if (ofs + len > xref->len) { fz_warn("broken xref section, proceeding anyway."); pdf_resize_xref(xref, ofs + len); } for (i = ofs; i < ofs + len; i++) { n = fz_read(xref->file, (unsigned char *) buf, 20); if (n < 0) return fz_rethrow(n, "cannot read xref table"); if (!xref->table[i].type) { s = buf; /* broken pdfs where line start with white space */ while (*s != '\0' && iswhite(*s)) s++; xref->table[i].ofs = atoi(s); xref->table[i].gen = atoi(s + 11); xref->table[i].type = s[17]; if (s[17] != 'f' && s[17] != 'n' && s[17] != 'o') return fz_throw("unexpected xref type: %#x (%d %d R)", s[17], i, xref->table[i].gen); } } } error = pdf_lex(&tok, xref->file, buf, cap, &n); if (error) return fz_rethrow(error, "cannot parse trailer"); if (tok != PDF_TOK_TRAILER) return fz_throw("expected trailer marker"); error = pdf_lex(&tok, xref->file, buf, cap, &n); if (error) return fz_rethrow(error, "cannot parse trailer"); if (tok != PDF_TOK_OPEN_DICT) return fz_throw("expected trailer dictionary"); error = pdf_parse_dict(trailerp, xref, xref->file, buf, cap); if (error) return fz_rethrow(error, "cannot parse trailer"); return fz_okay; }