static fz_error pdf_readstartxref(pdf_xref *xref) { unsigned char buf[1024]; int t, n; int i; fz_seek(xref->file, 0, 2); xref->filesize = fz_tell(xref->file); t = MAX(0, xref->filesize - (int)sizeof buf); fz_seek(xref->file, t, 0); n = fz_read(xref->file, buf, sizeof buf); if (n < 0) return fz_rethrow(n, "cannot read from file"); for (i = n - 9; i >= 0; i--) { if (memcmp(buf + i, "startxref", 9) == 0) { i += 9; while (iswhite(buf[i]) && i < n) i ++; xref->startxref = atoi((char*)(buf + i)); pdf_logxref("startxref %d\n", xref->startxref); return fz_okay; } } return fz_throw("cannot find startxref"); }
static int xps_find_and_read_zip_dir(xps_context *ctx) { unsigned char buf[512]; int file_size, back, maxback; int i, n; fz_seek(ctx->file, 0, SEEK_END); file_size = fz_tell(ctx->file); maxback = MIN(file_size, 0xFFFF + sizeof buf); back = MIN(maxback, sizeof buf); while (back < maxback) { fz_seek(ctx->file, file_size - back, 0); n = fz_read(ctx->file, buf, sizeof buf); if (n < 0) return fz_error_make(ctx->ctx, "cannot read end of central directory"); for (i = n - 4; i > 0; i--) if (!memcmp(buf + i, "PK\5\6", 4)) return xps_read_zip_dir(ctx, file_size - back + i); back += sizeof buf - 4; } return fz_error_make(ctx->ctx, "cannot find end of central directory"); }
static fz_error readstartxref(pdf_xref *xref) { fz_error error; unsigned char buf[1024]; int t, n; int i; error = fz_seek(xref->file, 0, 2); if (error) return fz_rethrow(error, "cannot seek to end of file"); t = MAX(0, fz_tell(xref->file) - ((int)sizeof buf)); error = fz_seek(xref->file, t, 0); if (error) return fz_rethrow(error, "cannot seek to offset %d", t); error = fz_read(&n, xref->file, buf, sizeof buf); if (error) return fz_rethrow(error, "cannot read from file"); for (i = n - 9; i >= 0; i--) { if (memcmp(buf + i, "startxref", 9) == 0) { i += 9; while (iswhite(buf[i]) && i < n) i ++; xref->startxref = atoi((char*)(buf + i)); return fz_okay; } } return fz_throw("cannot find startxref"); }
static fz_error pdf_readoldtrailer(pdf_xref *xref, char *buf, int cap) { fz_error error; int len; char *s; int n; int t; pdf_token_e tok; int c; pdf_logxref("load old xref format trailer\n"); fz_readline(xref->file, buf, cap); if (strncmp(buf, "xref", 4) != 0) return fz_throw("cannot find xref marker"); while (1) { c = fz_peekbyte(xref->file); if (!(c >= '0' && c <= '9')) break; fz_readline(xref->file, buf, cap); s = buf; fz_strsep(&s, " "); /* ignore ofs */ if (!s) return fz_throw("invalid range marker in xref"); len = atoi(fz_strsep(&s, " ")); /* broken pdfs where the section is not on a separate line */ if (s && *s != '\0') fz_seek(xref->file, -(2 + (int)strlen(s)), 1); t = fz_tell(xref->file); if (t < 0) return fz_throw("cannot tell in file"); fz_seek(xref->file, t + 20 * len, 0); } error = pdf_lex(&tok, xref->file, buf, cap, &n); if (error) return fz_rethrow(error, "cannot parse trailer"); if (tok != PDF_TTRAILER) return fz_throw("expected trailer marker"); error = pdf_lex(&tok, xref->file, buf, cap, &n); if (error) return fz_rethrow(error, "cannot parse trailer"); if (tok != PDF_TODICT) return fz_throw("expected trailer dictionary"); error = pdf_parsedict(&xref->trailer, xref, xref->file, buf, cap); if (error) return fz_rethrow(error, "cannot parse trailer"); return fz_okay; }
void pdfapp_open_progressive(pdfapp_t *app, char *filename, int reload, int bps) { fz_context *ctx = app->ctx; char *password = ""; fz_try(ctx) { fz_register_document_handlers(ctx); #ifdef HAVE_CURL if (!strncmp(filename, "http://", 7)) { app->stream = fz_stream_from_curl(ctx, filename, pdfapp_more_data, app); while (1) { fz_try(ctx) { fz_seek(ctx, app->stream, 0, SEEK_SET); app->doc = fz_open_document_with_stream(ctx, filename, app->stream); } fz_catch(ctx) { if (fz_caught(ctx) == FZ_ERROR_TRYLATER) { pdfapp_warn(app, "not enough data to open yet"); continue; } fz_rethrow(ctx); } break; } } else #endif if (bps == 0)
static fz_error pdf_readxref(fz_obj **trailerp, pdf_xref *xref, int ofs, char *buf, int cap) { fz_error error; int c; fz_seek(xref->file, ofs, 0); while (iswhite(fz_peekbyte(xref->file))) fz_readbyte(xref->file); c = fz_peekbyte(xref->file); if (c == 'x') { error = pdf_readoldxref(trailerp, xref, buf, cap); if (error) return fz_rethrow(error, "cannot read xref (ofs=%d)", ofs); } else if (c >= '0' && c <= '9') { error = pdf_readnewxref(trailerp, xref, buf, cap); if (error) return fz_rethrow(error, "cannot read xref (ofs=%d)", ofs); } else { return fz_throw("cannot recognize xref format"); } return fz_okay; }
static fz_error pdf_readtrailer(pdf_xref *xref, char *buf, int cap) { fz_error error; int c; fz_seek(xref->file, xref->startxref, 0); while (iswhite(fz_peekbyte(xref->file))) fz_readbyte(xref->file); c = fz_peekbyte(xref->file); if (c == 'x') { error = pdf_readoldtrailer(xref, buf, cap); if (error) return fz_rethrow(error, "cannot read trailer"); } else if (c >= '0' && c <= '9') { error = pdf_readnewtrailer(xref, buf, cap); if (error) return fz_rethrow(error, "cannot read trailer"); } else { return fz_throw("cannot recognize xref format: '%c'", c); } return fz_okay; }
static int next_null(fz_stream *stm, int max) { struct null_filter *state = stm->state; int n; if (state->remain == 0) return EOF; fz_seek(state->chain, state->offset, 0); n = fz_available(state->chain, max); if (n > state->remain) n = state->remain; if (n > sizeof(state->buffer)) n = sizeof(state->buffer); memcpy(state->buffer, state->chain->rp, n); stm->rp = state->buffer; stm->wp = stm->rp + n; if (n == 0) return EOF; state->chain->rp += n; state->remain -= n; state->offset += n; stm->pos += n; return *stm->rp++; }
/* SumatraPDF: allow to clone a stream */ fz_stream * fz_clone_stream(fz_context *ctx, fz_stream *stm) { fz_stream *clone; if (!stm->reopen) fz_throw(ctx, FZ_ERROR_GENERIC, "can't clone stream without reopening"); clone = stm->reopen(ctx, stm); fz_seek(clone, fz_tell(stm), 0); return clone; }
fz_error pdf_open_stream_at(fz_stream **stmp, pdf_xref *xref, int num, int gen, fz_obj *dict, int stm_ofs) { if (stm_ofs) { *stmp = pdf_open_filter(xref->file, xref, dict, num, gen); fz_seek(xref->file, stm_ofs, 0); return fz_okay; } return fz_throw("object is not a stream"); }
fz_stream * pdf_open_stream_with_offset(pdf_document *xref, int num, int gen, pdf_obj *dict, int stm_ofs) { fz_stream *stm; if (stm_ofs == 0) fz_throw(xref->ctx, "object is not a stream"); stm = pdf_open_filter(xref->file, xref, dict, num, gen, NULL); fz_seek(xref->file, stm_ofs, 0); return stm; }
fz_error pdf_cacheobject(pdf_xref *xref, int num, int gen) { fz_error error; pdf_xrefentry *x; int rnum, rgen; if (num < 0 || num >= xref->len) return fz_throw("object out of range (%d %d R); xref size %d", num, gen, xref->len); x = &xref->table[num]; if (x->obj) return fz_okay; if (x->type == 'f') { x->obj = fz_newnull(); return fz_okay; } else if (x->type == 'n') { fz_seek(xref->file, x->ofs, 0); error = pdf_parseindobj(&x->obj, xref, xref->file, xref->scratch, sizeof xref->scratch, &rnum, &rgen, &x->stmofs); if (error) return fz_rethrow(error, "cannot parse object (%d %d R)", num, gen); if (rnum != num) return fz_throw("found object (%d %d R) instead of (%d %d R)", rnum, rgen, num, gen); if (xref->crypt) pdf_cryptobj(xref->crypt, x->obj, num, gen); } else if (x->type == 'o') { if (!x->obj) { error = pdf_loadobjstm(xref, x->ofs, 0, xref->scratch, sizeof xref->scratch); if (error) return fz_rethrow(error, "cannot load object stream containing object (%d %d R)", num, gen); if (!x->obj) return fz_throw("object (%d %d R) was not found in its object stream", num, gen); } } else { return fz_throw("assert: corrupt xref struct"); } return fz_okay; }
static int read_null(fz_stream *stm, unsigned char *buf, int len) { struct null_filter *state = stm->state; int amount = fz_mini(len, state->remain); int n; fz_seek(state->chain, state->pos, 0); n = fz_read(state->chain, buf, amount); state->remain -= n; state->pos += n; return n; }
static fz_error pdf_load_version(pdf_xref *xref) { char buf[20]; fz_seek(xref->file, 0, 0); fz_read_line(xref->file, buf, sizeof buf); if (memcmp(buf, "%PDF-", 5) != 0) return fz_throw("cannot recognize version marker"); xref->version = atoi(buf + 5) * 10 + atoi(buf + 7); return fz_okay; }
/* * Open a stream for reading the raw (compressed but decrypted) data. * Using xref->file while this is open is a bad idea. */ fz_error * pdf_openrawstream(fz_stream **stmp, pdf_xref *xref, int oid, int gen) { pdf_xrefentry *x; fz_error *error; fz_filter *filter; if (oid < 0 || oid >= xref->len) return fz_throw("object id out of range (%d)", oid); x = xref->table + oid; error = pdf_cacheobject(xref, oid, gen); if (error) return fz_rethrow(error, "cannot load stream object (%d)", oid); if (x->stmbuf) { error = fz_openrbuffer(stmp, x->stmbuf); if (error) return fz_rethrow(error, "cannot open stream from buffer"); return fz_okay; } if (x->stmofs) { error = buildrawfilter(&filter, xref, x->obj, oid, gen); if (error) return fz_rethrow(error, "cannot create raw filter"); error = fz_seek(xref->file, x->stmofs, 0); if (error) { fz_dropfilter(filter); return fz_rethrow(error, "cannot seek to stream"); } error = fz_openrfilter(stmp, filter, xref->file); fz_dropfilter(filter); if (error) return fz_rethrow(error, "cannot open filter stream"); return fz_okay; } return fz_throw("object is not a stream"); }
static fz_error pdf_read_ttf_string(fz_stream *file, int offset, TT_NAME_RECORD *ttRecord, char *buf, int size) { fz_error err; char szTemp[MAX_FACENAME * 2]; // ignore empty and overlong strings int stringLength = BEtoHs(ttRecord->uStringLength); if (stringLength == 0 || stringLength >= sizeof(szTemp)) return fz_okay; fz_seek(file, offset + BEtoHs(ttRecord->uStringOffset), 0); err = safe_read(file, szTemp, stringLength); if (err) return err; return decodeplatformstring(file->ctx, BEtoHs(ttRecord->uPlatformID), BEtoHs(ttRecord->uEncodingID), szTemp, stringLength, buf, size); }
static fz_error pdf_loadversion(pdf_xref *xref) { char buf[20]; fz_seek(xref->file, 0, 0); fz_readline(xref->file, buf, sizeof buf); if (memcmp(buf, "%PDF-", 5) != 0) return fz_throw("cannot recognize version marker"); xref->version = atof(buf + 5) * 10; pdf_logxref("version %d.%d\n", xref->version / 10, xref->version % 10); return fz_okay; }
fz_stream * pdf_open_image_stream(pdf_document *xref, int num, int gen, pdf_image_params *params) { pdf_xref_entry *x; fz_stream *stm; if (num < 0 || num >= xref->len) fz_throw(xref->ctx, "object id out of range (%d %d R)", num, gen); x = xref->table + num; pdf_cache_object(xref, num, gen); /* RJW: "cannot load stream object (%d %d R)", num, gen */ if (x->stm_ofs == 0) fz_throw(xref->ctx, "object is not a stream"); stm = pdf_open_filter(xref->file, xref, x->obj, num, gen, params); fz_seek(xref->file, x->stm_ofs, 0); return stm; }
static fz_error loadversion(pdf_xref *xref) { fz_error error; char buf[20]; error = fz_seek(xref->file, 0, 0); if (error) return fz_rethrow(error, "cannot seek to beginning of file"); error = fz_readline(xref->file, buf, sizeof buf); if (error) return fz_rethrow(error, "cannot read version marker"); if (memcmp(buf, "%PDF-", 5) != 0) return fz_throw("cannot recognize version marker"); xref->version = (int) (atof(buf + 5) * 10.0 + 0.5); pdf_logxref("version %d.%d\n", xref->version / 10, xref->version % 10); return fz_okay; }
/* * Open a stream for reading uncompressed data. * Put the opened file in xref->stream. * Using xref->file while a stream is open is a Bad idea. */ fz_error pdf_open_stream(fz_stream **stmp, pdf_xref *xref, int num, int gen) { pdf_xref_entry *x; fz_error error; if (num < 0 || num >= xref->len) return fz_throw("object id out of range (%d %d R)", num, gen); x = xref->table + num; error = pdf_cache_object(xref, num, gen); if (error) return fz_rethrow(error, "cannot load stream object (%d %d R)", num, gen); if (x->stm_ofs) { *stmp = pdf_open_filter(xref->file, xref, x->obj, num, gen); fz_seek(xref->file, x->stm_ofs, 0); return fz_okay; } return fz_throw("object is not a stream"); }
static int xps_read_zip_entry(xps_context *ctx, xps_entry *ent, unsigned char *outbuf) { z_stream stream; unsigned char *inbuf; int sig; int version, general, method; int namelength, extralength; int code; fz_seek(ctx->file, ent->offset, 0); sig = getlong(ctx->file); if (sig != ZIP_LOCAL_FILE_SIG) return fz_error_make(ctx->ctx, "wrong zip local file signature (0x%x)", sig); version = getshort(ctx->file); general = getshort(ctx->file); method = getshort(ctx->file); (void) getshort(ctx->file); /* file time */ (void) getshort(ctx->file); /* file date */ (void) getlong(ctx->file); /* crc-32 */ (void) getlong(ctx->file); /* csize */ (void) getlong(ctx->file); /* usize */ namelength = getshort(ctx->file); extralength = getshort(ctx->file); fz_seek(ctx->file, namelength + extralength, 1); if (method == 0) { fz_read(ctx->file, outbuf, ent->usize); } else if (method == 8) { inbuf = fz_malloc(ctx->ctx, ent->csize); fz_read(ctx->file, inbuf, ent->csize); memset(&stream, 0, sizeof(z_stream)); stream.zalloc = (alloc_func) xps_zip_alloc_items; stream.zfree = (free_func) xps_zip_free; stream.opaque = ctx; stream.next_in = inbuf; stream.avail_in = ent->csize; stream.next_out = outbuf; stream.avail_out = ent->usize; code = inflateInit2(&stream, -15); if (code != Z_OK) return fz_error_make(ctx->ctx, "zlib inflateInit2 error: %s", stream.msg); code = inflate(&stream, Z_FINISH); if (code != Z_STREAM_END) { inflateEnd(&stream); return fz_error_make(ctx->ctx, "zlib inflate error: %s", stream.msg); } code = inflateEnd(&stream); if (code != Z_OK) return fz_error_make(ctx->ctx, "zlib inflateEnd error: %s", stream.msg); fz_free(ctx->ctx, inbuf); } else { return fz_error_make(ctx->ctx, "unknown compression method (%d)", method); } return fz_okay; }
static fz_error fz_repairobj(fz_stream *file, char *buf, int cap, int *stmofsp, int *stmlenp, fz_obj **encrypt, fz_obj **id) { fz_error error; int tok; int stmlen; int len; int n; *stmofsp = 0; *stmlenp = -1; stmlen = 0; error = pdf_lex(&tok, file, buf, cap, &len); if (error) return fz_rethrow(error, "cannot parse object"); if (tok == PDF_TODICT) { fz_obj *dict, *obj; /* Send nil xref so we don't try to resolve references */ error = pdf_parsedict(&dict, nil, file, buf, cap); if (error) return fz_rethrow(error, "cannot parse object"); obj = fz_dictgets(dict, "Type"); if (fz_isname(obj) && !strcmp(fz_toname(obj), "XRef")) { obj = fz_dictgets(dict, "Encrypt"); if (obj) { if (*encrypt) fz_dropobj(*encrypt); *encrypt = fz_keepobj(obj); } obj = fz_dictgets(dict, "ID"); if (obj) { if (*id) fz_dropobj(*id); *id = fz_keepobj(obj); } } obj = fz_dictgets(dict, "Length"); if (fz_isint(obj)) stmlen = fz_toint(obj); fz_dropobj(dict); } while ( tok != PDF_TSTREAM && tok != PDF_TENDOBJ && tok != PDF_TERROR && tok != PDF_TEOF ) { error = pdf_lex(&tok, file, buf, cap, &len); if (error) return fz_rethrow(error, "cannot scan for endobj or stream token"); } if (tok == PDF_TSTREAM) { int c = fz_readbyte(file); if (c == '\r') { c = fz_peekbyte(file); if (c == '\n') fz_readbyte(file); } *stmofsp = fz_tell(file); if (*stmofsp < 0) return fz_throw("cannot seek in file"); if (stmlen > 0) { fz_seek(file, *stmofsp + stmlen, 0); error = pdf_lex(&tok, file, buf, cap, &len); if (error) fz_catch(error, "cannot find endstream token, falling back to scanning"); if (tok == PDF_TENDSTREAM) goto atobjend; fz_seek(file, *stmofsp, 0); } n = fz_read(file, (unsigned char *) buf, 9); if (n < 0) return fz_rethrow(n, "cannot read from file"); while (memcmp(buf, "endstream", 9) != 0) { c = fz_readbyte(file); if (c == EOF) break; memmove(buf, buf + 1, 8); buf[8] = c; } *stmlenp = fz_tell(file) - *stmofsp - 9; atobjend: error = pdf_lex(&tok, file, buf, cap, &len); if (error) return fz_rethrow(error, "cannot scan for endobj token"); if (tok != PDF_TENDOBJ) fz_warn("object missing 'endobj' token"); } return fz_okay; }
static fz_error pdf_read_old_xref(fz_obj **trailerp, pdf_xref *xref, char *buf, int cap) { fz_error error; int ofs, len; char *s; int n; int tok; int i; int c; fz_read_line(xref->file, buf, cap); if (strncmp(buf, "xref", 4) != 0) return fz_throw("cannot find xref marker"); while (1) { c = fz_peek_byte(xref->file); if (!(c >= '0' && c <= '9')) break; fz_read_line(xref->file, buf, cap); s = buf; ofs = atoi(fz_strsep(&s, " ")); len = atoi(fz_strsep(&s, " ")); /* broken pdfs where the section is not on a separate line */ if (s && *s != '\0') { fz_warn("broken xref section. proceeding anyway."); fz_seek(xref->file, -(2 + (int)strlen(s)), 1); } /* broken pdfs where size in trailer undershoots entries in xref sections */ if (ofs + len > xref->len) { fz_warn("broken xref section, proceeding anyway."); pdf_resize_xref(xref, ofs + len); } for (i = ofs; i < ofs + len; i++) { n = fz_read(xref->file, (unsigned char *) buf, 20); if (n < 0) return fz_rethrow(n, "cannot read xref table"); if (!xref->table[i].type) { s = buf; /* broken pdfs where line start with white space */ while (*s != '\0' && iswhite(*s)) s++; xref->table[i].ofs = atoi(s); xref->table[i].gen = atoi(s + 11); xref->table[i].type = s[17]; if (s[17] != 'f' && s[17] != 'n' && s[17] != 'o') return fz_throw("unexpected xref type: %#x (%d %d R)", s[17], i, xref->table[i].gen); } } } error = pdf_lex(&tok, xref->file, buf, cap, &n); if (error) return fz_rethrow(error, "cannot parse trailer"); if (tok != PDF_TOK_TRAILER) return fz_throw("expected trailer marker"); error = pdf_lex(&tok, xref->file, buf, cap, &n); if (error) return fz_rethrow(error, "cannot parse trailer"); if (tok != PDF_TOK_OPEN_DICT) return fz_throw("expected trailer dictionary"); error = pdf_parse_dict(trailerp, xref, xref->file, buf, cap); if (error) return fz_rethrow(error, "cannot parse trailer"); return fz_okay; }
static fz_error pdf_loadobjstm(pdf_xref *xref, int num, int gen, char *buf, int cap) { fz_error error; fz_stream *stm; fz_obj *objstm; int *numbuf; int *ofsbuf; fz_obj *obj; int first; int count; int i, n; pdf_token_e tok; pdf_logxref("loadobjstm (%d %d R)\n", num, gen); error = pdf_loadobject(&objstm, xref, num, gen); if (error) return fz_rethrow(error, "cannot load object stream object (%d %d R)", num, gen); count = fz_toint(fz_dictgets(objstm, "N")); first = fz_toint(fz_dictgets(objstm, "First")); pdf_logxref("\tcount %d\n", count); numbuf = fz_malloc(count * sizeof(int)); ofsbuf = fz_malloc(count * sizeof(int)); error = pdf_openstream(&stm, xref, num, gen); if (error) { error = fz_rethrow(error, "cannot open object stream (%d %d R)", num, gen); goto cleanupbuf; } for (i = 0; i < count; i++) { error = pdf_lex(&tok, stm, buf, cap, &n); if (error || tok != PDF_TINT) { error = fz_rethrow(error, "corrupt object stream (%d %d R)", num, gen); goto cleanupstm; } numbuf[i] = atoi(buf); error = pdf_lex(&tok, stm, buf, cap, &n); if (error || tok != PDF_TINT) { error = fz_rethrow(error, "corrupt object stream (%d %d R)", num, gen); goto cleanupstm; } ofsbuf[i] = atoi(buf); } fz_seek(stm, first, 0); for (i = 0; i < count; i++) { fz_seek(stm, first + ofsbuf[i], 0); error = pdf_parsestmobj(&obj, xref, stm, buf, cap); if (error) { error = fz_rethrow(error, "cannot parse object %d in stream (%d %d R)", i, num, gen); goto cleanupstm; } if (numbuf[i] < 1 || numbuf[i] >= xref->len) { fz_dropobj(obj); error = fz_throw("object id (%d 0 R) out of range (0..%d)", numbuf[i], xref->len - 1); goto cleanupstm; } if (xref->table[numbuf[i]].type == 'o' && xref->table[numbuf[i]].ofs == num) { if (xref->table[numbuf[i]].obj) fz_dropobj(xref->table[numbuf[i]].obj); xref->table[numbuf[i]].obj = obj; } else { fz_dropobj(obj); } } fz_close(stm); fz_free(ofsbuf); fz_free(numbuf); fz_dropobj(objstm); return fz_okay; cleanupstm: fz_close(stm); cleanupbuf: fz_free(ofsbuf); fz_free(numbuf); fz_dropobj(objstm); return error; /* already rethrown */ }
fz_error pdf_repairxref(pdf_xref *xref, char *buf, int bufsize) { fz_error error; fz_obj *dict, *obj; fz_obj *length; fz_obj *encrypt = nil; fz_obj *id = nil; fz_obj *root = nil; fz_obj *info = nil; struct entry *list = nil; int listlen; int listcap; int maxnum = 0; int num = 0; int gen = 0; int tmpofs, numofs = 0, genofs = 0; int stmlen, stmofs = 0; int tok; int next; int i, n; pdf_logxref("repairxref %p\n", xref); fz_seek(xref->file, 0, 0); listlen = 0; listcap = 1024; list = fz_calloc(listcap, sizeof(struct entry)); /* look for '%PDF' version marker within first kilobyte of file */ n = fz_read(xref->file, (unsigned char *)buf, MAX(bufsize, 1024)); if (n < 0) { error = fz_rethrow(n, "cannot read from file"); goto cleanup; } fz_seek(xref->file, 0, 0); for (i = 0; i < n - 4; i++) { if (memcmp(buf + i, "%PDF", 4) == 0) { fz_seek(xref->file, i, 0); break; } } while (1) { tmpofs = fz_tell(xref->file); if (tmpofs < 0) { error = fz_throw("cannot tell in file"); goto cleanup; } error = pdf_lex(&tok, xref->file, buf, bufsize, &n); if (error) { fz_catch(error, "ignoring the rest of the file"); break; } if (tok == PDF_TINT) { numofs = genofs; num = gen; genofs = tmpofs; gen = atoi(buf); } if (tok == PDF_TOBJ) { error = fz_repairobj(xref->file, buf, bufsize, &stmofs, &stmlen, &encrypt, &id); if (error) { error = fz_rethrow(error, "cannot parse object (%d %d R)", num, gen); goto cleanup; } pdf_logxref("found object: (%d %d R)\n", num, gen); if (listlen + 1 == listcap) { listcap = (listcap * 3) / 2; list = fz_realloc(list, listcap, sizeof(struct entry)); } list[listlen].num = num; list[listlen].gen = gen; list[listlen].ofs = numofs; list[listlen].stmofs = stmofs; list[listlen].stmlen = stmlen; listlen ++; if (num > maxnum) maxnum = num; } /* trailer dictionary */ if (tok == PDF_TODICT) { error = pdf_parsedict(&dict, xref, xref->file, buf, bufsize); if (error) { error = fz_rethrow(error, "cannot parse object"); goto cleanup; } obj = fz_dictgets(dict, "Encrypt"); if (obj) { if (encrypt) fz_dropobj(encrypt); encrypt = fz_keepobj(obj); } obj = fz_dictgets(dict, "ID"); if (obj) { if (id) fz_dropobj(id); id = fz_keepobj(obj); } obj = fz_dictgets(dict, "Root"); if (obj) { if (root) fz_dropobj(root); root = fz_keepobj(obj); } obj = fz_dictgets(dict, "Info"); if (obj) { if (info) fz_dropobj(info); info = fz_keepobj(obj); } fz_dropobj(dict); } if (tok == PDF_TERROR) fz_readbyte(xref->file); if (tok == PDF_TEOF) break; } /* make xref reasonable */ pdf_resizexref(xref, maxnum + 1); for (i = 0; i < listlen; i++) { xref->table[list[i].num].type = 'n'; xref->table[list[i].num].ofs = list[i].ofs; xref->table[list[i].num].gen = list[i].gen; xref->table[list[i].num].stmofs = list[i].stmofs; /* corrected stream length */ if (list[i].stmlen >= 0) { pdf_logxref("correct stream length %d %d = %d\n", list[i].num, list[i].gen, list[i].stmlen); error = pdf_loadobject(&dict, xref, list[i].num, list[i].gen); if (error) { error = fz_rethrow(error, "cannot load stream object (%d %d R)", list[i].num, list[i].gen); goto cleanup; } length = fz_newint(list[i].stmlen); fz_dictputs(dict, "Length", length); fz_dropobj(length); fz_dropobj(dict); } } xref->table[0].type = 'f'; xref->table[0].ofs = 0; xref->table[0].gen = 65535; xref->table[0].stmofs = 0; xref->table[0].obj = nil; next = 0; for (i = xref->len - 1; i >= 0; i--) { if (xref->table[i].type == 'f') { xref->table[i].ofs = next; if (xref->table[i].gen < 65535) xref->table[i].gen ++; next = i; } } /* create a repaired trailer, Root will be added later */ xref->trailer = fz_newdict(5); obj = fz_newint(maxnum + 1); fz_dictputs(xref->trailer, "Size", obj); fz_dropobj(obj); if (root) { fz_dictputs(xref->trailer, "Root", root); fz_dropobj(root); } if (info) { fz_dictputs(xref->trailer, "Info", info); fz_dropobj(info); } if (encrypt) { if (fz_isindirect(encrypt)) { /* create new reference with non-nil xref pointer */ obj = fz_newindirect(fz_tonum(encrypt), fz_togen(encrypt), xref); fz_dropobj(encrypt); encrypt = obj; } fz_dictputs(xref->trailer, "Encrypt", encrypt); fz_dropobj(encrypt); } if (id) { if (fz_isindirect(id)) { /* create new reference with non-nil xref pointer */ obj = fz_newindirect(fz_tonum(id), fz_togen(id), xref); fz_dropobj(id); id = obj; } fz_dictputs(xref->trailer, "ID", id); fz_dropobj(id); } fz_free(list); return fz_okay; cleanup: if (encrypt) fz_dropobj(encrypt); if (id) fz_dropobj(id); if (root) fz_dropobj(root); if (info) fz_dropobj(info); fz_free(list); return error; /* already rethrown */ }
static fz_error * parseTTF(fz_stream *file, int offset, int index, char *path) { fz_error *err = nil; int byteread; TT_OFFSET_TABLE ttOffsetTable; TT_TABLE_DIRECTORY tblDir; TT_NAME_TABLE_HEADER ttNTHeader; TT_NAME_RECORD ttRecord; char szTemp[4096]; int found; int i; fz_seek(file,offset,0); SAFE_FZ_READ(file, &ttOffsetTable, sizeof(TT_OFFSET_TABLE)); ttOffsetTable.uNumOfTables = SWAPWORD(ttOffsetTable.uNumOfTables); ttOffsetTable.uMajorVersion = SWAPWORD(ttOffsetTable.uMajorVersion); ttOffsetTable.uMinorVersion = SWAPWORD(ttOffsetTable.uMinorVersion); //check is this is a true type font and the version is 1.0 if(ttOffsetTable.uMajorVersion != 1 || ttOffsetTable.uMinorVersion != 0) return fz_throw("fonterror : invalid font version"); found = 0; for(i = 0; i< ttOffsetTable.uNumOfTables; i++) { SAFE_FZ_READ(file,&tblDir,sizeof(TT_TABLE_DIRECTORY)); memcpy(szTemp, tblDir.szTag, 4); szTemp[4] = 0; if (stricmp(szTemp, "name") == 0) { found = 1; tblDir.uLength = SWAPLONG(tblDir.uLength); tblDir.uOffset = SWAPLONG(tblDir.uOffset); break; } else if (szTemp[0] == 0) { break; } } if (found) { fz_seek(file,tblDir.uOffset,0); SAFE_FZ_READ(file,&ttNTHeader,sizeof(TT_NAME_TABLE_HEADER)); ttNTHeader.uNRCount = SWAPWORD(ttNTHeader.uNRCount); ttNTHeader.uStorageOffset = SWAPWORD(ttNTHeader.uStorageOffset); offset = tblDir.uOffset + sizeof(TT_NAME_TABLE_HEADER); for(i = 0; i < ttNTHeader.uNRCount && err == nil; ++i) { fz_seek(file, offset + sizeof(TT_NAME_RECORD)*i, 0); SAFE_FZ_READ(file,&ttRecord,sizeof(TT_NAME_RECORD)); ttRecord.uNameID = SWAPWORD(ttRecord.uNameID); ttRecord.uLanguageID = SWAPWORD(ttRecord.uLanguageID); // Full Name if(ttRecord.uNameID == 6) { ttRecord.uPlatformID = SWAPWORD(ttRecord.uPlatformID); ttRecord.uEncodingID = SWAPWORD(ttRecord.uEncodingID); ttRecord.uStringLength = SWAPWORD(ttRecord.uStringLength); ttRecord.uStringOffset = SWAPWORD(ttRecord.uStringOffset); fz_seek(file, tblDir.uOffset + ttRecord.uStringOffset + ttNTHeader.uStorageOffset, 0); SAFE_FZ_READ(file, szTemp, ttRecord.uStringLength); switch(ttRecord.uPlatformID) { case PLATFORM_UNICODE: err = decodeunicodeplatform(szTemp, ttRecord.uStringLength, szTemp, sizeof(szTemp), ttRecord.uEncodingID); break; case PLATFORM_MACINTOSH: err = decodemacintoshplatform(szTemp, ttRecord.uStringLength, szTemp, sizeof(szTemp), ttRecord.uEncodingID); break; case PLATFORM_ISO: err = fz_throw("fonterror : unsupported platform"); break; case PLATFORM_MICROSOFT: err = decodemicrosoftplatform(szTemp, ttRecord.uStringLength, szTemp, sizeof(szTemp), ttRecord.uEncodingID); break; } if(err == nil) err = insertmapping(&fontlistMS, szTemp, path, index); } } } cleanup: return err; }
static fz_error parseTTF(fz_stream *file, int offset, int index, char *path, pdf_xref *xref) { fz_error err = fz_okay; TT_OFFSET_TABLE ttOffsetTable; TT_TABLE_DIRECTORY tblDir; TT_NAME_TABLE_HEADER ttNTHeader; TT_NAME_RECORD ttRecord; char szPSName[MAX_FACENAME] = { 0 }, szTTName[MAX_FACENAME] = { 0 }, szStyle[MAX_FACENAME] = { 0 }; int i, count, tblOffset; fz_seek(file,offset,0); err = safe_read(file, (char *)&ttOffsetTable, sizeof(TT_OFFSET_TABLE)); if (err) return err; // check if this is a TrueType font of version 1.0 or an OpenType font if (BEtoHl(ttOffsetTable.uVersion) != TTC_VERSION1 && ttOffsetTable.uVersion != TTAG_OTTO) return fz_error_make(file->ctx, "fonterror : invalid font version"); // determine the name table's offset by iterating through the offset table count = BEtoHs(ttOffsetTable.uNumOfTables); for (i = 0; i < count; i++) { err = safe_read(file, (char *)&tblDir, sizeof(TT_TABLE_DIRECTORY)); if (err) return err; if (!tblDir.uTag || BEtoHl(tblDir.uTag) == TTAG_name) break; } if (count == i || !tblDir.uTag) return fz_error_make(file->ctx, "fonterror : nameless font"); tblOffset = BEtoHl(tblDir.uOffset); // read the 'name' table for record count and offsets fz_seek(file, tblOffset, 0); err = safe_read(file, (char *)&ttNTHeader, sizeof(TT_NAME_TABLE_HEADER)); if (err) return err; offset = tblOffset + sizeof(TT_NAME_TABLE_HEADER); tblOffset += BEtoHs(ttNTHeader.uStorageOffset); // read through the strings for PostScript name and font family count = BEtoHs(ttNTHeader.uNRCount); for (i = 0; i < count; i++) { short nameId; fz_seek(file, offset + i * sizeof(TT_NAME_RECORD), 0); err = safe_read(file, (char *)&ttRecord, sizeof(TT_NAME_RECORD)); if (err) return err; // ignore non-English strings if (ttRecord.uLanguageID && BEtoHs(ttRecord.uLanguageID) != TT_MS_LANGID_ENGLISH_UNITED_STATES) continue; // ignore names other than font (sub)family and PostScript name nameId = BEtoHs(ttRecord.uNameID); if (TT_NAME_ID_FONT_FAMILY == nameId) err = pdf_read_ttf_string(file, tblOffset, &ttRecord, szTTName, MAX_FACENAME); else if (TT_NAME_ID_FONT_SUBFAMILY == nameId) err = pdf_read_ttf_string(file, tblOffset, &ttRecord, szStyle, MAX_FACENAME); else if (TT_NAME_ID_PS_NAME == nameId) err = pdf_read_ttf_string(file, tblOffset, &ttRecord, szPSName, MAX_FACENAME); if (err) fz_error_handle(file->ctx, err, "ignoring face name decoding fonterror"); } // TODO: is there a better way to distinguish Arial Caps from Arial proper? // cf. http://code.google.com/p/sumatrapdf/issues/detail?id=1290 if (!strcmp(szPSName, "ArialMT") && (strstr(path, "caps") || strstr(path, "Caps"))) return fz_error_make(file->ctx, "ignore %s, as it can't be distinguished from Arial,Regular", path); if (szPSName[0]) { err = insertmapping(xref->ctx, xref->win_fontlist, szPSName, path, index); if (err) return err; } if (szTTName[0]) { // derive a PostScript-like name and add it, if it's different from the font's // included PostScript name; cf. http://code.google.com/p/sumatrapdf/issues/detail?id=376 // append the font's subfamily, unless it's a Regular font if (szStyle[0] && _stricmp(szStyle, "Regular") != 0) { fz_strlcat(szTTName, "-", MAX_FACENAME); fz_strlcat(szTTName, szStyle, MAX_FACENAME); } removespaces(szTTName); // compare the two names before adding this one if (lookupcompare(szTTName, szPSName)) { err = insertmapping(xref->ctx, xref->win_fontlist, szTTName, path, index); if (err) return err; } } return fz_okay; }
static fz_error pdf_readoldxref(fz_obj **trailerp, pdf_xref *xref, char *buf, int cap) { fz_error error; int ofs, len; char *s; int n; pdf_token_e tok; int i; int c; pdf_logxref("load old xref format\n"); fz_readline(xref->file, buf, cap); if (strncmp(buf, "xref", 4) != 0) return fz_throw("cannot find xref marker"); while (1) { c = fz_peekbyte(xref->file); if (!(c >= '0' && c <= '9')) break; fz_readline(xref->file, buf, cap); s = buf; ofs = atoi(fz_strsep(&s, " ")); len = atoi(fz_strsep(&s, " ")); /* broken pdfs where the section is not on a separate line */ if (s && *s != '\0') { fz_warn("broken xref section. proceeding anyway."); fz_seek(xref->file, -(2 + (int)strlen(s)), 1); } /* broken pdfs where size in trailer undershoots entries in xref sections */ if (ofs + len > xref->cap) { fz_warn("broken xref section, proceeding anyway."); xref->cap = ofs + len; xref->table = fz_realloc(xref->table, xref->cap * sizeof(pdf_xrefentry)); } if ((ofs + len) > xref->len) { for (i = xref->len; i < (ofs + len); i++) { xref->table[i].ofs = 0; xref->table[i].gen = 0; xref->table[i].stmofs = 0; xref->table[i].obj = nil; xref->table[i].type = 0; } xref->len = ofs + len; } for (i = ofs; i < ofs + len; i++) { n = fz_read(xref->file, (unsigned char *) buf, 20); if (n < 0) return fz_rethrow(n, "cannot read xref table"); if (!xref->table[i].type) { s = buf; /* broken pdfs where line start with white space */ while (*s != '\0' && iswhite(*s)) s++; xref->table[i].ofs = atoi(s); xref->table[i].gen = atoi(s + 11); xref->table[i].type = s[17]; } } } error = pdf_lex(&tok, xref->file, buf, cap, &n); if (error) return fz_rethrow(error, "cannot parse trailer"); if (tok != PDF_TTRAILER) return fz_throw("expected trailer marker"); error = pdf_lex(&tok, xref->file, buf, cap, &n); if (error) return fz_rethrow(error, "cannot parse trailer"); if (tok != PDF_TODICT) return fz_throw("expected trailer dictionary"); error = pdf_parsedict(trailerp, xref, xref->file, buf, cap); if (error) return fz_rethrow(error, "cannot parse trailer"); return fz_okay; }
static int xps_read_zip_dir(xps_context *ctx, int start_offset) { int sig; int offset, count; int namesize, metasize, commentsize; int i; fz_seek(ctx->file, start_offset, 0); sig = getlong(ctx->file); if (sig != ZIP_END_OF_CENTRAL_DIRECTORY_SIG) return fz_error_make(ctx->ctx, "wrong zip end of central directory signature (0x%x)", sig); (void) getshort(ctx->file); /* this disk */ (void) getshort(ctx->file); /* start disk */ (void) getshort(ctx->file); /* entries in this disk */ count = getshort(ctx->file); /* entries in central directory disk */ (void) getlong(ctx->file); /* size of central directory */ offset = getlong(ctx->file); /* offset to central directory */ /* SumatraPDF: support ZIP64 extension */ if (count == 0xFFFF) { fz_seek(ctx->file, start_offset - 20, 0); sig = getlong(ctx->file); if (sig != ZIP64_END_OF_CENTRAL_DIRECTORY_LOCATOR_SIG) return fz_error_make(ctx->ctx, "wrong zip64 end of central directory locator signature (0x%x)", sig); (void) getlong(ctx->file); /* start disk */ offset = getlong64(ctx->file); /* offset to end of central directory record */ if (offset < 0) return fz_error_make(ctx->ctx, "zip64 files larger than 2 GB aren't supported"); fz_seek(ctx->file, offset, 0); sig = getlong(ctx->file); if (sig != ZIP64_END_OF_CENTRAL_DIRECTORY_SIG) return fz_error_make(ctx->ctx, "wrong zip64 end of central directory signature (0x%x)", sig); (void) getlong64(ctx->file); /* size of record */ (void) getshort(ctx->file); /* version made by */ (void) getshort(ctx->file); /* version to extract */ (void) getlong(ctx->file); /* disk number */ (void) getlong(ctx->file); /* disk number start */ count = getlong64(ctx->file); /* entries in central directory disk */ (void) getlong64(ctx->file); /* entries in central directory */ (void) getlong64(ctx->file); /* size of central directory */ offset = getlong64(ctx->file); /* offset to central directory */ if (count < 0 || offset < 0) return fz_error_make(ctx->ctx, "zip64 files larger than 2 GB aren't supported"); } ctx->zip_count = count; ctx->zip_table = fz_calloc(ctx->ctx, count, sizeof(xps_entry)); memset(ctx->zip_table, 0, sizeof(xps_entry) * count); fz_seek(ctx->file, offset, 0); for (i = 0; i < count; i++) { sig = getlong(ctx->file); if (sig != ZIP_CENTRAL_DIRECTORY_SIG) return fz_error_make(ctx->ctx, "wrong zip central directory signature (0x%x)", sig); (void) getshort(ctx->file); /* version made by */ (void) getshort(ctx->file); /* version to extract */ (void) getshort(ctx->file); /* general */ (void) getshort(ctx->file); /* method */ (void) getshort(ctx->file); /* last mod file time */ (void) getshort(ctx->file); /* last mod file date */ (void) getlong(ctx->file); /* crc-32 */ ctx->zip_table[i].csize = getlong(ctx->file); ctx->zip_table[i].usize = getlong(ctx->file); namesize = getshort(ctx->file); metasize = getshort(ctx->file); commentsize = getshort(ctx->file); (void) getshort(ctx->file); /* disk number start */ (void) getshort(ctx->file); /* int file atts */ (void) getlong(ctx->file); /* ext file atts */ ctx->zip_table[i].offset = getlong(ctx->file); ctx->zip_table[i].name = fz_malloc(ctx->ctx, namesize + 1); fz_read(ctx->file, (unsigned char*)ctx->zip_table[i].name, namesize); ctx->zip_table[i].name[namesize] = 0; /* SumatraPDF: support ZIP64 extension */ while (metasize > 0) { int type = getshort(ctx->file); int size = getshort(ctx->file); if (type == ZIP64_EXTRA_FIELD_SIG) { ctx->zip_table[i].usize = getlong64(ctx->file); ctx->zip_table[i].csize = getlong64(ctx->file); ctx->zip_table[i].offset = getlong64(ctx->file); fz_seek(ctx->file, -24, 1); } fz_seek(ctx->file, size, 1); metasize -= 4 + size; } if (ctx->zip_table[i].usize < 0 || ctx->zip_table[i].csize < 0 || ctx->zip_table[i].offset < 0) return fz_error_make(ctx->ctx, "zip64 files larger than 2 GB aren't supported"); fz_seek(ctx->file, commentsize, 1); } qsort(ctx->zip_table, count, sizeof(xps_entry), xps_compare_entries); return fz_okay; }
static unsigned char * cbz_read_zip_entry(cbz_document *doc, int offset, int *sizep) { fz_context *ctx = doc->ctx; fz_stream *file = doc->file; int sig, method, namelength, extralength; unsigned long csize, usize; unsigned char *cdata; int code; fz_seek(file, offset, 0); sig = getlong(doc->file); if (sig != ZIP_LOCAL_FILE_SIG) fz_throw(ctx, "wrong zip local file signature (0x%x)", sig); (void) getshort(doc->file); /* version */ (void) getshort(doc->file); /* general */ method = getshort(doc->file); (void) getshort(doc->file); /* file time */ (void) getshort(doc->file); /* file date */ (void) getlong(doc->file); /* crc-32 */ csize = getlong(doc->file); /* csize */ usize = getlong(doc->file); /* usize */ namelength = getshort(doc->file); extralength = getshort(doc->file); fz_seek(file, namelength + extralength, 1); cdata = fz_malloc(ctx, csize); fz_try(ctx) { fz_read(file, cdata, csize); } fz_catch(ctx) { fz_free(ctx, cdata); fz_rethrow(ctx); } if (method == 0) { *sizep = usize; return cdata; } if (method == 8) { unsigned char *udata = fz_malloc(ctx, usize); z_stream stream; memset(&stream, 0, sizeof stream); stream.zalloc = cbz_zip_alloc_items; stream.zfree = cbz_zip_free; stream.opaque = ctx; stream.next_in = cdata; stream.avail_in = csize; stream.next_out = udata; stream.avail_out = usize; fz_try(ctx) { code = inflateInit2(&stream, -15); if (code != Z_OK) fz_throw(ctx, "zlib inflateInit2 error: %s", stream.msg); code = inflate(&stream, Z_FINISH); if (code != Z_STREAM_END) { inflateEnd(&stream); fz_throw(ctx, "zlib inflate error: %s", stream.msg); } code = inflateEnd(&stream); if (code != Z_OK) fz_throw(ctx, "zlib inflateEnd error: %s", stream.msg); } fz_always(ctx) { fz_free(ctx, cdata); } fz_catch(ctx) { fz_free(ctx, udata); fz_rethrow(ctx); } *sizep = usize; return udata; }