fz_error pdf_parsestmobj(fz_obj **op, pdf_xref *xref, fz_stream *file, char *buf, int cap) { fz_error error; pdf_token_e tok; int len; error = pdf_lex(&tok, file, buf, cap, &len); if (error) return fz_rethrow(error, "cannot parse token in object stream"); switch (tok) { case PDF_TOARRAY: error = pdf_parsearray(op, xref, file, buf, cap); if (error) return fz_rethrow(error, "cannot parse object stream"); break; case PDF_TODICT: error = pdf_parsedict(op, xref, file, buf, cap); if (error) return fz_rethrow(error, "cannot parse object stream"); break; case PDF_TNAME: *op = fz_newname(buf); break; case PDF_TREAL: *op = fz_newreal(atof(buf)); break; case PDF_TSTRING: *op = fz_newstring(buf, len); break; case PDF_TTRUE: *op = fz_newbool(1); break; case PDF_TFALSE: *op = fz_newbool(0); break; case PDF_TNULL: *op = fz_newnull(); break; case PDF_TINT: *op = fz_newint(atoi(buf)); break; default: return fz_throw("unknown token in object stream"); } return fz_okay; }
static void addhexfilter(fz_obj *dict) { fz_obj *f, *dp, *newf, *newdp; fz_obj *ahx, *nullobj; ahx = fz_newname("ASCIIHexDecode"); nullobj = fz_newnull(); newf = newdp = nil; f = fz_dictgets(dict, "Filter"); dp = fz_dictgets(dict, "DecodeParms"); if (fz_isname(f)) { newf = fz_newarray(2); fz_arraypush(newf, ahx); fz_arraypush(newf, f); f = newf; if (fz_isdict(dp)) { newdp = fz_newarray(2); fz_arraypush(newdp, nullobj); fz_arraypush(newdp, dp); dp = newdp; } } else if (fz_isarray(f)) { fz_arrayinsert(f, ahx); if (fz_isarray(dp)) fz_arrayinsert(dp, nullobj); } else f = ahx; fz_dictputs(dict, "Filter", f); if (dp) fz_dictputs(dict, "DecodeParms", dp); fz_dropobj(ahx); fz_dropobj(nullobj); if (newf) fz_dropobj(newf); if (newdp) fz_dropobj(newdp); }
static fz_error parsename(fz_obj **obj, char **sp) { fz_error error; char buf[64]; char *s = *sp; char *p = buf; s ++; /* skip '/' */ while (p < buf + sizeof buf - 1 && isregular(*s)) *p++ = *s++; *p++ = 0; *sp = s; error = fz_newname(obj, buf); if (error) return fz_rethrow(error, "cannot create name"); return fz_okay; }
fz_error * pdf_parsestmobj(fz_obj **op, fz_stream *file, char *buf, int cap) { int tok, len; tok = pdf_lex(file, buf, cap, &len); switch (tok) { case PDF_TOARRAY: return pdf_parsearray(op, file, buf, cap); case PDF_TODICT: return pdf_parsedict(op, file, buf, cap); case PDF_TNAME: return fz_newname(op, buf); case PDF_TREAL: return fz_newreal(op, atof(buf)); case PDF_TSTRING: return fz_newstring(op, buf, len); case PDF_TTRUE: return fz_newbool(op, 1); case PDF_TFALSE: return fz_newbool(op, 0); case PDF_TNULL: return fz_newnull(op); case PDF_TINT: return fz_newint(op, atoi(buf)); } return fz_throw("syntaxerror: corrupt object stream"); }
fz_error pdf_parseindobj(fz_obj **op, pdf_xref *xref, fz_stream *file, char *buf, int cap, int *onum, int *ogen, int *ostmofs) { fz_error error = fz_okay; fz_obj *obj = nil; int num = 0, gen = 0, stmofs; pdf_token_e tok; int len; int a, b; error = pdf_lex(&tok, file, buf, cap, &len); if (error) return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen); if (tok != PDF_TINT) return fz_throw("cannot parse indirect object (%d %d R)", num, gen); num = atoi(buf); error = pdf_lex(&tok, file, buf, cap, &len); if (error) return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen); if (tok != PDF_TINT) return fz_throw("cannot parse indirect object (%d %d R)", num, gen); gen = atoi(buf); error = pdf_lex(&tok, file, buf, cap, &len); if (error) return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen); if (tok != PDF_TOBJ) return fz_throw("cannot parse indirect object (%d %d R)", num, gen); error = pdf_lex(&tok, file, buf, cap, &len); if (error) return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen); switch (tok) { case PDF_TOARRAY: error = pdf_parsearray(&obj, xref, file, buf, cap); if (error) return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen); break; case PDF_TODICT: error = pdf_parsedict(&obj, xref, file, buf, cap); if (error) return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen); break; case PDF_TNAME: obj = fz_newname(buf); break; case PDF_TREAL: obj = fz_newreal(atof(buf)); break; case PDF_TSTRING: obj = fz_newstring(buf, len); break; case PDF_TTRUE: obj = fz_newbool(1); break; case PDF_TFALSE: obj = fz_newbool(0); break; case PDF_TNULL: obj = fz_newnull(); break; case PDF_TINT: a = atoi(buf); error = pdf_lex(&tok, file, buf, cap, &len); if (error) return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen); if (tok == PDF_TSTREAM || tok == PDF_TENDOBJ) { obj = fz_newint(a); goto skip; } if (tok == PDF_TINT) { b = atoi(buf); error = pdf_lex(&tok, file, buf, cap, &len); if (error) return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen); if (tok == PDF_TR) { obj = fz_newindirect(a, b, xref); break; } } return fz_throw("cannot parse indirect object (%d %d R)", num, gen); case PDF_TENDOBJ: obj = fz_newnull(); goto skip; default: return fz_throw("cannot parse indirect object (%d %d R)", num, gen); } error = pdf_lex(&tok, file, buf, cap, &len); if (error) { fz_dropobj(obj); return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen); } skip: if (tok == PDF_TSTREAM) { int c = fz_readbyte(file); while (c == ' ') c = fz_readbyte(file); if (c == '\r') { c = fz_peekbyte(file); if (c != '\n') fz_warn("line feed missing after stream begin marker (%d %d R)", num, gen); else fz_readbyte(file); } stmofs = fz_tell(file); } else if (tok == PDF_TENDOBJ) { stmofs = 0; } else { fz_warn("expected endobj or stream keyword (%d %d R)", num, gen); stmofs = 0; } if (onum) *onum = num; if (ogen) *ogen = gen; if (ostmofs) *ostmofs = stmofs; *op = obj; return fz_okay; }
fz_error pdf_parsedict(fz_obj **op, pdf_xref *xref, fz_stream *file, char *buf, int cap) { fz_error error = fz_okay; fz_obj *dict = nil; fz_obj *key = nil; fz_obj *val = nil; pdf_token_e tok; int len; int a, b; dict = fz_newdict(8); while (1) { error = pdf_lex(&tok, file, buf, cap, &len); if (error) { fz_dropobj(dict); return fz_rethrow(error, "cannot parse dict"); } skip: if (tok == PDF_TCDICT) { *op = dict; return fz_okay; } /* for BI .. ID .. EI in content streams */ if (tok == PDF_TKEYWORD && !strcmp(buf, "ID")) { *op = dict; return fz_okay; } if (tok != PDF_TNAME) { fz_dropobj(dict); return fz_throw("invalid key in dict");; } key = fz_newname(buf); error = pdf_lex(&tok, file, buf, cap, &len); if (error) { fz_dropobj(dict); return fz_rethrow(error, "cannot parse dict"); } switch (tok) { case PDF_TOARRAY: error = pdf_parsearray(&val, xref, file, buf, cap); if (error) { fz_dropobj(key); fz_dropobj(dict); return fz_rethrow(error, "cannot parse dict"); } break; case PDF_TODICT: error = pdf_parsedict(&val, xref, file, buf, cap); if (error) { fz_dropobj(key); fz_dropobj(dict); return fz_rethrow(error, "cannot parse dict"); } break; case PDF_TNAME: val = fz_newname(buf); break; case PDF_TREAL: val = fz_newreal(atof(buf)); break; case PDF_TSTRING: val = fz_newstring(buf, len); break; case PDF_TTRUE: val = fz_newbool(1); break; case PDF_TFALSE: val = fz_newbool(0); break; case PDF_TNULL: val = fz_newnull(); break; case PDF_TINT: /* 64-bit to allow for numbers > INT_MAX and overflow */ a = (int) strtoll(buf, 0, 10); error = pdf_lex(&tok, file, buf, cap, &len); if (error) { fz_dropobj(key); fz_dropobj(dict); return fz_rethrow(error, "cannot parse dict"); } if (tok == PDF_TCDICT || tok == PDF_TNAME || (tok == PDF_TKEYWORD && !strcmp(buf, "ID"))) { val = fz_newint(a); fz_dictput(dict, key, val); fz_dropobj(val); fz_dropobj(key); goto skip; } if (tok == PDF_TINT) { b = atoi(buf); error = pdf_lex(&tok, file, buf, cap, &len); if (error) { fz_dropobj(key); fz_dropobj(dict); return fz_rethrow(error, "cannot parse dict"); } if (tok == PDF_TR) { val = fz_newindirect(a, b, xref); break; } } fz_dropobj(key); fz_dropobj(dict); return fz_throw("invalid indirect reference in dict"); default: return fz_throw("unknown token in dict"); } fz_dictput(dict, key, val); fz_dropobj(val); fz_dropobj(key); } }
fz_error pdf_parsearray(fz_obj **op, pdf_xref *xref, fz_stream *file, char *buf, int cap) { fz_error error = fz_okay; fz_obj *ary = nil; fz_obj *obj = nil; int a = 0, b = 0, n = 0; pdf_token_e tok; int len; ary = fz_newarray(4); while (1) { error = pdf_lex(&tok, file, buf, cap, &len); if (error) { fz_dropobj(ary); return fz_rethrow(error, "cannot parse array"); } if (tok != PDF_TINT && tok != PDF_TR) { if (n > 0) { obj = fz_newint(a); fz_arraypush(ary, obj); fz_dropobj(obj); } if (n > 1) { obj = fz_newint(b); fz_arraypush(ary, obj); fz_dropobj(obj); } n = 0; } if (tok == PDF_TINT && n == 2) { obj = fz_newint(a); fz_arraypush(ary, obj); fz_dropobj(obj); a = b; n --; } switch (tok) { case PDF_TCARRAY: *op = ary; return fz_okay; case PDF_TINT: if (n == 0) a = atoi(buf); if (n == 1) b = atoi(buf); n ++; break; case PDF_TR: if (n != 2) { fz_dropobj(ary); return fz_throw("cannot parse indirect reference in array"); } obj = fz_newindirect(a, b, xref); fz_arraypush(ary, obj); fz_dropobj(obj); n = 0; break; case PDF_TOARRAY: error = pdf_parsearray(&obj, xref, file, buf, cap); if (error) { fz_dropobj(ary); return fz_rethrow(error, "cannot parse array"); } fz_arraypush(ary, obj); fz_dropobj(obj); break; case PDF_TODICT: error = pdf_parsedict(&obj, xref, file, buf, cap); if (error) { fz_dropobj(ary); return fz_rethrow(error, "cannot parse array"); } fz_arraypush(ary, obj); fz_dropobj(obj); break; case PDF_TNAME: obj = fz_newname(buf); fz_arraypush(ary, obj); fz_dropobj(obj); break; case PDF_TREAL: obj = fz_newreal(atof(buf)); fz_arraypush(ary, obj); fz_dropobj(obj); break; case PDF_TSTRING: obj = fz_newstring(buf, len); fz_arraypush(ary, obj); fz_dropobj(obj); break; case PDF_TTRUE: obj = fz_newbool(1); fz_arraypush(ary, obj); fz_dropobj(obj); break; case PDF_TFALSE: obj = fz_newbool(0); fz_arraypush(ary, obj); fz_dropobj(obj); break; case PDF_TNULL: obj = fz_newnull(); fz_arraypush(ary, obj); fz_dropobj(obj); break; default: fz_dropobj(ary); return fz_throw("cannot parse token in array"); } } }
fz_error * pdf_parseindobj(fz_obj **op, fz_stream *file, char *buf, int cap, int *ooid, int *ogid, int *ostmofs) { fz_error *error = nil; fz_obj *obj = nil; int oid = 0, gid = 0, stmofs; int tok, len; int a, b; tok = pdf_lex(file, buf, cap, &len); if (tok != PDF_TINT) goto cleanup; oid = atoi(buf); tok = pdf_lex(file, buf, cap, &len); if (tok != PDF_TINT) goto cleanup; gid = atoi(buf); tok = pdf_lex(file, buf, cap, &len); if (tok != PDF_TOBJ) goto cleanup; tok = pdf_lex(file, buf, cap, &len); switch (tok) { case PDF_TOARRAY: error = pdf_parsearray(&obj, file, buf, cap); break; case PDF_TODICT: error = pdf_parsedict(&obj, file, buf, cap); break; case PDF_TNAME: error = fz_newname(&obj, buf); break; case PDF_TREAL: error = fz_newreal(&obj, atof(buf)); break; case PDF_TSTRING: error = fz_newstring(&obj, buf, len); break; case PDF_TTRUE: error = fz_newbool(&obj, 1); break; case PDF_TFALSE: error = fz_newbool(&obj, 0); break; case PDF_TNULL: error = fz_newnull(&obj); break; case PDF_TINT: a = atoi(buf); tok = pdf_lex(file, buf, cap, &len); if (tok == PDF_TSTREAM || tok == PDF_TENDOBJ) { error = fz_newint(&obj, a); if (error) goto cleanup; goto skip; } if (tok == PDF_TINT) { b = atoi(buf); tok = pdf_lex(file, buf, cap, &len); if (tok == PDF_TR) { error = fz_newindirect(&obj, a, b); break; } } goto cleanup; default: goto cleanup; } if (error) goto cleanup; tok = pdf_lex(file, buf, cap, &len); skip: if (tok == PDF_TSTREAM) { int c = fz_readbyte(file); if (c == '\r') { c = fz_peekbyte(file); if (c != '\n') fz_warn("syntaxerror: DOS format line ending after stream keyword (%d %d)\n", oid, gid); else c = fz_readbyte(file); } stmofs = fz_tell(file); } else if (tok == PDF_TENDOBJ) stmofs = 0; else goto cleanup; if (ooid) *ooid = oid; if (ogid) *ogid = gid; if (ostmofs) *ostmofs = stmofs; *op = obj; return nil; cleanup: if (obj) fz_dropobj(obj); if (error) return error; return fz_throw("syntaxerror: corrupt indirect object (%d %d)", oid, gid); }
fz_error * pdf_parsedict(fz_obj **op, fz_stream *file, char *buf, int cap) { fz_error *error = nil; fz_obj *dict = nil; fz_obj *key = nil; fz_obj *val = nil; int tok, len; int a, b; error = fz_newdict(op, 8); if (error) return error; dict = *op; while (1) { tok = pdf_lex(file, buf, cap, &len); skip: if (tok == PDF_TCDICT) return nil; /* for BI .. ID .. EI in content streams */ if (tok == PDF_TKEYWORD && !strcmp(buf, "ID")) return nil; if (tok != PDF_TNAME) goto cleanup; error = fz_newname(&key, buf); if (error) goto cleanup; tok = pdf_lex(file, buf, cap, &len); switch (tok) { case PDF_TOARRAY: error = pdf_parsearray(&val, file, buf, cap); break; case PDF_TODICT: error = pdf_parsedict(&val, file, buf, cap); break; case PDF_TNAME: error = fz_newname(&val, buf); break; case PDF_TREAL: error = fz_newreal(&val, atof(buf)); break; case PDF_TSTRING: error = fz_newstring(&val, buf, len); break; case PDF_TTRUE: error = fz_newbool(&val, 1); break; case PDF_TFALSE: error = fz_newbool(&val, 0); break; case PDF_TNULL: error = fz_newnull(&val); break; case PDF_TINT: a = atoi(buf); tok = pdf_lex(file, buf, cap, &len); if (tok == PDF_TCDICT || tok == PDF_TNAME || (tok == PDF_TKEYWORD && !strcmp(buf, "ID"))) { error = fz_newint(&val, a); if (error) goto cleanup; error = fz_dictput(dict, key, val); if (error) goto cleanup; fz_dropobj(val); fz_dropobj(key); key = val = nil; goto skip; } if (tok == PDF_TINT) { b = atoi(buf); tok = pdf_lex(file, buf, cap, &len); if (tok == PDF_TR) { error = fz_newindirect(&val, a, b); break; } } goto cleanup; default: goto cleanup; } if (error) goto cleanup; error = fz_dictput(dict, key, val); if (error) goto cleanup; fz_dropobj(val); fz_dropobj(key); key = val = nil; } cleanup: if (key) fz_dropobj(key); if (val) fz_dropobj(val); if (dict) fz_dropobj(dict); if (error) return error; return fz_throw("syntaxerror: corrupt dictionary"); }
fz_error * pdf_parsearray(fz_obj **op, fz_stream *file, char *buf, int cap) { fz_error *error = nil; fz_obj *ary = nil; fz_obj *obj = nil; int a = 0, b = 0, n = 0; int tok, len; error = fz_newarray(op, 4); if (error) return error; ary = *op; while (1) { tok = pdf_lex(file, buf, cap, &len); if (tok != PDF_TINT && tok != PDF_TR) { if (n > 0) { error = fz_newint(&obj, a); if (error) goto cleanup; error = fz_arraypush(ary, obj); if (error) goto cleanup; fz_dropobj(obj); obj = nil; } if (n > 1) { error = fz_newint(&obj, b); if (error) goto cleanup; error = fz_arraypush(ary, obj); if (error) goto cleanup; fz_dropobj(obj); obj = nil; } n = 0; } if (tok == PDF_TINT && n == 2) { error = fz_newint(&obj, a); if (error) goto cleanup; error = fz_arraypush(ary, obj); if (error) goto cleanup; fz_dropobj(obj); obj = nil; a = b; n --; } switch (tok) { case PDF_TCARRAY: return nil; case PDF_TINT: if (n == 0) a = atoi(buf); if (n == 1) b = atoi(buf); n ++; break; case PDF_TR: if (n != 2) goto cleanup; error = fz_newindirect(&obj, a, b); if (error) goto cleanup; n = 0; break; case PDF_TOARRAY: error = pdf_parsearray(&obj, file, buf, cap); break; case PDF_TODICT: error = pdf_parsedict(&obj, file, buf, cap); break; case PDF_TNAME: error = fz_newname(&obj, buf); break; case PDF_TREAL: error = fz_newreal(&obj, atof(buf)); break; case PDF_TSTRING: error = fz_newstring(&obj, buf, len); break; case PDF_TTRUE: error = fz_newbool(&obj, 1); break; case PDF_TFALSE: error = fz_newbool(&obj, 0); break; case PDF_TNULL: error = fz_newnull(&obj); break; default: goto cleanup; } if (error) goto cleanup; if (obj) { error = fz_arraypush(ary, obj); if (error) goto cleanup; fz_dropobj(obj); } obj = nil; } cleanup: if (obj) fz_dropobj(obj); if (ary) fz_dropobj(ary); if (error) return error; return fz_throw("syntaxerror: corrupt array"); }