static void showobject(int num, int gen) { fz_error error; fz_obj *obj; if (!xref) die(fz_throw("no file specified")); error = pdf_loadobject(&obj, xref, num, gen); if (error) die(error); if (pdf_isstream(xref, num, gen)) { printf("%d %d obj\n", num, gen); fz_debugobj(obj); printf("stream\n"); showstream(num, gen); printf("endstream\n"); printf("endobj\n\n"); } else { printf("%d %d obj\n", num, gen); fz_debugobj(obj); printf("endobj\n\n"); } fz_dropobj(obj); }
static fz_error pdf_repairobjstm(pdf_xref *xref, int num, int gen) { fz_error error; fz_obj *obj; fz_stream *stm; int tok; int i, n, count; char buf[256]; error = pdf_loadobject(&obj, xref, num, gen); if (error) return fz_rethrow(error, "cannot load object stream object (%d %d R)", num, gen); count = fz_toint(fz_dictgets(obj, "N")); fz_dropobj(obj); error = pdf_openstream(&stm, xref, num, gen); if (error) return fz_rethrow(error, "cannot open object stream object (%d %d R)", num, gen); for (i = 0; i < count; i++) { error = pdf_lex(&tok, stm, buf, sizeof buf, &n); if (error || tok != PDF_TINT) { fz_close(stm); return fz_rethrow(error, "corrupt object stream (%d %d R)", num, gen); } n = atoi(buf); if (n >= xref->len) pdf_resizexref(xref, n + 1); xref->table[n].ofs = num; xref->table[n].gen = i; xref->table[n].stmofs = 0; xref->table[n].obj = nil; xref->table[n].type = 'o'; error = pdf_lex(&tok, stm, buf, sizeof buf, &n); if (error || tok != PDF_TINT) { fz_close(stm); return fz_rethrow(error, "corrupt object stream (%d %d R)", num, gen); } } fz_close(stm); return fz_okay; }
void preloadobjstms(void) { fz_error *error; fz_obj *obj; int i; for (i = 0; i < src->len; i++) { if (src->table[i].type == 'o') { error = pdf_loadobject(&obj, src, i, 0); if (error) die(error); fz_dropobj(obj); } } }
static void preloadobjstms(void) { fz_error error; fz_obj *obj; int num; for (num = 0; num < xref->len; num++) { if (xref->table[num].type == 'o') { error = pdf_loadobject(&obj, xref, num, 0); if (error) die(error); fz_dropobj(obj); } } }
static void writeobject(int num, int gen) { fz_error error; fz_obj *obj; fz_obj *type; error = pdf_loadobject(&obj, xref, num, gen); if (error) die(error); /* skip ObjStm and XRef objects */ if (fz_isdict(obj)) { type = fz_dictgets(obj, "Type"); if (fz_isname(type) && !strcmp(fz_toname(type), "ObjStm")) { uselist[num] = 0; fz_dropobj(obj); return; } if (fz_isname(type) && !strcmp(fz_toname(type), "XRef")) { uselist[num] = 0; fz_dropobj(obj); return; } } if (!pdf_isstream(xref, num, gen)) { fprintf(out, "%d %d obj\n", num, gen); fz_fprintobj(out, obj, !doexpand); fprintf(out, "endobj\n\n"); } else { if (doexpand && !pdf_isjpximage(obj)) expandstream(obj, num, gen); else copystream(obj, num, gen); } fz_dropobj(obj); }
static void showobject(int num, int gen) { fz_error error; fz_obj *obj; if (!xref) die(fz_throw("no file specified")); error = pdf_loadobject(&obj, xref, num, gen); if (error) die(error); if (isimage(obj)) saveimage(obj, num, gen); else if (isfontdesc(obj)) savefont(obj, num, gen); fz_dropobj(obj); }
fz_error pdf_repairobjstms(pdf_xref *xref) { fz_obj *dict; int i; for (i = 0; i < xref->len; i++) { if (xref->table[i].stmofs) { pdf_loadobject(&dict, xref, i, 0); if (!strcmp(fz_toname(fz_dictgets(dict, "Type")), "ObjStm")) pdf_repairobjstm(xref, i, 0); fz_dropobj(dict); } } return fz_okay; }
void cleanexpand(void) { fz_error *error; fz_obj *stmobj; fz_buffer *buf; fz_obj *stmlen; int i, gen; for (i = 0; i < src->len; i++) { if (src->table[i].type == 'n') { gen = src->table[i].gen; if (pdf_isstream(src, i, gen)) { error = pdf_loadobject(&stmobj, src, i, gen); if (error) die(error); error = pdf_loadstream(&buf, src, i, gen); if (error) die(error); fz_dictdels(stmobj, "Filter"); fz_dictdels(stmobj, "DecodeParms"); error = fz_newint(&stmlen, buf->wp - buf->rp); if (error) die(error); error = fz_dictputs(stmobj, "Length", stmlen); if (error) die(error); fz_dropobj(stmlen); pdf_updateobject(src, i, gen, stmobj); pdf_updatestream(src, i, gen, buf); fz_dropobj(stmobj); } } } }
void showobject(int num, int gen) { fz_error *error; fz_obj *obj; if (!src) die(fz_throw("no file specified")); error = pdf_loadobject(&obj, src, num, gen); if (error) die(error); /* print the stream raw if we're binary */ if (pdf_isstream(src, num, gen) && showbinary) { showstream(num, gen); } else { printf("%d %d obj\n", num, gen); fz_debugobj(obj); printf("\n"); if (pdf_isstream(src, num, gen)) { printf("stream\n"); showstream(num, gen); printf("endstream\n"); } printf("endobj\n\n"); } fz_dropobj(obj); }
int copyPdfFile( soPdfFile* inFile, soPdfFile* outFile ) { fz_error *error; int pageTreeNum, pageTreeGen; assert(inFile != NULL); assert(outFile != NULL); // // Process every page in the source file // { printf("\nProcessing input page : "); for (int pageNo = 0; pageNo < pdf_getpagecount(inFile->pageTree); pageNo++) { displayPageNumber(pageNo + 1, !pageNo); // Get the page object from the source fz_obj *pageRef = inFile->pageTree->pref[pageNo]; fz_obj *pageObj = pdf_getpageobject(inFile->pageTree, pageNo); // // Process the page. Each page can be split into up-to 3 pages // fz_rect bbRect[3]; error = processPage(inFile, pageNo, bbRect, 3); if (error) return soPdfError(error); for (int ctr = 0; ctr < 3; ctr++) { // Check if this was a blank page if (fz_isemptyrect(bbRect[ctr])) break; // // copy the source page dictionary entry. The way this is done is basically // by making a copy of the page dict object in the source file, and adding // the copy in the source file. Then the copied page dict object is // referenced and added to the destination file. // // This convoluted procedure is done because the copy is done by pdf_transplant // function that accepts a source and destination. Whatever is referenced by // destination object is deep copied // // allocate an object id and generation id in source file // // There is a bug in mupdf where the object allocation returns // 0 oid and 0 gid when the input pdf file has iref stream // so to work around the issue, we wrap the pdf_allocojbect // in a for loop 10 times to get the number // int sNum, sGen, tries; for (tries = 0; tries < 10; tries++) { error = pdf_allocobject(inFile->xref, &sNum, &sGen); if (error) return soPdfError(error); // If sNum is non zero then the allocation was successful if (sNum != 0) break; pdf_updateobject(inFile->xref, sNum, sGen, pageObj); } // If we didn't succeed even after 10 tries then this file // is not going to work. if (tries >= 10) return soPdfError(fz_throw("cannot allocate object because of mupdf bug")); // make a deep copy of the original page dict fz_obj *pageObj2; error = fz_deepcopydict(&pageObj2, pageObj); if (error) return soPdfError(error); // update the source file with the duplicate page object pdf_updateobject(inFile->xref, sNum, sGen, pageObj2); fz_dropobj(pageObj2); // create an indirect reference to the page object fz_obj *pageRef2; error = fz_newindirect(&pageRef2, sNum, sGen); if (error) return soPdfError(error); // delete the parent dictionary entry // Do we need to delete any other dictionary entry // like annot, tabs, metadata, etc fz_dictdels(pageObj2, "Parent"); // Set the media box setPageMediaBox(inFile->xref, pageObj2, bbRect[ctr]); // Set the rotation based on input switch(p_mode) { // no rotation if fit height case FitHeight: case Fit2xHeight: break; // rotate -90 deg if fit width case Fit2xWidth: case FitWidth: setPageRotate(pageObj2, p_reverseLandscape ? 90 : -90); break; case SmartFitHeight: case SmartFitWidth: default: return soPdfError(fz_throw("Mode(%d) not yet implemented.", p_mode)); break; } // push the indirect reference to the destination list for copy by pdf_transplant error = fz_arraypush(outFile->editobjs, pageRef2); if (error) return soPdfError(error); } } } // flush the objects into destination from source { fz_obj *results; int outPages; printf("\nCopying output page : "); error = pdf_transplant(outFile->xref, inFile->xref, &results, outFile->editobjs); if (error) return soPdfError(error); outPages = fz_arraylen(results); for (int ctr = 0; ctr < outPages; ctr++) { displayPageNumber(ctr + 1, !ctr); error = fz_arraypush(outFile->pagelist, fz_arrayget(results, p_reverseLandscape ? outPages - 1 - ctr : ctr)); if (error) return soPdfError(error); } fz_dropobj(results); } // flush page tree // Create page tree and add back-links { fz_obj *pageTreeObj; fz_obj *pageTreeRef; // allocate a new object in out file for pageTree object error = pdf_allocobject(outFile->xref, &pageTreeNum, &pageTreeGen); if (error) return soPdfError(error); // Create a page tree object error = fz_packobj(&pageTreeObj, "<</Type/Pages/Count %i/Kids %o>>", fz_arraylen(outFile->pagelist), outFile->pagelist); if (error) return soPdfError(error); // Update the xref entry with the pageTree object pdf_updateobject(outFile->xref, pageTreeNum, pageTreeGen, pageTreeObj); fz_dropobj(pageTreeObj); // Create a reference to the pageTree object error = fz_newindirect(&pageTreeRef, pageTreeNum, pageTreeGen); if (error) return soPdfError(error); // // For every page in the output file, update the parent entry // for (int ctr = 0; ctr < fz_arraylen(outFile->pagelist); ctr++) { fz_obj *pageObj; int num = fz_tonum(fz_arrayget(outFile->pagelist, ctr)); int gen = fz_togen(fz_arrayget(outFile->pagelist, ctr)); // Get the page object from xreft error = pdf_loadobject(&pageObj, outFile->xref, num, gen); if (error) return soPdfError(error); // Update the parent entry in the page dictionary error = fz_dictputs(pageObj, "Parent", pageTreeRef); if (error) return soPdfError(error); // Update the entry with the updated page object pdf_updateobject(outFile->xref, num, gen, pageObj); fz_dropobj(pageObj); } } // Create catalog and root entries { fz_obj *catObj, *infoObj; int rootNum, rootGen; int infoNum, infoGen; // // Copy the info catalog to the destination // alloc an object id and gen id in destination file error = pdf_allocobject(outFile->xref, &infoNum, &infoGen); if (error) return soPdfError(error); // make a deep copy of the original page dict error = fz_deepcopydict(&infoObj, inFile->xref->info); if (error) return soPdfError(error); // update the dest file with object pdf_updateobject(outFile->xref, infoNum, infoGen, infoObj); outFile->xref->info = infoObj; fz_dropobj(infoObj); // // root/catalog object creation error = pdf_allocobject(outFile->xref, &rootNum, &rootGen); if (error) return soPdfError(error); error = fz_packobj(&catObj, "<</Type/Catalog /Pages %r>>", pageTreeNum, pageTreeGen); if (error) return soPdfError(error); pdf_updateobject(outFile->xref, rootNum, rootGen, catObj); fz_dropobj(catObj); // Create trailer error = fz_packobj(&outFile->xref->trailer, "<</Root %r /Info %r>>", rootNum, rootGen, infoNum, infoGen); if (error) return soPdfError(error); } // Update the info in the target file and save the xref printf("\nSaving.\n"); error = setPageInfo(inFile, outFile); if (error) return soPdfError(error); error = pdf_savexref(outFile->xref, outFile->fileName, NULL); if (error) return soPdfError(error); if (g_errorCount != 0) { printf("\nFollowing issues encounted were ignored.\n\n"); for (int ctr = g_errorCount - 1; ctr >= 0; ctr--) soPdfError(g_errorList[ctr]); } printf("\nSaved.\n"); return 0; }
static fz_error pdf_loadobjstm(pdf_xref *xref, int num, int gen, char *buf, int cap) { fz_error error; fz_stream *stm; fz_obj *objstm; int *numbuf; int *ofsbuf; fz_obj *obj; int first; int count; int i, n; pdf_token_e tok; pdf_logxref("loadobjstm (%d %d R)\n", num, gen); error = pdf_loadobject(&objstm, xref, num, gen); if (error) return fz_rethrow(error, "cannot load object stream object (%d %d R)", num, gen); count = fz_toint(fz_dictgets(objstm, "N")); first = fz_toint(fz_dictgets(objstm, "First")); pdf_logxref("\tcount %d\n", count); numbuf = fz_malloc(count * sizeof(int)); ofsbuf = fz_malloc(count * sizeof(int)); error = pdf_openstream(&stm, xref, num, gen); if (error) { error = fz_rethrow(error, "cannot open object stream (%d %d R)", num, gen); goto cleanupbuf; } for (i = 0; i < count; i++) { error = pdf_lex(&tok, stm, buf, cap, &n); if (error || tok != PDF_TINT) { error = fz_rethrow(error, "corrupt object stream (%d %d R)", num, gen); goto cleanupstm; } numbuf[i] = atoi(buf); error = pdf_lex(&tok, stm, buf, cap, &n); if (error || tok != PDF_TINT) { error = fz_rethrow(error, "corrupt object stream (%d %d R)", num, gen); goto cleanupstm; } ofsbuf[i] = atoi(buf); } fz_seek(stm, first, 0); for (i = 0; i < count; i++) { fz_seek(stm, first + ofsbuf[i], 0); error = pdf_parsestmobj(&obj, xref, stm, buf, cap); if (error) { error = fz_rethrow(error, "cannot parse object %d in stream (%d %d R)", i, num, gen); goto cleanupstm; } if (numbuf[i] < 1 || numbuf[i] >= xref->len) { fz_dropobj(obj); error = fz_throw("object id (%d 0 R) out of range (0..%d)", numbuf[i], xref->len - 1); goto cleanupstm; } if (xref->table[numbuf[i]].type == 'o' && xref->table[numbuf[i]].ofs == num) { if (xref->table[numbuf[i]].obj) fz_dropobj(xref->table[numbuf[i]].obj); xref->table[numbuf[i]].obj = obj; } else { fz_dropobj(obj); } } fz_close(stm); fz_free(ofsbuf); fz_free(numbuf); fz_dropobj(objstm); return fz_okay; cleanupstm: fz_close(stm); cleanupbuf: fz_free(ofsbuf); fz_free(numbuf); fz_dropobj(objstm); return error; /* already rethrown */ }
void editflushcatalog(void) { fz_error *error; int rootnum, rootgen; int listnum, listgen; fz_obj *listref; fz_obj *obj; int i; /* Create page tree and add back-links */ error = pdf_allocobject(editxref, &listnum, &listgen); if (error) die(error); error = fz_packobj(&obj, "<</Type/Pages/Count %i/Kids %o>>", fz_arraylen(editpagelist), editpagelist); if (error) die(error); pdf_updateobject(editxref, listnum, listgen, obj); fz_dropobj(obj); error = fz_newindirect(&listref, listnum, listgen); if (error) die(error); for (i = 0; i < fz_arraylen(editpagelist); i++) { int num = fz_tonum(fz_arrayget(editpagelist, i)); int gen = fz_togen(fz_arrayget(editpagelist, i)); error = pdf_loadobject(&obj, editxref, num, gen); if (error) die(error); error = fz_dictputs(obj, "Parent", listref); if (error) die(error); pdf_updateobject(editxref, num, gen, obj); fz_dropobj(obj); } /* Create catalog */ error = pdf_allocobject(editxref, &rootnum, &rootgen); if (error) die(error); error = fz_packobj(&obj, "<</Type/Catalog/Pages %r>>", listnum, listgen); if (error) die(error); pdf_updateobject(editxref, rootnum, rootgen, obj); fz_dropobj(obj); /* Create trailer */ error = fz_packobj(&editxref->trailer, "<</Root %r>>", rootnum, rootgen); if (error) die(error); }
fz_error pdf_repairxref(pdf_xref *xref, char *buf, int bufsize) { fz_error error; fz_obj *dict, *obj; fz_obj *length; fz_obj *encrypt = nil; fz_obj *id = nil; fz_obj *root = nil; fz_obj *info = nil; struct entry *list = nil; int listlen; int listcap; int maxnum = 0; int num = 0; int gen = 0; int tmpofs, numofs = 0, genofs = 0; int stmlen, stmofs = 0; int tok; int next; int i, n; pdf_logxref("repairxref %p\n", xref); fz_seek(xref->file, 0, 0); listlen = 0; listcap = 1024; list = fz_calloc(listcap, sizeof(struct entry)); /* look for '%PDF' version marker within first kilobyte of file */ n = fz_read(xref->file, (unsigned char *)buf, MAX(bufsize, 1024)); if (n < 0) { error = fz_rethrow(n, "cannot read from file"); goto cleanup; } fz_seek(xref->file, 0, 0); for (i = 0; i < n - 4; i++) { if (memcmp(buf + i, "%PDF", 4) == 0) { fz_seek(xref->file, i, 0); break; } } while (1) { tmpofs = fz_tell(xref->file); if (tmpofs < 0) { error = fz_throw("cannot tell in file"); goto cleanup; } error = pdf_lex(&tok, xref->file, buf, bufsize, &n); if (error) { fz_catch(error, "ignoring the rest of the file"); break; } if (tok == PDF_TINT) { numofs = genofs; num = gen; genofs = tmpofs; gen = atoi(buf); } if (tok == PDF_TOBJ) { error = fz_repairobj(xref->file, buf, bufsize, &stmofs, &stmlen, &encrypt, &id); if (error) { error = fz_rethrow(error, "cannot parse object (%d %d R)", num, gen); goto cleanup; } pdf_logxref("found object: (%d %d R)\n", num, gen); if (listlen + 1 == listcap) { listcap = (listcap * 3) / 2; list = fz_realloc(list, listcap, sizeof(struct entry)); } list[listlen].num = num; list[listlen].gen = gen; list[listlen].ofs = numofs; list[listlen].stmofs = stmofs; list[listlen].stmlen = stmlen; listlen ++; if (num > maxnum) maxnum = num; } /* trailer dictionary */ if (tok == PDF_TODICT) { error = pdf_parsedict(&dict, xref, xref->file, buf, bufsize); if (error) { error = fz_rethrow(error, "cannot parse object"); goto cleanup; } obj = fz_dictgets(dict, "Encrypt"); if (obj) { if (encrypt) fz_dropobj(encrypt); encrypt = fz_keepobj(obj); } obj = fz_dictgets(dict, "ID"); if (obj) { if (id) fz_dropobj(id); id = fz_keepobj(obj); } obj = fz_dictgets(dict, "Root"); if (obj) { if (root) fz_dropobj(root); root = fz_keepobj(obj); } obj = fz_dictgets(dict, "Info"); if (obj) { if (info) fz_dropobj(info); info = fz_keepobj(obj); } fz_dropobj(dict); } if (tok == PDF_TERROR) fz_readbyte(xref->file); if (tok == PDF_TEOF) break; } /* make xref reasonable */ pdf_resizexref(xref, maxnum + 1); for (i = 0; i < listlen; i++) { xref->table[list[i].num].type = 'n'; xref->table[list[i].num].ofs = list[i].ofs; xref->table[list[i].num].gen = list[i].gen; xref->table[list[i].num].stmofs = list[i].stmofs; /* corrected stream length */ if (list[i].stmlen >= 0) { pdf_logxref("correct stream length %d %d = %d\n", list[i].num, list[i].gen, list[i].stmlen); error = pdf_loadobject(&dict, xref, list[i].num, list[i].gen); if (error) { error = fz_rethrow(error, "cannot load stream object (%d %d R)", list[i].num, list[i].gen); goto cleanup; } length = fz_newint(list[i].stmlen); fz_dictputs(dict, "Length", length); fz_dropobj(length); fz_dropobj(dict); } } xref->table[0].type = 'f'; xref->table[0].ofs = 0; xref->table[0].gen = 65535; xref->table[0].stmofs = 0; xref->table[0].obj = nil; next = 0; for (i = xref->len - 1; i >= 0; i--) { if (xref->table[i].type == 'f') { xref->table[i].ofs = next; if (xref->table[i].gen < 65535) xref->table[i].gen ++; next = i; } } /* create a repaired trailer, Root will be added later */ xref->trailer = fz_newdict(5); obj = fz_newint(maxnum + 1); fz_dictputs(xref->trailer, "Size", obj); fz_dropobj(obj); if (root) { fz_dictputs(xref->trailer, "Root", root); fz_dropobj(root); } if (info) { fz_dictputs(xref->trailer, "Info", info); fz_dropobj(info); } if (encrypt) { if (fz_isindirect(encrypt)) { /* create new reference with non-nil xref pointer */ obj = fz_newindirect(fz_tonum(encrypt), fz_togen(encrypt), xref); fz_dropobj(encrypt); encrypt = obj; } fz_dictputs(xref->trailer, "Encrypt", encrypt); fz_dropobj(encrypt); } if (id) { if (fz_isindirect(id)) { /* create new reference with non-nil xref pointer */ obj = fz_newindirect(fz_tonum(id), fz_togen(id), xref); fz_dropobj(id); id = obj; } fz_dictputs(xref->trailer, "ID", id); fz_dropobj(id); } fz_free(list); return fz_okay; cleanup: if (encrypt) fz_dropobj(encrypt); if (id) fz_dropobj(id); if (root) fz_dropobj(root); if (info) fz_dropobj(info); fz_free(list); return error; /* already rethrown */ }
fz_error pdf_loadobjstm(pdf_xref *xref, int oid, int gen, char *buf, int cap) { fz_error error; fz_stream *stm; fz_obj *objstm; int *oidbuf; int *ofsbuf; fz_obj *obj; int first; int count; int i, n; pdf_token_e tok; pdf_logxref("loadobjstm (%d %d R)\n", oid, gen); error = pdf_loadobject(&objstm, xref, oid, gen); if (error) return fz_rethrow(error, "cannot load object stream object"); count = fz_toint(fz_dictgets(objstm, "N")); first = fz_toint(fz_dictgets(objstm, "First")); pdf_logxref(" count %d\n", count); oidbuf = fz_malloc(count * sizeof(int)); if (!oidbuf) { error = fz_rethrow(-1, "out of memory: object id buffer"); goto cleanupobj; } ofsbuf = fz_malloc(count * sizeof(int)); if (!ofsbuf) { error = fz_rethrow(-1, "out of memory: offset buffer"); goto cleanupoid; } error = pdf_openstream(&stm, xref, oid, gen); if (error) { error = fz_rethrow(error, "cannot open object stream"); goto cleanupofs; } for (i = 0; i < count; i++) { error = pdf_lex(&tok, stm, buf, cap, &n); if (error || tok != PDF_TINT) { error = fz_rethrow(error, "corrupt object stream"); goto cleanupstm; } oidbuf[i] = atoi(buf); error = pdf_lex(&tok, stm, buf, cap, &n); if (error || tok != PDF_TINT) { error = fz_rethrow(error, "corrupt object stream"); goto cleanupstm; } ofsbuf[i] = atoi(buf); } error = fz_seek(stm, first, 0); if (error) { error = fz_rethrow(error, "cannot seek in object stream"); goto cleanupstm; } for (i = 0; i < count; i++) { /* FIXME: seek to first + ofsbuf[i] */ error = pdf_parsestmobj(&obj, xref, stm, buf, cap); if (error) { error = fz_rethrow(error, "cannot parse object %d in stream", i); goto cleanupstm; } if (oidbuf[i] < 1 || oidbuf[i] >= xref->len) { fz_dropobj(obj); error = fz_throw("object id (%d 0 R) out of range (0..%d)", oidbuf[i], xref->len - 1); goto cleanupstm; } if (xref->table[oidbuf[i]].obj) fz_dropobj(xref->table[oidbuf[i]].obj); xref->table[oidbuf[i]].obj = obj; } fz_dropstream(stm); fz_free(ofsbuf); fz_free(oidbuf); fz_dropobj(objstm); return fz_okay; cleanupstm: fz_dropstream(stm); cleanupofs: fz_free(ofsbuf); cleanupoid: fz_free(oidbuf); cleanupobj: fz_dropobj(objstm); return error; /* already rethrown */ }