static void addhexfilter(fz_obj *dict) { fz_obj *f, *dp, *newf, *newdp; fz_obj *ahx, *nullobj; ahx = fz_newname("ASCIIHexDecode"); nullobj = fz_newnull(); newf = newdp = nil; f = fz_dictgets(dict, "Filter"); dp = fz_dictgets(dict, "DecodeParms"); if (fz_isname(f)) { newf = fz_newarray(2); fz_arraypush(newf, ahx); fz_arraypush(newf, f); f = newf; if (fz_isdict(dp)) { newdp = fz_newarray(2); fz_arraypush(newdp, nullobj); fz_arraypush(newdp, dp); dp = newdp; } } else if (fz_isarray(f)) { fz_arrayinsert(f, ahx); if (fz_isarray(dp)) fz_arrayinsert(dp, nullobj); } else f = ahx; fz_dictputs(dict, "Filter", f); if (dp) fz_dictputs(dict, "DecodeParms", dp); fz_dropobj(ahx); fz_dropobj(nullobj); if (newf) fz_dropobj(newf); if (newdp) fz_dropobj(newdp); }
void editcopy(int pagenum) { fz_error *error; fz_obj *obj; fz_obj *ref; fz_obj *num; printf("copy %s page %d\n", srcname, pagenum); ref = srcpages->pref[pagenum - 1]; obj = pdf_getpageobject(srcpages, pagenum - 1); fz_dictdels(obj, "Parent"); /* fz_dictdels(obj, "B"); fz_dictdels(obj, "PieceInfo"); fz_dictdels(obj, "Metadata"); fz_dictdels(obj, "Annots"); fz_dictdels(obj, "Tabs"); */ pdf_updateobject(src, fz_tonum(ref), fz_togen(ref), obj); error = fz_arraypush(editobjects, ref); if (error) die(error); error = fz_newint(&num, editmode); if (error) die(error); error = fz_arraypush(editmodelist, num); if (error) die(error); fz_dropobj(num); }
static fz_error parsearray(fz_obj **obj, pdf_xref *xref, char **sp, struct vap *v) { fz_error error; fz_obj *a; fz_obj *o; char *s = *sp; error = fz_newarray(&a, 8); if (error) return fz_rethrow(error, "cannot create array"); s ++; /* skip '[' */ while (*s) { skipwhite(&s); if (*s == ']') { s ++; break; } error = parseobj(&o, xref, &s, v); if (error) { fz_dropobj(a); return fz_rethrow(error, "cannot parse item"); } error = fz_arraypush(a, o); if (error) { fz_dropobj(o); fz_dropobj(a); return fz_rethrow(error, "cannot add item to array"); } fz_dropobj(o); } *obj = a; *sp = s; return fz_okay; }
void editflushobjects(void) { fz_error *error; fz_obj *results; int i; error = pdf_transplant(editxref, src, &results, editobjects); if (error) die(error); for (i = 0; i < fz_arraylen(results); i++) { error = fz_arraypush(editpagelist, fz_arrayget(results, i)); if (error) die(error); } fz_dropobj(results); }
static void retainpages(int argc, char **argv) { fz_error error; fz_obj *oldroot, *root, *pages, *kids, *countobj, *parent; /* Load the old page tree */ error = pdf_loadpagetree(xref); if (error) die(fz_rethrow(error, "cannot load page tree")); /* Keep only pages/type entry to avoid references to unretained pages */ oldroot = fz_dictgets(xref->trailer, "Root"); pages = fz_dictgets(oldroot, "Pages"); root = fz_newdict(2); fz_dictputs(root, "Type", fz_dictgets(oldroot, "Type")); fz_dictputs(root, "Pages", fz_dictgets(oldroot, "Pages")); pdf_updateobject(xref, fz_tonum(oldroot), fz_togen(oldroot), root); fz_dropobj(root); /* Create a new kids array with only the pages we want to keep */ parent = fz_newindirect(fz_tonum(pages), fz_togen(pages), xref); kids = fz_newarray(1); /* Retain pages specified */ while (argc - fz_optind) { int page, spage, epage; char *spec, *dash; char *pagelist = argv[fz_optind]; spec = fz_strsep(&pagelist, ","); while (spec) { dash = strchr(spec, '-'); if (dash == spec) spage = epage = pdf_getpagecount(xref); else spage = epage = atoi(spec); if (dash) { if (strlen(dash) > 1) epage = atoi(dash + 1); else epage = pdf_getpagecount(xref); } if (spage > epage) page = spage, spage = epage, epage = page; if (spage < 1) spage = 1; if (epage > pdf_getpagecount(xref)) epage = pdf_getpagecount(xref); for (page = spage; page <= epage; page++) { fz_obj *pageobj = pdf_getpageobject(xref, page); fz_obj *pageref = pdf_getpageref(xref, page); fz_dictputs(pageobj, "Parent", parent); /* Store page object in new kids array */ fz_arraypush(kids, pageref); } spec = fz_strsep(&pagelist, ","); } fz_optind++; } fz_dropobj(parent); /* Update page count and kids array */ countobj = fz_newint(fz_arraylen(kids)); fz_dictputs(pages, "Count", countobj); fz_dropobj(countobj); fz_dictputs(pages, "Kids", kids); fz_dropobj(kids); }
int copyPdfFile( soPdfFile* inFile, soPdfFile* outFile ) { fz_error *error; int pageTreeNum, pageTreeGen; assert(inFile != NULL); assert(outFile != NULL); // // Process every page in the source file // { printf("\nProcessing input page : "); for (int pageNo = 0; pageNo < pdf_getpagecount(inFile->pageTree); pageNo++) { displayPageNumber(pageNo + 1, !pageNo); // Get the page object from the source fz_obj *pageRef = inFile->pageTree->pref[pageNo]; fz_obj *pageObj = pdf_getpageobject(inFile->pageTree, pageNo); // // Process the page. Each page can be split into up-to 3 pages // fz_rect bbRect[3]; error = processPage(inFile, pageNo, bbRect, 3); if (error) return soPdfError(error); for (int ctr = 0; ctr < 3; ctr++) { // Check if this was a blank page if (fz_isemptyrect(bbRect[ctr])) break; // // copy the source page dictionary entry. The way this is done is basically // by making a copy of the page dict object in the source file, and adding // the copy in the source file. Then the copied page dict object is // referenced and added to the destination file. // // This convoluted procedure is done because the copy is done by pdf_transplant // function that accepts a source and destination. Whatever is referenced by // destination object is deep copied // // allocate an object id and generation id in source file // // There is a bug in mupdf where the object allocation returns // 0 oid and 0 gid when the input pdf file has iref stream // so to work around the issue, we wrap the pdf_allocojbect // in a for loop 10 times to get the number // int sNum, sGen, tries; for (tries = 0; tries < 10; tries++) { error = pdf_allocobject(inFile->xref, &sNum, &sGen); if (error) return soPdfError(error); // If sNum is non zero then the allocation was successful if (sNum != 0) break; pdf_updateobject(inFile->xref, sNum, sGen, pageObj); } // If we didn't succeed even after 10 tries then this file // is not going to work. if (tries >= 10) return soPdfError(fz_throw("cannot allocate object because of mupdf bug")); // make a deep copy of the original page dict fz_obj *pageObj2; error = fz_deepcopydict(&pageObj2, pageObj); if (error) return soPdfError(error); // update the source file with the duplicate page object pdf_updateobject(inFile->xref, sNum, sGen, pageObj2); fz_dropobj(pageObj2); // create an indirect reference to the page object fz_obj *pageRef2; error = fz_newindirect(&pageRef2, sNum, sGen); if (error) return soPdfError(error); // delete the parent dictionary entry // Do we need to delete any other dictionary entry // like annot, tabs, metadata, etc fz_dictdels(pageObj2, "Parent"); // Set the media box setPageMediaBox(inFile->xref, pageObj2, bbRect[ctr]); // Set the rotation based on input switch(p_mode) { // no rotation if fit height case FitHeight: case Fit2xHeight: break; // rotate -90 deg if fit width case Fit2xWidth: case FitWidth: setPageRotate(pageObj2, p_reverseLandscape ? 90 : -90); break; case SmartFitHeight: case SmartFitWidth: default: return soPdfError(fz_throw("Mode(%d) not yet implemented.", p_mode)); break; } // push the indirect reference to the destination list for copy by pdf_transplant error = fz_arraypush(outFile->editobjs, pageRef2); if (error) return soPdfError(error); } } } // flush the objects into destination from source { fz_obj *results; int outPages; printf("\nCopying output page : "); error = pdf_transplant(outFile->xref, inFile->xref, &results, outFile->editobjs); if (error) return soPdfError(error); outPages = fz_arraylen(results); for (int ctr = 0; ctr < outPages; ctr++) { displayPageNumber(ctr + 1, !ctr); error = fz_arraypush(outFile->pagelist, fz_arrayget(results, p_reverseLandscape ? outPages - 1 - ctr : ctr)); if (error) return soPdfError(error); } fz_dropobj(results); } // flush page tree // Create page tree and add back-links { fz_obj *pageTreeObj; fz_obj *pageTreeRef; // allocate a new object in out file for pageTree object error = pdf_allocobject(outFile->xref, &pageTreeNum, &pageTreeGen); if (error) return soPdfError(error); // Create a page tree object error = fz_packobj(&pageTreeObj, "<</Type/Pages/Count %i/Kids %o>>", fz_arraylen(outFile->pagelist), outFile->pagelist); if (error) return soPdfError(error); // Update the xref entry with the pageTree object pdf_updateobject(outFile->xref, pageTreeNum, pageTreeGen, pageTreeObj); fz_dropobj(pageTreeObj); // Create a reference to the pageTree object error = fz_newindirect(&pageTreeRef, pageTreeNum, pageTreeGen); if (error) return soPdfError(error); // // For every page in the output file, update the parent entry // for (int ctr = 0; ctr < fz_arraylen(outFile->pagelist); ctr++) { fz_obj *pageObj; int num = fz_tonum(fz_arrayget(outFile->pagelist, ctr)); int gen = fz_togen(fz_arrayget(outFile->pagelist, ctr)); // Get the page object from xreft error = pdf_loadobject(&pageObj, outFile->xref, num, gen); if (error) return soPdfError(error); // Update the parent entry in the page dictionary error = fz_dictputs(pageObj, "Parent", pageTreeRef); if (error) return soPdfError(error); // Update the entry with the updated page object pdf_updateobject(outFile->xref, num, gen, pageObj); fz_dropobj(pageObj); } } // Create catalog and root entries { fz_obj *catObj, *infoObj; int rootNum, rootGen; int infoNum, infoGen; // // Copy the info catalog to the destination // alloc an object id and gen id in destination file error = pdf_allocobject(outFile->xref, &infoNum, &infoGen); if (error) return soPdfError(error); // make a deep copy of the original page dict error = fz_deepcopydict(&infoObj, inFile->xref->info); if (error) return soPdfError(error); // update the dest file with object pdf_updateobject(outFile->xref, infoNum, infoGen, infoObj); outFile->xref->info = infoObj; fz_dropobj(infoObj); // // root/catalog object creation error = pdf_allocobject(outFile->xref, &rootNum, &rootGen); if (error) return soPdfError(error); error = fz_packobj(&catObj, "<</Type/Catalog /Pages %r>>", pageTreeNum, pageTreeGen); if (error) return soPdfError(error); pdf_updateobject(outFile->xref, rootNum, rootGen, catObj); fz_dropobj(catObj); // Create trailer error = fz_packobj(&outFile->xref->trailer, "<</Root %r /Info %r>>", rootNum, rootGen, infoNum, infoGen); if (error) return soPdfError(error); } // Update the info in the target file and save the xref printf("\nSaving.\n"); error = setPageInfo(inFile, outFile); if (error) return soPdfError(error); error = pdf_savexref(outFile->xref, outFile->fileName, NULL); if (error) return soPdfError(error); if (g_errorCount != 0) { printf("\nFollowing issues encounted were ignored.\n\n"); for (int ctr = g_errorCount - 1; ctr >= 0; ctr--) soPdfError(g_errorList[ctr]); } printf("\nSaved.\n"); return 0; }
fz_error pdf_parsearray(fz_obj **op, pdf_xref *xref, fz_stream *file, char *buf, int cap) { fz_error error = fz_okay; fz_obj *ary = nil; fz_obj *obj = nil; int a = 0, b = 0, n = 0; pdf_token_e tok; int len; ary = fz_newarray(4); while (1) { error = pdf_lex(&tok, file, buf, cap, &len); if (error) { fz_dropobj(ary); return fz_rethrow(error, "cannot parse array"); } if (tok != PDF_TINT && tok != PDF_TR) { if (n > 0) { obj = fz_newint(a); fz_arraypush(ary, obj); fz_dropobj(obj); } if (n > 1) { obj = fz_newint(b); fz_arraypush(ary, obj); fz_dropobj(obj); } n = 0; } if (tok == PDF_TINT && n == 2) { obj = fz_newint(a); fz_arraypush(ary, obj); fz_dropobj(obj); a = b; n --; } switch (tok) { case PDF_TCARRAY: *op = ary; return fz_okay; case PDF_TINT: if (n == 0) a = atoi(buf); if (n == 1) b = atoi(buf); n ++; break; case PDF_TR: if (n != 2) { fz_dropobj(ary); return fz_throw("cannot parse indirect reference in array"); } obj = fz_newindirect(a, b, xref); fz_arraypush(ary, obj); fz_dropobj(obj); n = 0; break; case PDF_TOARRAY: error = pdf_parsearray(&obj, xref, file, buf, cap); if (error) { fz_dropobj(ary); return fz_rethrow(error, "cannot parse array"); } fz_arraypush(ary, obj); fz_dropobj(obj); break; case PDF_TODICT: error = pdf_parsedict(&obj, xref, file, buf, cap); if (error) { fz_dropobj(ary); return fz_rethrow(error, "cannot parse array"); } fz_arraypush(ary, obj); fz_dropobj(obj); break; case PDF_TNAME: obj = fz_newname(buf); fz_arraypush(ary, obj); fz_dropobj(obj); break; case PDF_TREAL: obj = fz_newreal(atof(buf)); fz_arraypush(ary, obj); fz_dropobj(obj); break; case PDF_TSTRING: obj = fz_newstring(buf, len); fz_arraypush(ary, obj); fz_dropobj(obj); break; case PDF_TTRUE: obj = fz_newbool(1); fz_arraypush(ary, obj); fz_dropobj(obj); break; case PDF_TFALSE: obj = fz_newbool(0); fz_arraypush(ary, obj); fz_dropobj(obj); break; case PDF_TNULL: obj = fz_newnull(); fz_arraypush(ary, obj); fz_dropobj(obj); break; default: fz_dropobj(ary); return fz_throw("cannot parse token in array"); } } }
fz_error * pdf_parsearray(fz_obj **op, fz_stream *file, char *buf, int cap) { fz_error *error = nil; fz_obj *ary = nil; fz_obj *obj = nil; int a = 0, b = 0, n = 0; int tok, len; error = fz_newarray(op, 4); if (error) return error; ary = *op; while (1) { tok = pdf_lex(file, buf, cap, &len); if (tok != PDF_TINT && tok != PDF_TR) { if (n > 0) { error = fz_newint(&obj, a); if (error) goto cleanup; error = fz_arraypush(ary, obj); if (error) goto cleanup; fz_dropobj(obj); obj = nil; } if (n > 1) { error = fz_newint(&obj, b); if (error) goto cleanup; error = fz_arraypush(ary, obj); if (error) goto cleanup; fz_dropobj(obj); obj = nil; } n = 0; } if (tok == PDF_TINT && n == 2) { error = fz_newint(&obj, a); if (error) goto cleanup; error = fz_arraypush(ary, obj); if (error) goto cleanup; fz_dropobj(obj); obj = nil; a = b; n --; } switch (tok) { case PDF_TCARRAY: return nil; case PDF_TINT: if (n == 0) a = atoi(buf); if (n == 1) b = atoi(buf); n ++; break; case PDF_TR: if (n != 2) goto cleanup; error = fz_newindirect(&obj, a, b); if (error) goto cleanup; n = 0; break; case PDF_TOARRAY: error = pdf_parsearray(&obj, file, buf, cap); break; case PDF_TODICT: error = pdf_parsedict(&obj, file, buf, cap); break; case PDF_TNAME: error = fz_newname(&obj, buf); break; case PDF_TREAL: error = fz_newreal(&obj, atof(buf)); break; case PDF_TSTRING: error = fz_newstring(&obj, buf, len); break; case PDF_TTRUE: error = fz_newbool(&obj, 1); break; case PDF_TFALSE: error = fz_newbool(&obj, 0); break; case PDF_TNULL: error = fz_newnull(&obj); break; default: goto cleanup; } if (error) goto cleanup; if (obj) { error = fz_arraypush(ary, obj); if (error) goto cleanup; fz_dropobj(obj); } obj = nil; } cleanup: if (obj) fz_dropobj(obj); if (ary) fz_dropobj(ary); if (error) return error; return fz_throw("syntaxerror: corrupt array"); }