static void expandstream(fz_obj *obj, int num, int gen) { fz_error error; fz_buffer *buf, *tmp; fz_obj *newlen; error = pdf_loadstream(&buf, xref, num, gen); if (error) die(error); fz_dictdels(obj, "Filter"); fz_dictdels(obj, "DecodeParms"); if (doascii && isbinarystream(buf)) { tmp = hexbuf(buf->data, buf->len); fz_dropbuffer(buf); buf = tmp; addhexfilter(obj); } newlen = fz_newint(buf->len); fz_dictputs(obj, "Length", newlen); fz_dropobj(newlen); fprintf(out, "%d %d obj\n", num, gen); fz_fprintobj(out, obj, !doexpand); fprintf(out, "stream\n"); fwrite(buf->data, 1, buf->len, out); fprintf(out, "endstream\nendobj\n\n"); fz_dropbuffer(buf); }
fz_error* setPageMediaBox( pdf_xref* pdfXRef, fz_obj* pageObj, fz_rect mediaBox ) { fz_error *error; fz_obj *objMedia; fz_irect mRect; fz_obj *objInt; // Delete the CropBox. This is done because we are reducing // the size of the media box and CropBox is of no use to us fz_dictdels(pageObj, "CropBox"); //objMedia = fz_dictgets(pageObj, "CropBox"); //if (objMedia == NULL) return fz_throw("no CropBox entry"); //error = pdf_resolve(&objMedia, pdfXRef); //if (error) return fz_rethrow(error, "cannot resolve page bounds"); //if (! fz_isarray(objMedia)) return fz_throw("cannot find page bounds"); //fz_rect cRect = pdf_torect(objMedia); // Get the media box objMedia = fz_dictgets(pageObj, "MediaBox"); if (objMedia == NULL) return fz_throw("no MediaBox entry"); error = pdf_resolve(&objMedia, pdfXRef); if (error) return fz_rethrow(error, "cannot resolve page bounds"); if (! fz_isarray(objMedia)) return fz_throw("cannot find page bounds"); // We have the MediaBox array here mRect = fz_roundrect(mediaBox); error = fz_newint(&objInt, mRect.x0); if (error) return fz_rethrow(error, "cannot allocate int"); fz_arrayput(objMedia, 0, objInt); fz_dropobj(objInt); error = fz_newint(&objInt, mRect.y0); if (error) return fz_rethrow(error, "cannot allocate int"); fz_arrayput(objMedia, 1, objInt); fz_dropobj(objInt); error = fz_newint(&objInt, mRect.x1); if (error) return fz_rethrow(error, "cannot allocate int"); fz_arrayput(objMedia, 2, objInt); fz_dropobj(objInt); error = fz_newint(&objInt, mRect.y1); if (error) return fz_rethrow(error, "cannot allocate int"); fz_arrayput(objMedia, 3, objInt); fz_dropobj(objInt); return NULL; }
void cleanexpand(void) { fz_error *error; fz_obj *stmobj; fz_buffer *buf; fz_obj *stmlen; int i, gen; for (i = 0; i < src->len; i++) { if (src->table[i].type == 'n') { gen = src->table[i].gen; if (pdf_isstream(src, i, gen)) { error = pdf_loadobject(&stmobj, src, i, gen); if (error) die(error); error = pdf_loadstream(&buf, src, i, gen); if (error) die(error); fz_dictdels(stmobj, "Filter"); fz_dictdels(stmobj, "DecodeParms"); error = fz_newint(&stmlen, buf->wp - buf->rp); if (error) die(error); error = fz_dictputs(stmobj, "Length", stmlen); if (error) die(error); fz_dropobj(stmlen); pdf_updateobject(src, i, gen, stmobj); pdf_updatestream(src, i, gen, buf); fz_dropobj(stmobj); } } } }
void editcopy(int pagenum) { fz_error *error; fz_obj *obj; fz_obj *ref; fz_obj *num; printf("copy %s page %d\n", srcname, pagenum); ref = srcpages->pref[pagenum - 1]; obj = pdf_getpageobject(srcpages, pagenum - 1); fz_dictdels(obj, "Parent"); /* fz_dictdels(obj, "B"); fz_dictdels(obj, "PieceInfo"); fz_dictdels(obj, "Metadata"); fz_dictdels(obj, "Annots"); fz_dictdels(obj, "Tabs"); */ pdf_updateobject(src, fz_tonum(ref), fz_togen(ref), obj); error = fz_arraypush(editobjects, ref); if (error) die(error); error = fz_newint(&num, editmode); if (error) die(error); error = fz_arraypush(editmodelist, num); if (error) die(error); fz_dropobj(num); }
int copyPdfFile( soPdfFile* inFile, soPdfFile* outFile ) { fz_error *error; int pageTreeNum, pageTreeGen; assert(inFile != NULL); assert(outFile != NULL); // // Process every page in the source file // { printf("\nProcessing input page : "); for (int pageNo = 0; pageNo < pdf_getpagecount(inFile->pageTree); pageNo++) { displayPageNumber(pageNo + 1, !pageNo); // Get the page object from the source fz_obj *pageRef = inFile->pageTree->pref[pageNo]; fz_obj *pageObj = pdf_getpageobject(inFile->pageTree, pageNo); // // Process the page. Each page can be split into up-to 3 pages // fz_rect bbRect[3]; error = processPage(inFile, pageNo, bbRect, 3); if (error) return soPdfError(error); for (int ctr = 0; ctr < 3; ctr++) { // Check if this was a blank page if (fz_isemptyrect(bbRect[ctr])) break; // // copy the source page dictionary entry. The way this is done is basically // by making a copy of the page dict object in the source file, and adding // the copy in the source file. Then the copied page dict object is // referenced and added to the destination file. // // This convoluted procedure is done because the copy is done by pdf_transplant // function that accepts a source and destination. Whatever is referenced by // destination object is deep copied // // allocate an object id and generation id in source file // // There is a bug in mupdf where the object allocation returns // 0 oid and 0 gid when the input pdf file has iref stream // so to work around the issue, we wrap the pdf_allocojbect // in a for loop 10 times to get the number // int sNum, sGen, tries; for (tries = 0; tries < 10; tries++) { error = pdf_allocobject(inFile->xref, &sNum, &sGen); if (error) return soPdfError(error); // If sNum is non zero then the allocation was successful if (sNum != 0) break; pdf_updateobject(inFile->xref, sNum, sGen, pageObj); } // If we didn't succeed even after 10 tries then this file // is not going to work. if (tries >= 10) return soPdfError(fz_throw("cannot allocate object because of mupdf bug")); // make a deep copy of the original page dict fz_obj *pageObj2; error = fz_deepcopydict(&pageObj2, pageObj); if (error) return soPdfError(error); // update the source file with the duplicate page object pdf_updateobject(inFile->xref, sNum, sGen, pageObj2); fz_dropobj(pageObj2); // create an indirect reference to the page object fz_obj *pageRef2; error = fz_newindirect(&pageRef2, sNum, sGen); if (error) return soPdfError(error); // delete the parent dictionary entry // Do we need to delete any other dictionary entry // like annot, tabs, metadata, etc fz_dictdels(pageObj2, "Parent"); // Set the media box setPageMediaBox(inFile->xref, pageObj2, bbRect[ctr]); // Set the rotation based on input switch(p_mode) { // no rotation if fit height case FitHeight: case Fit2xHeight: break; // rotate -90 deg if fit width case Fit2xWidth: case FitWidth: setPageRotate(pageObj2, p_reverseLandscape ? 90 : -90); break; case SmartFitHeight: case SmartFitWidth: default: return soPdfError(fz_throw("Mode(%d) not yet implemented.", p_mode)); break; } // push the indirect reference to the destination list for copy by pdf_transplant error = fz_arraypush(outFile->editobjs, pageRef2); if (error) return soPdfError(error); } } } // flush the objects into destination from source { fz_obj *results; int outPages; printf("\nCopying output page : "); error = pdf_transplant(outFile->xref, inFile->xref, &results, outFile->editobjs); if (error) return soPdfError(error); outPages = fz_arraylen(results); for (int ctr = 0; ctr < outPages; ctr++) { displayPageNumber(ctr + 1, !ctr); error = fz_arraypush(outFile->pagelist, fz_arrayget(results, p_reverseLandscape ? outPages - 1 - ctr : ctr)); if (error) return soPdfError(error); } fz_dropobj(results); } // flush page tree // Create page tree and add back-links { fz_obj *pageTreeObj; fz_obj *pageTreeRef; // allocate a new object in out file for pageTree object error = pdf_allocobject(outFile->xref, &pageTreeNum, &pageTreeGen); if (error) return soPdfError(error); // Create a page tree object error = fz_packobj(&pageTreeObj, "<</Type/Pages/Count %i/Kids %o>>", fz_arraylen(outFile->pagelist), outFile->pagelist); if (error) return soPdfError(error); // Update the xref entry with the pageTree object pdf_updateobject(outFile->xref, pageTreeNum, pageTreeGen, pageTreeObj); fz_dropobj(pageTreeObj); // Create a reference to the pageTree object error = fz_newindirect(&pageTreeRef, pageTreeNum, pageTreeGen); if (error) return soPdfError(error); // // For every page in the output file, update the parent entry // for (int ctr = 0; ctr < fz_arraylen(outFile->pagelist); ctr++) { fz_obj *pageObj; int num = fz_tonum(fz_arrayget(outFile->pagelist, ctr)); int gen = fz_togen(fz_arrayget(outFile->pagelist, ctr)); // Get the page object from xreft error = pdf_loadobject(&pageObj, outFile->xref, num, gen); if (error) return soPdfError(error); // Update the parent entry in the page dictionary error = fz_dictputs(pageObj, "Parent", pageTreeRef); if (error) return soPdfError(error); // Update the entry with the updated page object pdf_updateobject(outFile->xref, num, gen, pageObj); fz_dropobj(pageObj); } } // Create catalog and root entries { fz_obj *catObj, *infoObj; int rootNum, rootGen; int infoNum, infoGen; // // Copy the info catalog to the destination // alloc an object id and gen id in destination file error = pdf_allocobject(outFile->xref, &infoNum, &infoGen); if (error) return soPdfError(error); // make a deep copy of the original page dict error = fz_deepcopydict(&infoObj, inFile->xref->info); if (error) return soPdfError(error); // update the dest file with object pdf_updateobject(outFile->xref, infoNum, infoGen, infoObj); outFile->xref->info = infoObj; fz_dropobj(infoObj); // // root/catalog object creation error = pdf_allocobject(outFile->xref, &rootNum, &rootGen); if (error) return soPdfError(error); error = fz_packobj(&catObj, "<</Type/Catalog /Pages %r>>", pageTreeNum, pageTreeGen); if (error) return soPdfError(error); pdf_updateobject(outFile->xref, rootNum, rootGen, catObj); fz_dropobj(catObj); // Create trailer error = fz_packobj(&outFile->xref->trailer, "<</Root %r /Info %r>>", rootNum, rootGen, infoNum, infoGen); if (error) return soPdfError(error); } // Update the info in the target file and save the xref printf("\nSaving.\n"); error = setPageInfo(inFile, outFile); if (error) return soPdfError(error); error = pdf_savexref(outFile->xref, outFile->fileName, NULL); if (error) return soPdfError(error); if (g_errorCount != 0) { printf("\nFollowing issues encounted were ignored.\n\n"); for (int ctr = g_errorCount - 1; ctr >= 0; ctr--) soPdfError(g_errorList[ctr]); } printf("\nSaved.\n"); return 0; }
void pdf_loadpagetreenode(pdf_xref *xref, fz_obj *node, struct info info) { fz_obj *dict, *kids, *count; fz_obj *obj, *tmp; int i, n; /* prevent infinite recursion */ if (fz_dictgets(node, ".seen")) return; kids = fz_dictgets(node, "Kids"); count = fz_dictgets(node, "Count"); if (fz_isarray(kids) && fz_isint(count)) { obj = fz_dictgets(node, "Resources"); if (obj) info.resources = obj; obj = fz_dictgets(node, "MediaBox"); if (obj) info.mediabox = obj; obj = fz_dictgets(node, "CropBox"); if (obj) info.cropbox = obj; obj = fz_dictgets(node, "Rotate"); if (obj) info.rotate = obj; tmp = fz_newnull(); fz_dictputs(node, ".seen", tmp); fz_dropobj(tmp); n = fz_arraylen(kids); for (i = 0; i < n; i++) { obj = fz_arrayget(kids, i); pdf_loadpagetreenode(xref, obj, info); } fz_dictdels(node, ".seen"); } else { dict = fz_resolveindirect(node); if (info.resources && !fz_dictgets(dict, "Resources")) fz_dictputs(dict, "Resources", info.resources); if (info.mediabox && !fz_dictgets(dict, "MediaBox")) fz_dictputs(dict, "MediaBox", info.mediabox); if (info.cropbox && !fz_dictgets(dict, "CropBox")) fz_dictputs(dict, "CropBox", info.cropbox); if (info.rotate && !fz_dictgets(dict, "Rotate")) fz_dictputs(dict, "Rotate", info.rotate); if (xref->pagelen == xref->pagecap) { fz_warn("found more pages than expected"); xref->pagecap ++; xref->pagerefs = fz_realloc(xref->pagerefs, sizeof(fz_obj*) * xref->pagecap); xref->pageobjs = fz_realloc(xref->pageobjs, sizeof(fz_obj*) * xref->pagecap); } xref->pagerefs[xref->pagelen] = fz_keepobj(node); xref->pageobjs[xref->pagelen] = fz_keepobj(dict); xref->pagelen ++; } }