예제 #1
0
파일: pdfclean.c 프로젝트: paradigm/paraPDF
static void renumberobjs(void)
{
	pdf_xrefentry *oldxref;
	int newlen;
	int num;

	/* Apply renumber map to indirect references in all objects in xref */
	renumberobj(xref->trailer);
	for (num = 0; num < xref->len; num++)
	{
		fz_obj *obj = xref->table[num].obj;

		if (fz_isindirect(obj))
		{
			obj = fz_newindirect(renumbermap[fz_tonum(obj)], 0, xref);
			pdf_updateobject(xref, num, 0, obj);
			fz_dropobj(obj);
		}
		else
		{
			renumberobj(obj);
		}
	}

	/* Create new table for the reordered, compacted xref */
	oldxref = xref->table;
	xref->table = fz_calloc(xref->len, sizeof(pdf_xrefentry));
	xref->table[0] = oldxref[0];

	/* Move used objects into the new compacted xref */
	newlen = 0;
	for (num = 1; num < xref->len; num++)
	{
		if (uselist[num])
		{
			if (newlen < renumbermap[num])
				newlen = renumbermap[num];
			xref->table[renumbermap[num]] = oldxref[num];
		}
		else
		{
			if (oldxref[num].obj)
				fz_dropobj(oldxref[num].obj);
		}
	}

	fz_free(oldxref);

	/* Update the used objects count in compacted xref */
	xref->len = newlen + 1;

	/* Update list of used objects to fit with compacted xref */
	for (num = 1; num < xref->len; num++)
		uselist[num] = 1;
}
예제 #2
0
void
cleanexpand(void)
{
	fz_error *error;
	fz_obj *stmobj;
	fz_buffer *buf;
	fz_obj *stmlen;
	int i, gen;

	for (i = 0; i < src->len; i++)
	{
		if (src->table[i].type == 'n')
		{
			gen = src->table[i].gen;

			if (pdf_isstream(src, i, gen))
			{
				error = pdf_loadobject(&stmobj, src, i, gen);
				if (error) die(error);

				error = pdf_loadstream(&buf, src, i, gen);
				if (error) die(error);

				fz_dictdels(stmobj, "Filter");
				fz_dictdels(stmobj, "DecodeParms");

				error = fz_newint(&stmlen, buf->wp - buf->rp);
				if (error) die(error);
				error = fz_dictputs(stmobj, "Length", stmlen);
				if (error) die(error);
				fz_dropobj(stmlen);

				pdf_updateobject(src, i, gen, stmobj);
				pdf_updatestream(src, i, gen, buf);

				fz_dropobj(stmobj);
			}
		}
	}
}
예제 #3
0
void
editcopy(int pagenum)
{
	fz_error *error;
	fz_obj *obj;
	fz_obj *ref;
	fz_obj *num;

	printf("copy %s page %d\n", srcname, pagenum);

	ref = srcpages->pref[pagenum - 1];
	obj = pdf_getpageobject(srcpages, pagenum - 1);

	fz_dictdels(obj, "Parent");
	/*
	fz_dictdels(obj, "B");
	fz_dictdels(obj, "PieceInfo");
	fz_dictdels(obj, "Metadata");
	fz_dictdels(obj, "Annots");
	fz_dictdels(obj, "Tabs");
	*/

	pdf_updateobject(src, fz_tonum(ref), fz_togen(ref), obj);

	error = fz_arraypush(editobjects, ref);
	if (error)
		die(error);

	error = fz_newint(&num, editmode);
	if (error)
		die(error);

	error = fz_arraypush(editmodelist, num);
	if (error)
		die(error);

	fz_dropobj(num);
}
예제 #4
0
파일: pdfclean.c 프로젝트: paradigm/paraPDF
static void retainpages(int argc, char **argv)
{
	fz_error error;
	fz_obj *oldroot, *root, *pages, *kids, *countobj, *parent;

	/* Load the old page tree */
	error = pdf_loadpagetree(xref);
	if (error)
		die(fz_rethrow(error, "cannot load page tree"));

	/* Keep only pages/type entry to avoid references to unretained pages */
	oldroot = fz_dictgets(xref->trailer, "Root");
	pages = fz_dictgets(oldroot, "Pages");

	root = fz_newdict(2);
	fz_dictputs(root, "Type", fz_dictgets(oldroot, "Type"));
	fz_dictputs(root, "Pages", fz_dictgets(oldroot, "Pages"));

	pdf_updateobject(xref, fz_tonum(oldroot), fz_togen(oldroot), root);

	fz_dropobj(root);

	/* Create a new kids array with only the pages we want to keep */
	parent = fz_newindirect(fz_tonum(pages), fz_togen(pages), xref);
	kids = fz_newarray(1);

	/* Retain pages specified */
	while (argc - fz_optind)
	{
		int page, spage, epage;
		char *spec, *dash;
		char *pagelist = argv[fz_optind];

		spec = fz_strsep(&pagelist, ",");
		while (spec)
		{
			dash = strchr(spec, '-');

			if (dash == spec)
				spage = epage = pdf_getpagecount(xref);
			else
				spage = epage = atoi(spec);

			if (dash)
			{
				if (strlen(dash) > 1)
					epage = atoi(dash + 1);
				else
					epage = pdf_getpagecount(xref);
			}

			if (spage > epage)
				page = spage, spage = epage, epage = page;

			if (spage < 1)
				spage = 1;
			if (epage > pdf_getpagecount(xref))
				epage = pdf_getpagecount(xref);

			for (page = spage; page <= epage; page++)
			{
				fz_obj *pageobj = pdf_getpageobject(xref, page);
				fz_obj *pageref = pdf_getpageref(xref, page);

				fz_dictputs(pageobj, "Parent", parent);

				/* Store page object in new kids array */
				fz_arraypush(kids, pageref);
			}

			spec = fz_strsep(&pagelist, ",");
		}

		fz_optind++;
	}

	fz_dropobj(parent);

	/* Update page count and kids array */
	countobj = fz_newint(fz_arraylen(kids));
	fz_dictputs(pages, "Count", countobj);
	fz_dropobj(countobj);
	fz_dictputs(pages, "Kids", kids);
	fz_dropobj(kids);
}
예제 #5
0
파일: processPdf.cpp 프로젝트: iroot/sopdf
int
copyPdfFile(
    soPdfFile* inFile,
    soPdfFile* outFile
    )
{
    fz_error    *error;
    int         pageTreeNum, pageTreeGen;

    assert(inFile != NULL);
    assert(outFile != NULL);

    //
    // Process every page in the source file
    //
    {
        printf("\nProcessing input page : ");
        for (int pageNo = 0; pageNo < pdf_getpagecount(inFile->pageTree); pageNo++)
        {
            displayPageNumber(pageNo + 1, !pageNo);

            // Get the page object from the source
            fz_obj  *pageRef = inFile->pageTree->pref[pageNo];
            fz_obj  *pageObj = pdf_getpageobject(inFile->pageTree, pageNo);

            //
            // Process the page. Each page can be split into up-to 3 pages
            //
            fz_rect    bbRect[3];
            error = processPage(inFile, pageNo, bbRect, 3);
            if (error)
                return soPdfError(error);


            for (int ctr = 0; ctr < 3; ctr++)
            {
                // Check if this was a blank page
                if (fz_isemptyrect(bbRect[ctr]))
                    break;

                //
                // copy the source page dictionary entry. The way this is done is basically
                // by making a copy of the page dict object in the source file, and adding
                // the copy in the source file. Then the copied page dict object is 
                // referenced and added to the destination file.
                //
                // This convoluted procedure is done because the copy is done by pdf_transplant
                // function that accepts a source and destination. Whatever is referenced by
                // destination object is deep copied
                //
                

                // allocate an object id and generation id in source file
                //
                // There is a bug in mupdf where the object allocation returns
                // 0 oid and 0 gid when the input pdf file has iref stream
                // so to work around the issue, we wrap the pdf_allocojbect
                // in a for loop 10 times to get the number
                //
                int sNum, sGen, tries;

                for (tries = 0; tries < 10; tries++)
                {
                    error = pdf_allocobject(inFile->xref, &sNum, &sGen);
                    if (error)
                        return soPdfError(error);

                    // If sNum is non zero then the allocation was successful
                    if (sNum != 0)
                        break;  
                    pdf_updateobject(inFile->xref, sNum, sGen, pageObj);
                }

                // If we didn't succeed even after 10 tries then this file 
                // is not going to work.
                if (tries >= 10)
                    return soPdfError(fz_throw("cannot allocate object because of mupdf bug"));

                // make a deep copy of the original page dict
                fz_obj  *pageObj2;
                error = fz_deepcopydict(&pageObj2, pageObj);
                if (error)
                    return soPdfError(error);

                // update the source file with the duplicate page object
                pdf_updateobject(inFile->xref, sNum, sGen, pageObj2);

                fz_dropobj(pageObj2);

                // create an indirect reference to the page object
                fz_obj  *pageRef2;
                error = fz_newindirect(&pageRef2, sNum, sGen);
                if (error)
                    return soPdfError(error);

                // delete the parent dictionary entry
                // Do we need to delete any other dictionary entry 
                // like annot, tabs, metadata, etc
                fz_dictdels(pageObj2, "Parent");

                // Set the media box
                setPageMediaBox(inFile->xref, pageObj2, bbRect[ctr]);

                // Set the rotation based on input
                switch(p_mode)
                {
                    // no rotation if fit height
                case FitHeight:
                case Fit2xHeight:
                    break;

                    // rotate -90 deg if fit width
                case Fit2xWidth:
                case FitWidth:
                    setPageRotate(pageObj2, p_reverseLandscape ? 90 : -90);
                    break;

                case SmartFitHeight:
                case SmartFitWidth:
                default:
                    return soPdfError(fz_throw("Mode(%d) not yet implemented.", p_mode));
                    break;
                }


                // push the indirect reference to the destination list for copy by pdf_transplant
                error = fz_arraypush(outFile->editobjs, pageRef2);
                if (error)
                    return soPdfError(error);
            }
        }
    }

    // flush the objects into destination from source
    {
        fz_obj      *results;
        int         outPages;

        printf("\nCopying output page : ");
        error = pdf_transplant(outFile->xref, inFile->xref, &results, outFile->editobjs);
        if (error)
            return soPdfError(error);

        outPages = fz_arraylen(results);
        for (int ctr = 0; ctr < outPages; ctr++)
        {
            displayPageNumber(ctr + 1, !ctr);
            error = fz_arraypush(outFile->pagelist, fz_arrayget(results, 
                p_reverseLandscape ? outPages - 1 - ctr : ctr));
            if (error)
                return soPdfError(error);
        }

        fz_dropobj(results);
    }

    // flush page tree

    // Create page tree and add back-links
    {
        fz_obj  *pageTreeObj;
        fz_obj  *pageTreeRef;

        // allocate a new object in out file for pageTree object
        error = pdf_allocobject(outFile->xref, &pageTreeNum, &pageTreeGen);
        if (error)
            return soPdfError(error);

        // Create a page tree object
        error = fz_packobj(&pageTreeObj, "<</Type/Pages/Count %i/Kids %o>>",
            fz_arraylen(outFile->pagelist), outFile->pagelist);
        if (error)
            return soPdfError(error);

        // Update the xref entry with the pageTree object
        pdf_updateobject(outFile->xref, pageTreeNum, pageTreeGen, pageTreeObj);

        fz_dropobj(pageTreeObj);

        // Create a reference to the pageTree object
        error = fz_newindirect(&pageTreeRef, pageTreeNum, pageTreeGen);
        if (error)
            return soPdfError(error);

        //
        // For every page in the output file, update the parent entry
        //
        for (int ctr = 0; ctr < fz_arraylen(outFile->pagelist); ctr++)
        {
            fz_obj  *pageObj;

            int num = fz_tonum(fz_arrayget(outFile->pagelist, ctr));
            int gen = fz_togen(fz_arrayget(outFile->pagelist, ctr));

            // Get the page object from xreft
            error = pdf_loadobject(&pageObj, outFile->xref, num, gen);
            if (error)
                return soPdfError(error);

            // Update the parent entry in the page dictionary
            error = fz_dictputs(pageObj, "Parent", pageTreeRef);
            if (error)
                return soPdfError(error);

            // Update the entry with the updated page object
            pdf_updateobject(outFile->xref, num, gen, pageObj);

            fz_dropobj(pageObj);
        }
    }

    // Create catalog and root entries
    {
        fz_obj  *catObj, *infoObj;
        int     rootNum, rootGen;
        int     infoNum, infoGen;

        //
        // Copy the info catalog to the destination

        // alloc an object id and gen id in destination file
        error = pdf_allocobject(outFile->xref, &infoNum, &infoGen);
        if (error)
            return soPdfError(error);

        // make a deep copy of the original page dict
        error = fz_deepcopydict(&infoObj, inFile->xref->info);
        if (error)
            return soPdfError(error);

        // update the dest file with object
        pdf_updateobject(outFile->xref, infoNum, infoGen, infoObj);
        outFile->xref->info = infoObj;

        fz_dropobj(infoObj);

        //
        // root/catalog object creation
        error = pdf_allocobject(outFile->xref, &rootNum, &rootGen);
        if (error)
            return soPdfError(error);

        error = fz_packobj(&catObj, "<</Type/Catalog /Pages %r>>", pageTreeNum, pageTreeGen);
        if (error)
            return soPdfError(error);

        pdf_updateobject(outFile->xref, rootNum, rootGen, catObj);

        fz_dropobj(catObj);

        // Create trailer
        error = fz_packobj(&outFile->xref->trailer, "<</Root %r /Info %r>>", 
            rootNum, rootGen, infoNum, infoGen);
        if (error)
            return soPdfError(error);

    }

    // Update the info in the target file and save the xref
    printf("\nSaving.\n");
    error = setPageInfo(inFile, outFile);
    if (error)
        return soPdfError(error);

    error = pdf_savexref(outFile->xref, outFile->fileName, NULL);
    if (error)
        return soPdfError(error);

    if (g_errorCount != 0)
    {
        printf("\nFollowing issues encounted were ignored.\n\n");
        for (int ctr = g_errorCount - 1; ctr >= 0; ctr--)
            soPdfError(g_errorList[ctr]);
    }
    printf("\nSaved.\n");

    return 0;
}
예제 #6
0
void
editflushcatalog(void)
{
	fz_error *error;
	int rootnum, rootgen;
	int listnum, listgen;
	fz_obj *listref;
	fz_obj *obj;
	int i;

	/* Create page tree and add back-links */

	error = pdf_allocobject(editxref, &listnum, &listgen);
	if (error)
		die(error);

	error = fz_packobj(&obj, "<</Type/Pages/Count %i/Kids %o>>",
			fz_arraylen(editpagelist),
			editpagelist);
	if (error)
		die(error);

	pdf_updateobject(editxref, listnum, listgen, obj);

	fz_dropobj(obj);

	error = fz_newindirect(&listref, listnum, listgen);
	if (error)
		die(error);

	for (i = 0; i < fz_arraylen(editpagelist); i++)
	{
		int num = fz_tonum(fz_arrayget(editpagelist, i));
		int gen = fz_togen(fz_arrayget(editpagelist, i));

		error = pdf_loadobject(&obj, editxref, num, gen);
		if (error)
			die(error);

		error = fz_dictputs(obj, "Parent", listref);
		if (error)
			die(error);

		pdf_updateobject(editxref, num, gen, obj);

		fz_dropobj(obj);
	}

	/* Create catalog */

	error = pdf_allocobject(editxref, &rootnum, &rootgen);
	if (error)
		die(error);

	error = fz_packobj(&obj, "<</Type/Catalog/Pages %r>>", listnum, listgen);
	if (error)
		die(error);

	pdf_updateobject(editxref, rootnum, rootgen, obj);

	fz_dropobj(obj);

	/* Create trailer */

	error = fz_packobj(&editxref->trailer, "<</Root %r>>", rootnum, rootgen);
	if (error)
		die(error);
}
예제 #7
0
fz_error *
pdf_savexref(pdf_xref *xref, char *path, pdf_crypt *encrypt)
{
	fz_error *error;
	fz_stream *out;
	int oid;
	int startxref;
	int *ofsbuf;
	fz_obj *obj;
	int eoid, egen;

	pdf_logxref("savexref '%s' %p\n", path, xref);

	/* need to add encryption object for acrobat < 6 */
	if (encrypt)
	{
		pdf_logxref("make encryption dict\n");

		error = pdf_allocobject(xref, &eoid, &egen);
		if (error)
			return error;

		pdf_cryptobj(encrypt, encrypt->encrypt, eoid, egen);

		error = pdf_updateobject(xref, eoid, egen, encrypt->encrypt);
		if (error)
			return error;
	}

	ofsbuf = fz_malloc(sizeof(int) * xref->len);
	if (!ofsbuf)
		return fz_outofmem;

	error = fz_openwfile(&out, path);
	if (error)
	{
		fz_free(ofsbuf);
		return error;
	}

	fz_print(out, "%%PDF-%1.1f\n", xref->version);
	fz_print(out, "%%\342\343\317\323\n\n");

	for (oid = 0; oid < xref->len; oid++)
	{
		pdf_xrefentry *x = xref->table + oid;
		if (x->type == 'n' || x->type == 'o' || x->type == 'a')
		{
			ofsbuf[oid] = fz_tell(out);
			error = writeobject(out, xref, encrypt, oid, x->type == 'o' ? 0 : x->gen);
			if (error)
				goto cleanup;
		}
		else
		{
			ofsbuf[oid] = x->ofs;
		}
	}

	startxref = fz_tell(out);
	fz_print(out, "xref\n");
	fz_print(out, "0 %d\n", xref->len);

	for (oid = 0; oid < xref->len; oid++)
	{
		int gen = xref->table[oid].gen;
		int type = xref->table[oid].type;
		if (type == 'o')
			gen = 0;
		if (type == 'a' || type == 'o')
			type = 'n';
		if (type == 'd')
			type = 'f';
		fz_print(out, "%010d %05d %c \n", ofsbuf[oid], gen, type);
	}

	fz_print(out, "\n");

	fz_print(out, "trailer\n<<\n  /Size %d", xref->len);
	obj = fz_dictgets(xref->trailer, "Root");
	fz_print(out, "\n  /Root %d %d R", fz_tonum(obj), fz_togen(obj));
	obj = fz_dictgets(xref->trailer, "Info");
	if (obj)
		fz_print(out, "\n  /Info %d %d R", fz_tonum(obj), fz_togen(obj));
	if (encrypt)
	{
		fz_print(out, "\n  /Encrypt %d %d R", eoid, egen);
		fz_print(out, "\n  /ID [");
		fz_printobj(out, encrypt->id, 1);
		fz_printobj(out, encrypt->id, 1);
		fz_print(out, "]");

		pdf_cryptobj(encrypt, encrypt->encrypt, eoid, egen);
	}
	fz_print(out, "\n>>\n\n");

	fz_print(out, "startxref\n");
	fz_print(out, "%d\n", startxref);
	fz_print(out, "%%%%EOF\n");

	xref->startxref = startxref;

	if(ofsbuf) fz_free(ofsbuf);
	fz_dropstream(out);
	return nil;

cleanup:
	if(ofsbuf) fz_free(ofsbuf);
	fz_dropstream(out);
	return error;
}