Example #1
0
static void renumberobjs(void)
{
	pdf_xrefentry *oldxref;
	int newlen;
	int num;

	/* Apply renumber map to indirect references in all objects in xref */
	renumberobj(xref->trailer);
	for (num = 0; num < xref->len; num++)
	{
		fz_obj *obj = xref->table[num].obj;

		if (fz_isindirect(obj))
		{
			obj = fz_newindirect(renumbermap[fz_tonum(obj)], 0, xref);
			pdf_updateobject(xref, num, 0, obj);
			fz_dropobj(obj);
		}
		else
		{
			renumberobj(obj);
		}
	}

	/* Create new table for the reordered, compacted xref */
	oldxref = xref->table;
	xref->table = fz_calloc(xref->len, sizeof(pdf_xrefentry));
	xref->table[0] = oldxref[0];

	/* Move used objects into the new compacted xref */
	newlen = 0;
	for (num = 1; num < xref->len; num++)
	{
		if (uselist[num])
		{
			if (newlen < renumbermap[num])
				newlen = renumbermap[num];
			xref->table[renumbermap[num]] = oldxref[num];
		}
		else
		{
			if (oldxref[num].obj)
				fz_dropobj(oldxref[num].obj);
		}
	}

	fz_free(oldxref);

	/* Update the used objects count in compacted xref */
	xref->len = newlen + 1;

	/* Update list of used objects to fit with compacted xref */
	for (num = 1; num < xref->len; num++)
		uselist[num] = 1;
}
Example #2
0
static void renumberobj(fz_obj *obj)
{
	int i;

	if (fz_isdict(obj))
	{
		for (i = 0; i < fz_dictlen(obj); i++)
		{
			fz_obj *key = fz_dictgetkey(obj, i);
			fz_obj *val = fz_dictgetval(obj, i);
			if (fz_isindirect(val))
			{
				val = fz_newindirect(renumbermap[fz_tonum(val)], 0, xref);
				fz_dictput(obj, key, val);
				fz_dropobj(val);
			}
			else
			{
				renumberobj(val);
			}
		}
	}

	else if (fz_isarray(obj))
	{
		for (i = 0; i < fz_arraylen(obj); i++)
		{
			fz_obj *val = fz_arrayget(obj, i);
			if (fz_isindirect(val))
			{
				val = fz_newindirect(renumbermap[fz_tonum(val)], 0, xref);
				fz_arrayput(obj, i, val);
				fz_dropobj(val);
			}
			else
			{
				renumberobj(val);
			}
		}
	}
}
static fz_error parseobj(fz_obj **obj, pdf_xref *xref, char **sp, struct vap *v)
{
	fz_error error;
	char buf[32];
	int num, gen, len;
	char *tmp;
	char *s = *sp;

	if (*s == '\0')
		return fz_throw("end of data");

	skipwhite(&s);

	error = fz_okay;

	if (v != nil && *s == '%')
	{
		s ++;

		switch (*s)
		{
		case 'o': *obj = fz_keepobj(va_arg(v->ap, fz_obj*)); break;
		case 'b': error = fz_newbool(obj, va_arg(v->ap, int)); break;
		case 'i': error = fz_newint(obj, va_arg(v->ap, int)); break;
		case 'f': error = fz_newreal(obj, (float)va_arg(v->ap, double)); break;
		case 'n': error = fz_newname(obj, va_arg(v->ap, char*)); break;
		case 'r':
			  num = va_arg(v->ap, int);
			  gen = va_arg(v->ap, int);
			  error = fz_newindirect(obj, num, gen, xref);
			  break;
		case 's':
			  tmp = va_arg(v->ap, char*);
			  error = fz_newstring(obj, tmp, strlen(tmp));
			  break;
		case '#':
			  tmp = va_arg(v->ap, char*);
			  len = va_arg(v->ap, int);
			  error = fz_newstring(obj, tmp, len);
			  break;
		default:
			  error = fz_throw("unknown format specifier in packobj: '%c'", *s);
			  break;
		}

		if (error)
			error = fz_rethrow(error, "cannot create object for %% format");

		s ++;
	}
Example #4
0
static void retainpages(int argc, char **argv)
{
	fz_error error;
	fz_obj *oldroot, *root, *pages, *kids, *countobj, *parent;

	/* Load the old page tree */
	error = pdf_loadpagetree(xref);
	if (error)
		die(fz_rethrow(error, "cannot load page tree"));

	/* Keep only pages/type entry to avoid references to unretained pages */
	oldroot = fz_dictgets(xref->trailer, "Root");
	pages = fz_dictgets(oldroot, "Pages");

	root = fz_newdict(2);
	fz_dictputs(root, "Type", fz_dictgets(oldroot, "Type"));
	fz_dictputs(root, "Pages", fz_dictgets(oldroot, "Pages"));

	pdf_updateobject(xref, fz_tonum(oldroot), fz_togen(oldroot), root);

	fz_dropobj(root);

	/* Create a new kids array with only the pages we want to keep */
	parent = fz_newindirect(fz_tonum(pages), fz_togen(pages), xref);
	kids = fz_newarray(1);

	/* Retain pages specified */
	while (argc - fz_optind)
	{
		int page, spage, epage;
		char *spec, *dash;
		char *pagelist = argv[fz_optind];

		spec = fz_strsep(&pagelist, ",");
		while (spec)
		{
			dash = strchr(spec, '-');

			if (dash == spec)
				spage = epage = pdf_getpagecount(xref);
			else
				spage = epage = atoi(spec);

			if (dash)
			{
				if (strlen(dash) > 1)
					epage = atoi(dash + 1);
				else
					epage = pdf_getpagecount(xref);
			}

			if (spage > epage)
				page = spage, spage = epage, epage = page;

			if (spage < 1)
				spage = 1;
			if (epage > pdf_getpagecount(xref))
				epage = pdf_getpagecount(xref);

			for (page = spage; page <= epage; page++)
			{
				fz_obj *pageobj = pdf_getpageobject(xref, page);
				fz_obj *pageref = pdf_getpageref(xref, page);

				fz_dictputs(pageobj, "Parent", parent);

				/* Store page object in new kids array */
				fz_arraypush(kids, pageref);
			}

			spec = fz_strsep(&pagelist, ",");
		}

		fz_optind++;
	}

	fz_dropobj(parent);

	/* Update page count and kids array */
	countobj = fz_newint(fz_arraylen(kids));
	fz_dictputs(pages, "Count", countobj);
	fz_dropobj(countobj);
	fz_dictputs(pages, "Kids", kids);
	fz_dropobj(kids);
}
Example #5
0
int
copyPdfFile(
    soPdfFile* inFile,
    soPdfFile* outFile
    )
{
    fz_error    *error;
    int         pageTreeNum, pageTreeGen;

    assert(inFile != NULL);
    assert(outFile != NULL);

    //
    // Process every page in the source file
    //
    {
        printf("\nProcessing input page : ");
        for (int pageNo = 0; pageNo < pdf_getpagecount(inFile->pageTree); pageNo++)
        {
            displayPageNumber(pageNo + 1, !pageNo);

            // Get the page object from the source
            fz_obj  *pageRef = inFile->pageTree->pref[pageNo];
            fz_obj  *pageObj = pdf_getpageobject(inFile->pageTree, pageNo);

            //
            // Process the page. Each page can be split into up-to 3 pages
            //
            fz_rect    bbRect[3];
            error = processPage(inFile, pageNo, bbRect, 3);
            if (error)
                return soPdfError(error);


            for (int ctr = 0; ctr < 3; ctr++)
            {
                // Check if this was a blank page
                if (fz_isemptyrect(bbRect[ctr]))
                    break;

                //
                // copy the source page dictionary entry. The way this is done is basically
                // by making a copy of the page dict object in the source file, and adding
                // the copy in the source file. Then the copied page dict object is 
                // referenced and added to the destination file.
                //
                // This convoluted procedure is done because the copy is done by pdf_transplant
                // function that accepts a source and destination. Whatever is referenced by
                // destination object is deep copied
                //
                

                // allocate an object id and generation id in source file
                //
                // There is a bug in mupdf where the object allocation returns
                // 0 oid and 0 gid when the input pdf file has iref stream
                // so to work around the issue, we wrap the pdf_allocojbect
                // in a for loop 10 times to get the number
                //
                int sNum, sGen, tries;

                for (tries = 0; tries < 10; tries++)
                {
                    error = pdf_allocobject(inFile->xref, &sNum, &sGen);
                    if (error)
                        return soPdfError(error);

                    // If sNum is non zero then the allocation was successful
                    if (sNum != 0)
                        break;  
                    pdf_updateobject(inFile->xref, sNum, sGen, pageObj);
                }

                // If we didn't succeed even after 10 tries then this file 
                // is not going to work.
                if (tries >= 10)
                    return soPdfError(fz_throw("cannot allocate object because of mupdf bug"));

                // make a deep copy of the original page dict
                fz_obj  *pageObj2;
                error = fz_deepcopydict(&pageObj2, pageObj);
                if (error)
                    return soPdfError(error);

                // update the source file with the duplicate page object
                pdf_updateobject(inFile->xref, sNum, sGen, pageObj2);

                fz_dropobj(pageObj2);

                // create an indirect reference to the page object
                fz_obj  *pageRef2;
                error = fz_newindirect(&pageRef2, sNum, sGen);
                if (error)
                    return soPdfError(error);

                // delete the parent dictionary entry
                // Do we need to delete any other dictionary entry 
                // like annot, tabs, metadata, etc
                fz_dictdels(pageObj2, "Parent");

                // Set the media box
                setPageMediaBox(inFile->xref, pageObj2, bbRect[ctr]);

                // Set the rotation based on input
                switch(p_mode)
                {
                    // no rotation if fit height
                case FitHeight:
                case Fit2xHeight:
                    break;

                    // rotate -90 deg if fit width
                case Fit2xWidth:
                case FitWidth:
                    setPageRotate(pageObj2, p_reverseLandscape ? 90 : -90);
                    break;

                case SmartFitHeight:
                case SmartFitWidth:
                default:
                    return soPdfError(fz_throw("Mode(%d) not yet implemented.", p_mode));
                    break;
                }


                // push the indirect reference to the destination list for copy by pdf_transplant
                error = fz_arraypush(outFile->editobjs, pageRef2);
                if (error)
                    return soPdfError(error);
            }
        }
    }

    // flush the objects into destination from source
    {
        fz_obj      *results;
        int         outPages;

        printf("\nCopying output page : ");
        error = pdf_transplant(outFile->xref, inFile->xref, &results, outFile->editobjs);
        if (error)
            return soPdfError(error);

        outPages = fz_arraylen(results);
        for (int ctr = 0; ctr < outPages; ctr++)
        {
            displayPageNumber(ctr + 1, !ctr);
            error = fz_arraypush(outFile->pagelist, fz_arrayget(results, 
                p_reverseLandscape ? outPages - 1 - ctr : ctr));
            if (error)
                return soPdfError(error);
        }

        fz_dropobj(results);
    }

    // flush page tree

    // Create page tree and add back-links
    {
        fz_obj  *pageTreeObj;
        fz_obj  *pageTreeRef;

        // allocate a new object in out file for pageTree object
        error = pdf_allocobject(outFile->xref, &pageTreeNum, &pageTreeGen);
        if (error)
            return soPdfError(error);

        // Create a page tree object
        error = fz_packobj(&pageTreeObj, "<</Type/Pages/Count %i/Kids %o>>",
            fz_arraylen(outFile->pagelist), outFile->pagelist);
        if (error)
            return soPdfError(error);

        // Update the xref entry with the pageTree object
        pdf_updateobject(outFile->xref, pageTreeNum, pageTreeGen, pageTreeObj);

        fz_dropobj(pageTreeObj);

        // Create a reference to the pageTree object
        error = fz_newindirect(&pageTreeRef, pageTreeNum, pageTreeGen);
        if (error)
            return soPdfError(error);

        //
        // For every page in the output file, update the parent entry
        //
        for (int ctr = 0; ctr < fz_arraylen(outFile->pagelist); ctr++)
        {
            fz_obj  *pageObj;

            int num = fz_tonum(fz_arrayget(outFile->pagelist, ctr));
            int gen = fz_togen(fz_arrayget(outFile->pagelist, ctr));

            // Get the page object from xreft
            error = pdf_loadobject(&pageObj, outFile->xref, num, gen);
            if (error)
                return soPdfError(error);

            // Update the parent entry in the page dictionary
            error = fz_dictputs(pageObj, "Parent", pageTreeRef);
            if (error)
                return soPdfError(error);

            // Update the entry with the updated page object
            pdf_updateobject(outFile->xref, num, gen, pageObj);

            fz_dropobj(pageObj);
        }
    }

    // Create catalog and root entries
    {
        fz_obj  *catObj, *infoObj;
        int     rootNum, rootGen;
        int     infoNum, infoGen;

        //
        // Copy the info catalog to the destination

        // alloc an object id and gen id in destination file
        error = pdf_allocobject(outFile->xref, &infoNum, &infoGen);
        if (error)
            return soPdfError(error);

        // make a deep copy of the original page dict
        error = fz_deepcopydict(&infoObj, inFile->xref->info);
        if (error)
            return soPdfError(error);

        // update the dest file with object
        pdf_updateobject(outFile->xref, infoNum, infoGen, infoObj);
        outFile->xref->info = infoObj;

        fz_dropobj(infoObj);

        //
        // root/catalog object creation
        error = pdf_allocobject(outFile->xref, &rootNum, &rootGen);
        if (error)
            return soPdfError(error);

        error = fz_packobj(&catObj, "<</Type/Catalog /Pages %r>>", pageTreeNum, pageTreeGen);
        if (error)
            return soPdfError(error);

        pdf_updateobject(outFile->xref, rootNum, rootGen, catObj);

        fz_dropobj(catObj);

        // Create trailer
        error = fz_packobj(&outFile->xref->trailer, "<</Root %r /Info %r>>", 
            rootNum, rootGen, infoNum, infoGen);
        if (error)
            return soPdfError(error);

    }

    // Update the info in the target file and save the xref
    printf("\nSaving.\n");
    error = setPageInfo(inFile, outFile);
    if (error)
        return soPdfError(error);

    error = pdf_savexref(outFile->xref, outFile->fileName, NULL);
    if (error)
        return soPdfError(error);

    if (g_errorCount != 0)
    {
        printf("\nFollowing issues encounted were ignored.\n\n");
        for (int ctr = g_errorCount - 1; ctr >= 0; ctr--)
            soPdfError(g_errorList[ctr]);
    }
    printf("\nSaved.\n");

    return 0;
}
fz_error
pdf_parseindobj(fz_obj **op, pdf_xref *xref,
	fz_stream *file, char *buf, int cap,
	int *onum, int *ogen, int *ostmofs)
{
	fz_error error = fz_okay;
	fz_obj *obj = nil;
	int num = 0, gen = 0, stmofs;
	pdf_token_e tok;
	int len;
	int a, b;

	error = pdf_lex(&tok, file, buf, cap, &len);
	if (error)
		return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);
	if (tok != PDF_TINT)
		return fz_throw("cannot parse indirect object (%d %d R)", num, gen);
	num = atoi(buf);

	error = pdf_lex(&tok, file, buf, cap, &len);
	if (error)
		return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);
	if (tok != PDF_TINT)
		return fz_throw("cannot parse indirect object (%d %d R)", num, gen);
	gen = atoi(buf);

	error = pdf_lex(&tok, file, buf, cap, &len);
	if (error)
		return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);
	if (tok != PDF_TOBJ)
		return fz_throw("cannot parse indirect object (%d %d R)", num, gen);

	error = pdf_lex(&tok, file, buf, cap, &len);
	if (error)
		return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);

	switch (tok)
	{
	case PDF_TOARRAY:
		error = pdf_parsearray(&obj, xref, file, buf, cap);
		if (error)
			return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);
		break;

	case PDF_TODICT:
		error = pdf_parsedict(&obj, xref, file, buf, cap);
		if (error)
			return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);
		break;

	case PDF_TNAME: obj = fz_newname(buf); break;
	case PDF_TREAL: obj = fz_newreal(atof(buf)); break;
	case PDF_TSTRING: obj = fz_newstring(buf, len); break;
	case PDF_TTRUE: obj = fz_newbool(1); break;
	case PDF_TFALSE: obj = fz_newbool(0); break;
	case PDF_TNULL: obj = fz_newnull(); break;

	case PDF_TINT:
		a = atoi(buf);
		error = pdf_lex(&tok, file, buf, cap, &len);
		if (error)
			return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);
		if (tok == PDF_TSTREAM || tok == PDF_TENDOBJ)
		{
			obj = fz_newint(a);
			goto skip;
		}
		if (tok == PDF_TINT)
		{
			b = atoi(buf);
			error = pdf_lex(&tok, file, buf, cap, &len);
			if (error)
				return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);
			if (tok == PDF_TR)
			{
				obj = fz_newindirect(a, b, xref);
				break;
			}
		}
		return fz_throw("cannot parse indirect object (%d %d R)", num, gen);

	case PDF_TENDOBJ:
		obj = fz_newnull();
		goto skip;

	default:
		return fz_throw("cannot parse indirect object (%d %d R)", num, gen);
	}

	error = pdf_lex(&tok, file, buf, cap, &len);
	if (error)
	{
		fz_dropobj(obj);
		return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);
	}

skip:
	if (tok == PDF_TSTREAM)
	{
		int c = fz_readbyte(file);
		while (c == ' ')
			c = fz_readbyte(file);
		if (c == '\r')
		{
			c = fz_peekbyte(file);
			if (c != '\n')
				fz_warn("line feed missing after stream begin marker (%d %d R)", num, gen);
			else
				fz_readbyte(file);
		}
		stmofs = fz_tell(file);
	}
	else if (tok == PDF_TENDOBJ)
	{
		stmofs = 0;
	}
	else
	{
		fz_warn("expected endobj or stream keyword (%d %d R)", num, gen);
		stmofs = 0;
	}

	if (onum) *onum = num;
	if (ogen) *ogen = gen;
	if (ostmofs) *ostmofs = stmofs;
	*op = obj;
	return fz_okay;
}
fz_error
pdf_parsedict(fz_obj **op, pdf_xref *xref, fz_stream *file, char *buf, int cap)
{
	fz_error error = fz_okay;
	fz_obj *dict = nil;
	fz_obj *key = nil;
	fz_obj *val = nil;
	pdf_token_e tok;
	int len;
	int a, b;

	dict = fz_newdict(8);

	while (1)
	{
		error = pdf_lex(&tok, file, buf, cap, &len);
		if (error)
		{
			fz_dropobj(dict);
			return fz_rethrow(error, "cannot parse dict");
		}

skip:
		if (tok == PDF_TCDICT)
		{
			*op = dict;
			return fz_okay;
		}

		/* for BI .. ID .. EI in content streams */
		if (tok == PDF_TKEYWORD && !strcmp(buf, "ID"))
		{
			*op = dict;
			return fz_okay;
		}

		if (tok != PDF_TNAME)
		{
			fz_dropobj(dict);
			return fz_throw("invalid key in dict");;
		}

		key = fz_newname(buf);

		error = pdf_lex(&tok, file, buf, cap, &len);
		if (error)
		{
			fz_dropobj(dict);
			return fz_rethrow(error, "cannot parse dict");
		}

		switch (tok)
		{
		case PDF_TOARRAY:
			error = pdf_parsearray(&val, xref, file, buf, cap);
			if (error)
			{
				fz_dropobj(key);
				fz_dropobj(dict);
				return fz_rethrow(error, "cannot parse dict");
			}
			break;

		case PDF_TODICT:
			error = pdf_parsedict(&val, xref, file, buf, cap);
			if (error)
			{
				fz_dropobj(key);
				fz_dropobj(dict);
				return fz_rethrow(error, "cannot parse dict");
			}
			break;

		case PDF_TNAME: val = fz_newname(buf); break;
		case PDF_TREAL: val = fz_newreal(atof(buf)); break;
		case PDF_TSTRING: val = fz_newstring(buf, len); break;
		case PDF_TTRUE: val = fz_newbool(1); break;
		case PDF_TFALSE: val = fz_newbool(0); break;
		case PDF_TNULL: val = fz_newnull(); break;

		case PDF_TINT:
			/* 64-bit to allow for numbers > INT_MAX and overflow */
			a = (int) strtoll(buf, 0, 10);
			error = pdf_lex(&tok, file, buf, cap, &len);
			if (error)
			{
				fz_dropobj(key);
				fz_dropobj(dict);
				return fz_rethrow(error, "cannot parse dict");
			}
			if (tok == PDF_TCDICT || tok == PDF_TNAME ||
				(tok == PDF_TKEYWORD && !strcmp(buf, "ID")))
			{
				val = fz_newint(a);
				fz_dictput(dict, key, val);
				fz_dropobj(val);
				fz_dropobj(key);
				goto skip;
			}
			if (tok == PDF_TINT)
			{
				b = atoi(buf);
				error = pdf_lex(&tok, file, buf, cap, &len);
				if (error)
				{
					fz_dropobj(key);
					fz_dropobj(dict);
					return fz_rethrow(error, "cannot parse dict");
				}
				if (tok == PDF_TR)
				{
					val = fz_newindirect(a, b, xref);
					break;
				}
			}
			fz_dropobj(key);
			fz_dropobj(dict);
			return fz_throw("invalid indirect reference in dict");

		default:
			return fz_throw("unknown token in dict");
		}

		fz_dictput(dict, key, val);
		fz_dropobj(val);
		fz_dropobj(key);
	}
}
fz_error
pdf_parsearray(fz_obj **op, pdf_xref *xref, fz_stream *file, char *buf, int cap)
{
	fz_error error = fz_okay;
	fz_obj *ary = nil;
	fz_obj *obj = nil;
	int a = 0, b = 0, n = 0;
	pdf_token_e tok;
	int len;

	ary = fz_newarray(4);

	while (1)
	{
		error = pdf_lex(&tok, file, buf, cap, &len);
		if (error)
		{
			fz_dropobj(ary);
			return fz_rethrow(error, "cannot parse array");
		}

		if (tok != PDF_TINT && tok != PDF_TR)
		{
			if (n > 0)
			{
				obj = fz_newint(a);
				fz_arraypush(ary, obj);
				fz_dropobj(obj);
			}
			if (n > 1)
			{
				obj = fz_newint(b);
				fz_arraypush(ary, obj);
				fz_dropobj(obj);
			}
			n = 0;
		}

		if (tok == PDF_TINT && n == 2)
		{
			obj = fz_newint(a);
			fz_arraypush(ary, obj);
			fz_dropobj(obj);
			a = b;
			n --;
		}

		switch (tok)
		{
		case PDF_TCARRAY:
			*op = ary;
			return fz_okay;

		case PDF_TINT:
			if (n == 0)
				a = atoi(buf);
			if (n == 1)
				b = atoi(buf);
			n ++;
			break;

		case PDF_TR:
			if (n != 2)
			{
				fz_dropobj(ary);
				return fz_throw("cannot parse indirect reference in array");
			}
			obj = fz_newindirect(a, b, xref);
			fz_arraypush(ary, obj);
			fz_dropobj(obj);
			n = 0;
			break;

		case PDF_TOARRAY:
			error = pdf_parsearray(&obj, xref, file, buf, cap);
			if (error)
			{
				fz_dropobj(ary);
				return fz_rethrow(error, "cannot parse array");
			}
			fz_arraypush(ary, obj);
			fz_dropobj(obj);
			break;

		case PDF_TODICT:
			error = pdf_parsedict(&obj, xref, file, buf, cap);
			if (error)
			{
				fz_dropobj(ary);
				return fz_rethrow(error, "cannot parse array");
			}
			fz_arraypush(ary, obj);
			fz_dropobj(obj);
			break;

		case PDF_TNAME:
			obj = fz_newname(buf);
			fz_arraypush(ary, obj);
			fz_dropobj(obj);
			break;
		case PDF_TREAL:
			obj = fz_newreal(atof(buf));
			fz_arraypush(ary, obj);
			fz_dropobj(obj);
			break;
		case PDF_TSTRING:
			obj = fz_newstring(buf, len);
			fz_arraypush(ary, obj);
			fz_dropobj(obj);
			break;
		case PDF_TTRUE:
			obj = fz_newbool(1);
			fz_arraypush(ary, obj);
			fz_dropobj(obj);
			break;
		case PDF_TFALSE:
			obj = fz_newbool(0);
			fz_arraypush(ary, obj);
			fz_dropobj(obj);
			break;
		case PDF_TNULL:
			obj = fz_newnull();
			fz_arraypush(ary, obj);
			fz_dropobj(obj);
			break;

		default:
			fz_dropobj(ary);
			return fz_throw("cannot parse token in array");
		}
	}
}
static void saveimage(fz_obj *obj, int num, int gen)
{
    pdf_image *img = nil;
    fz_obj *ref;
    fz_error error;
    fz_pixmap *pix;
    char name[1024];
    FILE *f;
    int bpc;
    int w;
    int h;
    int n;
    int x;
    int y;

    error = fz_newindirect(&ref, num, gen, xref);
    if (error)
        die(error);

    error = pdf_newstore(&xref->store);
    if (error)
        die(error);

    error = pdf_loadimage(&img, xref, ref);
    if (error)
        die(error);

    n = img->super.n;
    w = img->super.w;
    h = img->super.h;
    bpc = img->bpc;

    error = fz_newpixmap(&pix, 0, 0, w, h, n + 1);
    if (error)
        die(error);

    error = img->super.loadtile(&img->super, pix);
    if (error)
        die(error);

    if (bpc == 1 && n == 0)
    {
        fz_pixmap *temp;

        error = fz_newpixmap(&temp, pix->x, pix->y, pix->w, pix->h, pdf_devicergb->n + 1);
        if (error)
            die(error);

        for (y = 0; y < pix->h; y++)
            for (x = 0; x < pix->w; x++)
            {
                int pixel = y * pix->w + x;
                temp->samples[pixel * temp->n + 0] = 255;
                temp->samples[pixel * temp->n + 1] = pix->samples[pixel];
                temp->samples[pixel * temp->n + 2] = pix->samples[pixel];
                temp->samples[pixel * temp->n + 3] = pix->samples[pixel];
            }

        fz_droppixmap(pix);
        pix = temp;
    }

    if (img->super.cs && strcmp(img->super.cs->name, "DeviceRGB"))
    {
        fz_pixmap *temp;

        error = fz_newpixmap(&temp, pix->x, pix->y, pix->w, pix->h, pdf_devicergb->n + 1);
        if (error)
            die(error);

        fz_convertpixmap(img->super.cs, pix, pdf_devicergb, temp);
        fz_droppixmap(pix);
        pix = temp;
    }

    sprintf(name, "img-%04d.pnm", num);

    f = fopen(name, "wb");
    if (f == NULL)
        die(fz_throw("Error creating image file"));

    fprintf(f, "P6\n%d %d\n%d\n", w, h, 255);

    for (y = 0; y < pix->h; y++)
        for (x = 0; x < pix->w; x++)
        {
            fz_sample *sample = &pix->samples[(y * pix->w + x) * (pdf_devicergb->n + 1)];
            unsigned char r = sample[1];
            unsigned char g = sample[2];
            unsigned char b = sample[3];
            fprintf(f, "%c%c%c", r, g, b);
        }

    if (fclose(f) < 0)
        die(fz_throw("Error closing image file"));

    fz_droppixmap(pix);

    pdf_dropstore(xref->store);
    xref->store = nil;

    fz_dropimage(&img->super);

    fz_dropobj(ref);
}
Example #10
0
void
editflushcatalog(void)
{
	fz_error *error;
	int rootnum, rootgen;
	int listnum, listgen;
	fz_obj *listref;
	fz_obj *obj;
	int i;

	/* Create page tree and add back-links */

	error = pdf_allocobject(editxref, &listnum, &listgen);
	if (error)
		die(error);

	error = fz_packobj(&obj, "<</Type/Pages/Count %i/Kids %o>>",
			fz_arraylen(editpagelist),
			editpagelist);
	if (error)
		die(error);

	pdf_updateobject(editxref, listnum, listgen, obj);

	fz_dropobj(obj);

	error = fz_newindirect(&listref, listnum, listgen);
	if (error)
		die(error);

	for (i = 0; i < fz_arraylen(editpagelist); i++)
	{
		int num = fz_tonum(fz_arrayget(editpagelist, i));
		int gen = fz_togen(fz_arrayget(editpagelist, i));

		error = pdf_loadobject(&obj, editxref, num, gen);
		if (error)
			die(error);

		error = fz_dictputs(obj, "Parent", listref);
		if (error)
			die(error);

		pdf_updateobject(editxref, num, gen, obj);

		fz_dropobj(obj);
	}

	/* Create catalog */

	error = pdf_allocobject(editxref, &rootnum, &rootgen);
	if (error)
		die(error);

	error = fz_packobj(&obj, "<</Type/Catalog/Pages %r>>", listnum, listgen);
	if (error)
		die(error);

	pdf_updateobject(editxref, rootnum, rootgen, obj);

	fz_dropobj(obj);

	/* Create trailer */

	error = fz_packobj(&editxref->trailer, "<</Root %r>>", rootnum, rootgen);
	if (error)
		die(error);
}
Example #11
0
fz_error *
pdf_parseindobj(fz_obj **op, fz_stream *file, char *buf, int cap,
		int *ooid, int *ogid, int *ostmofs)
{
	fz_error *error = nil;
	fz_obj *obj = nil;
	int oid = 0, gid = 0, stmofs;
	int tok, len;
	int a, b;

	tok = pdf_lex(file, buf, cap, &len);
	if (tok != PDF_TINT)
		goto cleanup;
	oid = atoi(buf);

	tok = pdf_lex(file, buf, cap, &len);
	if (tok != PDF_TINT)
		goto cleanup;
	gid = atoi(buf);

	tok = pdf_lex(file, buf, cap, &len);
	if (tok != PDF_TOBJ)
		goto cleanup;

	tok = pdf_lex(file, buf, cap, &len);
	switch (tok)
	{
		case PDF_TOARRAY:	error = pdf_parsearray(&obj, file, buf, cap); break;
		case PDF_TODICT:	error = pdf_parsedict(&obj, file, buf, cap); break;
		case PDF_TNAME:		error = fz_newname(&obj, buf); break;
		case PDF_TREAL:		error = fz_newreal(&obj, atof(buf)); break;
		case PDF_TSTRING:	error = fz_newstring(&obj, buf, len); break;
		case PDF_TTRUE:		error = fz_newbool(&obj, 1); break;
		case PDF_TFALSE:	error = fz_newbool(&obj, 0); break;
		case PDF_TNULL:		error = fz_newnull(&obj); break;
		case PDF_TINT:
			a = atoi(buf);
			tok = pdf_lex(file, buf, cap, &len);
			if (tok == PDF_TSTREAM || tok == PDF_TENDOBJ)
			{
				error = fz_newint(&obj, a);
				if (error) goto cleanup;
				goto skip;
			}
			if (tok == PDF_TINT)
			{
				b = atoi(buf);
				tok = pdf_lex(file, buf, cap, &len);
				if (tok == PDF_TR)
				{
					error = fz_newindirect(&obj, a, b);
					break;
				}
			}
			goto cleanup;
		default:
			goto cleanup;
	}
	if (error) goto cleanup;

	tok = pdf_lex(file, buf, cap, &len);

skip:
	if (tok == PDF_TSTREAM)
	{
		int c = fz_readbyte(file);
		if (c == '\r')
		{
			c = fz_peekbyte(file);
			if (c != '\n')
				fz_warn("syntaxerror: DOS format line ending after stream keyword (%d %d)\n", oid, gid);
			else
				c = fz_readbyte(file);
		}
		stmofs = fz_tell(file);
	}
	else if (tok == PDF_TENDOBJ)
		stmofs = 0;
	else
		goto cleanup;

	if (ooid) *ooid = oid;
	if (ogid) *ogid = gid;
	if (ostmofs) *ostmofs = stmofs;
	*op = obj;
	return nil;

cleanup:
	if (obj) fz_dropobj(obj);
	if (error) return error;
	return fz_throw("syntaxerror: corrupt indirect object (%d %d)", oid, gid);
}
Example #12
0
fz_error *
pdf_parsedict(fz_obj **op, fz_stream *file, char *buf, int cap)
{
	fz_error *error = nil;
	fz_obj *dict = nil;
	fz_obj *key = nil;
	fz_obj *val = nil;
	int tok, len;
	int a, b;

	error = fz_newdict(op, 8);
	if (error) return error;
	dict = *op;

	while (1)
	{
		tok = pdf_lex(file, buf, cap, &len);

skip:
		if (tok == PDF_TCDICT)
			return nil;

		/* for BI .. ID .. EI in content streams */
		if (tok == PDF_TKEYWORD && !strcmp(buf, "ID"))
			return nil;

		if (tok != PDF_TNAME)
			goto cleanup;

		error = fz_newname(&key, buf);
		if (error) goto cleanup;

		tok = pdf_lex(file, buf, cap, &len);

		switch (tok)
		{
		case PDF_TOARRAY:	error = pdf_parsearray(&val, file, buf, cap); break;
		case PDF_TODICT:	error = pdf_parsedict(&val, file, buf, cap); break;
		case PDF_TNAME:		error = fz_newname(&val, buf); break;
		case PDF_TREAL:		error = fz_newreal(&val, atof(buf)); break;
		case PDF_TSTRING:	error = fz_newstring(&val, buf, len); break;
		case PDF_TTRUE:		error = fz_newbool(&val, 1); break;
		case PDF_TFALSE:	error = fz_newbool(&val, 0); break;
		case PDF_TNULL:		error = fz_newnull(&val); break;
		case PDF_TINT:
			a = atoi(buf);
			tok = pdf_lex(file, buf, cap, &len);
			if (tok == PDF_TCDICT || tok == PDF_TNAME ||
				(tok == PDF_TKEYWORD && !strcmp(buf, "ID")))
			{
				error = fz_newint(&val, a);
				if (error) goto cleanup;
				error = fz_dictput(dict, key, val);
				if (error) goto cleanup;
				fz_dropobj(val);
				fz_dropobj(key);
				key = val = nil;
				goto skip;
			}
			if (tok == PDF_TINT)
			{
				b = atoi(buf);
				tok = pdf_lex(file, buf, cap, &len);
				if (tok == PDF_TR)
				{
					error = fz_newindirect(&val, a, b);
					break;
				}
			}
			goto cleanup;
		default:
			goto cleanup;
		}

		if (error) goto cleanup;

		error = fz_dictput(dict, key, val);
		if (error) goto cleanup;

		fz_dropobj(val);
		fz_dropobj(key);
		key = val = nil;
	}

cleanup:
	if (key) fz_dropobj(key);
	if (val) fz_dropobj(val);
	if (dict) fz_dropobj(dict);
	if (error) return error;
	return fz_throw("syntaxerror: corrupt dictionary");
}
Example #13
0
fz_error *
pdf_parsearray(fz_obj **op, fz_stream *file, char *buf, int cap)
{
	fz_error *error = nil;
	fz_obj *ary = nil;
	fz_obj *obj = nil;
	int a = 0, b = 0, n = 0;
	int tok, len;

	error = fz_newarray(op, 4);
	if (error) return error;
	ary = *op;

	while (1)
	{
		tok = pdf_lex(file, buf, cap, &len);

		if (tok != PDF_TINT && tok != PDF_TR)
		{
			if (n > 0)
			{
				error = fz_newint(&obj, a);
				if (error) goto cleanup;
				error = fz_arraypush(ary, obj);
				if (error) goto cleanup;
				fz_dropobj(obj);
				obj = nil;
			}
			if (n > 1)
			{
				error = fz_newint(&obj, b);
				if (error) goto cleanup;
				error = fz_arraypush(ary, obj);
				if (error) goto cleanup;
				fz_dropobj(obj);
				obj = nil;
			}
			n = 0;
		}

		if (tok == PDF_TINT && n == 2)
		{
			error = fz_newint(&obj, a);
			if (error) goto cleanup;
			error = fz_arraypush(ary, obj);
			if (error) goto cleanup;
			fz_dropobj(obj);
			obj = nil;
			a = b;
			n --;
		}

		switch (tok)
		{
		case PDF_TCARRAY:
			return nil;
		case PDF_TINT:
			if (n == 0)
				a = atoi(buf);
			if (n == 1)
				b = atoi(buf);
			n ++;
			break;
		case PDF_TR:
			if (n != 2)
				goto cleanup;
			error = fz_newindirect(&obj, a, b);
			if (error) goto cleanup;
			n = 0;
			break;
		case PDF_TOARRAY:	error = pdf_parsearray(&obj, file, buf, cap); break;
		case PDF_TODICT:	error = pdf_parsedict(&obj, file, buf, cap); break;
		case PDF_TNAME:		error = fz_newname(&obj, buf); break;
		case PDF_TREAL:		error = fz_newreal(&obj, atof(buf)); break;
		case PDF_TSTRING:	error = fz_newstring(&obj, buf, len); break;
		case PDF_TTRUE:		error = fz_newbool(&obj, 1); break;
		case PDF_TFALSE:	error = fz_newbool(&obj, 0); break;
		case PDF_TNULL:		error = fz_newnull(&obj); break;
		default:		goto cleanup;
		}
		if (error) goto cleanup;

		if (obj)
		{
			error = fz_arraypush(ary, obj);
			if (error) goto cleanup;
			fz_dropobj(obj);
		}

		obj = nil;
	}

cleanup:
	if (obj) fz_dropobj(obj);
	if (ary) fz_dropobj(ary);
	if (error) return error;
	return fz_throw("syntaxerror: corrupt array");
}
Example #14
0
fz_error
pdf_repairxref(pdf_xref *xref, char *buf, int bufsize)
{
	fz_error error;
	fz_obj *dict, *obj;
	fz_obj *length;

	fz_obj *encrypt = nil;
	fz_obj *id = nil;
	fz_obj *root = nil;
	fz_obj *info = nil;

	struct entry *list = nil;
	int listlen;
	int listcap;
	int maxnum = 0;

	int num = 0;
	int gen = 0;
	int tmpofs, numofs = 0, genofs = 0;
	int stmlen, stmofs = 0;
	int tok;
	int next;
	int i, n;

	pdf_logxref("repairxref %p\n", xref);

	fz_seek(xref->file, 0, 0);

	listlen = 0;
	listcap = 1024;
	list = fz_calloc(listcap, sizeof(struct entry));

	/* look for '%PDF' version marker within first kilobyte of file */
	n = fz_read(xref->file, (unsigned char *)buf, MAX(bufsize, 1024));
	if (n < 0)
	{
		error = fz_rethrow(n, "cannot read from file");
		goto cleanup;
	}

	fz_seek(xref->file, 0, 0);
	for (i = 0; i < n - 4; i++)
	{
		if (memcmp(buf + i, "%PDF", 4) == 0)
		{
			fz_seek(xref->file, i, 0);
			break;
		}
	}

	while (1)
	{
		tmpofs = fz_tell(xref->file);
		if (tmpofs < 0)
		{
			error = fz_throw("cannot tell in file");
			goto cleanup;
		}

		error = pdf_lex(&tok, xref->file, buf, bufsize, &n);
		if (error)
		{
			fz_catch(error, "ignoring the rest of the file");
			break;
		}

		if (tok == PDF_TINT)
		{
			numofs = genofs;
			num = gen;
			genofs = tmpofs;
			gen = atoi(buf);
		}

		if (tok == PDF_TOBJ)
		{
			error = fz_repairobj(xref->file, buf, bufsize, &stmofs, &stmlen, &encrypt, &id);
			if (error)
			{
				error = fz_rethrow(error, "cannot parse object (%d %d R)", num, gen);
				goto cleanup;
			}

			pdf_logxref("found object: (%d %d R)\n", num, gen);

			if (listlen + 1 == listcap)
			{
				listcap = (listcap * 3) / 2;
				list = fz_realloc(list, listcap, sizeof(struct entry));
			}

			list[listlen].num = num;
			list[listlen].gen = gen;
			list[listlen].ofs = numofs;
			list[listlen].stmofs = stmofs;
			list[listlen].stmlen = stmlen;
			listlen ++;

			if (num > maxnum)
				maxnum = num;
		}

		/* trailer dictionary */
		if (tok == PDF_TODICT)
		{
			error = pdf_parsedict(&dict, xref, xref->file, buf, bufsize);
			if (error)
			{
				error = fz_rethrow(error, "cannot parse object");
				goto cleanup;
			}

			obj = fz_dictgets(dict, "Encrypt");
			if (obj)
			{
				if (encrypt)
					fz_dropobj(encrypt);
				encrypt = fz_keepobj(obj);
			}

			obj = fz_dictgets(dict, "ID");
			if (obj)
			{
				if (id)
					fz_dropobj(id);
				id = fz_keepobj(obj);
			}

			obj = fz_dictgets(dict, "Root");
			if (obj)
			{
				if (root)
					fz_dropobj(root);
				root = fz_keepobj(obj);
			}

			obj = fz_dictgets(dict, "Info");
			if (obj)
			{
				if (info)
					fz_dropobj(info);
				info = fz_keepobj(obj);
			}

			fz_dropobj(dict);
		}

		if (tok == PDF_TERROR)
			fz_readbyte(xref->file);

		if (tok == PDF_TEOF)
			break;
	}

	/* make xref reasonable */

	pdf_resizexref(xref, maxnum + 1);

	for (i = 0; i < listlen; i++)
	{
		xref->table[list[i].num].type = 'n';
		xref->table[list[i].num].ofs = list[i].ofs;
		xref->table[list[i].num].gen = list[i].gen;

		xref->table[list[i].num].stmofs = list[i].stmofs;

		/* corrected stream length */
		if (list[i].stmlen >= 0)
		{
			pdf_logxref("correct stream length %d %d = %d\n",
				list[i].num, list[i].gen, list[i].stmlen);

			error = pdf_loadobject(&dict, xref, list[i].num, list[i].gen);
			if (error)
			{
				error = fz_rethrow(error, "cannot load stream object (%d %d R)", list[i].num, list[i].gen);
				goto cleanup;
			}

			length = fz_newint(list[i].stmlen);
			fz_dictputs(dict, "Length", length);
			fz_dropobj(length);

			fz_dropobj(dict);
		}

	}

	xref->table[0].type = 'f';
	xref->table[0].ofs = 0;
	xref->table[0].gen = 65535;
	xref->table[0].stmofs = 0;
	xref->table[0].obj = nil;

	next = 0;
	for (i = xref->len - 1; i >= 0; i--)
	{
		if (xref->table[i].type == 'f')
		{
			xref->table[i].ofs = next;
			if (xref->table[i].gen < 65535)
				xref->table[i].gen ++;
			next = i;
		}
	}

	/* create a repaired trailer, Root will be added later */

	xref->trailer = fz_newdict(5);

	obj = fz_newint(maxnum + 1);
	fz_dictputs(xref->trailer, "Size", obj);
	fz_dropobj(obj);

	if (root)
	{
		fz_dictputs(xref->trailer, "Root", root);
		fz_dropobj(root);
	}
	if (info)
	{
		fz_dictputs(xref->trailer, "Info", info);
		fz_dropobj(info);
	}

	if (encrypt)
	{
		if (fz_isindirect(encrypt))
		{
			/* create new reference with non-nil xref pointer */
			obj = fz_newindirect(fz_tonum(encrypt), fz_togen(encrypt), xref);
			fz_dropobj(encrypt);
			encrypt = obj;
		}
		fz_dictputs(xref->trailer, "Encrypt", encrypt);
		fz_dropobj(encrypt);
	}

	if (id)
	{
		if (fz_isindirect(id))
		{
			/* create new reference with non-nil xref pointer */
			obj = fz_newindirect(fz_tonum(id), fz_togen(id), xref);
			fz_dropobj(id);
			id = obj;
		}
		fz_dictputs(xref->trailer, "ID", id);
		fz_dropobj(id);
	}

	fz_free(list);
	return fz_okay;

cleanup:
	if (encrypt) fz_dropobj(encrypt);
	if (id) fz_dropobj(id);
	if (root) fz_dropobj(root);
	if (info) fz_dropobj(info);
	fz_free(list);
	return error; /* already rethrown */
}