Ejemplo n.º 1
0
void
fz_seek(fz_stream *stm, int offset, int whence)
{
	stm->avail = 0; /* Reset bit reading */
	if (stm->seek)
	{
		if (whence == 1)
		{
			offset = fz_tell(stm) + offset;
			whence = 0;
		}
		stm->seek(stm, offset, whence);
		stm->eof = 0;
	}
	else if (whence != 2)
	{
		if (whence == 0)
			offset -= fz_tell(stm);
		if (offset < 0)
			fz_warn(stm->ctx, "cannot seek backwards");
		/* dog slow, but rare enough */
		while (offset-- > 0)
		{
			if (fz_read_byte(stm) == EOF)
			{
				fz_warn(stm->ctx, "seek failed");
				break;
			}
		}
	}
	else
		fz_warn(stm->ctx, "cannot seek");
}
static fz_error
pdf_readstartxref(pdf_xref *xref)
{
    unsigned char buf[1024];
    int t, n;
    int i;

    fz_seek(xref->file, 0, 2);

    xref->filesize = fz_tell(xref->file);

    t = MAX(0, xref->filesize - (int)sizeof buf);
    fz_seek(xref->file, t, 0);

    n = fz_read(xref->file, buf, sizeof buf);
    if (n < 0)
        return fz_rethrow(n, "cannot read from file");

    for (i = n - 9; i >= 0; i--)
    {
        if (memcmp(buf + i, "startxref", 9) == 0)
        {
            i += 9;
            while (iswhite(buf[i]) && i < n)
                i ++;
            xref->startxref = atoi((char*)(buf + i));
            pdf_logxref("startxref %d\n", xref->startxref);
            return fz_okay;
        }
    }

    return fz_throw("cannot find startxref");
}
Ejemplo n.º 3
0
static int
xps_find_and_read_zip_dir(xps_context *ctx)
{
	unsigned char buf[512];
	int file_size, back, maxback;
	int i, n;

	fz_seek(ctx->file, 0, SEEK_END);
	file_size = fz_tell(ctx->file);

	maxback = MIN(file_size, 0xFFFF + sizeof buf);
	back = MIN(maxback, sizeof buf);

	while (back < maxback)
	{
		fz_seek(ctx->file, file_size - back, 0);

		n = fz_read(ctx->file, buf, sizeof buf);
		if (n < 0)
			return fz_error_make(ctx->ctx, "cannot read end of central directory");

		for (i = n - 4; i > 0; i--)
			if (!memcmp(buf + i, "PK\5\6", 4))
				return xps_read_zip_dir(ctx, file_size - back + i);

		back += sizeof buf - 4;
	}

	return fz_error_make(ctx->ctx, "cannot find end of central directory");
}
Ejemplo n.º 4
0
static fz_error
readstartxref(pdf_xref *xref)
{
	fz_error error;
	unsigned char buf[1024];
	int t, n;
	int i;

	error = fz_seek(xref->file, 0, 2);
	if (error)
		return fz_rethrow(error, "cannot seek to end of file");

	t = MAX(0, fz_tell(xref->file) - ((int)sizeof buf));
	error = fz_seek(xref->file, t, 0);
	if (error)
		return fz_rethrow(error, "cannot seek to offset %d", t);

	error = fz_read(&n, xref->file, buf, sizeof buf);
	if (error)
		return fz_rethrow(error, "cannot read from file");

	for (i = n - 9; i >= 0; i--)
	{
		if (memcmp(buf + i, "startxref", 9) == 0)
		{
			i += 9;
			while (iswhite(buf[i]) && i < n)
				i ++;
			xref->startxref = atoi((char*)(buf + i));
			return fz_okay;
		}
	}

	return fz_throw("cannot find startxref");
}
static fz_error
pdf_readoldtrailer(pdf_xref *xref, char *buf, int cap)
{
    fz_error error;
    int len;
    char *s;
    int n;
    int t;
    pdf_token_e tok;
    int c;

    pdf_logxref("load old xref format trailer\n");

    fz_readline(xref->file, buf, cap);
    if (strncmp(buf, "xref", 4) != 0)
        return fz_throw("cannot find xref marker");

    while (1)
    {
        c = fz_peekbyte(xref->file);
        if (!(c >= '0' && c <= '9'))
            break;

        fz_readline(xref->file, buf, cap);
        s = buf;
        fz_strsep(&s, " "); /* ignore ofs */
        if (!s)
            return fz_throw("invalid range marker in xref");
        len = atoi(fz_strsep(&s, " "));

        /* broken pdfs where the section is not on a separate line */
        if (s && *s != '\0')
            fz_seek(xref->file, -(2 + (int)strlen(s)), 1);

        t = fz_tell(xref->file);
        if (t < 0)
            return fz_throw("cannot tell in file");

        fz_seek(xref->file, t + 20 * len, 0);
    }

    error = pdf_lex(&tok, xref->file, buf, cap, &n);
    if (error)
        return fz_rethrow(error, "cannot parse trailer");
    if (tok != PDF_TTRAILER)
        return fz_throw("expected trailer marker");

    error = pdf_lex(&tok, xref->file, buf, cap, &n);
    if (error)
        return fz_rethrow(error, "cannot parse trailer");
    if (tok != PDF_TODICT)
        return fz_throw("expected trailer dictionary");

    error = pdf_parsedict(&xref->trailer, xref, xref->file, buf, cap);
    if (error)
        return fz_rethrow(error, "cannot parse trailer");
    return fz_okay;
}
Ejemplo n.º 6
0
/* SumatraPDF: allow to clone a stream */
fz_stream *
fz_clone_stream(fz_context *ctx, fz_stream *stm)
{
	fz_stream *clone;
	if (!stm->reopen)
		fz_throw(ctx, FZ_ERROR_GENERIC, "can't clone stream without reopening");
	clone = stm->reopen(ctx, stm);
	fz_seek(clone, fz_tell(stm), 0);
	return clone;
}
Ejemplo n.º 7
0
void
fz_seek(fz_stream *stm, int offset, int whence)
{
	if (stm->seek)
	{
		if (whence == 1)
		{
			offset = fz_tell(stm) + offset;
			whence = 0;
		}
		if (whence == 0)
		{
			int dist = stm->pos - offset;
			if (dist >= 0 && dist <= stm->wp - stm->bp)
			{
				stm->rp = stm->wp - dist;
				stm->eof = 0;
				return;
			}
		}
		stm->seek(stm, offset, whence);
		stm->eof = 0;
	}
	else if (whence != 2)
	{
		if (whence == 0)
			offset -= fz_tell(stm);
		if (offset < 0)
			printf("cannot seek backwards\n");
		/* dog slow, but rare enough */
		while (offset-- > 0)
			fz_read_byte(stm);
	}
	else
		printf("cannot seek\n");
}
Ejemplo n.º 8
0
/*
 * Construct a filter to decode a stream, without
 * constraining to stream length, and without decryption.
 */
fz_stream *
pdf_open_inline_stream(pdf_document *xref, pdf_obj *stmobj, int length, fz_stream *chain, pdf_image_params *imparams)
{
	pdf_obj *filters;
	pdf_obj *params;

	filters = pdf_dict_getsa(stmobj, "Filter", "F");
	params = pdf_dict_getsa(stmobj, "DecodeParms", "DP");

	/* don't close chain when we close this filter */
	fz_keep_stream(chain);

	if (pdf_is_name(filters))
		return build_filter(chain, xref, filters, params, 0, 0, imparams);
	if (pdf_array_len(filters) > 0)
		return build_filter_chain(chain, xref, filters, params, 0, 0, imparams);

	return fz_open_null(chain, length, fz_tell(chain));
}
Ejemplo n.º 9
0
/*
 * Construct a filter to decode a stream, without
 * constraining to stream length, and without decryption.
 */
fz_stream *
pdf_open_inline_stream(fz_context *ctx, pdf_document *doc, pdf_obj *stmobj, int length, fz_stream *chain, fz_compression_params *imparams)
{
	pdf_obj *filters;
	pdf_obj *params;

	filters = pdf_dict_geta(ctx, stmobj, PDF_NAME_Filter, PDF_NAME_F);
	params = pdf_dict_geta(ctx, stmobj, PDF_NAME_DecodeParms, PDF_NAME_DP);

	/* don't close chain when we close this filter */
	fz_keep_stream(ctx, chain);

	if (pdf_is_name(ctx, filters))
		return build_filter(ctx, chain, doc, filters, params, 0, 0, imparams);
	if (pdf_array_len(ctx, filters) > 0)
		return build_filter_chain(ctx, chain, doc, filters, params, 0, 0, imparams);

	if (imparams)
		imparams->type = FZ_IMAGE_RAW;
	return fz_open_null(ctx, chain, length, fz_tell(ctx, chain));
}
fz_error
pdf_parseindobj(fz_obj **op, pdf_xref *xref,
	fz_stream *file, char *buf, int cap,
	int *onum, int *ogen, int *ostmofs)
{
	fz_error error = fz_okay;
	fz_obj *obj = nil;
	int num = 0, gen = 0, stmofs;
	pdf_token_e tok;
	int len;
	int a, b;

	error = pdf_lex(&tok, file, buf, cap, &len);
	if (error)
		return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);
	if (tok != PDF_TINT)
		return fz_throw("cannot parse indirect object (%d %d R)", num, gen);
	num = atoi(buf);

	error = pdf_lex(&tok, file, buf, cap, &len);
	if (error)
		return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);
	if (tok != PDF_TINT)
		return fz_throw("cannot parse indirect object (%d %d R)", num, gen);
	gen = atoi(buf);

	error = pdf_lex(&tok, file, buf, cap, &len);
	if (error)
		return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);
	if (tok != PDF_TOBJ)
		return fz_throw("cannot parse indirect object (%d %d R)", num, gen);

	error = pdf_lex(&tok, file, buf, cap, &len);
	if (error)
		return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);

	switch (tok)
	{
	case PDF_TOARRAY:
		error = pdf_parsearray(&obj, xref, file, buf, cap);
		if (error)
			return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);
		break;

	case PDF_TODICT:
		error = pdf_parsedict(&obj, xref, file, buf, cap);
		if (error)
			return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);
		break;

	case PDF_TNAME: obj = fz_newname(buf); break;
	case PDF_TREAL: obj = fz_newreal(atof(buf)); break;
	case PDF_TSTRING: obj = fz_newstring(buf, len); break;
	case PDF_TTRUE: obj = fz_newbool(1); break;
	case PDF_TFALSE: obj = fz_newbool(0); break;
	case PDF_TNULL: obj = fz_newnull(); break;

	case PDF_TINT:
		a = atoi(buf);
		error = pdf_lex(&tok, file, buf, cap, &len);
		if (error)
			return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);
		if (tok == PDF_TSTREAM || tok == PDF_TENDOBJ)
		{
			obj = fz_newint(a);
			goto skip;
		}
		if (tok == PDF_TINT)
		{
			b = atoi(buf);
			error = pdf_lex(&tok, file, buf, cap, &len);
			if (error)
				return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);
			if (tok == PDF_TR)
			{
				obj = fz_newindirect(a, b, xref);
				break;
			}
		}
		return fz_throw("cannot parse indirect object (%d %d R)", num, gen);

	case PDF_TENDOBJ:
		obj = fz_newnull();
		goto skip;

	default:
		return fz_throw("cannot parse indirect object (%d %d R)", num, gen);
	}

	error = pdf_lex(&tok, file, buf, cap, &len);
	if (error)
	{
		fz_dropobj(obj);
		return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);
	}

skip:
	if (tok == PDF_TSTREAM)
	{
		int c = fz_readbyte(file);
		while (c == ' ')
			c = fz_readbyte(file);
		if (c == '\r')
		{
			c = fz_peekbyte(file);
			if (c != '\n')
				fz_warn("line feed missing after stream begin marker (%d %d R)", num, gen);
			else
				fz_readbyte(file);
		}
		stmofs = fz_tell(file);
	}
	else if (tok == PDF_TENDOBJ)
	{
		stmofs = 0;
	}
	else
	{
		fz_warn("expected endobj or stream keyword (%d %d R)", num, gen);
		stmofs = 0;
	}

	if (onum) *onum = num;
	if (ogen) *ogen = gen;
	if (ostmofs) *ostmofs = stmofs;
	*op = obj;
	return fz_okay;
}
Ejemplo n.º 11
0
fz_error
fz_seek(fz_stream *stm, int offset, int whence)
{
	fz_error error;
	fz_buffer *buf = stm->buffer;
	int t, c;

	if (stm->dead)
		return fz_throw("assert: seek in dead stream");

	if (whence == 1)
	{
		int cur = fz_tell(stm);
		if (cur < 0)
			return fz_throw("cannot tell current position");
		offset = cur + offset;
		whence = 0;
	}

	buf->eof = 0;

	switch (stm->kind)
	{
	case FZ_SFILE:
		t = lseek(stm->file, offset, whence);
		if (t < 0)
		{
			stm->dead = 1;
			return fz_throw("syserr: lseek: %s", strerror(errno));
		}

		buf->rp = buf->bp;
		buf->wp = buf->bp;

		return fz_okay;

	case FZ_SFILTER:
		if (whence == 0)
		{
			if (offset < fz_tell(stm))
			{
				stm->dead = 1;
				return fz_throw("assert: seek backwards in filter");
			}
			while (fz_tell(stm) < offset)
			{
				c = fz_readbyte(stm);
				if (c == EOF)
				{
					error = fz_readerror(stm);
					if (error)
						return fz_rethrow(error, "cannot seek forward in filter");
					break;
				}
			}
			return fz_okay;
		}

		stm->dead = 1;
		return fz_throw("assert: relative seek in filter");

	case FZ_SBUFFER:
		if (whence == 0)
			buf->rp = CLAMP(buf->bp + offset, buf->bp, buf->ep);
		else
			buf->rp = CLAMP(buf->ep + offset, buf->bp, buf->ep);
		return fz_okay;

	default:
		return fz_throw("unknown stream type");
	}
}
Ejemplo n.º 12
0
pdf_obj *
pdf_parse_ind_obj(fz_context *ctx, pdf_document *doc,
	fz_stream *file, pdf_lexbuf *buf,
	int *onum, int *ogen, fz_off_t *ostmofs, int *try_repair)
{
	pdf_obj *obj = NULL;
	int num = 0, gen = 0;
	fz_off_t stm_ofs;
	pdf_token tok;
	fz_off_t a, b;

	fz_var(obj);

	tok = pdf_lex(ctx, file, buf);
	if (tok != PDF_TOK_INT)
	{
		if (try_repair)
			*try_repair = 1;
		fz_throw(ctx, FZ_ERROR_GENERIC, "expected object number");
	}
	num = buf->i;

	tok = pdf_lex(ctx, file, buf);
	if (tok != PDF_TOK_INT)
	{
		if (try_repair)
			*try_repair = 1;
		fz_throw(ctx, FZ_ERROR_GENERIC, "expected generation number (%d ? obj)", num);
	}
	gen = buf->i;

	tok = pdf_lex(ctx, file, buf);
	if (tok != PDF_TOK_OBJ)
	{
		if (try_repair)
			*try_repair = 1;
		fz_throw(ctx, FZ_ERROR_GENERIC, "expected 'obj' keyword (%d %d ?)", num, gen);
	}

	tok = pdf_lex(ctx, file, buf);

	switch (tok)
	{
	case PDF_TOK_OPEN_ARRAY:
		obj = pdf_parse_array(ctx, doc, file, buf);
		break;

	case PDF_TOK_OPEN_DICT:
		obj = pdf_parse_dict(ctx, doc, file, buf);
		break;

	case PDF_TOK_NAME: obj = pdf_new_name(ctx, doc, buf->scratch); break;
	case PDF_TOK_REAL: obj = pdf_new_real(ctx, doc, buf->f); break;
	case PDF_TOK_STRING: obj = pdf_new_string(ctx, doc, buf->scratch, buf->len); break;
	case PDF_TOK_TRUE: obj = pdf_new_bool(ctx, doc, 1); break;
	case PDF_TOK_FALSE: obj = pdf_new_bool(ctx, doc, 0); break;
	case PDF_TOK_NULL: obj = pdf_new_null(ctx, doc); break;

	case PDF_TOK_INT:
		a = buf->i;
		tok = pdf_lex(ctx, file, buf);

		if (tok == PDF_TOK_STREAM || tok == PDF_TOK_ENDOBJ)
		{
			obj = pdf_new_int_offset(ctx, doc, a);
			goto skip;
		}
		if (tok == PDF_TOK_INT)
		{
			b = buf->i;
			tok = pdf_lex(ctx, file, buf);
			if (tok == PDF_TOK_R)
			{
				obj = pdf_new_indirect(ctx, doc, a, b);
				break;
			}
		}
		fz_throw(ctx, FZ_ERROR_GENERIC, "expected 'R' keyword (%d %d R)", num, gen);

	case PDF_TOK_ENDOBJ:
		obj = pdf_new_null(ctx, doc);
		goto skip;

	default:
		fz_throw(ctx, FZ_ERROR_GENERIC, "syntax error in object (%d %d R)", num, gen);
	}

	fz_try(ctx)
	{
		tok = pdf_lex(ctx, file, buf);
	}
	fz_catch(ctx)
	{
		pdf_drop_obj(ctx, obj);
		fz_rethrow(ctx);
	}

skip:
	if (tok == PDF_TOK_STREAM)
	{
		int c = fz_read_byte(ctx, file);
		while (c == ' ')
			c = fz_read_byte(ctx, file);
		if (c == '\r')
		{
			c = fz_peek_byte(ctx, file);
			if (c != '\n')
				fz_warn(ctx, "line feed missing after stream begin marker (%d %d R)", num, gen);
			else
				fz_read_byte(ctx, file);
		}
		stm_ofs = fz_tell(ctx, file);
	}
	else if (tok == PDF_TOK_ENDOBJ)
	{
		stm_ofs = 0;
	}
	else
	{
		fz_warn(ctx, "expected 'endobj' or 'stream' keyword (%d %d R)", num, gen);
		stm_ofs = 0;
	}

	if (onum) *onum = num;
	if (ogen) *ogen = gen;
	if (ostmofs) *ostmofs = stm_ofs;
	return obj;
}
Ejemplo n.º 13
0
fz_error *
pdf_parseindobj(fz_obj **op, fz_stream *file, char *buf, int cap,
		int *ooid, int *ogid, int *ostmofs)
{
	fz_error *error = nil;
	fz_obj *obj = nil;
	int oid = 0, gid = 0, stmofs;
	int tok, len;
	int a, b;

	tok = pdf_lex(file, buf, cap, &len);
	if (tok != PDF_TINT)
		goto cleanup;
	oid = atoi(buf);

	tok = pdf_lex(file, buf, cap, &len);
	if (tok != PDF_TINT)
		goto cleanup;
	gid = atoi(buf);

	tok = pdf_lex(file, buf, cap, &len);
	if (tok != PDF_TOBJ)
		goto cleanup;

	tok = pdf_lex(file, buf, cap, &len);
	switch (tok)
	{
		case PDF_TOARRAY:	error = pdf_parsearray(&obj, file, buf, cap); break;
		case PDF_TODICT:	error = pdf_parsedict(&obj, file, buf, cap); break;
		case PDF_TNAME:		error = fz_newname(&obj, buf); break;
		case PDF_TREAL:		error = fz_newreal(&obj, atof(buf)); break;
		case PDF_TSTRING:	error = fz_newstring(&obj, buf, len); break;
		case PDF_TTRUE:		error = fz_newbool(&obj, 1); break;
		case PDF_TFALSE:	error = fz_newbool(&obj, 0); break;
		case PDF_TNULL:		error = fz_newnull(&obj); break;
		case PDF_TINT:
			a = atoi(buf);
			tok = pdf_lex(file, buf, cap, &len);
			if (tok == PDF_TSTREAM || tok == PDF_TENDOBJ)
			{
				error = fz_newint(&obj, a);
				if (error) goto cleanup;
				goto skip;
			}
			if (tok == PDF_TINT)
			{
				b = atoi(buf);
				tok = pdf_lex(file, buf, cap, &len);
				if (tok == PDF_TR)
				{
					error = fz_newindirect(&obj, a, b);
					break;
				}
			}
			goto cleanup;
		default:
			goto cleanup;
	}
	if (error) goto cleanup;

	tok = pdf_lex(file, buf, cap, &len);

skip:
	if (tok == PDF_TSTREAM)
	{
		int c = fz_readbyte(file);
		if (c == '\r')
		{
			c = fz_peekbyte(file);
			if (c != '\n')
				fz_warn("syntaxerror: DOS format line ending after stream keyword (%d %d)\n", oid, gid);
			else
				c = fz_readbyte(file);
		}
		stmofs = fz_tell(file);
	}
	else if (tok == PDF_TENDOBJ)
		stmofs = 0;
	else
		goto cleanup;

	if (ooid) *ooid = oid;
	if (ogid) *ogid = gid;
	if (ostmofs) *ostmofs = stmofs;
	*op = obj;
	return nil;

cleanup:
	if (obj) fz_dropobj(obj);
	if (error) return error;
	return fz_throw("syntaxerror: corrupt indirect object (%d %d)", oid, gid);
}
Ejemplo n.º 14
0
fz_error *
pdf_savexref(pdf_xref *xref, char *path, pdf_crypt *encrypt)
{
	fz_error *error;
	fz_stream *out;
	int oid;
	int startxref;
	int *ofsbuf;
	fz_obj *obj;
	int eoid, egen;

	pdf_logxref("savexref '%s' %p\n", path, xref);

	/* need to add encryption object for acrobat < 6 */
	if (encrypt)
	{
		pdf_logxref("make encryption dict\n");

		error = pdf_allocobject(xref, &eoid, &egen);
		if (error)
			return error;

		pdf_cryptobj(encrypt, encrypt->encrypt, eoid, egen);

		error = pdf_updateobject(xref, eoid, egen, encrypt->encrypt);
		if (error)
			return error;
	}

	ofsbuf = fz_malloc(sizeof(int) * xref->len);
	if (!ofsbuf)
		return fz_outofmem;

	error = fz_openwfile(&out, path);
	if (error)
	{
		fz_free(ofsbuf);
		return error;
	}

	fz_print(out, "%%PDF-%1.1f\n", xref->version);
	fz_print(out, "%%\342\343\317\323\n\n");

	for (oid = 0; oid < xref->len; oid++)
	{
		pdf_xrefentry *x = xref->table + oid;
		if (x->type == 'n' || x->type == 'o' || x->type == 'a')
		{
			ofsbuf[oid] = fz_tell(out);
			error = writeobject(out, xref, encrypt, oid, x->type == 'o' ? 0 : x->gen);
			if (error)
				goto cleanup;
		}
		else
		{
			ofsbuf[oid] = x->ofs;
		}
	}

	startxref = fz_tell(out);
	fz_print(out, "xref\n");
	fz_print(out, "0 %d\n", xref->len);

	for (oid = 0; oid < xref->len; oid++)
	{
		int gen = xref->table[oid].gen;
		int type = xref->table[oid].type;
		if (type == 'o')
			gen = 0;
		if (type == 'a' || type == 'o')
			type = 'n';
		if (type == 'd')
			type = 'f';
		fz_print(out, "%010d %05d %c \n", ofsbuf[oid], gen, type);
	}

	fz_print(out, "\n");

	fz_print(out, "trailer\n<<\n  /Size %d", xref->len);
	obj = fz_dictgets(xref->trailer, "Root");
	fz_print(out, "\n  /Root %d %d R", fz_tonum(obj), fz_togen(obj));
	obj = fz_dictgets(xref->trailer, "Info");
	if (obj)
		fz_print(out, "\n  /Info %d %d R", fz_tonum(obj), fz_togen(obj));
	if (encrypt)
	{
		fz_print(out, "\n  /Encrypt %d %d R", eoid, egen);
		fz_print(out, "\n  /ID [");
		fz_printobj(out, encrypt->id, 1);
		fz_printobj(out, encrypt->id, 1);
		fz_print(out, "]");

		pdf_cryptobj(encrypt, encrypt->encrypt, eoid, egen);
	}
	fz_print(out, "\n>>\n\n");

	fz_print(out, "startxref\n");
	fz_print(out, "%d\n", startxref);
	fz_print(out, "%%%%EOF\n");

	xref->startxref = startxref;

	if(ofsbuf) fz_free(ofsbuf);
	fz_dropstream(out);
	return nil;

cleanup:
	if(ofsbuf) fz_free(ofsbuf);
	fz_dropstream(out);
	return error;
}
Ejemplo n.º 15
0
fz_error *
pdf_updatexref(pdf_xref *xref, char *path)
{
	fz_error *error;
	fz_stream *out;
	int oid;
	int i, n;
	int startxref;
	fz_obj *obj;

	pdf_logxref("updatexref '%s' %p\n", path, xref);

	error = fz_openafile(&out, path);
	if (error)
		return error;

	fz_print(out, "\n");

	for (oid = 0; oid < xref->len; oid++)
	{
		if (xref->table[oid].type == 'a')
		{
			xref->table[oid].ofs = fz_tell(out);
			error = writeobject(out, xref, xref->crypt, oid, xref->table[oid].gen);
			if (error)
				goto cleanup;
		}
	}

	/* always write out entry 0 in appended xref sections */
	xref->table[0].type = 'd';

	startxref = fz_tell(out);
	fz_print(out, "xref\n");

	oid = 0;
	while (oid < xref->len)
	{
		n = countmodified(xref, oid);

		pdf_logxref("  section %d +%d\n", oid, n);

		fz_print(out, "%d %d\n", oid, n);

		for (i = 0; i < n; i++)
		{
			if (xref->table[oid + i].type == 'd')
				xref->table[oid + i].type = 'f';
			if (xref->table[oid + i].type == 'a')
				xref->table[oid + i].type = 'n';

			fz_print(out, "%010d %05d %c \n",
				xref->table[oid + i].ofs,
				xref->table[oid + i].gen,
				xref->table[oid + i].type);
		}

		oid += n;
		while (oid < xref->len &&
				xref->table[oid].type != 'a' &&
				xref->table[oid].type != 'd')
			oid ++;
	}

	fz_print(out, "\n");

	fz_print(out, "trailer\n<<\n  /Size %d\n  /Prev %d", xref->len, xref->startxref);

	obj = fz_dictgets(xref->trailer, "Root");
	fz_print(out,"\n  /Root %d %d R", fz_tonum(obj), fz_togen(obj));

	obj = fz_dictgets(xref->trailer, "Info");
	if (obj)
		fz_print(out,"\n  /Info %d %d R", fz_tonum(obj), fz_togen(obj));

	obj = fz_dictgets(xref->trailer, "Encrypt");
	if (obj) {
		fz_print(out,"\n  /Encrypt ");
		fz_printobj(out, obj, TIGHT);
	}

	obj = fz_dictgets(xref->trailer, "ID");
	if (obj) {
		fz_print(out,"\n  /ID ");
		fz_printobj(out, obj, TIGHT);
	}

	fz_print(out, "\n>>\n\n");

	fz_print(out, "startxref\n");
	fz_print(out, "%d\n", startxref);
	fz_print(out, "%%%%EOF\n");

	xref->startxref = startxref;

	fz_dropstream(out);
	return nil;

cleanup:
	fz_dropstream(out);
	return error;
}
Ejemplo n.º 16
0
fz_error
pdf_repairxref(pdf_xref *xref, char *buf, int bufsize)
{
	fz_error error;
	fz_obj *dict, *obj;
	fz_obj *length;

	fz_obj *encrypt = nil;
	fz_obj *id = nil;
	fz_obj *root = nil;
	fz_obj *info = nil;

	struct entry *list = nil;
	int listlen;
	int listcap;
	int maxnum = 0;

	int num = 0;
	int gen = 0;
	int tmpofs, numofs = 0, genofs = 0;
	int stmlen, stmofs = 0;
	int tok;
	int next;
	int i, n;

	pdf_logxref("repairxref %p\n", xref);

	fz_seek(xref->file, 0, 0);

	listlen = 0;
	listcap = 1024;
	list = fz_calloc(listcap, sizeof(struct entry));

	/* look for '%PDF' version marker within first kilobyte of file */
	n = fz_read(xref->file, (unsigned char *)buf, MAX(bufsize, 1024));
	if (n < 0)
	{
		error = fz_rethrow(n, "cannot read from file");
		goto cleanup;
	}

	fz_seek(xref->file, 0, 0);
	for (i = 0; i < n - 4; i++)
	{
		if (memcmp(buf + i, "%PDF", 4) == 0)
		{
			fz_seek(xref->file, i, 0);
			break;
		}
	}

	while (1)
	{
		tmpofs = fz_tell(xref->file);
		if (tmpofs < 0)
		{
			error = fz_throw("cannot tell in file");
			goto cleanup;
		}

		error = pdf_lex(&tok, xref->file, buf, bufsize, &n);
		if (error)
		{
			fz_catch(error, "ignoring the rest of the file");
			break;
		}

		if (tok == PDF_TINT)
		{
			numofs = genofs;
			num = gen;
			genofs = tmpofs;
			gen = atoi(buf);
		}

		if (tok == PDF_TOBJ)
		{
			error = fz_repairobj(xref->file, buf, bufsize, &stmofs, &stmlen, &encrypt, &id);
			if (error)
			{
				error = fz_rethrow(error, "cannot parse object (%d %d R)", num, gen);
				goto cleanup;
			}

			pdf_logxref("found object: (%d %d R)\n", num, gen);

			if (listlen + 1 == listcap)
			{
				listcap = (listcap * 3) / 2;
				list = fz_realloc(list, listcap, sizeof(struct entry));
			}

			list[listlen].num = num;
			list[listlen].gen = gen;
			list[listlen].ofs = numofs;
			list[listlen].stmofs = stmofs;
			list[listlen].stmlen = stmlen;
			listlen ++;

			if (num > maxnum)
				maxnum = num;
		}

		/* trailer dictionary */
		if (tok == PDF_TODICT)
		{
			error = pdf_parsedict(&dict, xref, xref->file, buf, bufsize);
			if (error)
			{
				error = fz_rethrow(error, "cannot parse object");
				goto cleanup;
			}

			obj = fz_dictgets(dict, "Encrypt");
			if (obj)
			{
				if (encrypt)
					fz_dropobj(encrypt);
				encrypt = fz_keepobj(obj);
			}

			obj = fz_dictgets(dict, "ID");
			if (obj)
			{
				if (id)
					fz_dropobj(id);
				id = fz_keepobj(obj);
			}

			obj = fz_dictgets(dict, "Root");
			if (obj)
			{
				if (root)
					fz_dropobj(root);
				root = fz_keepobj(obj);
			}

			obj = fz_dictgets(dict, "Info");
			if (obj)
			{
				if (info)
					fz_dropobj(info);
				info = fz_keepobj(obj);
			}

			fz_dropobj(dict);
		}

		if (tok == PDF_TERROR)
			fz_readbyte(xref->file);

		if (tok == PDF_TEOF)
			break;
	}

	/* make xref reasonable */

	pdf_resizexref(xref, maxnum + 1);

	for (i = 0; i < listlen; i++)
	{
		xref->table[list[i].num].type = 'n';
		xref->table[list[i].num].ofs = list[i].ofs;
		xref->table[list[i].num].gen = list[i].gen;

		xref->table[list[i].num].stmofs = list[i].stmofs;

		/* corrected stream length */
		if (list[i].stmlen >= 0)
		{
			pdf_logxref("correct stream length %d %d = %d\n",
				list[i].num, list[i].gen, list[i].stmlen);

			error = pdf_loadobject(&dict, xref, list[i].num, list[i].gen);
			if (error)
			{
				error = fz_rethrow(error, "cannot load stream object (%d %d R)", list[i].num, list[i].gen);
				goto cleanup;
			}

			length = fz_newint(list[i].stmlen);
			fz_dictputs(dict, "Length", length);
			fz_dropobj(length);

			fz_dropobj(dict);
		}

	}

	xref->table[0].type = 'f';
	xref->table[0].ofs = 0;
	xref->table[0].gen = 65535;
	xref->table[0].stmofs = 0;
	xref->table[0].obj = nil;

	next = 0;
	for (i = xref->len - 1; i >= 0; i--)
	{
		if (xref->table[i].type == 'f')
		{
			xref->table[i].ofs = next;
			if (xref->table[i].gen < 65535)
				xref->table[i].gen ++;
			next = i;
		}
	}

	/* create a repaired trailer, Root will be added later */

	xref->trailer = fz_newdict(5);

	obj = fz_newint(maxnum + 1);
	fz_dictputs(xref->trailer, "Size", obj);
	fz_dropobj(obj);

	if (root)
	{
		fz_dictputs(xref->trailer, "Root", root);
		fz_dropobj(root);
	}
	if (info)
	{
		fz_dictputs(xref->trailer, "Info", info);
		fz_dropobj(info);
	}

	if (encrypt)
	{
		if (fz_isindirect(encrypt))
		{
			/* create new reference with non-nil xref pointer */
			obj = fz_newindirect(fz_tonum(encrypt), fz_togen(encrypt), xref);
			fz_dropobj(encrypt);
			encrypt = obj;
		}
		fz_dictputs(xref->trailer, "Encrypt", encrypt);
		fz_dropobj(encrypt);
	}

	if (id)
	{
		if (fz_isindirect(id))
		{
			/* create new reference with non-nil xref pointer */
			obj = fz_newindirect(fz_tonum(id), fz_togen(id), xref);
			fz_dropobj(id);
			id = obj;
		}
		fz_dictputs(xref->trailer, "ID", id);
		fz_dropobj(id);
	}

	fz_free(list);
	return fz_okay;

cleanup:
	if (encrypt) fz_dropobj(encrypt);
	if (id) fz_dropobj(id);
	if (root) fz_dropobj(root);
	if (info) fz_dropobj(info);
	fz_free(list);
	return error; /* already rethrown */
}
Ejemplo n.º 17
0
static fz_error
fz_repairobj(fz_stream *file, char *buf, int cap, int *stmofsp, int *stmlenp, fz_obj **encrypt, fz_obj **id)
{
	fz_error error;
	int tok;
	int stmlen;
	int len;
	int n;

	*stmofsp = 0;
	*stmlenp = -1;

	stmlen = 0;

	error = pdf_lex(&tok, file, buf, cap, &len);
	if (error)
		return fz_rethrow(error, "cannot parse object");
	if (tok == PDF_TODICT)
	{
		fz_obj *dict, *obj;

		/* Send nil xref so we don't try to resolve references */
		error = pdf_parsedict(&dict, nil, file, buf, cap);
		if (error)
			return fz_rethrow(error, "cannot parse object");

		obj = fz_dictgets(dict, "Type");
		if (fz_isname(obj) && !strcmp(fz_toname(obj), "XRef"))
		{
			obj = fz_dictgets(dict, "Encrypt");
			if (obj)
			{
				if (*encrypt)
					fz_dropobj(*encrypt);
				*encrypt = fz_keepobj(obj);
			}

			obj = fz_dictgets(dict, "ID");
			if (obj)
			{
				if (*id)
					fz_dropobj(*id);
				*id = fz_keepobj(obj);
			}
		}

		obj = fz_dictgets(dict, "Length");
		if (fz_isint(obj))
			stmlen = fz_toint(obj);

		fz_dropobj(dict);
	}

	while ( tok != PDF_TSTREAM &&
		tok != PDF_TENDOBJ &&
		tok != PDF_TERROR &&
		tok != PDF_TEOF )
	{
		error = pdf_lex(&tok, file, buf, cap, &len);
		if (error)
			return fz_rethrow(error, "cannot scan for endobj or stream token");
	}

	if (tok == PDF_TSTREAM)
	{
		int c = fz_readbyte(file);
		if (c == '\r') {
			c = fz_peekbyte(file);
			if (c == '\n')
				fz_readbyte(file);
		}

		*stmofsp = fz_tell(file);
		if (*stmofsp < 0)
			return fz_throw("cannot seek in file");

		if (stmlen > 0)
		{
			fz_seek(file, *stmofsp + stmlen, 0);
			error = pdf_lex(&tok, file, buf, cap, &len);
			if (error)
				fz_catch(error, "cannot find endstream token, falling back to scanning");
			if (tok == PDF_TENDSTREAM)
				goto atobjend;
			fz_seek(file, *stmofsp, 0);
		}

		n = fz_read(file, (unsigned char *) buf, 9);
		if (n < 0)
			return fz_rethrow(n, "cannot read from file");

		while (memcmp(buf, "endstream", 9) != 0)
		{
			c = fz_readbyte(file);
			if (c == EOF)
				break;
			memmove(buf, buf + 1, 8);
			buf[8] = c;
		}

		*stmlenp = fz_tell(file) - *stmofsp - 9;

atobjend:
		error = pdf_lex(&tok, file, buf, cap, &len);
		if (error)
			return fz_rethrow(error, "cannot scan for endobj token");
		if (tok != PDF_TENDOBJ)
			fz_warn("object missing 'endobj' token");
	}

	return fz_okay;
}
Ejemplo n.º 18
0
pdf_obj *
pdf_parse_ind_obj(pdf_document *xref,
                  fz_stream *file, pdf_lexbuf *buf,
                  int *onum, int *ogen, int *ostmofs)
{
    pdf_obj *obj = NULL;
    int num = 0, gen = 0, stm_ofs;
    int tok;
    int a, b;
    fz_context *ctx = file->ctx;

    fz_var(obj);

    tok = pdf_lex(file, buf);
    /* RJW: cannot parse indirect object (%d %d R)", num, gen */
    if (tok != PDF_TOK_INT)
        fz_throw(ctx, "expected object number (%d %d R)", num, gen);
    num = buf->i;

    tok = pdf_lex(file, buf);
    /* RJW: "cannot parse indirect object (%d %d R)", num, gen */
    if (tok != PDF_TOK_INT)
        fz_throw(ctx, "expected generation number (%d %d R)", num, gen);
    gen = buf->i;

    tok = pdf_lex(file, buf);
    /* RJW: "cannot parse indirect object (%d %d R)", num, gen */
    if (tok != PDF_TOK_OBJ)
        fz_throw(ctx, "expected 'obj' keyword (%d %d R)", num, gen);

    tok = pdf_lex(file, buf);
    /* RJW: "cannot parse indirect object (%d %d R)", num, gen */

    switch (tok)
    {
    case PDF_TOK_OPEN_ARRAY:
        obj = pdf_parse_array(xref, file, buf);
        /* RJW: "cannot parse indirect object (%d %d R)", num, gen */
        break;

    case PDF_TOK_OPEN_DICT:
        obj = pdf_parse_dict(xref, file, buf);
        /* RJW: "cannot parse indirect object (%d %d R)", num, gen */
        break;

    case PDF_TOK_NAME:
        obj = fz_new_name(ctx, buf->scratch);
        break;
    case PDF_TOK_REAL:
        obj = pdf_new_real(ctx, buf->f);
        break;
    case PDF_TOK_STRING:
        obj = pdf_new_string(ctx, buf->scratch, buf->len);
        break;
    case PDF_TOK_TRUE:
        obj = pdf_new_bool(ctx, 1);
        break;
    case PDF_TOK_FALSE:
        obj = pdf_new_bool(ctx, 0);
        break;
    case PDF_TOK_NULL:
        obj = pdf_new_null(ctx);
        break;

    case PDF_TOK_INT:
        a = buf->i;
        tok = pdf_lex(file, buf);
        /* "cannot parse indirect object (%d %d R)", num, gen */
        if (tok == PDF_TOK_STREAM || tok == PDF_TOK_ENDOBJ)
        {
            obj = pdf_new_int(ctx, a);
            goto skip;
        }
        if (tok == PDF_TOK_INT)
        {
            b = buf->i;
            tok = pdf_lex(file, buf);
            /* RJW: "cannot parse indirect object (%d %d R)", num, gen); */
            if (tok == PDF_TOK_R)
            {
                obj = pdf_new_indirect(ctx, a, b, xref);
                break;
            }
        }
        fz_throw(ctx, "expected 'R' keyword (%d %d R)", num, gen);

    case PDF_TOK_ENDOBJ:
        obj = pdf_new_null(ctx);
        goto skip;

    default:
        fz_throw(ctx, "syntax error in object (%d %d R)", num, gen);
    }

    fz_try(ctx)
    {
        tok = pdf_lex(file, buf);
    }
    fz_catch(ctx)
    {
        pdf_drop_obj(obj);
        fz_throw(ctx, "cannot parse indirect object (%d %d R)", num, gen);
    }

skip:
    if (tok == PDF_TOK_STREAM)
    {
        int c = fz_read_byte(file);
        while (c == ' ')
            c = fz_read_byte(file);
        if (c == '\r')
        {
            c = fz_peek_byte(file);
            if (c != '\n')
                fz_warn(ctx, "line feed missing after stream begin marker (%d %d R)", num, gen);
            else
                fz_read_byte(file);
        }
        stm_ofs = fz_tell(file);
    }
    else if (tok == PDF_TOK_ENDOBJ)
    {
        stm_ofs = 0;
    }
    else
    {
        fz_warn(ctx, "expected 'endobj' or 'stream' keyword (%d %d R)", num, gen);
        stm_ofs = 0;
    }

    if (onum) *onum = num;
    if (ogen) *ogen = gen;
    if (ostmofs) *ostmofs = stm_ofs;
    return obj;
}
Ejemplo n.º 19
0
fz_error
pdf_parse_ind_obj(fz_obj **op, pdf_xref *xref,
	fz_stream *file, char *buf, int cap,
	int *onum, int *ogen, int *ostmofs)
{
	fz_error error = fz_okay;
	fz_obj *obj = NULL;
	int num = 0, gen = 0, stm_ofs;
	int tok;
	int len;
	int a, b;

	error = pdf_lex(&tok, file, buf, cap, &len);
	if (error)
		return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);
	if (tok != PDF_TOK_INT)
		return fz_throw("expected object number (%d %d R)", num, gen);
	num = atoi(buf);

	error = pdf_lex(&tok, file, buf, cap, &len);
	if (error)
		return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);
	if (tok != PDF_TOK_INT)
		return fz_throw("expected generation number (%d %d R)", num, gen);
	gen = atoi(buf);

	error = pdf_lex(&tok, file, buf, cap, &len);
	if (error)
		return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);
	if (tok != PDF_TOK_OBJ)
		return fz_throw("expected 'obj' keyword (%d %d R)", num, gen);

	error = pdf_lex(&tok, file, buf, cap, &len);
	if (error)
		return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);

	switch (tok)
	{
	case PDF_TOK_OPEN_ARRAY:
		error = pdf_parse_array(&obj, xref, file, buf, cap);
		if (error)
			return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);
		break;

	case PDF_TOK_OPEN_DICT:
		error = pdf_parse_dict(&obj, xref, file, buf, cap);
		if (error)
			return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);
		break;

	case PDF_TOK_NAME: obj = fz_new_name(buf); break;
	case PDF_TOK_REAL: obj = fz_new_real(fz_atof(buf)); break;
	case PDF_TOK_STRING: obj = fz_new_string(buf, len); break;
	case PDF_TOK_TRUE: obj = fz_new_bool(1); break;
	case PDF_TOK_FALSE: obj = fz_new_bool(0); break;
	case PDF_TOK_NULL: obj = fz_new_null(); break;

	case PDF_TOK_INT:
		a = atoi(buf);
		error = pdf_lex(&tok, file, buf, cap, &len);
		if (error)
			return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);
		if (tok == PDF_TOK_STREAM || tok == PDF_TOK_ENDOBJ)
		{
			obj = fz_new_int(a);
			goto skip;
		}
		if (tok == PDF_TOK_INT)
		{
			b = atoi(buf);
			error = pdf_lex(&tok, file, buf, cap, &len);
			if (error)
				return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);
			if (tok == PDF_TOK_R)
			{
				obj = fz_new_indirect(a, b, xref);
				break;
			}
		}
		return fz_throw("expected 'R' keyword (%d %d R)", num, gen);

	case PDF_TOK_ENDOBJ:
		obj = fz_new_null();
		goto skip;

	default:
		return fz_throw("syntax error in object (%d %d R)", num, gen);
	}

	error = pdf_lex(&tok, file, buf, cap, &len);
	if (error)
	{
		fz_drop_obj(obj);
		return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);
	}

skip:
	if (tok == PDF_TOK_STREAM)
	{
		int c = fz_read_byte(file);
		while (c == ' ')
			c = fz_read_byte(file);
		if (c == '\r')
		{
			c = fz_peek_byte(file);
			if (c != '\n')
				fz_warn("line feed missing after stream begin marker (%d %d R)", num, gen);
			else
				fz_read_byte(file);
		}
		stm_ofs = fz_tell(file);
	}
	else if (tok == PDF_TOK_ENDOBJ)
	{
		stm_ofs = 0;
	}
	else
	{
		fz_warn("expected 'endobj' or 'stream' keyword (%d %d R)", num, gen);
		stm_ofs = 0;
	}

	if (onum) *onum = num;
	if (ogen) *ogen = gen;
	if (ostmofs) *ostmofs = stm_ofs;
	*op = obj;
	return fz_okay;
}