Пример #1
0
static void
pdf_parse_bf_range_array(fz_context *ctx, pdf_cmap *cmap, fz_stream *file, int lo, int hi)
{
	pdf_lexbuf buf;
	int tok;
	int dst[256];
	int i;

	buf.size = PDF_LEXBUF_SMALL;
	while (1)
	{
		tok = pdf_lex_cmap(file, &buf);
		/* RJW: "syntaxerror in cmap" */

		if (tok == PDF_TOK_CLOSE_ARRAY)
			return;

		/* Note: does not handle [ /Name /Name ... ] */
		else if (tok != PDF_TOK_STRING)
			fz_throw(ctx, "expected string or ]");

		if (buf.len / 2)
		{
			for (i = 0; i < buf.len / 2; i++)
				dst[i] = pdf_code_from_string(&buf.scratch[i * 2], 2);

			pdf_map_one_to_many(ctx, cmap, lo, dst, buf.len / 2);
		}

		lo ++;
	}
}
Пример #2
0
static void
pdf_parse_bf_range_array(pdf_cmap *cmap, fz_stream *file, int lo, int hi)
{
	char buf[256];
	int tok;
	int len;
	int dst[256];
	int i;

	while (1)
	{
		tok = pdf_lex_cmap(file, buf, sizeof buf, &len);
		/* RJW: "syntaxerror in cmap" */

		if (tok == PDF_TOK_CLOSE_ARRAY)
			return;

		/* Note: does not handle [ /Name /Name ... ] */
		else if (tok != PDF_TOK_STRING)
			fz_throw(file->ctx, "expected string or ]");

		if (len / 2)
		{
			for (i = 0; i < len / 2; i++)
				dst[i] = pdf_code_from_string(buf + i * 2, 2);

			pdf_map_one_to_many(file->ctx, cmap, lo, dst, len / 2);
		}

		lo ++;
	}
}
Пример #3
0
static void
pdf_parse_bf_char(fz_context *ctx, pdf_cmap *cmap, fz_stream *file)
{
	pdf_lexbuf buf;
	int tok;
	int dst[256];
	int src;
	int i;

	buf.size = PDF_LEXBUF_SMALL;
	while (1)
	{
		tok = pdf_lex_cmap(file, &buf);
		/* RJW: "syntaxerror in cmap" */

		if (tok == TOK_END_BF_CHAR)
			return;

		else if (tok != PDF_TOK_STRING)
			fz_throw(ctx, "expected string or endbfchar");

		src = pdf_code_from_string(buf.scratch, buf.len);

		tok = pdf_lex_cmap(file, &buf);
		/* RJW: "syntaxerror in cmap" */
		/* Note: does not handle /dstName */
		if (tok != PDF_TOK_STRING)
			fz_throw(ctx, "expected string");

		if (buf.len / 2)
		{
			for (i = 0; i < buf.len / 2; i++)
				dst[i] = pdf_code_from_string(&buf.scratch[i * 2], 2);
			pdf_map_one_to_many(ctx, cmap, src, dst, i);
		}
	}
}
Пример #4
0
static void
pdf_parse_bf_char(pdf_cmap *cmap, fz_stream *file)
{
	char buf[256];
	int tok;
	int len;
	int dst[256];
	int src;
	int i;

	while (1)
	{
		tok = pdf_lex_cmap(file, buf, sizeof buf, &len);
		/* RJW: "syntaxerror in cmap" */

		if (tok == TOK_END_BF_CHAR)
			return;

		else if (tok != PDF_TOK_STRING)
			fz_throw(file->ctx, "expected string or endbfchar");

		src = pdf_code_from_string(buf, len);

		tok = pdf_lex_cmap(file, buf, sizeof buf, &len);
		/* RJW: "syntaxerror in cmap" */
		/* Note: does not handle /dstName */
		if (tok != PDF_TOK_STRING)
			fz_throw(file->ctx, "expected string");

		if (len / 2)
		{
			for (i = 0; i < len / 2; i++)
				dst[i] = pdf_code_from_string(buf + i * 2, 2);
			pdf_map_one_to_many(file->ctx, cmap, src, dst, i);
		}
	}
}
Пример #5
0
void
pdf_load_to_unicode(pdf_document *doc, pdf_font_desc *font,
	char **strings, char *collection, pdf_obj *cmapstm)
{
	pdf_cmap *cmap;
	int cid;
	int ucsbuf[8];
	int ucslen;
	int i;
	fz_context *ctx = doc->ctx;

	if (pdf_is_stream(doc, pdf_to_num(cmapstm), pdf_to_gen(cmapstm)))
	{
		cmap = pdf_load_embedded_cmap(doc, cmapstm);

		font->to_unicode = pdf_new_cmap(ctx);

		for (i = 0; i < (strings ? 256 : 65536); i++)
		{
			cid = pdf_lookup_cmap(font->encoding, i);
			if (cid >= 0)
			{
				ucslen = pdf_lookup_cmap_full(cmap, i, ucsbuf);
				if (ucslen == 1)
					pdf_map_range_to_range(ctx, font->to_unicode, cid, cid, ucsbuf[0]);
				if (ucslen > 1)
					pdf_map_one_to_many(ctx, font->to_unicode, cid, ucsbuf, ucslen);
			}
		}

		pdf_sort_cmap(ctx, font->to_unicode);

		pdf_drop_cmap(ctx, cmap);
		font->size += pdf_cmap_size(ctx, font->to_unicode);
	}

	else if (collection)
	{
		if (!strcmp(collection, "Adobe-CNS1"))
			font->to_unicode = pdf_load_system_cmap(ctx, "Adobe-CNS1-UCS2");
		else if (!strcmp(collection, "Adobe-GB1"))
			font->to_unicode = pdf_load_system_cmap(ctx, "Adobe-GB1-UCS2");
		else if (!strcmp(collection, "Adobe-Japan1"))
			font->to_unicode = pdf_load_system_cmap(ctx, "Adobe-Japan1-UCS2");
		else if (!strcmp(collection, "Adobe-Korea1"))
			font->to_unicode = pdf_load_system_cmap(ctx, "Adobe-Korea1-UCS2");

		return;
	}

	if (strings)
	{
		/* TODO one-to-many mappings */

		font->cid_to_ucs_len = 256;
		font->cid_to_ucs = fz_malloc_array(ctx, 256, sizeof(unsigned short));
		font->size += 256 * sizeof(unsigned short);

		for (i = 0; i < 256; i++)
		{
			if (strings[i])
				font->cid_to_ucs[i] = pdf_lookup_agl(strings[i]);
			else
				font->cid_to_ucs[i] = '?';
		}
	}

	if (!font->to_unicode && !font->cid_to_ucs)
	{
		/* TODO: synthesize a ToUnicode if it's a freetype font with
		 * cmap and/or post tables or if it has glyph names. */
	}
}
Пример #6
0
fz_error
pdf_load_to_unicode(pdf_font_desc *font, pdf_xref *xref,
	char **strings, char *collection, fz_obj *cmapstm)
{
	fz_error error = fz_okay;
	pdf_cmap *cmap;
	int cid;
	int ucsbuf[8];
	int ucslen;
	int i;

	if (pdf_is_stream(xref, fz_to_num(cmapstm), fz_to_gen(cmapstm)))
	{
		error = pdf_load_embedded_cmap(&cmap, xref, cmapstm);
		if (error)
			return fz_rethrow(error, "cannot load embedded cmap (%d %d R)", fz_to_num(cmapstm), fz_to_gen(cmapstm));

		font->to_unicode = pdf_new_cmap();

		for (i = 0; i < (strings ? 256 : 65536); i++)
		{
			cid = pdf_lookup_cmap(font->encoding, i);
			if (cid >= 0)
			{
				ucslen = pdf_lookup_cmap_full(cmap, i, ucsbuf);
				if (ucslen == 1)
					pdf_map_range_to_range(font->to_unicode, cid, cid, ucsbuf[0]);
				if (ucslen > 1)
					pdf_map_one_to_many(font->to_unicode, cid, ucsbuf, ucslen);
			}
		}

		pdf_sort_cmap(font->to_unicode);

		pdf_drop_cmap(cmap);
	}

	else if (collection)
	{
		error = fz_okay;

		if (!strcmp(collection, "Adobe-CNS1"))
			error = pdf_load_system_cmap(&font->to_unicode, "Adobe-CNS1-UCS2");
		else if (!strcmp(collection, "Adobe-GB1"))
			error = pdf_load_system_cmap(&font->to_unicode, "Adobe-GB1-UCS2");
		else if (!strcmp(collection, "Adobe-Japan1"))
			error = pdf_load_system_cmap(&font->to_unicode, "Adobe-Japan1-UCS2");
		else if (!strcmp(collection, "Adobe-Korea1"))
			error = pdf_load_system_cmap(&font->to_unicode, "Adobe-Korea1-UCS2");

		if (error)
			return fz_rethrow(error, "cannot load ToUnicode system cmap %s-UCS2", collection);
	}

	if (strings)
	{
		/* TODO one-to-many mappings */

		font->cid_to_ucs_len = 256;
		font->cid_to_ucs = fz_calloc(256, sizeof(unsigned short));

		for (i = 0; i < 256; i++)
		{
			if (strings[i])
				font->cid_to_ucs[i] = pdf_lookup_agl(strings[i]);
			else
				font->cid_to_ucs[i] = '?';
		}
	}

	if (!font->to_unicode && !font->cid_to_ucs)
	{
		/* TODO: synthesize a ToUnicode if it's a freetype font with
		 * cmap and/or post tables or if it has glyph names. */
	}

	return fz_okay;
}
Пример #7
0
static void
pdf_parse_bf_range(fz_context *ctx, pdf_cmap *cmap, fz_stream *file)
{
	pdf_lexbuf buf;
	int tok;
	int lo, hi, dst;

	buf.size = PDF_LEXBUF_SMALL;
	while (1)
	{
		tok = pdf_lex_cmap(file, &buf);
		/* RJW: "syntaxerror in cmap" */

		if (tok == TOK_END_BF_RANGE)
			return;

		else if (tok != PDF_TOK_STRING)
			fz_throw(ctx, "expected string or endbfrange");

		lo = pdf_code_from_string(buf.scratch, buf.len);

		tok = pdf_lex_cmap(file, &buf);
		/* RJW: "syntaxerror in cmap" */
		if (tok != PDF_TOK_STRING)
			fz_throw(ctx, "expected string");

		hi = pdf_code_from_string(buf.scratch, buf.len);

		tok = pdf_lex_cmap(file, &buf);
		/* RJW: "syntaxerror in cmap" */

		if (tok == PDF_TOK_STRING)
		{
			if (buf.len == 2)
			{
				dst = pdf_code_from_string(buf.scratch, buf.len);
				pdf_map_range_to_range(ctx, cmap, lo, hi, dst);
			}
			else
			{
				int dststr[256];
				int i;

				if (buf.len / 2)
				{
					for (i = 0; i < buf.len / 2; i++)
						dststr[i] = pdf_code_from_string(&buf.scratch[i * 2], 2);

					while (lo <= hi)
					{
						dststr[i-1] ++;
						pdf_map_one_to_many(ctx, cmap, lo, dststr, i);
						lo ++;
					}
				}
			}
		}

		else if (tok == PDF_TOK_OPEN_ARRAY)
		{
			pdf_parse_bf_range_array(ctx, cmap, file, lo, hi);
			/* RJW: "cannot map bfrange" */
		}

		else
		{
			fz_throw(ctx, "expected string or array or endbfrange");
		}
	}
}
Пример #8
0
static void
pdf_parse_bf_range(pdf_cmap *cmap, fz_stream *file)
{
	char buf[256];
	int tok;
	int len;
	int lo, hi, dst;

	while (1)
	{
		tok = pdf_lex_cmap(file, buf, sizeof buf, &len);
		/* RJW: "syntaxerror in cmap" */

		if (tok == TOK_END_BF_RANGE)
			return;

		else if (tok != PDF_TOK_STRING)
			fz_throw(file->ctx, "expected string or endbfrange");

		lo = pdf_code_from_string(buf, len);

		tok = pdf_lex_cmap(file, buf, sizeof buf, &len);
		/* RJW: "syntaxerror in cmap" */
		if (tok != PDF_TOK_STRING)
			fz_throw(file->ctx, "expected string");

		hi = pdf_code_from_string(buf, len);

		tok = pdf_lex_cmap(file, buf, sizeof buf, &len);
		/* RJW: "syntaxerror in cmap" */

		if (tok == PDF_TOK_STRING)
		{
			if (len == 2)
			{
				dst = pdf_code_from_string(buf, len);
				pdf_map_range_to_range(file->ctx, cmap, lo, hi, dst);
			}
			else
			{
				int dststr[256];
				int i;

				if (len / 2)
				{
					for (i = 0; i < len / 2; i++)
						dststr[i] = pdf_code_from_string(buf + i * 2, 2);

					while (lo <= hi)
					{
						dststr[i-1] ++;
						pdf_map_one_to_many(file->ctx, cmap, lo, dststr, i);
						lo ++;
					}
				}
			}
		}

		else if (tok == PDF_TOK_OPEN_ARRAY)
		{
			pdf_parse_bf_range_array(cmap, file, lo, hi);
			/* RJW: "cannot map bfrange" */
		}

		else
		{
			fz_throw(file->ctx, "expected string or array or endbfrange");
		}
	}
}