Ejemplo n.º 1
0
static void
pdf_parse_cid_char(fz_context *ctx, pdf_cmap *cmap, fz_stream *file)
{
	pdf_lexbuf buf;
	int tok;
	int src, dst;

	buf.size = PDF_LEXBUF_SMALL;
	while (1)
	{
		tok = pdf_lex_cmap(file, &buf);
		/* RJW: "syntaxerror in cmap" */

		if (tok == TOK_END_CID_CHAR)
			return;

		else if (tok != PDF_TOK_STRING)
			fz_throw(ctx, "expected string or endcidchar");

		src = pdf_code_from_string(buf.scratch, buf.len);

		tok = pdf_lex_cmap(file, &buf);
		/* RJW: "syntaxerror in cmap" */

		if (tok != PDF_TOK_INT)
			fz_throw(ctx, "expected integer");

		dst = buf.i;

		pdf_map_range_to_range(ctx, cmap, src, src, dst);
	}
}
Ejemplo n.º 2
0
static void
pdf_parse_cid_char(pdf_cmap *cmap, fz_stream *file)
{
	char buf[256];
	int tok;
	int len;
	int src, dst;

	while (1)
	{
		tok = pdf_lex_cmap(file, buf, sizeof buf, &len);
		/* RJW: "syntaxerror in cmap" */

		if (tok == TOK_END_CID_CHAR)
			return;

		else if (tok != PDF_TOK_STRING)
			fz_throw(file->ctx, "expected string or endcidchar");

		src = pdf_code_from_string(buf, len);

		tok = pdf_lex_cmap(file, buf, sizeof buf, &len);
		/* RJW: "syntaxerror in cmap" */

		if (tok != PDF_TOK_INT)
			fz_throw(file->ctx, "expected integer");

		dst = atoi(buf);

		pdf_map_range_to_range(file->ctx, cmap, src, src, dst);
	}
}
Ejemplo n.º 3
0
/*
 * Create an Identity-* CMap (for both 1 and 2-byte encodings)
 */
pdf_cmap *
pdf_new_identity_cmap(int wmode, int bytes)
{
	pdf_cmap *cmap = pdf_new_cmap();
	sprintf(cmap->cmap_name, "Identity-%c", wmode ? 'V' : 'H');
	pdf_add_codespace(cmap, 0x0000, 0xffff, bytes);
	pdf_map_range_to_range(cmap, 0x0000, 0xffff, 0);
	pdf_sort_cmap(cmap);
	pdf_set_wmode(cmap, wmode);
	return cmap;
}
Ejemplo n.º 4
0
/*
 * Create an Identity-* CMap (for both 1 and 2-byte encodings)
 */
pdf_cmap *
pdf_new_identity_cmap(fz_context *ctx, int wmode, int bytes)
{
	pdf_cmap *cmap = pdf_new_cmap(ctx);
	fz_try(ctx)
	{
		sprintf(cmap->cmap_name, "Identity-%c", wmode ? 'V' : 'H');
		pdf_add_codespace(ctx, cmap, 0x0000, 0xffff, bytes);
		pdf_map_range_to_range(ctx, cmap, 0x0000, 0xffff, 0);
		pdf_sort_cmap(ctx, cmap);
		pdf_set_cmap_wmode(ctx, cmap, wmode);
	}
	fz_catch(ctx)
	{
		pdf_drop_cmap(ctx, cmap);
		fz_rethrow(ctx);
	}
	return cmap;
}
Ejemplo n.º 5
0
/*
 * Create an Identity-* CMap (for both 1 and 2-byte encodings)
 */
pdf_cmap *
pdf_new_identity_cmap(fz_context *ctx, int wmode, int bytes)
{
	pdf_cmap *cmap = pdf_new_cmap(ctx);
	fz_try(ctx)
	{
		unsigned int high = (1 << (bytes * 8)) - 1;
		sprintf(cmap->cmap_name, "Identity-%c", wmode ? 'V' : 'H');
		pdf_add_codespace(ctx, cmap, 0, high, bytes);
		pdf_map_range_to_range(ctx, cmap, 0, high, 0);
		pdf_sort_cmap(ctx, cmap);
		pdf_set_cmap_wmode(ctx, cmap, wmode);
	}
	fz_catch(ctx)
	{
		pdf_drop_cmap(ctx, cmap);
		fz_rethrow(ctx);
	}
	return cmap;
}
Ejemplo n.º 6
0
void
pdf_load_to_unicode(pdf_document *doc, pdf_font_desc *font,
	char **strings, char *collection, pdf_obj *cmapstm)
{
	pdf_cmap *cmap;
	int cid;
	int ucsbuf[8];
	int ucslen;
	int i;
	fz_context *ctx = doc->ctx;

	if (pdf_is_stream(doc, pdf_to_num(cmapstm), pdf_to_gen(cmapstm)))
	{
		cmap = pdf_load_embedded_cmap(doc, cmapstm);

		font->to_unicode = pdf_new_cmap(ctx);

		for (i = 0; i < (strings ? 256 : 65536); i++)
		{
			cid = pdf_lookup_cmap(font->encoding, i);
			if (cid >= 0)
			{
				ucslen = pdf_lookup_cmap_full(cmap, i, ucsbuf);
				if (ucslen == 1)
					pdf_map_range_to_range(ctx, font->to_unicode, cid, cid, ucsbuf[0]);
				if (ucslen > 1)
					pdf_map_one_to_many(ctx, font->to_unicode, cid, ucsbuf, ucslen);
			}
		}

		pdf_sort_cmap(ctx, font->to_unicode);

		pdf_drop_cmap(ctx, cmap);
		font->size += pdf_cmap_size(ctx, font->to_unicode);
	}

	else if (collection)
	{
		if (!strcmp(collection, "Adobe-CNS1"))
			font->to_unicode = pdf_load_system_cmap(ctx, "Adobe-CNS1-UCS2");
		else if (!strcmp(collection, "Adobe-GB1"))
			font->to_unicode = pdf_load_system_cmap(ctx, "Adobe-GB1-UCS2");
		else if (!strcmp(collection, "Adobe-Japan1"))
			font->to_unicode = pdf_load_system_cmap(ctx, "Adobe-Japan1-UCS2");
		else if (!strcmp(collection, "Adobe-Korea1"))
			font->to_unicode = pdf_load_system_cmap(ctx, "Adobe-Korea1-UCS2");

		return;
	}

	if (strings)
	{
		/* TODO one-to-many mappings */

		font->cid_to_ucs_len = 256;
		font->cid_to_ucs = fz_malloc_array(ctx, 256, sizeof(unsigned short));
		font->size += 256 * sizeof(unsigned short);

		for (i = 0; i < 256; i++)
		{
			if (strings[i])
				font->cid_to_ucs[i] = pdf_lookup_agl(strings[i]);
			else
				font->cid_to_ucs[i] = '?';
		}
	}

	if (!font->to_unicode && !font->cid_to_ucs)
	{
		/* TODO: synthesize a ToUnicode if it's a freetype font with
		 * cmap and/or post tables or if it has glyph names. */
	}
}
Ejemplo n.º 7
0
fz_error
pdf_load_to_unicode(pdf_font_desc *font, pdf_xref *xref,
	char **strings, char *collection, fz_obj *cmapstm)
{
	fz_error error = fz_okay;
	pdf_cmap *cmap;
	int cid;
	int ucsbuf[8];
	int ucslen;
	int i;

	if (pdf_is_stream(xref, fz_to_num(cmapstm), fz_to_gen(cmapstm)))
	{
		error = pdf_load_embedded_cmap(&cmap, xref, cmapstm);
		if (error)
			return fz_rethrow(error, "cannot load embedded cmap (%d %d R)", fz_to_num(cmapstm), fz_to_gen(cmapstm));

		font->to_unicode = pdf_new_cmap();

		for (i = 0; i < (strings ? 256 : 65536); i++)
		{
			cid = pdf_lookup_cmap(font->encoding, i);
			if (cid >= 0)
			{
				ucslen = pdf_lookup_cmap_full(cmap, i, ucsbuf);
				if (ucslen == 1)
					pdf_map_range_to_range(font->to_unicode, cid, cid, ucsbuf[0]);
				if (ucslen > 1)
					pdf_map_one_to_many(font->to_unicode, cid, ucsbuf, ucslen);
			}
		}

		pdf_sort_cmap(font->to_unicode);

		pdf_drop_cmap(cmap);
	}

	else if (collection)
	{
		error = fz_okay;

		if (!strcmp(collection, "Adobe-CNS1"))
			error = pdf_load_system_cmap(&font->to_unicode, "Adobe-CNS1-UCS2");
		else if (!strcmp(collection, "Adobe-GB1"))
			error = pdf_load_system_cmap(&font->to_unicode, "Adobe-GB1-UCS2");
		else if (!strcmp(collection, "Adobe-Japan1"))
			error = pdf_load_system_cmap(&font->to_unicode, "Adobe-Japan1-UCS2");
		else if (!strcmp(collection, "Adobe-Korea1"))
			error = pdf_load_system_cmap(&font->to_unicode, "Adobe-Korea1-UCS2");

		if (error)
			return fz_rethrow(error, "cannot load ToUnicode system cmap %s-UCS2", collection);
	}

	if (strings)
	{
		/* TODO one-to-many mappings */

		font->cid_to_ucs_len = 256;
		font->cid_to_ucs = fz_calloc(256, sizeof(unsigned short));

		for (i = 0; i < 256; i++)
		{
			if (strings[i])
				font->cid_to_ucs[i] = pdf_lookup_agl(strings[i]);
			else
				font->cid_to_ucs[i] = '?';
		}
	}

	if (!font->to_unicode && !font->cid_to_ucs)
	{
		/* TODO: synthesize a ToUnicode if it's a freetype font with
		 * cmap and/or post tables or if it has glyph names. */
	}

	return fz_okay;
}
Ejemplo n.º 8
0
static void
pdf_parse_bf_range(fz_context *ctx, pdf_cmap *cmap, fz_stream *file)
{
	pdf_lexbuf buf;
	int tok;
	int lo, hi, dst;

	buf.size = PDF_LEXBUF_SMALL;
	while (1)
	{
		tok = pdf_lex_cmap(file, &buf);
		/* RJW: "syntaxerror in cmap" */

		if (tok == TOK_END_BF_RANGE)
			return;

		else if (tok != PDF_TOK_STRING)
			fz_throw(ctx, "expected string or endbfrange");

		lo = pdf_code_from_string(buf.scratch, buf.len);

		tok = pdf_lex_cmap(file, &buf);
		/* RJW: "syntaxerror in cmap" */
		if (tok != PDF_TOK_STRING)
			fz_throw(ctx, "expected string");

		hi = pdf_code_from_string(buf.scratch, buf.len);

		tok = pdf_lex_cmap(file, &buf);
		/* RJW: "syntaxerror in cmap" */

		if (tok == PDF_TOK_STRING)
		{
			if (buf.len == 2)
			{
				dst = pdf_code_from_string(buf.scratch, buf.len);
				pdf_map_range_to_range(ctx, cmap, lo, hi, dst);
			}
			else
			{
				int dststr[256];
				int i;

				if (buf.len / 2)
				{
					for (i = 0; i < buf.len / 2; i++)
						dststr[i] = pdf_code_from_string(&buf.scratch[i * 2], 2);

					while (lo <= hi)
					{
						dststr[i-1] ++;
						pdf_map_one_to_many(ctx, cmap, lo, dststr, i);
						lo ++;
					}
				}
			}
		}

		else if (tok == PDF_TOK_OPEN_ARRAY)
		{
			pdf_parse_bf_range_array(ctx, cmap, file, lo, hi);
			/* RJW: "cannot map bfrange" */
		}

		else
		{
			fz_throw(ctx, "expected string or array or endbfrange");
		}
	}
}
Ejemplo n.º 9
0
static void
pdf_parse_bf_range(pdf_cmap *cmap, fz_stream *file)
{
	char buf[256];
	int tok;
	int len;
	int lo, hi, dst;

	while (1)
	{
		tok = pdf_lex_cmap(file, buf, sizeof buf, &len);
		/* RJW: "syntaxerror in cmap" */

		if (tok == TOK_END_BF_RANGE)
			return;

		else if (tok != PDF_TOK_STRING)
			fz_throw(file->ctx, "expected string or endbfrange");

		lo = pdf_code_from_string(buf, len);

		tok = pdf_lex_cmap(file, buf, sizeof buf, &len);
		/* RJW: "syntaxerror in cmap" */
		if (tok != PDF_TOK_STRING)
			fz_throw(file->ctx, "expected string");

		hi = pdf_code_from_string(buf, len);

		tok = pdf_lex_cmap(file, buf, sizeof buf, &len);
		/* RJW: "syntaxerror in cmap" */

		if (tok == PDF_TOK_STRING)
		{
			if (len == 2)
			{
				dst = pdf_code_from_string(buf, len);
				pdf_map_range_to_range(file->ctx, cmap, lo, hi, dst);
			}
			else
			{
				int dststr[256];
				int i;

				if (len / 2)
				{
					for (i = 0; i < len / 2; i++)
						dststr[i] = pdf_code_from_string(buf + i * 2, 2);

					while (lo <= hi)
					{
						dststr[i-1] ++;
						pdf_map_one_to_many(file->ctx, cmap, lo, dststr, i);
						lo ++;
					}
				}
			}
		}

		else if (tok == PDF_TOK_OPEN_ARRAY)
		{
			pdf_parse_bf_range_array(cmap, file, lo, hi);
			/* RJW: "cannot map bfrange" */
		}

		else
		{
			fz_throw(file->ctx, "expected string or array or endbfrange");
		}
	}
}