static pdf_font_desc *
load_cid_font(pdf_document *xref, pdf_obj *dict, pdf_obj *encoding, pdf_obj *to_unicode)
{
	pdf_obj *widths;
	pdf_obj *descriptor;
	pdf_font_desc *fontdesc = NULL;
	FT_Face face;
	int kind;
	char collection[256];
	char *basefont;
	int i, k, fterr;
	pdf_obj *obj;
	int dw;
	fz_context *ctx = xref->ctx;

	fz_var(fontdesc);

	fz_try(ctx)
	{
		/* Get font name and CID collection */

		basefont = pdf_to_name(pdf_dict_gets(dict, "BaseFont"));

		{
			pdf_obj *cidinfo;
			char tmpstr[64];
			int tmplen;

			cidinfo = pdf_dict_gets(dict, "CIDSystemInfo");
			if (!cidinfo)
				fz_throw(ctx, "cid font is missing info");

			obj = pdf_dict_gets(cidinfo, "Registry");
			tmplen = fz_mini(sizeof tmpstr - 1, pdf_to_str_len(obj));
			memcpy(tmpstr, pdf_to_str_buf(obj), tmplen);
			tmpstr[tmplen] = '\0';
			fz_strlcpy(collection, tmpstr, sizeof collection);

			fz_strlcat(collection, "-", sizeof collection);

			obj = pdf_dict_gets(cidinfo, "Ordering");
			tmplen = fz_mini(sizeof tmpstr - 1, pdf_to_str_len(obj));
			memcpy(tmpstr, pdf_to_str_buf(obj), tmplen);
			tmpstr[tmplen] = '\0';
			fz_strlcat(collection, tmpstr, sizeof collection);
		}

		/* Load font file */

		fontdesc = pdf_new_font_desc(ctx);

		descriptor = pdf_dict_gets(dict, "FontDescriptor");
		if (!descriptor)
			fz_throw(ctx, "syntaxerror: missing font descriptor");
		pdf_load_font_descriptor(fontdesc, xref, descriptor, collection, basefont);

		face = fontdesc->font->ft_face;
		kind = ft_kind(face);

		/* Encoding */

		if (pdf_is_name(encoding))
		{
			if (!strcmp(pdf_to_name(encoding), "Identity-H"))
				fontdesc->encoding = pdf_new_identity_cmap(ctx, 0, 2);
			else if (!strcmp(pdf_to_name(encoding), "Identity-V"))
				fontdesc->encoding = pdf_new_identity_cmap(ctx, 1, 2);
			else
				fontdesc->encoding = pdf_load_system_cmap(ctx, pdf_to_name(encoding));
		}
		else if (pdf_is_indirect(encoding))
		{
			fontdesc->encoding = pdf_load_embedded_cmap(xref, encoding);
		}
		else
		{
			fz_throw(ctx, "syntaxerror: font missing encoding");
		}
		fontdesc->size += pdf_cmap_size(ctx, fontdesc->encoding);

		pdf_set_font_wmode(ctx, fontdesc, pdf_cmap_wmode(ctx, fontdesc->encoding));

		if (kind == TRUETYPE)
		{
			pdf_obj *cidtogidmap;

			cidtogidmap = pdf_dict_gets(dict, "CIDToGIDMap");
			if (pdf_is_indirect(cidtogidmap))
			{
				fz_buffer *buf;

				buf = pdf_load_stream(xref, pdf_to_num(cidtogidmap), pdf_to_gen(cidtogidmap));

				fontdesc->cid_to_gid_len = (buf->len) / 2;
				fontdesc->cid_to_gid = fz_malloc_array(ctx, fontdesc->cid_to_gid_len, sizeof(unsigned short));
				fontdesc->size += fontdesc->cid_to_gid_len * sizeof(unsigned short);
				for (i = 0; i < fontdesc->cid_to_gid_len; i++)
					fontdesc->cid_to_gid[i] = (buf->data[i * 2] << 8) + buf->data[i * 2 + 1];

				fz_drop_buffer(ctx, buf);
			}

			/* if truetype font is external, cidtogidmap should not be identity */
			/* so we map from cid to unicode and then map that through the (3 1) */
			/* unicode cmap to get a glyph id */
			else if (fontdesc->font->ft_substitute)
			{
				fterr = FT_Select_Charmap(face, ft_encoding_unicode);
				if (fterr)
				{
					fz_throw(ctx, "fonterror: no unicode cmap when emulating CID font: %s", ft_error_string(fterr));
				}

				if (!strcmp(collection, "Adobe-CNS1"))
					fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-CNS1-UCS2");
				else if (!strcmp(collection, "Adobe-GB1"))
					fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-GB1-UCS2");
				else if (!strcmp(collection, "Adobe-Japan1"))
					fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-Japan1-UCS2");
				else if (!strcmp(collection, "Adobe-Japan2"))
					fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-Japan2-UCS2");
				else if (!strcmp(collection, "Adobe-Korea1"))
					fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-Korea1-UCS2");
			}
		}

		pdf_load_to_unicode(xref, fontdesc, NULL, collection, to_unicode);

		/* Horizontal */

		dw = 1000;
		obj = pdf_dict_gets(dict, "DW");
		if (obj)
			dw = pdf_to_int(obj);
		pdf_set_default_hmtx(ctx, fontdesc, dw);

		widths = pdf_dict_gets(dict, "W");
		if (widths)
		{
			int c0, c1, w, n, m;

			n = pdf_array_len(widths);
			for (i = 0; i < n; )
			{
				c0 = pdf_to_int(pdf_array_get(widths, i));
				obj = pdf_array_get(widths, i + 1);
				if (pdf_is_array(obj))
				{
					m = pdf_array_len(obj);
					for (k = 0; k < m; k++)
					{
						w = pdf_to_int(pdf_array_get(obj, k));
						pdf_add_hmtx(ctx, fontdesc, c0 + k, c0 + k, w);
					}
					i += 2;
				}
				else
				{
					c1 = pdf_to_int(obj);
					w = pdf_to_int(pdf_array_get(widths, i + 2));
					pdf_add_hmtx(ctx, fontdesc, c0, c1, w);
					i += 3;
				}
			}
		}

		pdf_end_hmtx(ctx, fontdesc);

		/* Vertical */

		if (pdf_cmap_wmode(ctx, fontdesc->encoding) == 1)
		{
			int dw2y = 880;
			int dw2w = -1000;

			obj = pdf_dict_gets(dict, "DW2");
			if (obj)
			{
				dw2y = pdf_to_int(pdf_array_get(obj, 0));
				dw2w = pdf_to_int(pdf_array_get(obj, 1));
			}

			pdf_set_default_vmtx(ctx, fontdesc, dw2y, dw2w);

			widths = pdf_dict_gets(dict, "W2");
			if (widths)
			{
				int c0, c1, w, x, y, n;

				n = pdf_array_len(widths);
				for (i = 0; i < n; )
				{
					c0 = pdf_to_int(pdf_array_get(widths, i));
					obj = pdf_array_get(widths, i + 1);
					if (pdf_is_array(obj))
					{
						int m = pdf_array_len(obj);
						for (k = 0; k * 3 < m; k ++)
						{
							w = pdf_to_int(pdf_array_get(obj, k * 3 + 0));
							x = pdf_to_int(pdf_array_get(obj, k * 3 + 1));
							y = pdf_to_int(pdf_array_get(obj, k * 3 + 2));
							pdf_add_vmtx(ctx, fontdesc, c0 + k, c0 + k, x, y, w);
						}
						i += 2;
					}
					else
					{
						c1 = pdf_to_int(obj);
						w = pdf_to_int(pdf_array_get(widths, i + 2));
						x = pdf_to_int(pdf_array_get(widths, i + 3));
						y = pdf_to_int(pdf_array_get(widths, i + 4));
						pdf_add_vmtx(ctx, fontdesc, c0, c1, x, y, w);
						i += 5;
					}
				}
			}

			pdf_end_vmtx(ctx, fontdesc);
		}
	}
	fz_catch(ctx)
	{
		pdf_drop_font(ctx, fontdesc);
		fz_throw(ctx, "cannot load cid font (%d %d R)", pdf_to_num(dict), pdf_to_gen(dict));
	}

	return fontdesc;
}
static pdf_font_desc *
pdf_load_simple_font(pdf_document *xref, pdf_obj *dict)
{
	pdf_obj *descriptor;
	pdf_obj *encoding;
	pdf_obj *widths;
	unsigned short *etable = NULL;
	pdf_font_desc *fontdesc = NULL;
	char *subtype;
	FT_Face face;
	FT_CharMap cmap;
	int symbolic;
	int kind;

	char *basefont;
	char *estrings[256];
	char ebuffer[256][32];
	int i, k, n;
	int fterr;
	fz_context *ctx = xref->ctx;

	fz_var(fontdesc);
	fz_var(etable);

	basefont = pdf_to_name(pdf_dict_gets(dict, "BaseFont"));

	/* Load font file */
	fz_try(ctx)
	{
		fontdesc = pdf_new_font_desc(ctx);

		descriptor = pdf_dict_gets(dict, "FontDescriptor");
		if (descriptor)
			pdf_load_font_descriptor(fontdesc, xref, descriptor, NULL, basefont);
		else
			pdf_load_builtin_font(ctx, fontdesc, basefont);

		/* Some chinese documents mistakenly consider WinAnsiEncoding to be codepage 936 */
		if (descriptor && pdf_is_string(pdf_dict_gets(descriptor, "FontName")) &&
			!pdf_dict_gets(dict, "ToUnicode") &&
			!strcmp(pdf_to_name(pdf_dict_gets(dict, "Encoding")), "WinAnsiEncoding") &&
			pdf_to_int(pdf_dict_gets(descriptor, "Flags")) == 4)
		{
			char *cp936fonts[] = {
				"\xCB\xCE\xCC\xE5", "SimSun,Regular",
				"\xBA\xDA\xCC\xE5", "SimHei,Regular",
				"\xBF\xAC\xCC\xE5_GB2312", "SimKai,Regular",
				"\xB7\xC2\xCB\xCE_GB2312", "SimFang,Regular",
				"\xC1\xA5\xCA\xE9", "SimLi,Regular",
				NULL
			};
			for (i = 0; cp936fonts[i]; i += 2)
				if (!strcmp(basefont, cp936fonts[i]))
					break;
			if (cp936fonts[i])
			{
				fz_warn(ctx, "workaround for S22PDF lying about chinese font encodings");
				pdf_drop_font(ctx, fontdesc);
				fontdesc = pdf_new_font_desc(ctx);
				pdf_load_font_descriptor(fontdesc, xref, descriptor, "Adobe-GB1", cp936fonts[i+1]);
				fontdesc->encoding = pdf_load_system_cmap(ctx, "GBK-EUC-H");
				fontdesc->to_unicode = pdf_load_system_cmap(ctx, "Adobe-GB1-UCS2");
				fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-GB1-UCS2");

				face = fontdesc->font->ft_face;
				kind = ft_kind(face);
				goto skip_encoding;
			}
		}

		face = fontdesc->font->ft_face;
		kind = ft_kind(face);

		/* Encoding */

		symbolic = fontdesc->flags & 4;

		if (face->num_charmaps > 0)
			cmap = face->charmaps[0];
		else
			cmap = NULL;

		for (i = 0; i < face->num_charmaps; i++)
		{
			FT_CharMap test = face->charmaps[i];

			if (kind == TYPE1)
			{
				if (test->platform_id == 7)
					cmap = test;
			}

			if (kind == TRUETYPE)
			{
				if (test->platform_id == 1 && test->encoding_id == 0)
					cmap = test;
				if (test->platform_id == 3 && test->encoding_id == 1)
					cmap = test;
				if (symbolic && test->platform_id == 3 && test->encoding_id == 0)
					cmap = test;
			}
		}

		if (cmap)
		{
			fterr = FT_Set_Charmap(face, cmap);
			if (fterr)
				fz_warn(ctx, "freetype could not set cmap: %s", ft_error_string(fterr));
		}
		else
			fz_warn(ctx, "freetype could not find any cmaps");

		etable = fz_malloc_array(ctx, 256, sizeof(unsigned short));
		fontdesc->size += 256 * sizeof(unsigned short);
		for (i = 0; i < 256; i++)
		{
			estrings[i] = NULL;
			etable[i] = 0;
		}

		encoding = pdf_dict_gets(dict, "Encoding");
		if (encoding)
		{
			if (pdf_is_name(encoding))
				pdf_load_encoding(estrings, pdf_to_name(encoding));

			if (pdf_is_dict(encoding))
			{
				pdf_obj *base, *diff, *item;

				base = pdf_dict_gets(encoding, "BaseEncoding");
				if (pdf_is_name(base))
					pdf_load_encoding(estrings, pdf_to_name(base));
				else if (!fontdesc->is_embedded && !symbolic)
					pdf_load_encoding(estrings, "StandardEncoding");

				diff = pdf_dict_gets(encoding, "Differences");
				if (pdf_is_array(diff))
				{
					n = pdf_array_len(diff);
					k = 0;
					for (i = 0; i < n; i++)
					{
						item = pdf_array_get(diff, i);
						if (pdf_is_int(item))
							k = pdf_to_int(item);
						if (pdf_is_name(item) && k >= 0 && k < nelem(estrings))
							estrings[k++] = pdf_to_name(item);
					}
				}
			}
		}

		/* start with the builtin encoding */
		for (i = 0; i < 256; i++)
			etable[i] = ft_char_index(face, i);

		fz_lock(ctx, FZ_LOCK_FREETYPE);

		/* built-in and substitute fonts may be a different type than what the document expects */
		subtype = pdf_to_name(pdf_dict_gets(dict, "Subtype"));
		if (!strcmp(subtype, "Type1"))
			kind = TYPE1;
		else if (!strcmp(subtype, "MMType1"))
			kind = TYPE1;
		else if (!strcmp(subtype, "TrueType"))
			kind = TRUETYPE;
		else if (!strcmp(subtype, "CIDFontType0"))
			kind = TYPE1;
		else if (!strcmp(subtype, "CIDFontType2"))
			kind = TRUETYPE;

		/* encode by glyph name where we can */
		if (kind == TYPE1)
		{
			for (i = 0; i < 256; i++)
			{
				if (estrings[i])
				{
					etable[i] = FT_Get_Name_Index(face, estrings[i]);
					if (etable[i] == 0)
					{
						int aglcode = pdf_lookup_agl(estrings[i]);
						const char **dupnames = pdf_lookup_agl_duplicates(aglcode);
						while (*dupnames)
						{
							etable[i] = FT_Get_Name_Index(face, (char*)*dupnames);
							if (etable[i])
								break;
							dupnames++;
						}
					}
				}
			}
		}

		/* encode by glyph name where we can */
		if (kind == TRUETYPE)
		{
			/* Unicode cmap */
			if (!symbolic && face->charmap && face->charmap->platform_id == 3)
			{
				for (i = 0; i < 256; i++)
				{
					if (estrings[i])
					{
						int aglcode = pdf_lookup_agl(estrings[i]);
						if (!aglcode)
							etable[i] = FT_Get_Name_Index(face, estrings[i]);
						else
							etable[i] = ft_char_index(face, aglcode);
					}
				}
			}

			/* MacRoman cmap */
			else if (!symbolic && face->charmap && face->charmap->platform_id == 1)
			{
				for (i = 0; i < 256; i++)
				{
					if (estrings[i])
					{
						k = lookup_mre_code(estrings[i]);
						if (k <= 0)
							etable[i] = FT_Get_Name_Index(face, estrings[i]);
						else
							etable[i] = ft_char_index(face, k);
					}
				}
			}

			/* Symbolic cmap */
			else if (!face->charmap || face->charmap->encoding != FT_ENCODING_MS_SYMBOL)
			{
				for (i = 0; i < 256; i++)
				{
					if (estrings[i])
					{
						etable[i] = FT_Get_Name_Index(face, estrings[i]);
						if (etable[i] == 0)
							etable[i] = ft_char_index(face, i);
					}
				}
			}
		}

		/* try to reverse the glyph names from the builtin encoding */
		for (i = 0; i < 256; i++)
		{
			if (etable[i] && !estrings[i])
			{
				if (FT_HAS_GLYPH_NAMES(face))
				{
					fterr = FT_Get_Glyph_Name(face, etable[i], ebuffer[i], 32);
					if (fterr)
						fz_warn(ctx, "freetype get glyph name (gid %d): %s", etable[i], ft_error_string(fterr));
					if (ebuffer[i][0])
						estrings[i] = ebuffer[i];
				}
				else
				{
					estrings[i] = (char*) pdf_win_ansi[i]; /* discard const */
				}
			}
		}

		/* symbolic Type 1 fonts with an implicit encoding and non-standard glyph names */
		if (kind == TYPE1 && symbolic)
		{
			for (i = 0; i < 256; i++)
				if (etable[i] && estrings[i] && !pdf_lookup_agl(estrings[i]))
					estrings[i] = (char*) pdf_standard[i];
		}

		fz_unlock(ctx, FZ_LOCK_FREETYPE);

		fontdesc->encoding = pdf_new_identity_cmap(ctx, 0, 1);
		fontdesc->size += pdf_cmap_size(ctx, fontdesc->encoding);
		fontdesc->cid_to_gid_len = 256;
		fontdesc->cid_to_gid = etable;

		fz_try(ctx)
		{
			pdf_load_to_unicode(xref, fontdesc, estrings, NULL, pdf_dict_gets(dict, "ToUnicode"));
		}
		fz_catch(ctx)
		{
			fz_warn(ctx, "cannot load ToUnicode CMap");
		}

	skip_encoding:

		/* Widths */

		pdf_set_default_hmtx(ctx, fontdesc, fontdesc->missing_width);

		widths = pdf_dict_gets(dict, "Widths");
		if (widths)
		{
			int first, last;

			first = pdf_to_int(pdf_dict_gets(dict, "FirstChar"));
			last = pdf_to_int(pdf_dict_gets(dict, "LastChar"));

			if (first < 0 || last > 255 || first > last)
				first = last = 0;

			for (i = 0; i < last - first + 1; i++)
			{
				int wid = pdf_to_int(pdf_array_get(widths, i));
				pdf_add_hmtx(ctx, fontdesc, i + first, i + first, wid);
			}
		}
		else
		{
			fz_lock(ctx, FZ_LOCK_FREETYPE);
			fterr = FT_Set_Char_Size(face, 1000, 1000, 72, 72);
			if (fterr)
				fz_warn(ctx, "freetype set character size: %s", ft_error_string(fterr));
			for (i = 0; i < 256; i++)
			{
				pdf_add_hmtx(ctx, fontdesc, i, i, ft_width(ctx, fontdesc, i));
			}
			fz_unlock(ctx, FZ_LOCK_FREETYPE);
		}

		pdf_end_hmtx(ctx, fontdesc);
	}
	fz_catch(ctx)
	{
		if (fontdesc && etable != fontdesc->cid_to_gid)
			fz_free(ctx, etable);
		pdf_drop_font(ctx, fontdesc);
		fz_throw(ctx, "cannot load simple font (%d %d R)", pdf_to_num(dict), pdf_to_gen(dict));
	}
	return fontdesc;
}
Example #3
0
pdf_font_desc *
pdf_load_type3_font(fz_context *ctx, pdf_document *doc, pdf_obj *rdb, pdf_obj *dict)
{
	char buf[256];
	char *estrings[256];
	pdf_font_desc *fontdesc = NULL;
	pdf_obj *encoding;
	pdf_obj *widths;
	pdf_obj *charprocs;
	pdf_obj *obj;
	int first, last;
	int i, k, n;
	fz_rect bbox;
	fz_matrix matrix;
	fz_font *font;

	fz_var(fontdesc);

	/* Make a new type3 font entry in the document */
	if (doc->num_type3_fonts == doc->max_type3_fonts)
	{
		int new_max = doc->max_type3_fonts * 2;

		if (new_max == 0)
			new_max = 4;
		doc->type3_fonts = fz_resize_array(ctx, doc->type3_fonts, new_max, sizeof(*doc->type3_fonts));
		doc->max_type3_fonts = new_max;
	}

	fz_try(ctx)
	{
		obj = pdf_dict_get(ctx, dict, PDF_NAME_Name);
		if (pdf_is_name(ctx, obj))
			fz_strlcpy(buf, pdf_to_name(ctx, obj), sizeof buf);
		else
			fz_strlcpy(buf, "Unnamed-T3", sizeof buf);

		fontdesc = pdf_new_font_desc(ctx);

		obj = pdf_dict_get(ctx, dict, PDF_NAME_FontMatrix);
		pdf_to_matrix(ctx, obj, &matrix);

		obj = pdf_dict_get(ctx, dict, PDF_NAME_FontBBox);
		fz_transform_rect(pdf_to_rect(ctx, obj, &bbox), &matrix);

		font = fz_new_type3_font(ctx, buf, &matrix);
		fontdesc->font = font;
		fontdesc->size += sizeof(fz_font) + 256 * (sizeof(fz_buffer*) + sizeof(float));

		fz_set_font_bbox(ctx, font, bbox.x0, bbox.y0, bbox.x1, bbox.y1);

		/* Encoding */

		for (i = 0; i < 256; i++)
			estrings[i] = NULL;

		encoding = pdf_dict_get(ctx, dict, PDF_NAME_Encoding);
		if (!encoding)
		{
			fz_throw(ctx, FZ_ERROR_GENERIC, "syntaxerror: Type3 font missing Encoding");
		}

		if (pdf_is_name(ctx, encoding))
			pdf_load_encoding(estrings, pdf_to_name(ctx, encoding));

		if (pdf_is_dict(ctx, encoding))
		{
			pdf_obj *base, *diff, *item;

			base = pdf_dict_get(ctx, encoding, PDF_NAME_BaseEncoding);
			if (pdf_is_name(ctx, base))
				pdf_load_encoding(estrings, pdf_to_name(ctx, base));

			diff = pdf_dict_get(ctx, encoding, PDF_NAME_Differences);
			if (pdf_is_array(ctx, diff))
			{
				n = pdf_array_len(ctx, diff);
				k = 0;
				for (i = 0; i < n; i++)
				{
					item = pdf_array_get(ctx, diff, i);
					if (pdf_is_int(ctx, item))
						k = pdf_to_int(ctx, item);
					if (pdf_is_name(ctx, item) && k >= 0 && k < nelem(estrings))
						estrings[k++] = pdf_to_name(ctx, item);
				}
			}
		}

		fontdesc->encoding = pdf_new_identity_cmap(ctx, 0, 1);
		fontdesc->size += pdf_cmap_size(ctx, fontdesc->encoding);

		pdf_load_to_unicode(ctx, doc, fontdesc, estrings, NULL, pdf_dict_get(ctx, dict, PDF_NAME_ToUnicode));

		/* Widths */

		pdf_set_default_hmtx(ctx, fontdesc, 0);

		first = pdf_to_int(ctx, pdf_dict_get(ctx, dict, PDF_NAME_FirstChar));
		last = pdf_to_int(ctx, pdf_dict_get(ctx, dict, PDF_NAME_LastChar));

		if (first < 0 || last > 255 || first > last)
			first = last = 0;

		widths = pdf_dict_get(ctx, dict, PDF_NAME_Widths);
		if (!widths)
		{
			fz_throw(ctx, FZ_ERROR_GENERIC, "syntaxerror: Type3 font missing Widths");
		}

		for (i = first; i <= last; i++)
		{
			float w = pdf_to_real(ctx, pdf_array_get(ctx, widths, i - first));
			w = font->t3matrix.a * w * 1000;
			font->t3widths[i] = w * 0.001f;
			pdf_add_hmtx(ctx, fontdesc, i, i, w);
		}

		pdf_end_hmtx(ctx, fontdesc);

		/* Resources -- inherit page resources if the font doesn't have its own */

		font->t3freeres = pdf_t3_free_resources;
		font->t3resources = pdf_dict_get(ctx, dict, PDF_NAME_Resources);
		if (!font->t3resources)
			font->t3resources = rdb;
		if (font->t3resources)
			pdf_keep_obj(ctx, font->t3resources);
		if (!font->t3resources)
			fz_warn(ctx, "no resource dictionary for type 3 font!");

		font->t3doc = doc;
		font->t3run = pdf_run_glyph_func;

		/* CharProcs */

		charprocs = pdf_dict_get(ctx, dict, PDF_NAME_CharProcs);
		if (!charprocs)
		{
			fz_throw(ctx, FZ_ERROR_GENERIC, "syntaxerror: Type3 font missing CharProcs");
		}

		for (i = 0; i < 256; i++)
		{
			if (estrings[i])
			{
				obj = pdf_dict_gets(ctx, charprocs, estrings[i]);
				if (pdf_is_stream(ctx, obj))
				{
					font->t3procs[i] = pdf_load_stream(ctx, obj);
					fz_trim_buffer(ctx, font->t3procs[i]);
					fontdesc->size += fz_buffer_storage(ctx, font->t3procs[i], NULL);
					fontdesc->size += 0; // TODO: display list size calculation
				}
			}
		}
	}
	fz_catch(ctx)
	{
		pdf_drop_font(ctx, fontdesc);
		fz_rethrow(ctx);
	}

	doc->type3_fonts[doc->num_type3_fonts++] = fz_keep_font(ctx, font);

	return fontdesc;
}
Example #4
0
pdf_font_desc *
pdf_load_type3_font(pdf_document *doc, pdf_obj *rdb, pdf_obj *dict)
{
	char buf[256];
	char *estrings[256];
	pdf_font_desc *fontdesc = NULL;
	pdf_obj *encoding;
	pdf_obj *widths;
	pdf_obj *charprocs;
	pdf_obj *obj;
	int first, last;
	int i, k, n;
	fz_rect bbox;
	fz_matrix matrix;
	fz_context *ctx = doc->ctx;

	fz_var(fontdesc);

	/* Make a new type3 font entry in the document */
	if (doc->num_type3_fonts == doc->max_type3_fonts)
	{
		int new_max = doc->max_type3_fonts * 2;

		if (new_max == 0)
			new_max = 4;
		doc->type3_fonts = fz_resize_array(doc->ctx, doc->type3_fonts, new_max, sizeof(*doc->type3_fonts));
		doc->max_type3_fonts = new_max;
	}

	fz_try(ctx)
	{
		obj = pdf_dict_gets(dict, "Name");
		if (pdf_is_name(obj))
			fz_strlcpy(buf, pdf_to_name(obj), sizeof buf);
		else
			sprintf(buf, "Unnamed-T3");

		fontdesc = pdf_new_font_desc(ctx);

		obj = pdf_dict_gets(dict, "FontMatrix");
		pdf_to_matrix(ctx, obj, &matrix);

		obj = pdf_dict_gets(dict, "FontBBox");
		fz_transform_rect(pdf_to_rect(ctx, obj, &bbox), &matrix);

		fontdesc->font = fz_new_type3_font(ctx, buf, &matrix);
		fontdesc->size += sizeof(fz_font) + 256 * (sizeof(fz_buffer*) + sizeof(float));

		fz_set_font_bbox(ctx, fontdesc->font, bbox.x0, bbox.y0, bbox.x1, bbox.y1);

		/* SumatraPDF: expose Type3 FontDescriptor flags */
		fontdesc->flags = pdf_to_int(pdf_dict_gets(pdf_dict_gets(dict, "FontDescriptor"), "Flags"));

		/* Encoding */

		for (i = 0; i < 256; i++)
			estrings[i] = NULL;

		encoding = pdf_dict_gets(dict, "Encoding");
		if (!encoding)
		{
			fz_throw(ctx, FZ_ERROR_GENERIC, "syntaxerror: Type3 font missing Encoding");
		}

		if (pdf_is_name(encoding))
			pdf_load_encoding(estrings, pdf_to_name(encoding));

		if (pdf_is_dict(encoding))
		{
			pdf_obj *base, *diff, *item;

			base = pdf_dict_gets(encoding, "BaseEncoding");
			if (pdf_is_name(base))
				pdf_load_encoding(estrings, pdf_to_name(base));

			diff = pdf_dict_gets(encoding, "Differences");
			if (pdf_is_array(diff))
			{
				n = pdf_array_len(diff);
				k = 0;
				for (i = 0; i < n; i++)
				{
					item = pdf_array_get(diff, i);
					if (pdf_is_int(item))
						k = pdf_to_int(item);
					if (pdf_is_name(item) && k >= 0 && k < nelem(estrings))
						estrings[k++] = pdf_to_name(item);
				}
			}
		}

		fontdesc->encoding = pdf_new_identity_cmap(ctx, 0, 1);
		fontdesc->size += pdf_cmap_size(ctx, fontdesc->encoding);

		pdf_load_to_unicode(doc, fontdesc, estrings, NULL, pdf_dict_gets(dict, "ToUnicode"));

		/* SumatraPDF: trying to match Adobe Reader's behavior */
		if (!(fontdesc->flags & PDF_FD_SYMBOLIC) && fontdesc->cid_to_ucs_len >= 128)
			for (i = 32; i < 128; i++)
				if (fontdesc->cid_to_ucs[i] == '?' || fontdesc->cid_to_ucs[i] == '\0')
					fontdesc->cid_to_ucs[i] = i;

		/* Widths */

		pdf_set_default_hmtx(ctx, fontdesc, 0);

		first = pdf_to_int(pdf_dict_gets(dict, "FirstChar"));
		last = pdf_to_int(pdf_dict_gets(dict, "LastChar"));

		/* cf. http://code.google.com/p/sumatrapdf/issues/detail?id=1966 */
		if (first >= 256 && last - first < 256)
		{
			fz_warn(ctx, "ignoring out-of-bound values for FirstChar/LastChar: %d/%d", first, last);
			last -= first;
			first = 0;
		}

		if (first < 0 || last > 255 || first > last)
			first = last = 0;

		widths = pdf_dict_gets(dict, "Widths");
		if (!widths)
		{
			fz_throw(ctx, FZ_ERROR_GENERIC, "syntaxerror: Type3 font missing Widths");
		}

		for (i = first; i <= last; i++)
		{
			float w = pdf_to_real(pdf_array_get(widths, i - first));
			w = fontdesc->font->t3matrix.a * w * 1000;
			fontdesc->font->t3widths[i] = w * 0.001f;
			pdf_add_hmtx(ctx, fontdesc, i, i, w);
		}

		pdf_end_hmtx(ctx, fontdesc);

		/* Resources -- inherit page resources if the font doesn't have its own */

		fontdesc->font->t3freeres = pdf_t3_free_resources;
		fontdesc->font->t3resources = pdf_dict_gets(dict, "Resources");
		if (!fontdesc->font->t3resources)
			fontdesc->font->t3resources = rdb;
		if (fontdesc->font->t3resources)
			pdf_keep_obj(fontdesc->font->t3resources);
		if (!fontdesc->font->t3resources)
			fz_warn(ctx, "no resource dictionary for type 3 font!");

		fontdesc->font->t3doc = doc;
		fontdesc->font->t3run = pdf_run_glyph_func;

		/* CharProcs */

		charprocs = pdf_dict_gets(dict, "CharProcs");
		if (!charprocs)
		{
			fz_throw(ctx, FZ_ERROR_GENERIC, "syntaxerror: Type3 font missing CharProcs");
		}

		for (i = 0; i < 256; i++)
		{
			if (estrings[i])
			{
				/* SumatraPDF: don't reject fonts with few broken glyphs */
				fz_try(ctx)
				{

				obj = pdf_dict_gets(charprocs, estrings[i]);
				if (pdf_is_stream(doc, pdf_to_num(obj), pdf_to_gen(obj)))
				{
					fontdesc->font->t3procs[i] = pdf_load_stream(doc, pdf_to_num(obj), pdf_to_gen(obj));
					fontdesc->size += fontdesc->font->t3procs[i]->cap;
					fontdesc->size += 0; // TODO: display list size calculation
				}

				}
				fz_catch(ctx)
				{
					fz_warn(ctx, "failed to get data for type 3 glyph '%s'", estrings[i]);
				}
			}
		}
	}
	fz_catch(ctx)
	{
		if (fontdesc)
			pdf_drop_font(ctx, fontdesc);
		fz_rethrow_message(ctx, "cannot load type3 font (%d %d R)", pdf_to_num(dict), pdf_to_gen(dict));
	}

	doc->type3_fonts[doc->num_type3_fonts++] = fz_keep_font(ctx, fontdesc->font);

	return fontdesc;
}
static pdf_font_desc *
load_cid_font(pdf_document *doc, pdf_obj *dict, pdf_obj *encoding, pdf_obj *to_unicode)
{
	pdf_obj *widths;
	pdf_obj *descriptor;
	pdf_font_desc *fontdesc = NULL;
	FT_Face face;
	int kind;
	char collection[256];
	char *basefont;
	int i, k, fterr;
	pdf_obj *obj;
	int dw;
	fz_context *ctx = doc->ctx;

	fz_var(fontdesc);

	fz_try(ctx)
	{
		/* Get font name and CID collection */

		basefont = pdf_to_name(pdf_dict_gets(dict, "BaseFont"));

		{
			pdf_obj *cidinfo;
			char tmpstr[64];
			int tmplen;

			cidinfo = pdf_dict_gets(dict, "CIDSystemInfo");
			if (!cidinfo)
				fz_throw(ctx, FZ_ERROR_GENERIC, "cid font is missing info");

			obj = pdf_dict_gets(cidinfo, "Registry");
			tmplen = fz_mini(sizeof tmpstr - 1, pdf_to_str_len(obj));
			memcpy(tmpstr, pdf_to_str_buf(obj), tmplen);
			tmpstr[tmplen] = '\0';
			fz_strlcpy(collection, tmpstr, sizeof collection);

			fz_strlcat(collection, "-", sizeof collection);

			obj = pdf_dict_gets(cidinfo, "Ordering");
			tmplen = fz_mini(sizeof tmpstr - 1, pdf_to_str_len(obj));
			memcpy(tmpstr, pdf_to_str_buf(obj), tmplen);
			tmpstr[tmplen] = '\0';
			fz_strlcat(collection, tmpstr, sizeof collection);
		}

		/* Load font file */

		fontdesc = pdf_new_font_desc(ctx);

		descriptor = pdf_dict_gets(dict, "FontDescriptor");
		if (!descriptor)
			fz_throw(ctx, FZ_ERROR_GENERIC, "syntaxerror: missing font descriptor");
		pdf_load_font_descriptor(fontdesc, doc, descriptor, collection, basefont, 1, 1);

		face = fontdesc->font->ft_face;
		kind = ft_kind(face);

		/* Encoding */

		if (pdf_is_name(encoding))
		{
			if (!strcmp(pdf_to_name(encoding), "Identity-H"))
				fontdesc->encoding = pdf_new_identity_cmap(ctx, 0, 2);
			else if (!strcmp(pdf_to_name(encoding), "Identity-V"))
				fontdesc->encoding = pdf_new_identity_cmap(ctx, 1, 2);
			else
				fontdesc->encoding = pdf_load_system_cmap(ctx, pdf_to_name(encoding));
		}
		else if (pdf_is_indirect(encoding))
		{
			fontdesc->encoding = pdf_load_embedded_cmap(doc, encoding);
		}
		else
		{
			fz_throw(ctx, FZ_ERROR_GENERIC, "syntaxerror: font missing encoding");
		}
		fontdesc->size += pdf_cmap_size(ctx, fontdesc->encoding);

		pdf_set_font_wmode(ctx, fontdesc, pdf_cmap_wmode(ctx, fontdesc->encoding));

		if (kind == TRUETYPE ||
			/* cf. http://code.google.com/p/sumatrapdf/issues/detail?id=1565 */
			!strcmp(pdf_to_name(pdf_dict_gets(dict, "Subtype")), "CIDFontType2") ||
			/* cf. http://code.google.com/p/sumatrapdf/issues/detail?id=1997 */
			pdf_is_indirect(pdf_dict_gets(dict, "CIDToGIDMap")))
		{
			pdf_obj *cidtogidmap;

			cidtogidmap = pdf_dict_gets(dict, "CIDToGIDMap");
			if (pdf_is_indirect(cidtogidmap))
			{
				fz_buffer *buf;

				buf = pdf_load_stream(doc, pdf_to_num(cidtogidmap), pdf_to_gen(cidtogidmap));

				fontdesc->cid_to_gid_len = (buf->len) / 2;
				fontdesc->cid_to_gid = fz_malloc_array(ctx, fontdesc->cid_to_gid_len, sizeof(unsigned short));
				fontdesc->size += fontdesc->cid_to_gid_len * sizeof(unsigned short);
				for (i = 0; i < fontdesc->cid_to_gid_len; i++)
					fontdesc->cid_to_gid[i] = (buf->data[i * 2] << 8) + buf->data[i * 2 + 1];

				fz_drop_buffer(ctx, buf);
			}

			/* if truetype font is external, cidtogidmap should not be identity */
			/* so we map from cid to unicode and then map that through the (3 1) */
			/* unicode cmap to get a glyph id */
			else if (fontdesc->font->ft_substitute)
			{
				fterr = FT_Select_Charmap(face, ft_encoding_unicode);
				if (fterr)
				{
					fz_throw(ctx, FZ_ERROR_GENERIC, "fonterror: no unicode cmap when emulating CID font: %s", ft_error_string(fterr));
				}

				if (!strcmp(collection, "Adobe-CNS1"))
					fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-CNS1-UCS2");
				else if (!strcmp(collection, "Adobe-GB1"))
					fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-GB1-UCS2");
				else if (!strcmp(collection, "Adobe-Japan1"))
					fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-Japan1-UCS2");
				else if (!strcmp(collection, "Adobe-Japan2"))
					fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-Japan2-UCS2");
				else if (!strcmp(collection, "Adobe-Korea1"))
					fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-Korea1-UCS2");
				/* cf. http://code.google.com/p/sumatrapdf/issues/detail?id=2318 */
				else if (!strcmp(collection, "Adobe-Identity") && fontdesc->font->ft_file)
					fontdesc->font->ft_substitute = 0;
			}
		}

		/* cf. http://code.google.com/p/sumatrapdf/issues/detail?id=1961 */
		fz_try(ctx)
		{

		pdf_load_to_unicode(doc, fontdesc, NULL, collection, to_unicode);

		}
		fz_catch(ctx)
		{
			fz_warn(ctx, "cannot load ToUnicode CMap");
		}

		/* If we have an identity encoding, we're supposed to use the glyph ids directly.
		 * If we only have a substitute font, that won't work.
		 * Make a last ditch attempt by using
		 * the ToUnicode table if it exists to map via the substitute font's cmap. */
		if (strstr(fontdesc->encoding->cmap_name, "Identity-") && fontdesc->font->ft_substitute)
		{
			fz_warn(ctx, "non-embedded font using identity encoding: %s", basefont);
			if (fontdesc->to_unicode && !fontdesc->to_ttf_cmap)
				fontdesc->to_ttf_cmap = pdf_keep_cmap(ctx, fontdesc->to_unicode);
		}

		/* Horizontal */

		dw = 1000;
		obj = pdf_dict_gets(dict, "DW");
		if (obj)
			dw = pdf_to_int(obj);
		pdf_set_default_hmtx(ctx, fontdesc, dw);

		widths = pdf_dict_gets(dict, "W");
		if (widths)
		{
			int c0, c1, w, n, m;

			n = pdf_array_len(widths);
			for (i = 0; i < n; )
			{
				c0 = pdf_to_int(pdf_array_get(widths, i));
				obj = pdf_array_get(widths, i + 1);
				if (pdf_is_array(obj))
				{
					m = pdf_array_len(obj);
					for (k = 0; k < m; k++)
					{
						w = pdf_to_int(pdf_array_get(obj, k));
						pdf_add_hmtx(ctx, fontdesc, c0 + k, c0 + k, w);
					}
					i += 2;
				}
				else
				{
					c1 = pdf_to_int(obj);
					w = pdf_to_int(pdf_array_get(widths, i + 2));
					pdf_add_hmtx(ctx, fontdesc, c0, c1, w);
					i += 3;
				}
			}
		}

		pdf_end_hmtx(ctx, fontdesc);

		/* Vertical */

		if (pdf_cmap_wmode(ctx, fontdesc->encoding) == 1)
		{
			int dw2y = 880;
			int dw2w = -1000;

			obj = pdf_dict_gets(dict, "DW2");
			if (obj)
			{
				dw2y = pdf_to_int(pdf_array_get(obj, 0));
				dw2w = pdf_to_int(pdf_array_get(obj, 1));
			}

			pdf_set_default_vmtx(ctx, fontdesc, dw2y, dw2w);

			widths = pdf_dict_gets(dict, "W2");
			if (widths)
			{
				int c0, c1, w, x, y, n;

				n = pdf_array_len(widths);
				for (i = 0; i < n; )
				{
					c0 = pdf_to_int(pdf_array_get(widths, i));
					obj = pdf_array_get(widths, i + 1);
					if (pdf_is_array(obj))
					{
						int m = pdf_array_len(obj);
						for (k = 0; k * 3 < m; k ++)
						{
							w = pdf_to_int(pdf_array_get(obj, k * 3 + 0));
							x = pdf_to_int(pdf_array_get(obj, k * 3 + 1));
							y = pdf_to_int(pdf_array_get(obj, k * 3 + 2));
							pdf_add_vmtx(ctx, fontdesc, c0 + k, c0 + k, x, y, w);
						}
						i += 2;
					}
					else
					{
						c1 = pdf_to_int(obj);
						w = pdf_to_int(pdf_array_get(widths, i + 2));
						x = pdf_to_int(pdf_array_get(widths, i + 3));
						y = pdf_to_int(pdf_array_get(widths, i + 4));
						pdf_add_vmtx(ctx, fontdesc, c0, c1, x, y, w);
						i += 5;
					}
				}
			}

			pdf_end_vmtx(ctx, fontdesc);
		}
	}
	fz_catch(ctx)
	{
		pdf_drop_font(ctx, fontdesc);
		fz_rethrow_message(ctx, "cannot load cid font (%d %d R)", pdf_to_num(dict), pdf_to_gen(dict));
	}

	return fontdesc;
}
Example #6
0
pdf_font_desc *
pdf_load_type3_font(pdf_document *xref, pdf_obj *rdb, pdf_obj *dict)
{
	char buf[256];
	char *estrings[256];
	pdf_font_desc *fontdesc = NULL;
	pdf_obj *encoding;
	pdf_obj *widths;
	pdf_obj *charprocs;
	pdf_obj *obj;
	int first, last;
	int i, k, n;
	fz_rect bbox;
	fz_matrix matrix;
	fz_context *ctx = xref->ctx;

	fz_var(fontdesc);

	fz_try(ctx)
	{
		obj = pdf_dict_gets(dict, "Name");
		if (pdf_is_name(obj))
			fz_strlcpy(buf, pdf_to_name(obj), sizeof buf);
		else
			sprintf(buf, "Unnamed-T3");

		fontdesc = pdf_new_font_desc(ctx);

		obj = pdf_dict_gets(dict, "FontMatrix");
		pdf_to_matrix(ctx, obj, &matrix);

		obj = pdf_dict_gets(dict, "FontBBox");
		fz_transform_rect(pdf_to_rect(ctx, obj, &bbox), &matrix);

		fontdesc->font = fz_new_type3_font(ctx, buf, &matrix);
		fontdesc->size += sizeof(fz_font) + 256 * (sizeof(fz_buffer*) + sizeof(float));

		fz_set_font_bbox(ctx, fontdesc->font, bbox.x0, bbox.y0, bbox.x1, bbox.y1);

		/* Encoding */

		for (i = 0; i < 256; i++)
			estrings[i] = NULL;

		encoding = pdf_dict_gets(dict, "Encoding");
		if (!encoding)
		{
			fz_throw(ctx, "syntaxerror: Type3 font missing Encoding");
		}

		if (pdf_is_name(encoding))
			pdf_load_encoding(estrings, pdf_to_name(encoding));

		if (pdf_is_dict(encoding))
		{
			pdf_obj *base, *diff, *item;

			base = pdf_dict_gets(encoding, "BaseEncoding");
			if (pdf_is_name(base))
				pdf_load_encoding(estrings, pdf_to_name(base));

			diff = pdf_dict_gets(encoding, "Differences");
			if (pdf_is_array(diff))
			{
				n = pdf_array_len(diff);
				k = 0;
				for (i = 0; i < n; i++)
				{
					item = pdf_array_get(diff, i);
					if (pdf_is_int(item))
						k = pdf_to_int(item);
					if (pdf_is_name(item) && k >= 0 && k < nelem(estrings))
						estrings[k++] = pdf_to_name(item);
				}
			}
		}

		fontdesc->encoding = pdf_new_identity_cmap(ctx, 0, 1);
		fontdesc->size += pdf_cmap_size(ctx, fontdesc->encoding);

		pdf_load_to_unicode(xref, fontdesc, estrings, NULL, pdf_dict_gets(dict, "ToUnicode"));

		/* Widths */

		pdf_set_default_hmtx(ctx, fontdesc, 0);

		first = pdf_to_int(pdf_dict_gets(dict, "FirstChar"));
		last = pdf_to_int(pdf_dict_gets(dict, "LastChar"));

		if (first < 0 || last > 255 || first > last)
			first = last = 0;

		widths = pdf_dict_gets(dict, "Widths");
		if (!widths)
		{
			fz_throw(ctx, "syntaxerror: Type3 font missing Widths");
		}

		for (i = first; i <= last; i++)
		{
			float w = pdf_to_real(pdf_array_get(widths, i - first));
			w = fontdesc->font->t3matrix.a * w * 1000;
			fontdesc->font->t3widths[i] = w * 0.001f;
			pdf_add_hmtx(ctx, fontdesc, i, i, w);
		}

		pdf_end_hmtx(ctx, fontdesc);

		/* Resources -- inherit page resources if the font doesn't have its own */

		fontdesc->font->t3freeres = pdf_t3_free_resources;
		fontdesc->font->t3resources = pdf_dict_gets(dict, "Resources");
		if (!fontdesc->font->t3resources)
			fontdesc->font->t3resources = rdb;
		if (fontdesc->font->t3resources)
			pdf_keep_obj(fontdesc->font->t3resources);
		if (!fontdesc->font->t3resources)
			fz_warn(ctx, "no resource dictionary for type 3 font!");

		fontdesc->font->t3doc = xref;
		fontdesc->font->t3run = pdf_run_glyph_func;

		/* CharProcs */

		charprocs = pdf_dict_gets(dict, "CharProcs");
		if (!charprocs)
		{
			fz_throw(ctx, "syntaxerror: Type3 font missing CharProcs");
		}

		for (i = 0; i < 256; i++)
		{
			if (estrings[i])
			{
				obj = pdf_dict_gets(charprocs, estrings[i]);
				if (pdf_is_stream(xref, pdf_to_num(obj), pdf_to_gen(obj)))
				{
					fontdesc->font->t3procs[i] = pdf_load_stream(xref, pdf_to_num(obj), pdf_to_gen(obj));
					fontdesc->size += fontdesc->font->t3procs[i]->cap;
					fontdesc->size += 0; // TODO: display list size calculation
				}
			}
		}
	}
	fz_catch(ctx)
	{
		if (fontdesc)
			pdf_drop_font(ctx, fontdesc);
		fz_throw(ctx, "cannot load type3 font (%d %d R)", pdf_to_num(dict), pdf_to_gen(dict));
	}
	return fontdesc;
}