Пример #1
0
/* Convert Unicode/PdfDocEncoding string into ucs-2 */
unsigned short *
pdf_to_ucs2(fz_context *ctx, pdf_obj *src)
{
    unsigned char *srcptr = (unsigned char *) pdf_to_str_buf(src);
    unsigned short *dstptr, *dst;
    int srclen = pdf_to_str_len(src);
    int i;

    if (srclen >= 2 && srcptr[0] == 254 && srcptr[1] == 255)
    {
        dstptr = dst = fz_malloc_array(ctx, (srclen - 2) / 2 + 1, sizeof(short));
        for (i = 2; i + 1 < srclen; i += 2)
            *dstptr++ = srcptr[i] << 8 | srcptr[i+1];
    }
    else if (srclen >= 2 && srcptr[0] == 255 && srcptr[1] == 254)
    {
        dstptr = dst = fz_malloc_array(ctx, (srclen - 2) / 2 + 1, sizeof(short));
        for (i = 2; i + 1 < srclen; i += 2)
            *dstptr++ = srcptr[i] | srcptr[i+1] << 8;
    }
    else
    {
        dstptr = dst = fz_malloc_array(ctx, srclen + 1, sizeof(short));
        for (i = 0; i < srclen; i++)
            *dstptr++ = pdf_doc_encoding[srcptr[i]];
    }

    *dstptr = '\0';
    return dst;
}
Пример #2
0
/* (buffer must be at least (fz_to_str_len(src) + 1) * 2 bytes in size) */
void
pdf_to_ucs2_buf(unsigned short *buffer, pdf_obj *src)
{
    unsigned char *srcptr = (unsigned char *) pdf_to_str_buf(src);
    unsigned short *dstptr = buffer;
    int srclen = pdf_to_str_len(src);
    int i;

    if (srclen >= 2 && srcptr[0] == 254 && srcptr[1] == 255)
    {
        for (i = 2; i + 1 < srclen; i += 2)
            *dstptr++ = srcptr[i] << 8 | srcptr[i+1];
    }
    else if (srclen >= 2 && srcptr[0] == 255 && srcptr[1] == 254)
    {
        for (i = 2; i + 1 < srclen; i += 2)
            *dstptr++ = srcptr[i] | srcptr[i+1] << 8;
    }
    else
    {
        for (i = 0; i < srclen; i++)
            *dstptr++ = pdf_doc_encoding[srcptr[i]];
    }

    *dstptr = '\0';
}
Пример #3
0
/* Convert Unicode/PdfDocEncoding string into utf-8 */
char *
pdf_to_utf8(fz_context *ctx, pdf_obj *src)
{
    unsigned char *srcptr = (unsigned char *) pdf_to_str_buf(src);
    char *dstptr, *dst;
    int srclen = pdf_to_str_len(src);
    int dstlen = 0;
    int ucs;
    int i;

    if (srclen >= 2 && srcptr[0] == 254 && srcptr[1] == 255)
    {
        for (i = 2; i + 1 < srclen; i += 2)
        {
            ucs = srcptr[i] << 8 | srcptr[i+1];
            dstlen += fz_runelen(ucs);
        }

        dstptr = dst = fz_malloc(ctx, dstlen + 1);

        for (i = 2; i + 1 < srclen; i += 2)
        {
            ucs = srcptr[i] << 8 | srcptr[i+1];
            dstptr += fz_runetochar(dstptr, ucs);
        }
    }
    else if (srclen >= 2 && srcptr[0] == 255 && srcptr[1] == 254)
    {
        for (i = 2; i + 1 < srclen; i += 2)
        {
            ucs = srcptr[i] | srcptr[i+1] << 8;
            dstlen += fz_runelen(ucs);
        }

        dstptr = dst = fz_malloc(ctx, dstlen + 1);

        for (i = 2; i + 1 < srclen; i += 2)
        {
            ucs = srcptr[i] | srcptr[i+1] << 8;
            dstptr += fz_runetochar(dstptr, ucs);
        }
    }
    else
    {
        for (i = 0; i < srclen; i++)
            dstlen += fz_runelen(pdf_doc_encoding[srcptr[i]]);

        dstptr = dst = fz_malloc(ctx, dstlen + 1);

        for (i = 0; i < srclen; i++)
        {
            ucs = pdf_doc_encoding[srcptr[i]];
            dstptr += fz_runetochar(dstptr, ucs);
        }
    }

    *dstptr = '\0';
    return dst;
}
Пример #4
0
static fz_colorspace *
load_indexed(pdf_document *doc, pdf_obj *array)
{
	fz_context *ctx = doc->ctx;
	pdf_obj *baseobj = pdf_array_get(array, 1);
	pdf_obj *highobj = pdf_array_get(array, 2);
	pdf_obj *lookupobj = pdf_array_get(array, 3);
	fz_colorspace *base = NULL;
	fz_colorspace *cs;
	int i, n, high;
	unsigned char *lookup = NULL;

	fz_var(base);
	fz_var(lookup);

	fz_try(ctx)
	{
		base = pdf_load_colorspace(doc, baseobj);

		high = pdf_to_int(highobj);
		high = fz_clampi(high, 0, 255);
		n = base->n * (high + 1);
		lookup = fz_malloc_array(ctx, 1, n);

		if (pdf_is_string(lookupobj) && pdf_to_str_len(lookupobj) >= n)
		{
			unsigned char *buf = (unsigned char *) pdf_to_str_buf(lookupobj);
			for (i = 0; i < n; i++)
				lookup[i] = buf[i];
		}
		else if (pdf_is_indirect(lookupobj))
		{
			fz_stream *file = NULL;

			fz_var(file);

			fz_try(ctx)
			{
				file = pdf_open_stream(doc, pdf_to_num(lookupobj), pdf_to_gen(lookupobj));
				i = fz_read(file, lookup, n);
				if (i < n)
					memset(lookup+i, 0, n-i);
			}
			fz_always(ctx)
			{
				fz_close(file);
			}
			fz_catch(ctx)
			{
				fz_rethrow_message(ctx, "cannot open colorspace lookup table (%d 0 R)", pdf_to_num(lookupobj));
			}
		}
		else
		{
			fz_rethrow_message(ctx, "cannot parse colorspace lookup table");
		}

		cs = fz_new_indexed_colorspace(ctx, base, high, lookup);
	}
Пример #5
0
static void
safe_print_pdf_obj(fz_context *ctx, pdf_obj *obj, const char *dflt)
{
	if (obj == NULL)
		fprintf(stderr, "%s", dflt);
	else if (pdf_is_string(ctx, obj))
		safe_print_pdf_string(ctx, (unsigned char *)pdf_to_str_buf(ctx, obj), pdf_to_str_len(ctx, obj));
	else
		pdf_print_obj(ctx, fz_stderr(ctx), obj, 1);
}
Пример #6
0
static wchar_t *get_prop_str(fz_context* ctx, pdf_obj *obj, const char *name)
{
	pdf_obj* p_obj = pdf_dict_getp(ctx, obj, name);

	int len = pdf_to_str_len(ctx, p_obj) + 1;

	wchar_t *buf = AllocArray<wchar_t>(len);
	pdf_to_ucs2_buf(ctx, (unsigned short *)buf, p_obj);

	return p_obj ? pdf_clean_string(buf) : nullptr;
}
Пример #7
0
static char *get_string_or_stream(pdf_document *doc, pdf_obj *obj)
{
	fz_context *ctx = doc->ctx;
	int len = 0;
	char *buf = NULL;
	fz_buffer *strmbuf = NULL;
	char *text = NULL;

	fz_var(strmbuf);
	fz_var(text);
	fz_try(ctx)
	{
		if (pdf_is_string(obj))
		{
			len = pdf_to_str_len(obj);
			buf = pdf_to_str_buf(obj);
		}
		else if (pdf_is_stream(doc, pdf_to_num(obj), pdf_to_gen(obj)))
		{
			strmbuf = pdf_load_stream(doc, pdf_to_num(obj), pdf_to_gen(obj));
			len = fz_buffer_storage(ctx, strmbuf, (unsigned char **)&buf);
		}

		if (buf)
		{
			text = fz_malloc(ctx, len+1);
			memcpy(text, buf, len);
			text[len] = 0;
		}
	}
	fz_always(ctx)
	{
		fz_drop_buffer(ctx, strmbuf);
	}
	fz_catch(ctx)
	{
		fz_free(ctx, text);
		fz_rethrow(ctx);
	}

	return text;
}
Пример #8
0
static pdf_font_desc *
load_cid_font(pdf_document *xref, pdf_obj *dict, pdf_obj *encoding, pdf_obj *to_unicode)
{
	pdf_obj *widths;
	pdf_obj *descriptor;
	pdf_font_desc *fontdesc = NULL;
	FT_Face face;
	int kind;
	char collection[256];
	char *basefont;
	int i, k, fterr;
	pdf_obj *obj;
	int dw;
	fz_context *ctx = xref->ctx;

	fz_var(fontdesc);

	fz_try(ctx)
	{
		/* Get font name and CID collection */

		basefont = pdf_to_name(pdf_dict_gets(dict, "BaseFont"));

		{
			pdf_obj *cidinfo;
			char tmpstr[64];
			int tmplen;

			cidinfo = pdf_dict_gets(dict, "CIDSystemInfo");
			if (!cidinfo)
				fz_throw(ctx, "cid font is missing info");

			obj = pdf_dict_gets(cidinfo, "Registry");
			tmplen = fz_mini(sizeof tmpstr - 1, pdf_to_str_len(obj));
			memcpy(tmpstr, pdf_to_str_buf(obj), tmplen);
			tmpstr[tmplen] = '\0';
			fz_strlcpy(collection, tmpstr, sizeof collection);

			fz_strlcat(collection, "-", sizeof collection);

			obj = pdf_dict_gets(cidinfo, "Ordering");
			tmplen = fz_mini(sizeof tmpstr - 1, pdf_to_str_len(obj));
			memcpy(tmpstr, pdf_to_str_buf(obj), tmplen);
			tmpstr[tmplen] = '\0';
			fz_strlcat(collection, tmpstr, sizeof collection);
		}

		/* Load font file */

		fontdesc = pdf_new_font_desc(ctx);

		descriptor = pdf_dict_gets(dict, "FontDescriptor");
		if (!descriptor)
			fz_throw(ctx, "syntaxerror: missing font descriptor");
		pdf_load_font_descriptor(fontdesc, xref, descriptor, collection, basefont);

		face = fontdesc->font->ft_face;
		kind = ft_kind(face);

		/* Encoding */

		if (pdf_is_name(encoding))
		{
			if (!strcmp(pdf_to_name(encoding), "Identity-H"))
				fontdesc->encoding = pdf_new_identity_cmap(ctx, 0, 2);
			else if (!strcmp(pdf_to_name(encoding), "Identity-V"))
				fontdesc->encoding = pdf_new_identity_cmap(ctx, 1, 2);
			else
				fontdesc->encoding = pdf_load_system_cmap(ctx, pdf_to_name(encoding));
		}
		else if (pdf_is_indirect(encoding))
		{
			fontdesc->encoding = pdf_load_embedded_cmap(xref, encoding);
		}
		else
		{
			fz_throw(ctx, "syntaxerror: font missing encoding");
		}
		fontdesc->size += pdf_cmap_size(ctx, fontdesc->encoding);

		pdf_set_font_wmode(ctx, fontdesc, pdf_cmap_wmode(ctx, fontdesc->encoding));

		if (kind == TRUETYPE)
		{
			pdf_obj *cidtogidmap;

			cidtogidmap = pdf_dict_gets(dict, "CIDToGIDMap");
			if (pdf_is_indirect(cidtogidmap))
			{
				fz_buffer *buf;

				buf = pdf_load_stream(xref, pdf_to_num(cidtogidmap), pdf_to_gen(cidtogidmap));

				fontdesc->cid_to_gid_len = (buf->len) / 2;
				fontdesc->cid_to_gid = fz_malloc_array(ctx, fontdesc->cid_to_gid_len, sizeof(unsigned short));
				fontdesc->size += fontdesc->cid_to_gid_len * sizeof(unsigned short);
				for (i = 0; i < fontdesc->cid_to_gid_len; i++)
					fontdesc->cid_to_gid[i] = (buf->data[i * 2] << 8) + buf->data[i * 2 + 1];

				fz_drop_buffer(ctx, buf);
			}

			/* if truetype font is external, cidtogidmap should not be identity */
			/* so we map from cid to unicode and then map that through the (3 1) */
			/* unicode cmap to get a glyph id */
			else if (fontdesc->font->ft_substitute)
			{
				fterr = FT_Select_Charmap(face, ft_encoding_unicode);
				if (fterr)
				{
					fz_throw(ctx, "fonterror: no unicode cmap when emulating CID font: %s", ft_error_string(fterr));
				}

				if (!strcmp(collection, "Adobe-CNS1"))
					fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-CNS1-UCS2");
				else if (!strcmp(collection, "Adobe-GB1"))
					fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-GB1-UCS2");
				else if (!strcmp(collection, "Adobe-Japan1"))
					fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-Japan1-UCS2");
				else if (!strcmp(collection, "Adobe-Japan2"))
					fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-Japan2-UCS2");
				else if (!strcmp(collection, "Adobe-Korea1"))
					fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-Korea1-UCS2");
			}
		}

		pdf_load_to_unicode(xref, fontdesc, NULL, collection, to_unicode);

		/* Horizontal */

		dw = 1000;
		obj = pdf_dict_gets(dict, "DW");
		if (obj)
			dw = pdf_to_int(obj);
		pdf_set_default_hmtx(ctx, fontdesc, dw);

		widths = pdf_dict_gets(dict, "W");
		if (widths)
		{
			int c0, c1, w, n, m;

			n = pdf_array_len(widths);
			for (i = 0; i < n; )
			{
				c0 = pdf_to_int(pdf_array_get(widths, i));
				obj = pdf_array_get(widths, i + 1);
				if (pdf_is_array(obj))
				{
					m = pdf_array_len(obj);
					for (k = 0; k < m; k++)
					{
						w = pdf_to_int(pdf_array_get(obj, k));
						pdf_add_hmtx(ctx, fontdesc, c0 + k, c0 + k, w);
					}
					i += 2;
				}
				else
				{
					c1 = pdf_to_int(obj);
					w = pdf_to_int(pdf_array_get(widths, i + 2));
					pdf_add_hmtx(ctx, fontdesc, c0, c1, w);
					i += 3;
				}
			}
		}

		pdf_end_hmtx(ctx, fontdesc);

		/* Vertical */

		if (pdf_cmap_wmode(ctx, fontdesc->encoding) == 1)
		{
			int dw2y = 880;
			int dw2w = -1000;

			obj = pdf_dict_gets(dict, "DW2");
			if (obj)
			{
				dw2y = pdf_to_int(pdf_array_get(obj, 0));
				dw2w = pdf_to_int(pdf_array_get(obj, 1));
			}

			pdf_set_default_vmtx(ctx, fontdesc, dw2y, dw2w);

			widths = pdf_dict_gets(dict, "W2");
			if (widths)
			{
				int c0, c1, w, x, y, n;

				n = pdf_array_len(widths);
				for (i = 0; i < n; )
				{
					c0 = pdf_to_int(pdf_array_get(widths, i));
					obj = pdf_array_get(widths, i + 1);
					if (pdf_is_array(obj))
					{
						int m = pdf_array_len(obj);
						for (k = 0; k * 3 < m; k ++)
						{
							w = pdf_to_int(pdf_array_get(obj, k * 3 + 0));
							x = pdf_to_int(pdf_array_get(obj, k * 3 + 1));
							y = pdf_to_int(pdf_array_get(obj, k * 3 + 2));
							pdf_add_vmtx(ctx, fontdesc, c0 + k, c0 + k, x, y, w);
						}
						i += 2;
					}
					else
					{
						c1 = pdf_to_int(obj);
						w = pdf_to_int(pdf_array_get(widths, i + 2));
						x = pdf_to_int(pdf_array_get(widths, i + 3));
						y = pdf_to_int(pdf_array_get(widths, i + 4));
						pdf_add_vmtx(ctx, fontdesc, c0, c1, x, y, w);
						i += 5;
					}
				}
			}

			pdf_end_vmtx(ctx, fontdesc);
		}
	}
	fz_catch(ctx)
	{
		pdf_drop_font(ctx, fontdesc);
		fz_throw(ctx, "cannot load cid font (%d %d R)", pdf_to_num(dict), pdf_to_gen(dict));
	}

	return fontdesc;
}
Пример #9
0
static fz_colorspace *
load_indexed(pdf_document *xref, pdf_obj *array)
{
	struct indexed *idx = NULL;
	fz_context *ctx = xref->ctx;
	pdf_obj *baseobj = pdf_array_get(array, 1);
	pdf_obj *highobj = pdf_array_get(array, 2);
	pdf_obj *lookup = pdf_array_get(array, 3);
	fz_colorspace *base = NULL;
	fz_colorspace *cs = NULL;
	int i, n;

	fz_var(idx);
	fz_var(base);
	fz_var(cs);

	fz_try(ctx)
	{
		base = pdf_load_colorspace(xref, baseobj);
		/* "cannot load base colorspace (%d %d R)", pdf_to_num(baseobj), pdf_to_gen(baseobj) */

		idx = fz_malloc_struct(ctx, struct indexed);
		idx->lookup = NULL;
		idx->base = base;
		idx->high = pdf_to_int(highobj);
		idx->high = CLAMP(idx->high, 0, 255);
		n = base->n * (idx->high + 1);
		idx->lookup = fz_malloc_array(ctx, 1, n);

		cs = fz_new_colorspace(ctx, "Indexed", 1);
		cs->to_rgb = indexed_to_rgb;
		cs->free_data = free_indexed;
		cs->data = idx;
		cs->size += sizeof(*idx) + n + (base ? base->size : 0);

		if (pdf_is_string(lookup) && pdf_to_str_len(lookup) == n)
		{
			unsigned char *buf = (unsigned char *) pdf_to_str_buf(lookup);
			for (i = 0; i < n; i++)
				idx->lookup[i] = buf[i];
		}
		else if (pdf_is_indirect(lookup))
		{
			fz_stream *file = NULL;

			fz_try(ctx)
			{
				file = pdf_open_stream(xref, pdf_to_num(lookup), pdf_to_gen(lookup));
			}
			fz_catch(ctx)
			{
				fz_throw(ctx, "cannot open colorspace lookup table (%d 0 R)", pdf_to_num(lookup));
			}

			i = fz_read(file, idx->lookup, n);
			if (i < 0)
			{
				fz_close(file);
				fz_throw(ctx, "cannot read colorspace lookup table (%d 0 R)", pdf_to_num(lookup));
			}

			fz_close(file);
		}
		else
		{
			fz_throw(ctx, "cannot parse colorspace lookup table");
		}
	}
Пример #10
0
/* Convert Unicode/PdfDocEncoding string into utf-8 */
char *
pdf_to_utf8(pdf_document *doc, pdf_obj *src)
{
    fz_context *ctx = doc->ctx;
    fz_buffer *strmbuf = NULL;
    unsigned char *srcptr;
    char *dstptr, *dst;
    int srclen;
    int dstlen = 0;
    int ucs;
    int i;

    fz_var(strmbuf);
    fz_try(ctx)
    {
        if (pdf_is_string(src))
        {
            srcptr = (unsigned char *) pdf_to_str_buf(src);
            srclen = pdf_to_str_len(src);
        }
        else if (pdf_is_stream(doc, pdf_to_num(src), pdf_to_gen(src)))
        {
            strmbuf = pdf_load_stream(doc, pdf_to_num(src), pdf_to_gen(src));
            srclen = fz_buffer_storage(ctx, strmbuf, (unsigned char **)&srcptr);
        }
        else
        {
            srclen = 0;
        }

        if (srclen >= 2 && srcptr[0] == 254 && srcptr[1] == 255)
        {
            for (i = 2; i + 1 < srclen; i += 2)
            {
                ucs = srcptr[i] << 8 | srcptr[i+1];
                dstlen += fz_runelen(ucs);
            }

            dstptr = dst = fz_malloc(ctx, dstlen + 1);

            for (i = 2; i + 1 < srclen; i += 2)
            {
                ucs = srcptr[i] << 8 | srcptr[i+1];
                dstptr += fz_runetochar(dstptr, ucs);
            }
        }
        else if (srclen >= 2 && srcptr[0] == 255 && srcptr[1] == 254)
        {
            for (i = 2; i + 1 < srclen; i += 2)
            {
                ucs = srcptr[i] | srcptr[i+1] << 8;
                dstlen += fz_runelen(ucs);
            }

            dstptr = dst = fz_malloc(ctx, dstlen + 1);

            for (i = 2; i + 1 < srclen; i += 2)
            {
                ucs = srcptr[i] | srcptr[i+1] << 8;
                dstptr += fz_runetochar(dstptr, ucs);
            }
        }
        else
        {
            for (i = 0; i < srclen; i++)
                dstlen += fz_runelen(pdf_doc_encoding[srcptr[i]]);

            dstptr = dst = fz_malloc(ctx, dstlen + 1);

            for (i = 0; i < srclen; i++)
            {
                ucs = pdf_replace_undefined(pdf_doc_encoding[srcptr[i]]);
                dstptr += fz_runetochar(dstptr, ucs);
            }
        }
    }
    fz_always(ctx)
    {
        fz_drop_buffer(ctx, strmbuf);
    }
    fz_catch(ctx)
    {
        fz_rethrow(ctx);
    }

    *dstptr = '\0';
    return dst;
}
Пример #11
0
/* Convert Unicode/PdfDocEncoding string into utf-8 */
char *
pdf_to_utf8(fz_context *ctx, pdf_document *doc, pdf_obj *src)
{
	fz_buffer *stmbuf = NULL;
	unsigned char *srcptr;
	char *dstptr, *dst;
	size_t srclen;
	size_t dstlen = 0;
	int ucs;
	size_t i;

	fz_var(stmbuf);
	fz_try(ctx)
	{
		if (pdf_is_string(ctx, src))
		{
			srcptr = (unsigned char *) pdf_to_str_buf(ctx, src);
			srclen = pdf_to_str_len(ctx, src);
		}
		else if (pdf_is_stream(ctx, src))
		{
			stmbuf = pdf_load_stream(ctx, doc, pdf_to_num(ctx, src), pdf_to_gen(ctx, src));
			srclen = fz_buffer_storage(ctx, stmbuf, (unsigned char **)&srcptr);
		}
		else
		{
			srclen = 0;
		}

		/* UTF-16BE */
		if (srclen >= 2 && srcptr[0] == 254 && srcptr[1] == 255)
		{
			i = 2;
			while (i + 2 <= srclen)
			{
				/* skip language escape codes */
				if (i + 6 <= srclen &&
					srcptr[i+0] == 0 && srcptr[i+1] == 27 &&
					srcptr[i+4] == 0 && srcptr[i+5] == 27)
				{
					i += 6;
				}
				else if (i + 8 <= srclen &&
					srcptr[i+0] == 0 && srcptr[i+1] == 27 &&
					srcptr[i+6] == 0 && srcptr[i+7] == 27)
				{
					i += 8;
				}
				else
				{
					i += rune_from_utf16be(&ucs, srcptr + i, srcptr + srclen);
					dstlen += fz_runelen(ucs);
				}
			}

			dstptr = dst = fz_malloc(ctx, dstlen + 1);

			i = 2;
			while (i + 2 <= srclen)
			{
				/* skip language escape codes */
				if (i + 6 <= srclen &&
					srcptr[i+0] == 0 && srcptr[i+1] == 27 &&
					srcptr[i+4] == 0 && srcptr[i+5] == 27)
				{
					i += 6;
				}
				else if (i + 8 <= srclen &&
					srcptr[i+0] == 0 && srcptr[i+1] == 27 &&
					srcptr[i+6] == 0 && srcptr[i+7] == 27)
				{
					i += 8;
				}
				else
				{
					i += rune_from_utf16be(&ucs, srcptr + i, srcptr + srclen);
					dstptr += fz_runetochar(dstptr, ucs);
				}
			}
		}

		/* PDFDocEncoding */
		else
		{
			for (i = 0; i < srclen; i++)
				dstlen += fz_runelen(pdf_doc_encoding[srcptr[i]]);

			dstptr = dst = fz_malloc(ctx, dstlen + 1);

			for (i = 0; i < srclen; i++)
			{
				ucs = pdf_doc_encoding[srcptr[i]];
				dstptr += fz_runetochar(dstptr, ucs);
			}
		}
	}
	fz_always(ctx)
	{
		fz_drop_buffer(ctx, stmbuf);
	}
	fz_catch(ctx)
	{
		fz_rethrow(ctx);
	}

	*dstptr = '\0';
	return dst;
}
Пример #12
0
static pdf_font_desc *
load_cid_font(pdf_document *doc, pdf_obj *dict, pdf_obj *encoding, pdf_obj *to_unicode)
{
	pdf_obj *widths;
	pdf_obj *descriptor;
	pdf_font_desc *fontdesc = NULL;
	FT_Face face;
	int kind;
	char collection[256];
	char *basefont;
	int i, k, fterr;
	pdf_obj *obj;
	int dw;
	fz_context *ctx = doc->ctx;

	fz_var(fontdesc);

	fz_try(ctx)
	{
		/* Get font name and CID collection */

		basefont = pdf_to_name(pdf_dict_gets(dict, "BaseFont"));

		{
			pdf_obj *cidinfo;
			char tmpstr[64];
			int tmplen;

			cidinfo = pdf_dict_gets(dict, "CIDSystemInfo");
			if (!cidinfo)
				fz_throw(ctx, FZ_ERROR_GENERIC, "cid font is missing info");

			obj = pdf_dict_gets(cidinfo, "Registry");
			tmplen = fz_mini(sizeof tmpstr - 1, pdf_to_str_len(obj));
			memcpy(tmpstr, pdf_to_str_buf(obj), tmplen);
			tmpstr[tmplen] = '\0';
			fz_strlcpy(collection, tmpstr, sizeof collection);

			fz_strlcat(collection, "-", sizeof collection);

			obj = pdf_dict_gets(cidinfo, "Ordering");
			tmplen = fz_mini(sizeof tmpstr - 1, pdf_to_str_len(obj));
			memcpy(tmpstr, pdf_to_str_buf(obj), tmplen);
			tmpstr[tmplen] = '\0';
			fz_strlcat(collection, tmpstr, sizeof collection);
		}

		/* Load font file */

		fontdesc = pdf_new_font_desc(ctx);

		descriptor = pdf_dict_gets(dict, "FontDescriptor");
		if (!descriptor)
			fz_throw(ctx, FZ_ERROR_GENERIC, "syntaxerror: missing font descriptor");
		pdf_load_font_descriptor(fontdesc, doc, descriptor, collection, basefont, 1, 1);

		face = fontdesc->font->ft_face;
		kind = ft_kind(face);

		/* Encoding */

		if (pdf_is_name(encoding))
		{
			if (!strcmp(pdf_to_name(encoding), "Identity-H"))
				fontdesc->encoding = pdf_new_identity_cmap(ctx, 0, 2);
			else if (!strcmp(pdf_to_name(encoding), "Identity-V"))
				fontdesc->encoding = pdf_new_identity_cmap(ctx, 1, 2);
			else
				fontdesc->encoding = pdf_load_system_cmap(ctx, pdf_to_name(encoding));
		}
		else if (pdf_is_indirect(encoding))
		{
			fontdesc->encoding = pdf_load_embedded_cmap(doc, encoding);
		}
		else
		{
			fz_throw(ctx, FZ_ERROR_GENERIC, "syntaxerror: font missing encoding");
		}
		fontdesc->size += pdf_cmap_size(ctx, fontdesc->encoding);

		pdf_set_font_wmode(ctx, fontdesc, pdf_cmap_wmode(ctx, fontdesc->encoding));

		if (kind == TRUETYPE ||
			/* cf. http://code.google.com/p/sumatrapdf/issues/detail?id=1565 */
			!strcmp(pdf_to_name(pdf_dict_gets(dict, "Subtype")), "CIDFontType2") ||
			/* cf. http://code.google.com/p/sumatrapdf/issues/detail?id=1997 */
			pdf_is_indirect(pdf_dict_gets(dict, "CIDToGIDMap")))
		{
			pdf_obj *cidtogidmap;

			cidtogidmap = pdf_dict_gets(dict, "CIDToGIDMap");
			if (pdf_is_indirect(cidtogidmap))
			{
				fz_buffer *buf;

				buf = pdf_load_stream(doc, pdf_to_num(cidtogidmap), pdf_to_gen(cidtogidmap));

				fontdesc->cid_to_gid_len = (buf->len) / 2;
				fontdesc->cid_to_gid = fz_malloc_array(ctx, fontdesc->cid_to_gid_len, sizeof(unsigned short));
				fontdesc->size += fontdesc->cid_to_gid_len * sizeof(unsigned short);
				for (i = 0; i < fontdesc->cid_to_gid_len; i++)
					fontdesc->cid_to_gid[i] = (buf->data[i * 2] << 8) + buf->data[i * 2 + 1];

				fz_drop_buffer(ctx, buf);
			}

			/* if truetype font is external, cidtogidmap should not be identity */
			/* so we map from cid to unicode and then map that through the (3 1) */
			/* unicode cmap to get a glyph id */
			else if (fontdesc->font->ft_substitute)
			{
				fterr = FT_Select_Charmap(face, ft_encoding_unicode);
				if (fterr)
				{
					fz_throw(ctx, FZ_ERROR_GENERIC, "fonterror: no unicode cmap when emulating CID font: %s", ft_error_string(fterr));
				}

				if (!strcmp(collection, "Adobe-CNS1"))
					fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-CNS1-UCS2");
				else if (!strcmp(collection, "Adobe-GB1"))
					fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-GB1-UCS2");
				else if (!strcmp(collection, "Adobe-Japan1"))
					fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-Japan1-UCS2");
				else if (!strcmp(collection, "Adobe-Japan2"))
					fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-Japan2-UCS2");
				else if (!strcmp(collection, "Adobe-Korea1"))
					fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-Korea1-UCS2");
				/* cf. http://code.google.com/p/sumatrapdf/issues/detail?id=2318 */
				else if (!strcmp(collection, "Adobe-Identity") && fontdesc->font->ft_file)
					fontdesc->font->ft_substitute = 0;
			}
		}

		/* cf. http://code.google.com/p/sumatrapdf/issues/detail?id=1961 */
		fz_try(ctx)
		{

		pdf_load_to_unicode(doc, fontdesc, NULL, collection, to_unicode);

		}
		fz_catch(ctx)
		{
			fz_warn(ctx, "cannot load ToUnicode CMap");
		}

		/* If we have an identity encoding, we're supposed to use the glyph ids directly.
		 * If we only have a substitute font, that won't work.
		 * Make a last ditch attempt by using
		 * the ToUnicode table if it exists to map via the substitute font's cmap. */
		if (strstr(fontdesc->encoding->cmap_name, "Identity-") && fontdesc->font->ft_substitute)
		{
			fz_warn(ctx, "non-embedded font using identity encoding: %s", basefont);
			if (fontdesc->to_unicode && !fontdesc->to_ttf_cmap)
				fontdesc->to_ttf_cmap = pdf_keep_cmap(ctx, fontdesc->to_unicode);
		}

		/* Horizontal */

		dw = 1000;
		obj = pdf_dict_gets(dict, "DW");
		if (obj)
			dw = pdf_to_int(obj);
		pdf_set_default_hmtx(ctx, fontdesc, dw);

		widths = pdf_dict_gets(dict, "W");
		if (widths)
		{
			int c0, c1, w, n, m;

			n = pdf_array_len(widths);
			for (i = 0; i < n; )
			{
				c0 = pdf_to_int(pdf_array_get(widths, i));
				obj = pdf_array_get(widths, i + 1);
				if (pdf_is_array(obj))
				{
					m = pdf_array_len(obj);
					for (k = 0; k < m; k++)
					{
						w = pdf_to_int(pdf_array_get(obj, k));
						pdf_add_hmtx(ctx, fontdesc, c0 + k, c0 + k, w);
					}
					i += 2;
				}
				else
				{
					c1 = pdf_to_int(obj);
					w = pdf_to_int(pdf_array_get(widths, i + 2));
					pdf_add_hmtx(ctx, fontdesc, c0, c1, w);
					i += 3;
				}
			}
		}

		pdf_end_hmtx(ctx, fontdesc);

		/* Vertical */

		if (pdf_cmap_wmode(ctx, fontdesc->encoding) == 1)
		{
			int dw2y = 880;
			int dw2w = -1000;

			obj = pdf_dict_gets(dict, "DW2");
			if (obj)
			{
				dw2y = pdf_to_int(pdf_array_get(obj, 0));
				dw2w = pdf_to_int(pdf_array_get(obj, 1));
			}

			pdf_set_default_vmtx(ctx, fontdesc, dw2y, dw2w);

			widths = pdf_dict_gets(dict, "W2");
			if (widths)
			{
				int c0, c1, w, x, y, n;

				n = pdf_array_len(widths);
				for (i = 0; i < n; )
				{
					c0 = pdf_to_int(pdf_array_get(widths, i));
					obj = pdf_array_get(widths, i + 1);
					if (pdf_is_array(obj))
					{
						int m = pdf_array_len(obj);
						for (k = 0; k * 3 < m; k ++)
						{
							w = pdf_to_int(pdf_array_get(obj, k * 3 + 0));
							x = pdf_to_int(pdf_array_get(obj, k * 3 + 1));
							y = pdf_to_int(pdf_array_get(obj, k * 3 + 2));
							pdf_add_vmtx(ctx, fontdesc, c0 + k, c0 + k, x, y, w);
						}
						i += 2;
					}
					else
					{
						c1 = pdf_to_int(obj);
						w = pdf_to_int(pdf_array_get(widths, i + 2));
						x = pdf_to_int(pdf_array_get(widths, i + 3));
						y = pdf_to_int(pdf_array_get(widths, i + 4));
						pdf_add_vmtx(ctx, fontdesc, c0, c1, x, y, w);
						i += 5;
					}
				}
			}

			pdf_end_vmtx(ctx, fontdesc);
		}
	}
	fz_catch(ctx)
	{
		pdf_drop_font(ctx, fontdesc);
		fz_rethrow_message(ctx, "cannot load cid font (%d %d R)", pdf_to_num(dict), pdf_to_gen(dict));
	}

	return fontdesc;
}