/*
 * Load predefined CMap from system.
 */
fz_error
pdf_loadsystemcmap(pdf_cmap **cmapp, char *cmapname)
{
    fz_error error;
    pdf_cmap *usecmap;
    pdf_cmap *cmap;
    int i;

    pdf_logfont("loading system cmap %s\n", cmapname);

    for (i = 0; pdf_cmaptable[i]; i++)
    {
	if (!strcmp(cmapname, pdf_cmaptable[i]->cmapname))
	{
	    cmap = pdf_cmaptable[i];
	    if (cmap->usecmapname[0] && !cmap->usecmap)
	    {
		error = pdf_loadsystemcmap(&usecmap, cmap->usecmapname);
		if (error)
		    return fz_rethrow(error, "could not load usecmap: %s", cmap->usecmapname);
		pdf_setusecmap(cmap, usecmap);
	    }
	    *cmapp = cmap;
	    return fz_okay;
	}
    }

    return fz_throw("no builtin cmap file: %s", cmapname);
}
Пример #2
0
/*
 * Load predefined CMap from system.
 */
fz_error
pdf_loadsystemcmap(pdf_cmap **cmapp, char *cmapname)
{
	fz_error error;
	fz_stream *stream;
	pdf_cmap *usecmap;
	pdf_cmap *cmap;
	unsigned char *data;
	unsigned int len;
	int i;

	pdf_logfont("loading system cmap %s\n", cmapname);

	error = pdf_getcmapbuffer(cmapname, &data, &len);
	if(error)
		return fz_rethrow(error, "no builtin cmap file: %s", cmapname);

	stream = fz_openrmemory(data, len);
	error = pdf_parsecmap(&cmap, stream);
	fz_dropstream(stream);
	if(error)
		return fz_rethrow(error, "cannot parse cmap data");

	if (cmap->usecmapname[0] && !cmap->usecmap)
	{
		pdf_logfont("charmap depends on other charmap (%s), loading it\n", cmap->usecmapname);
		error = pdf_loadsystemcmap(&usecmap, cmap->usecmapname);
		if (error)
			return fz_rethrow(error, "could not load usecmap: %s", cmap->usecmapname);
		pdf_setusecmap(cmap, usecmap);
	}

	*cmapp = cmap;
	return fz_okay;
}
Пример #3
0
fz_error
pdf_loadtounicode(pdf_fontdesc *font, pdf_xref *xref,
                  char **strings, char *collection, fz_obj *cmapstm)
{
    fz_error error = fz_okay;
    pdf_cmap *cmap;
    int cid;
    int ucsbuf[8];
    int ucslen;
    int i;

    if (pdf_isstream(xref, fz_tonum(cmapstm), fz_togen(cmapstm)))
    {
        pdf_logfont("tounicode embedded cmap\n");

        error = pdf_loadembeddedcmap(&cmap, xref, cmapstm);
        if (error)
            return fz_rethrow(error, "cannot load embedded cmap (%d %d R)", fz_tonum(cmapstm), fz_togen(cmapstm));

        font->tounicode = pdf_newcmap();

        for (i = 0; i < (strings ? 256 : 65536); i++)
        {
            cid = pdf_lookupcmap(font->encoding, i);
            if (cid >= 0)
            {
                ucslen = pdf_lookupcmapfull(cmap, i, ucsbuf);
                if (ucslen == 1)
                    pdf_maprangetorange(font->tounicode, cid, cid, ucsbuf[0]);
                if (ucslen > 1)
                    pdf_maponetomany(font->tounicode, cid, ucsbuf, ucslen);
            }
        }

        pdf_sortcmap(font->tounicode);

        pdf_dropcmap(cmap);
    }

    else if (collection)
    {
        pdf_logfont("tounicode cid collection (%s)\n", collection);

        error = fz_okay;

        if (!strcmp(collection, "Adobe-CNS1"))
            error = pdf_loadsystemcmap(&font->tounicode, "Adobe-CNS1-UCS2");
        else if (!strcmp(collection, "Adobe-GB1"))
            error = pdf_loadsystemcmap(&font->tounicode, "Adobe-GB1-UCS2");
        else if (!strcmp(collection, "Adobe-Japan1"))
            error = pdf_loadsystemcmap(&font->tounicode, "Adobe-Japan1-UCS2");
        else if (!strcmp(collection, "Adobe-Japan2"))
            error = pdf_loadsystemcmap(&font->tounicode, "Adobe-Japan2-UCS2"); /* where's this? */
        else if (!strcmp(collection, "Adobe-Korea1"))
            error = pdf_loadsystemcmap(&font->tounicode, "Adobe-Korea1-UCS2");

        if (error)
            return fz_rethrow(error, "cannot load tounicode system cmap %s-UCS2", collection);
    }

    if (strings)
    {
        pdf_logfont("tounicode strings\n");

        /* TODO one-to-many mappings */

        font->ncidtoucs = 256;
        font->cidtoucs = fz_calloc(256, sizeof(unsigned short));

        for (i = 0; i < 256; i++)
        {
            if (strings[i])
                font->cidtoucs[i] = pdf_lookupagl(strings[i]);
            else
                font->cidtoucs[i] = '?';
        }
    }

    if (!font->tounicode && !font->cidtoucs)
    {
        pdf_logfont("tounicode could not be loaded\n");
        /* TODO: synthesize a ToUnicode if it's a freetype font with
        * cmap and/or post tables or if it has glyph names. */
    }

    return fz_okay;
}
Пример #4
0
static fz_error
loadcidfont(pdf_fontdesc **fontdescp, pdf_xref *xref, fz_obj *dict, fz_obj *encoding, fz_obj *tounicode)
{
	fz_error error;
	fz_obj *widths;
	fz_obj *descriptor;
	pdf_fontdesc *fontdesc;
	FT_Face face;
	fz_bbox bbox;
	int kind;
	char collection[256];
	char *basefont;
	int i, k, fterr;
	fz_obj *obj;
	int dw;

	/* Get font name and CID collection */

	basefont = fz_toname(fz_dictgets(dict, "BaseFont"));

	{
		fz_obj *cidinfo;
		char tmpstr[64];
		int tmplen;

		cidinfo = fz_dictgets(dict, "CIDSystemInfo");
		if (!cidinfo)
			return fz_throw("cid font is missing info");

		obj = fz_dictgets(cidinfo, "Registry");
		tmplen = MIN(sizeof tmpstr - 1, fz_tostrlen(obj));
		memcpy(tmpstr, fz_tostrbuf(obj), tmplen);
		tmpstr[tmplen] = '\0';
		fz_strlcpy(collection, tmpstr, sizeof collection);

		fz_strlcat(collection, "-", sizeof collection);

		obj = fz_dictgets(cidinfo, "Ordering");
		tmplen = MIN(sizeof tmpstr - 1, fz_tostrlen(obj));
		memcpy(tmpstr, fz_tostrbuf(obj), tmplen);
		tmpstr[tmplen] = '\0';
		fz_strlcat(collection, tmpstr, sizeof collection);
	}

	/* Load font file */

	fontdesc = pdf_newfontdesc();

	pdf_logfont("load cid font (%d %d R) ptr=%p {\n", fz_tonum(dict), fz_togen(dict), fontdesc);
	pdf_logfont("basefont %s\n", basefont);
	pdf_logfont("collection %s\n", collection);

	descriptor = fz_dictgets(dict, "FontDescriptor");
	if (descriptor)
		error = pdf_loadfontdescriptor(fontdesc, xref, descriptor, collection, basefont);
	else
		error = fz_throw("syntaxerror: missing font descriptor");
	if (error)
		goto cleanup;

	face = fontdesc->font->ftface;
	kind = ftkind(face);

	bbox.x0 = (face->bbox.xMin * 1000) / face->units_per_EM;
	bbox.y0 = (face->bbox.yMin * 1000) / face->units_per_EM;
	bbox.x1 = (face->bbox.xMax * 1000) / face->units_per_EM;
	bbox.y1 = (face->bbox.yMax * 1000) / face->units_per_EM;

	pdf_logfont("ft bbox [%d %d %d %d]\n", bbox.x0, bbox.y0, bbox.x1, bbox.y1);

	if (bbox.x0 == bbox.x1)
		fz_setfontbbox(fontdesc->font, -1000, -1000, 2000, 2000);
	else
		fz_setfontbbox(fontdesc->font, bbox.x0, bbox.y0, bbox.x1, bbox.y1);

	/* Encoding */

	error = fz_okay;
	if (fz_isname(encoding))
	{
		pdf_logfont("encoding /%s\n", fz_toname(encoding));
		if (!strcmp(fz_toname(encoding), "Identity-H"))
			fontdesc->encoding = pdf_newidentitycmap(0, 2);
		else if (!strcmp(fz_toname(encoding), "Identity-V"))
			fontdesc->encoding = pdf_newidentitycmap(1, 2);
		else
			error = pdf_loadsystemcmap(&fontdesc->encoding, fz_toname(encoding));
	}
	else if (fz_isindirect(encoding))
	{
		pdf_logfont("encoding %d %d R\n", fz_tonum(encoding), fz_togen(encoding));
		error = pdf_loadembeddedcmap(&fontdesc->encoding, xref, encoding);
	}
	else
	{
		error = fz_throw("syntaxerror: font missing encoding");
	}
	if (error)
		goto cleanup;

	pdf_setfontwmode(fontdesc, pdf_getwmode(fontdesc->encoding));
	pdf_logfont("wmode %d\n", pdf_getwmode(fontdesc->encoding));

	if (kind == TRUETYPE)
	{
		fz_obj *cidtogidmap;

		cidtogidmap = fz_dictgets(dict, "CIDToGIDMap");
		if (fz_isindirect(cidtogidmap))
		{
			fz_buffer *buf;

			pdf_logfont("cidtogidmap stream\n");

			error = pdf_loadstream(&buf, xref, fz_tonum(cidtogidmap), fz_togen(cidtogidmap));
			if (error)
				goto cleanup;

			fontdesc->ncidtogid = (buf->len) / 2;
			fontdesc->cidtogid = fz_malloc(fontdesc->ncidtogid * sizeof(unsigned short));
			for (i = 0; i < fontdesc->ncidtogid; i++)
				fontdesc->cidtogid[i] = (buf->data[i * 2] << 8) + buf->data[i * 2 + 1];

			fz_dropbuffer(buf);
		}

		/* if truetype font is external, cidtogidmap should not be identity */
		/* so we map from cid to unicode and then map that through the (3 1) */
		/* unicode cmap to get a glyph id */
		else if (fontdesc->font->ftsubstitute)
		{
			pdf_logfont("emulate ttf cidfont\n");

			fterr = FT_Select_Charmap(face, ft_encoding_unicode);
			if (fterr)
			{
				error = fz_throw("fonterror: no unicode cmap when emulating CID font: %s", ft_errorstring(fterr));
				goto cleanup;
			}

			if (!strcmp(collection, "Adobe-CNS1"))
				error = pdf_loadsystemcmap(&fontdesc->tottfcmap, "Adobe-CNS1-UCS2");
			else if (!strcmp(collection, "Adobe-GB1"))
				error = pdf_loadsystemcmap(&fontdesc->tottfcmap, "Adobe-GB1-UCS2");
			else if (!strcmp(collection, "Adobe-Japan1"))
				error = pdf_loadsystemcmap(&fontdesc->tottfcmap, "Adobe-Japan1-UCS2");
			else if (!strcmp(collection, "Adobe-Japan2"))
				error = pdf_loadsystemcmap(&fontdesc->tottfcmap, "Adobe-Japan2-UCS2");
			else if (!strcmp(collection, "Adobe-Korea1"))
				error = pdf_loadsystemcmap(&fontdesc->tottfcmap, "Adobe-Korea1-UCS2");
			else
				error = fz_okay;

			if (error)
			{
				error = fz_rethrow(error, "cannot load system cmap %s", collection);
				goto cleanup;
			}
		}
	}

	error = pdf_loadtounicode(fontdesc, xref, nil, collection, tounicode);
	if (error)
		goto cleanup;

	/* Horizontal */

	dw = 1000;
	obj = fz_dictgets(dict, "DW");
	if (obj)
		dw = fz_toint(obj);
	pdf_setdefaulthmtx(fontdesc, dw);

	widths = fz_dictgets(dict, "W");
	if (widths)
	{
		int c0, c1, w;

		for (i = 0; i < fz_arraylen(widths); )
		{
			c0 = fz_toint(fz_arrayget(widths, i));
			obj = fz_arrayget(widths, i + 1);
			if (fz_isarray(obj))
			{
				for (k = 0; k < fz_arraylen(obj); k++)
				{
					w = fz_toint(fz_arrayget(obj, k));
					pdf_addhmtx(fontdesc, c0 + k, c0 + k, w);
				}
				i += 2;
			}
			else
			{
				c1 = fz_toint(obj);
				w = fz_toint(fz_arrayget(widths, i + 2));
				pdf_addhmtx(fontdesc, c0, c1, w);
				i += 3;
			}
		}
	}

	pdf_endhmtx(fontdesc);

	/* Vertical */

	if (pdf_getwmode(fontdesc->encoding) == 1)
	{
		int dw2y = 880;
		int dw2w = -1000;

		obj = fz_dictgets(dict, "DW2");
		if (obj)
		{
			dw2y = fz_toint(fz_arrayget(obj, 0));
			dw2w = fz_toint(fz_arrayget(obj, 1));
		}

		pdf_setdefaultvmtx(fontdesc, dw2y, dw2w);

		widths = fz_dictgets(dict, "W2");
		if (widths)
		{
			int c0, c1, w, x, y;

			for (i = 0; i < fz_arraylen(widths); )
			{
				c0 = fz_toint(fz_arrayget(widths, i));
				obj = fz_arrayget(widths, i + 1);
				if (fz_isarray(obj))
				{
					for (k = 0; k < fz_arraylen(obj); k += 3)
					{
						w = fz_toint(fz_arrayget(obj, k + 0));
						x = fz_toint(fz_arrayget(obj, k + 1));
						y = fz_toint(fz_arrayget(obj, k + 2));
						pdf_addvmtx(fontdesc, c0 + k, c0 + k, x, y, w);
					}
					i += 2;
				}
				else
				{
					c1 = fz_toint(obj);
					w = fz_toint(fz_arrayget(widths, i + 2));
					x = fz_toint(fz_arrayget(widths, i + 3));
					y = fz_toint(fz_arrayget(widths, i + 4));
					pdf_addvmtx(fontdesc, c0, c1, x, y, w);
					i += 5;
				}
			}
		}

		pdf_endvmtx(fontdesc);
	}

	pdf_logfont("}\n");

	*fontdescp = fontdesc;
	return fz_okay;

cleanup:
	pdf_dropfont(fontdesc);
	return fz_rethrow(error, "cannot load cid font (%d %d R)", fz_tonum(dict), fz_togen(dict));
}
/*
 * Load CMap stream in PDF file
 */
fz_error
pdf_loadembeddedcmap(pdf_cmap **cmapp, pdf_xref *xref, fz_obj *stmref)
{
    fz_error error = fz_okay;
    fz_obj *stmobj;
    fz_stream *file = nil;
    pdf_cmap *cmap = nil;
    pdf_cmap *usecmap;
    fz_obj *wmode;
    fz_obj *obj;

    if ((*cmapp = pdf_finditem(xref->store, PDF_KCMAP, stmref)))
    {
	pdf_keepcmap(*cmapp);
	return fz_okay;
    }

    pdf_logfont("load embedded cmap (%d %d R) {\n", fz_tonum(stmref), fz_togen(stmref));

    stmobj = fz_resolveindirect(stmref);

    error = pdf_openstream(&file, xref, fz_tonum(stmref), fz_togen(stmref));
    if (error)
    {
	error = fz_rethrow(error, "cannot open cmap stream");
	goto cleanup;
    }

    error = pdf_parsecmap(&cmap, file);
    if (error)
    {
	error = fz_rethrow(error, "cannot parse cmap stream");
	goto cleanup;
    }

    fz_dropstream(file);

    wmode = fz_dictgets(stmobj, "WMode");
    if (fz_isint(wmode))
    {
	pdf_logfont("wmode %d\n", wmode);
	pdf_setwmode(cmap, fz_toint(wmode));
    }

    obj = fz_dictgets(stmobj, "UseCMap");
    if (fz_isname(obj))
    {
	pdf_logfont("usecmap /%s\n", fz_toname(obj));
	error = pdf_loadsystemcmap(&usecmap, fz_toname(obj));
	if (error)
	{
	    error = fz_rethrow(error, "cannot load system usecmap '%s'", fz_toname(obj));
	    goto cleanup;
	}
	pdf_setusecmap(cmap, usecmap);
	pdf_dropcmap(usecmap);
    }
    else if (fz_isindirect(obj))
    {
	pdf_logfont("usecmap (%d %d R)\n", fz_tonum(obj), fz_togen(obj));
	error = pdf_loadembeddedcmap(&usecmap, xref, obj);
	if (error)
	{
	    error = fz_rethrow(error, "cannot load embedded usecmap");
	    goto cleanup;
	}
	pdf_setusecmap(cmap, usecmap);
	pdf_dropcmap(usecmap);
    }

    pdf_logfont("}\n");

    error = pdf_storeitem(xref->store, PDF_KCMAP, stmref, cmap);
    if (error)
    {
	error = fz_rethrow(error, "cannot store cmap resource");
	goto cleanup;
    }

    *cmapp = cmap;
    return fz_okay;

cleanup:
    if (file)
	fz_dropstream(file);
    if (cmap)
	pdf_dropcmap(cmap);
    return error; /* already rethrown */
}