Ejemplo n.º 1
0
/* Convert Unicode/PdfDocEncoding string into utf-8 */
char *
pdf_to_utf8(fz_context *ctx, pdf_obj *src)
{
    unsigned char *srcptr = (unsigned char *) pdf_to_str_buf(src);
    char *dstptr, *dst;
    int srclen = pdf_to_str_len(src);
    int dstlen = 0;
    int ucs;
    int i;

    if (srclen >= 2 && srcptr[0] == 254 && srcptr[1] == 255)
    {
        for (i = 2; i + 1 < srclen; i += 2)
        {
            ucs = srcptr[i] << 8 | srcptr[i+1];
            dstlen += fz_runelen(ucs);
        }

        dstptr = dst = fz_malloc(ctx, dstlen + 1);

        for (i = 2; i + 1 < srclen; i += 2)
        {
            ucs = srcptr[i] << 8 | srcptr[i+1];
            dstptr += fz_runetochar(dstptr, ucs);
        }
    }
    else if (srclen >= 2 && srcptr[0] == 255 && srcptr[1] == 254)
    {
        for (i = 2; i + 1 < srclen; i += 2)
        {
            ucs = srcptr[i] | srcptr[i+1] << 8;
            dstlen += fz_runelen(ucs);
        }

        dstptr = dst = fz_malloc(ctx, dstlen + 1);

        for (i = 2; i + 1 < srclen; i += 2)
        {
            ucs = srcptr[i] | srcptr[i+1] << 8;
            dstptr += fz_runetochar(dstptr, ucs);
        }
    }
    else
    {
        for (i = 0; i < srclen; i++)
            dstlen += fz_runelen(pdf_doc_encoding[srcptr[i]]);

        dstptr = dst = fz_malloc(ctx, dstlen + 1);

        for (i = 0; i < srclen; i++)
        {
            ucs = pdf_doc_encoding[srcptr[i]];
            dstptr += fz_runetochar(dstptr, ucs);
        }
    }

    *dstptr = '\0';
    return dst;
}
Ejemplo n.º 2
0
char *
fz_utf8_from_wchar(const wchar_t *s)
{
	const wchar_t *src = s;
	char *d;
	char *dst;
	int len = 1;

	while (*src)
	{
		len += fz_runelen(*src++);
	}

	d = malloc(len);
	if (d != NULL)
	{
		dst = d;
		src = s;
		while (*src)
		{
			dst += fz_runetochar(dst, *src++);
		}
		*dst = 0;
	}
	return d;
}
Ejemplo n.º 3
0
/* Convert Unicode/PdfDocEncoding string into utf-8 */
char *
pdf_to_utf8(pdf_document *doc, pdf_obj *src)
{
    fz_context *ctx = doc->ctx;
    fz_buffer *strmbuf = NULL;
    unsigned char *srcptr;
    char *dstptr, *dst;
    int srclen;
    int dstlen = 0;
    int ucs;
    int i;

    fz_var(strmbuf);
    fz_try(ctx)
    {
        if (pdf_is_string(src))
        {
            srcptr = (unsigned char *) pdf_to_str_buf(src);
            srclen = pdf_to_str_len(src);
        }
        else if (pdf_is_stream(doc, pdf_to_num(src), pdf_to_gen(src)))
        {
            strmbuf = pdf_load_stream(doc, pdf_to_num(src), pdf_to_gen(src));
            srclen = fz_buffer_storage(ctx, strmbuf, (unsigned char **)&srcptr);
        }
        else
        {
            srclen = 0;
        }

        if (srclen >= 2 && srcptr[0] == 254 && srcptr[1] == 255)
        {
            for (i = 2; i + 1 < srclen; i += 2)
            {
                ucs = srcptr[i] << 8 | srcptr[i+1];
                dstlen += fz_runelen(ucs);
            }

            dstptr = dst = fz_malloc(ctx, dstlen + 1);

            for (i = 2; i + 1 < srclen; i += 2)
            {
                ucs = srcptr[i] << 8 | srcptr[i+1];
                dstptr += fz_runetochar(dstptr, ucs);
            }
        }
        else if (srclen >= 2 && srcptr[0] == 255 && srcptr[1] == 254)
        {
            for (i = 2; i + 1 < srclen; i += 2)
            {
                ucs = srcptr[i] | srcptr[i+1] << 8;
                dstlen += fz_runelen(ucs);
            }

            dstptr = dst = fz_malloc(ctx, dstlen + 1);

            for (i = 2; i + 1 < srclen; i += 2)
            {
                ucs = srcptr[i] | srcptr[i+1] << 8;
                dstptr += fz_runetochar(dstptr, ucs);
            }
        }
        else
        {
            for (i = 0; i < srclen; i++)
                dstlen += fz_runelen(pdf_doc_encoding[srcptr[i]]);

            dstptr = dst = fz_malloc(ctx, dstlen + 1);

            for (i = 0; i < srclen; i++)
            {
                ucs = pdf_replace_undefined(pdf_doc_encoding[srcptr[i]]);
                dstptr += fz_runetochar(dstptr, ucs);
            }
        }
    }
    fz_always(ctx)
    {
        fz_drop_buffer(ctx, strmbuf);
    }
    fz_catch(ctx)
    {
        fz_rethrow(ctx);
    }

    *dstptr = '\0';
    return dst;
}
Ejemplo n.º 4
0
/* Convert Unicode/PdfDocEncoding string into utf-8 */
char *
pdf_to_utf8(fz_context *ctx, pdf_document *doc, pdf_obj *src)
{
	fz_buffer *stmbuf = NULL;
	unsigned char *srcptr;
	char *dstptr, *dst;
	size_t srclen;
	size_t dstlen = 0;
	int ucs;
	size_t i;

	fz_var(stmbuf);
	fz_try(ctx)
	{
		if (pdf_is_string(ctx, src))
		{
			srcptr = (unsigned char *) pdf_to_str_buf(ctx, src);
			srclen = pdf_to_str_len(ctx, src);
		}
		else if (pdf_is_stream(ctx, src))
		{
			stmbuf = pdf_load_stream(ctx, doc, pdf_to_num(ctx, src), pdf_to_gen(ctx, src));
			srclen = fz_buffer_storage(ctx, stmbuf, (unsigned char **)&srcptr);
		}
		else
		{
			srclen = 0;
		}

		/* UTF-16BE */
		if (srclen >= 2 && srcptr[0] == 254 && srcptr[1] == 255)
		{
			i = 2;
			while (i + 2 <= srclen)
			{
				/* skip language escape codes */
				if (i + 6 <= srclen &&
					srcptr[i+0] == 0 && srcptr[i+1] == 27 &&
					srcptr[i+4] == 0 && srcptr[i+5] == 27)
				{
					i += 6;
				}
				else if (i + 8 <= srclen &&
					srcptr[i+0] == 0 && srcptr[i+1] == 27 &&
					srcptr[i+6] == 0 && srcptr[i+7] == 27)
				{
					i += 8;
				}
				else
				{
					i += rune_from_utf16be(&ucs, srcptr + i, srcptr + srclen);
					dstlen += fz_runelen(ucs);
				}
			}

			dstptr = dst = fz_malloc(ctx, dstlen + 1);

			i = 2;
			while (i + 2 <= srclen)
			{
				/* skip language escape codes */
				if (i + 6 <= srclen &&
					srcptr[i+0] == 0 && srcptr[i+1] == 27 &&
					srcptr[i+4] == 0 && srcptr[i+5] == 27)
				{
					i += 6;
				}
				else if (i + 8 <= srclen &&
					srcptr[i+0] == 0 && srcptr[i+1] == 27 &&
					srcptr[i+6] == 0 && srcptr[i+7] == 27)
				{
					i += 8;
				}
				else
				{
					i += rune_from_utf16be(&ucs, srcptr + i, srcptr + srclen);
					dstptr += fz_runetochar(dstptr, ucs);
				}
			}
		}

		/* PDFDocEncoding */
		else
		{
			for (i = 0; i < srclen; i++)
				dstlen += fz_runelen(pdf_doc_encoding[srcptr[i]]);

			dstptr = dst = fz_malloc(ctx, dstlen + 1);

			for (i = 0; i < srclen; i++)
			{
				ucs = pdf_doc_encoding[srcptr[i]];
				dstptr += fz_runetochar(dstptr, ucs);
			}
		}
	}
	fz_always(ctx)
	{
		fz_drop_buffer(ctx, stmbuf);
	}
	fz_catch(ctx)
	{
		fz_rethrow(ctx);
	}

	*dstptr = '\0';
	return dst;
}