Пример #1
0
/*
 * Conversion to PDFDoc/EBCDIC or UTF-16/[EBCDIC-]UTF-8
 */
char *
pdf_convert_hypertext(PDF *p, const char *text, int len,
    pdc_text_format hypertextformat, pdc_encoding hypertextencoding,
    int codepage, int *outlen, pdc_bool oututf8, pdc_bool verbose)
{
    pdc_encodingvector *inev = NULL, *outev = NULL;
    pdc_byte *intext = (pdc_byte *) text, *outtext = NULL;
    pdc_text_format textformat = pdc_utf16be;
    int convflags = PDC_CONV_WITHBOM | PDC_CONV_TRYBYTES;

    *outlen = 0;

    if (text == NULL)
        return NULL;

    if (len == 0)
        len = (int) strlen(text);

    /* incoming encoding */
    if (hypertextencoding >= 0)
    {
        inev = pdc_get_encoding_vector(p->pdc, hypertextencoding);
    }

    /* PDFDocEncoding */
    outev = pdc_get_encoding_vector(p->pdc, pdc_pdfdoc);

    /* conversion to UTF-16-BE or PDFDocEncoding / EBCDIC */
    pdf_set_convertflags(p, &convflags);
    if (pdc_logg_is_enabled(p->pdc, 3, trc_text))
        convflags |= PDC_CONV_LOGGING;

    pdc_convert_string(p->pdc, hypertextformat, codepage, inev,
                       intext, len,
                       &textformat, outev, &outtext, outlen,
                       convflags, verbose);


    /* conversion to UTF-8 if Unicode */
    if (oututf8 && textformat == pdc_utf16be)
    {
        pdc_text_format outtextformat = PDC_UTF8;
        pdc_byte *newtext = NULL;

        convflags = PDC_CONV_WITHBOM;
        if (pdc_logg_is_enabled(p->pdc, 3, trc_text))
            convflags |= PDC_CONV_LOGGING;

        pdc_convert_string(p->pdc, textformat, 0, NULL, outtext, *outlen,
                           &outtextformat, NULL, &newtext, outlen,
                           convflags, verbose);
        pdc_free(p->pdc, outtext);
        outtext = newtext;
    }

    return (char *) outtext;
}
Пример #2
0
char *
pdf_convert_pdfstring(PDF *p, const char *text, int inlen, int convflags,
                      int *outlen)
{
    pdc_byte *newtext = NULL;

    if (pdc_is_utf8_bytecode(text))
    {
        pdc_text_format textformat = PDC_UTF8;
        pdc_text_format outtextformat = pdc_utf16be;
        pdc_encodingvector *outev = pdc_get_encoding_vector(p->pdc, pdc_pdfdoc);

        pdf_set_convertflags(p, &convflags);

        pdc_convert_string(p->pdc, textformat, 0, NULL,
                           (pdc_byte *) text, inlen,
                           &outtextformat, outev, &newtext, outlen,
                           convflags, pdc_true);
    }
    else
    {
        newtext = (pdc_byte *) text;
        *outlen = inlen;
    }

    return (char *) newtext;
}
Пример #3
0
/*
 * pdc_fopen_logg opens a file. The function expects a UTF-8 encoded file name.
 * (see function pdc_convert_filename), if define PDC_UNICODE_FILENAME is set.
 *
 */
FILE *
pdc_fopen_logg(pdc_core *pdc, const char *filename, const char *mode)
{
    FILE *fp = NULL;
    int i = 0;


#if defined(PDC_UNICODE_FILENAME)

    pdc_byte *outfilename = NULL;
    pdc_text_format nameformat = PDC_UTF8;
    pdc_text_format targetnameformat = pdc_utf16;
    int len = (int) pdc_strlen(filename);
    int outlen = 0;

    if (pdc_is_utf16be_unicode(filename))
        nameformat = pdc_utf16be;

    /* convert filename from UTF-8 / UTF-16BE to UTF-16 or Latin-1 */
    pdc_convert_string(pdc, nameformat, 0, NULL, (pdc_byte *) filename, len,
                       &targetnameformat, NULL, &outfilename, &outlen,
                       PDC_CONV_TRYBYTES | PDC_CONV_NOBOM, pdc_true);

    if (targetnameformat == pdc_bytes)
    {
        fp = fopen((const char *) outfilename, mode);
    }
    else
    {
        wchar_t wmode[8];

        len = (int) strlen(mode);
        for (i = 0; i < len; i++)
            wmode[i] = (wchar_t) mode[i];
        wmode[len] = 0;

        fp = _wfopen((wchar_t *) outfilename, wmode);
    }

    pdc_free(pdc, outfilename);

#else
    (void) pdc;

    /* due to honorlang, codeset of LANG: UTF-8 */
    if (pdc_is_utf8_bytecode(filename))
        i = 3;

    fp = fopen(&filename[i], mode);
#endif

    pdc_logg_openclose(pdc, fp, pdc_true);



    return fp;
}
Пример #4
0
char *
pdf_get_opt_filename(PDF *p, const char *keyword, pdc_resopt *resopts,
                     pdc_encoding enc, int codepage)
{
    pdc_bool logg1 = pdc_logg_is_enabled(p->pdc, 1, trc_optlist);
    pdc_bool logg3 = pdc_logg_is_enabled(p->pdc, 3, trc_text);
    pdc_byte *filename = NULL;
    char **strlist;

    if (pdc_get_optvalues(keyword, resopts, NULL, &strlist))
    {
        pdc_encodingvector *inev = NULL, *outev = NULL;
        pdc_text_format intextformat = pdc_bytes;
        pdc_text_format outtextformat = pdc_utf16; /* sic! */
        int convflags = PDC_CONV_NOBOM | PDC_CONV_TRYBYTES | PDC_CONV_NEWALLOC;
        pdc_bool isutf8;
        int ic, outlen;

        /* whole option list or string list is in UTF-8 */
        isutf8 = pdc_is_lastopt_utf8(resopts);

        if (!isutf8)
        {
            if (enc < 0 && enc != pdc_unicode && enc != pdc_cid)
                enc = pdf_get_hypertextencoding(p, "auto", &codepage,
                                                pdc_true);
            if (enc >= 0)
                inev = pdc_get_encoding_vector(p->pdc, enc);
        }
        else
        {
            intextformat = PDC_UTF8;
        }

        if (logg1)
        {
            if (isutf8)
            {
                pdc_logg(p->pdc, "\tOption \"%s\" is "PDC_UTF8_STRG" encoded\n",
                         keyword);
            }
            else
            {
                pdc_logg(p->pdc, "\tOption \"%s\" is %s encoded\n",
                         keyword, pdc_get_user_encoding(p->pdc, enc));
            }
        }

        outev = pdc_get_encoding_vector(p->pdc, pdc_winansi);

        if (logg3)
            convflags |= PDC_CONV_LOGGING;
        pdf_set_convertflags(p, &convflags);

        pdc_convert_string(p->pdc, intextformat, codepage, inev,
                    (pdc_byte *) strlist[0], (int) strlen(strlist[0]),
                    &outtextformat, outev, &filename, &outlen,
                    convflags, pdc_true);

        if (outtextformat == pdc_utf16)
        {
            pdc_ushort uv, *unifilename = (pdc_ushort *) filename;
            int code;

            if (p->compatibility < PDC_1_7)
                pdc_error(p->pdc, PDC_E_IO_UNSUPP_PDFUNINAME, 0, 0, 0, 0);

            /* we must replace non-WinAnsi characters by period
             * and omit the BOM to get a WinAnsi string.
             */
            outlen /= 2;
            for (ic = 0; ic < outlen; ic++)
            {
                uv = unifilename[ic];

                code = pdc_get_encoding_bytecode(p->pdc, outev, uv);
                if (code <= 0)
                    uv = PDC_UNICODE_PERIOD;

                filename[ic] = (char) uv;
            }
            filename[ic] = 0;
        }

        if (logg3)
            pdc_logg_hexdump(p->pdc, "output filename", "\t\t",
                             (char *) filename, strlen((char *) filename));
    }

    return (char *) filename;
}
Пример #5
0
int
pdf_get_opt_textlist(PDF *p, const char *keyword, pdc_resopt *resopts,
                     pdc_encoding enc, int codepage, pdc_bool ishypertext,
                     const char *fieldname, char **text, char ***textlist)
{
    pdc_bool logg1 = pdc_logg_is_enabled(p->pdc, 1, trc_optlist);
    int ns;
    char **strlist;

    ns = pdc_get_optvalues(keyword, resopts, NULL, &strlist);
    if (ns)
    {
        pdc_byte *string = NULL;
        pdc_encodingvector *inev = NULL, *outev = NULL;
        pdc_text_format intextformat = pdc_bytes;
        pdc_text_format outtextformat = pdc_utf16be;
        pdc_text_format textformat;
        int convflags = PDC_CONV_WITHBOM;
        pdc_bool isutf8;
        int i, outlen;

        /* whole option list or string list is in UTF-8 */
        isutf8 = pdc_is_lastopt_utf8(resopts);

        /* Encoding */
        if (ishypertext)
        {
            /* Initialize */
            if (!isutf8)
            {
                if (enc < 0 && enc != pdc_unicode && enc != pdc_cid)
                    enc = pdf_get_hypertextencoding(p, "auto", &codepage,
                                                    pdc_true);
                if (enc >= 0)
                    inev = pdc_get_encoding_vector(p->pdc, enc);
            }

            outev = pdc_get_encoding_vector(p->pdc, pdc_pdfdoc);

            /* conversion to PDFDocEncoding if possible */
            convflags |= PDC_CONV_TRYBYTES;
        }
        else
        {
            if (enc == pdc_invalidenc)
            {
                if (fieldname)
                {
                    pdc_cleanup_optionlist(p->pdc, resopts);
                    pdc_error(p->pdc, PDF_E_FF_FONTMISSING, fieldname, 0, 0, 0);
                }
                return 0;
            }
            else if (enc >= 0 && !isutf8)
            {
                /* bug #2069: always conversion to UTF-16BE */
                inev = pdc_get_encoding_vector(p->pdc, enc);
            }
        }

        if (logg1)
        {
            if (isutf8)
            {
                pdc_logg(p->pdc, "\tOption \"%s\" is "PDC_UTF8_STRG" encoded\n",
                         keyword);
            }
            else
            {
                pdc_logg(p->pdc, "\tOption \"%s\" is %s encoded\n",
                         keyword, pdc_get_user_encoding(p->pdc, enc));
            }
        }

        for (i = 0; i < ns; i++)
        {
            string = (pdc_byte *) strlist[i];

            {
                if (ishypertext || isutf8 || inev != NULL)
                {
                    intextformat = isutf8 ?  PDC_UTF8 : pdc_bytes;

                    if (pdc_logg_is_enabled(p->pdc, 3, trc_text))
                        convflags |= PDC_CONV_LOGGING;
                    pdf_set_convertflags(p, &convflags);
                    textformat = outtextformat;
                    pdc_convert_string(p->pdc, intextformat, codepage, inev,
                                string, (int) strlen((char *) string),
                                &textformat, outev, &string, &outlen,
                                convflags, pdc_true);
                    pdc_free(p->pdc, strlist[i]);
                    strlist[i] = (char *) string;
                }
            }
        }

        if (text)
            *text = strlist[0];
        else
            *textlist = strlist;

        if (fieldname)
        {
            strlist = (char **) pdc_save_lastopt(resopts, PDC_OPT_SAVEALL);
            pdf_insert_stringlist(p, strlist, ns);
        }
    }

    return ns;
}