/* * Conversion to PDFDoc/EBCDIC or UTF-16/[EBCDIC-]UTF-8 */ char * pdf_convert_hypertext(PDF *p, const char *text, int len, pdc_text_format hypertextformat, pdc_encoding hypertextencoding, int codepage, int *outlen, pdc_bool oututf8, pdc_bool verbose) { pdc_encodingvector *inev = NULL, *outev = NULL; pdc_byte *intext = (pdc_byte *) text, *outtext = NULL; pdc_text_format textformat = pdc_utf16be; int convflags = PDC_CONV_WITHBOM | PDC_CONV_TRYBYTES; *outlen = 0; if (text == NULL) return NULL; if (len == 0) len = (int) strlen(text); /* incoming encoding */ if (hypertextencoding >= 0) { inev = pdc_get_encoding_vector(p->pdc, hypertextencoding); } /* PDFDocEncoding */ outev = pdc_get_encoding_vector(p->pdc, pdc_pdfdoc); /* conversion to UTF-16-BE or PDFDocEncoding / EBCDIC */ pdf_set_convertflags(p, &convflags); if (pdc_logg_is_enabled(p->pdc, 3, trc_text)) convflags |= PDC_CONV_LOGGING; pdc_convert_string(p->pdc, hypertextformat, codepage, inev, intext, len, &textformat, outev, &outtext, outlen, convflags, verbose); /* conversion to UTF-8 if Unicode */ if (oututf8 && textformat == pdc_utf16be) { pdc_text_format outtextformat = PDC_UTF8; pdc_byte *newtext = NULL; convflags = PDC_CONV_WITHBOM; if (pdc_logg_is_enabled(p->pdc, 3, trc_text)) convflags |= PDC_CONV_LOGGING; pdc_convert_string(p->pdc, textformat, 0, NULL, outtext, *outlen, &outtextformat, NULL, &newtext, outlen, convflags, verbose); pdc_free(p->pdc, outtext); outtext = newtext; } return (char *) outtext; }
char * pdf_convert_pdfstring(PDF *p, const char *text, int inlen, int convflags, int *outlen) { pdc_byte *newtext = NULL; if (pdc_is_utf8_bytecode(text)) { pdc_text_format textformat = PDC_UTF8; pdc_text_format outtextformat = pdc_utf16be; pdc_encodingvector *outev = pdc_get_encoding_vector(p->pdc, pdc_pdfdoc); pdf_set_convertflags(p, &convflags); pdc_convert_string(p->pdc, textformat, 0, NULL, (pdc_byte *) text, inlen, &outtextformat, outev, &newtext, outlen, convflags, pdc_true); } else { newtext = (pdc_byte *) text; *outlen = inlen; } return (char *) newtext; }
/* * pdc_fopen_logg opens a file. The function expects a UTF-8 encoded file name. * (see function pdc_convert_filename), if define PDC_UNICODE_FILENAME is set. * */ FILE * pdc_fopen_logg(pdc_core *pdc, const char *filename, const char *mode) { FILE *fp = NULL; int i = 0; #if defined(PDC_UNICODE_FILENAME) pdc_byte *outfilename = NULL; pdc_text_format nameformat = PDC_UTF8; pdc_text_format targetnameformat = pdc_utf16; int len = (int) pdc_strlen(filename); int outlen = 0; if (pdc_is_utf16be_unicode(filename)) nameformat = pdc_utf16be; /* convert filename from UTF-8 / UTF-16BE to UTF-16 or Latin-1 */ pdc_convert_string(pdc, nameformat, 0, NULL, (pdc_byte *) filename, len, &targetnameformat, NULL, &outfilename, &outlen, PDC_CONV_TRYBYTES | PDC_CONV_NOBOM, pdc_true); if (targetnameformat == pdc_bytes) { fp = fopen((const char *) outfilename, mode); } else { wchar_t wmode[8]; len = (int) strlen(mode); for (i = 0; i < len; i++) wmode[i] = (wchar_t) mode[i]; wmode[len] = 0; fp = _wfopen((wchar_t *) outfilename, wmode); } pdc_free(pdc, outfilename); #else (void) pdc; /* due to honorlang, codeset of LANG: UTF-8 */ if (pdc_is_utf8_bytecode(filename)) i = 3; fp = fopen(&filename[i], mode); #endif pdc_logg_openclose(pdc, fp, pdc_true); return fp; }
char * pdf_get_opt_filename(PDF *p, const char *keyword, pdc_resopt *resopts, pdc_encoding enc, int codepage) { pdc_bool logg1 = pdc_logg_is_enabled(p->pdc, 1, trc_optlist); pdc_bool logg3 = pdc_logg_is_enabled(p->pdc, 3, trc_text); pdc_byte *filename = NULL; char **strlist; if (pdc_get_optvalues(keyword, resopts, NULL, &strlist)) { pdc_encodingvector *inev = NULL, *outev = NULL; pdc_text_format intextformat = pdc_bytes; pdc_text_format outtextformat = pdc_utf16; /* sic! */ int convflags = PDC_CONV_NOBOM | PDC_CONV_TRYBYTES | PDC_CONV_NEWALLOC; pdc_bool isutf8; int ic, outlen; /* whole option list or string list is in UTF-8 */ isutf8 = pdc_is_lastopt_utf8(resopts); if (!isutf8) { if (enc < 0 && enc != pdc_unicode && enc != pdc_cid) enc = pdf_get_hypertextencoding(p, "auto", &codepage, pdc_true); if (enc >= 0) inev = pdc_get_encoding_vector(p->pdc, enc); } else { intextformat = PDC_UTF8; } if (logg1) { if (isutf8) { pdc_logg(p->pdc, "\tOption \"%s\" is "PDC_UTF8_STRG" encoded\n", keyword); } else { pdc_logg(p->pdc, "\tOption \"%s\" is %s encoded\n", keyword, pdc_get_user_encoding(p->pdc, enc)); } } outev = pdc_get_encoding_vector(p->pdc, pdc_winansi); if (logg3) convflags |= PDC_CONV_LOGGING; pdf_set_convertflags(p, &convflags); pdc_convert_string(p->pdc, intextformat, codepage, inev, (pdc_byte *) strlist[0], (int) strlen(strlist[0]), &outtextformat, outev, &filename, &outlen, convflags, pdc_true); if (outtextformat == pdc_utf16) { pdc_ushort uv, *unifilename = (pdc_ushort *) filename; int code; if (p->compatibility < PDC_1_7) pdc_error(p->pdc, PDC_E_IO_UNSUPP_PDFUNINAME, 0, 0, 0, 0); /* we must replace non-WinAnsi characters by period * and omit the BOM to get a WinAnsi string. */ outlen /= 2; for (ic = 0; ic < outlen; ic++) { uv = unifilename[ic]; code = pdc_get_encoding_bytecode(p->pdc, outev, uv); if (code <= 0) uv = PDC_UNICODE_PERIOD; filename[ic] = (char) uv; } filename[ic] = 0; } if (logg3) pdc_logg_hexdump(p->pdc, "output filename", "\t\t", (char *) filename, strlen((char *) filename)); } return (char *) filename; }
int pdf_get_opt_textlist(PDF *p, const char *keyword, pdc_resopt *resopts, pdc_encoding enc, int codepage, pdc_bool ishypertext, const char *fieldname, char **text, char ***textlist) { pdc_bool logg1 = pdc_logg_is_enabled(p->pdc, 1, trc_optlist); int ns; char **strlist; ns = pdc_get_optvalues(keyword, resopts, NULL, &strlist); if (ns) { pdc_byte *string = NULL; pdc_encodingvector *inev = NULL, *outev = NULL; pdc_text_format intextformat = pdc_bytes; pdc_text_format outtextformat = pdc_utf16be; pdc_text_format textformat; int convflags = PDC_CONV_WITHBOM; pdc_bool isutf8; int i, outlen; /* whole option list or string list is in UTF-8 */ isutf8 = pdc_is_lastopt_utf8(resopts); /* Encoding */ if (ishypertext) { /* Initialize */ if (!isutf8) { if (enc < 0 && enc != pdc_unicode && enc != pdc_cid) enc = pdf_get_hypertextencoding(p, "auto", &codepage, pdc_true); if (enc >= 0) inev = pdc_get_encoding_vector(p->pdc, enc); } outev = pdc_get_encoding_vector(p->pdc, pdc_pdfdoc); /* conversion to PDFDocEncoding if possible */ convflags |= PDC_CONV_TRYBYTES; } else { if (enc == pdc_invalidenc) { if (fieldname) { pdc_cleanup_optionlist(p->pdc, resopts); pdc_error(p->pdc, PDF_E_FF_FONTMISSING, fieldname, 0, 0, 0); } return 0; } else if (enc >= 0 && !isutf8) { /* bug #2069: always conversion to UTF-16BE */ inev = pdc_get_encoding_vector(p->pdc, enc); } } if (logg1) { if (isutf8) { pdc_logg(p->pdc, "\tOption \"%s\" is "PDC_UTF8_STRG" encoded\n", keyword); } else { pdc_logg(p->pdc, "\tOption \"%s\" is %s encoded\n", keyword, pdc_get_user_encoding(p->pdc, enc)); } } for (i = 0; i < ns; i++) { string = (pdc_byte *) strlist[i]; { if (ishypertext || isutf8 || inev != NULL) { intextformat = isutf8 ? PDC_UTF8 : pdc_bytes; if (pdc_logg_is_enabled(p->pdc, 3, trc_text)) convflags |= PDC_CONV_LOGGING; pdf_set_convertflags(p, &convflags); textformat = outtextformat; pdc_convert_string(p->pdc, intextformat, codepage, inev, string, (int) strlen((char *) string), &textformat, outev, &string, &outlen, convflags, pdc_true); pdc_free(p->pdc, strlist[i]); strlist[i] = (char *) string; } } } if (text) *text = strlist[0]; else *textlist = strlist; if (fieldname) { strlist = (char **) pdc_save_lastopt(resopts, PDC_OPT_SAVEALL); pdf_insert_stringlist(p, strlist, ns); } } return ns; }