static void rtfSetCharset(RTFGroupData *group) { const char *charset_name; char *save_buf = input_buffer; if (forced_charset) return; if (getCharset(group->codepage)) return; charset_name = charset_from_codepage(group->codepage); check_charset(&source_csname,charset_name); input_buffer=NULL; // if (group->charset && *group->charset) { // free(group->charset); // group->charset = NULL; // } addCharset(read_charset(source_csname), group->codepage); group->charset = getCharset(group->codepage); if (!group->charset) group->charset = getDefaultCharset(); input_buffer = save_buf; }
const QString detectCharset (const QByteArray byteArray) { const char* text = byteArray.constData(); uint8_t c = *text; std::string charset = ""; if (validateUTF8 (byteArray)) { while ((c = *text++) != '\0') { if (c > 0x7F) { charset = "UTF-8"; break; } if (c == 0x1B) /* ESC */ { c = *text++; if (c == '$') { c = *text++; switch (c) { case 'B': // JIS X 0208-1983 case '@': // JIS X 0208-1978 charset = "ISO-2022-JP"; continue; case 'A': // GB2312-1980 charset = "ISO-2022-JP-2"; break; case '(': c = *text++; switch (c) { case 'C': // KSC5601-1987 case 'D': // JIS X 0212-1990 charset = "ISO-2022-JP-2"; } break; case ')': c = *text++; if (c == 'C') charset = "ISO-2022-KR"; // KSC5601-1987 } break; } } } if (charset.empty()) charset = getDefaultCharset(); } if (charset.empty()) { switch (localeNum) { case LATIN1: /* Windows-1252 */ charset = detectCharsetLatin (text); break; case LATINC: case LATINC_UA: case LATINC_TJ: /* Cyrillic */ charset = detectCharsetCyrillic (text); break; case LATINA: /* MS Windows Arabic */ charset = detectCharsetWinArabic (text); break; case CHINESE_CN: case CHINESE_TW: case CHINESE_HK: charset = detectCharsetChinese (text); break; case JAPANESE: charset = detectCharsetJapanese (text); break; case KOREAN: charset = detectCharsetKorean (text); break; case VIETNAMESE: case THAI: case GEORGIAN: charset = encodingItem[OPENI18N]; break; default: if (getDefaultCharset() != "UTF-8") charset = getDefaultCharset(); else if (detect_noniso (text)) charset = encodingItem[CODEPAGE]; else charset = encodingItem[OPENI18N]; if (charset.empty()) charset = encodingItem[IANA]; } } return QString::fromStdString (charset); }