static const char *enca_guess(struct mp_log *log, bstr buf, const char *language) { // Do our own UTF-8 detection, because ENCA seems to get it wrong sometimes // (suggested by divVerent). Explicitly allow cut-off UTF-8. if (bstr_validate_utf8(buf) > -8) return "UTF-8"; if (!language || !language[0]) language = "__"; // neutral language const char *detected_cp = NULL; EncaAnalyser analyser = enca_analyser_alloc(language); if (analyser) { enca_set_termination_strictness(analyser, 0); EncaEncoding enc = enca_analyse_const(analyser, buf.start, buf.len); const char *tmp = enca_charset_name(enc.charset, ENCA_NAME_STYLE_ICONV); if (tmp && enc.charset != ENCA_CS_UNKNOWN) detected_cp = tmp; enca_analyser_free(analyser); } else { mp_err(log, "ENCA doesn't know language '%s'\n", language); size_t langcnt; const char **languages = enca_get_languages(&langcnt); mp_err(log, "ENCA supported languages:"); for (int i = 0; i < langcnt; i++) mp_err(log, " %s", languages[i]); mp_err(log, "\n"); free(languages); } return detected_cp; }
static const char *enca_guess(struct mp_log *log, bstr buf, const char *language) { if (!language || !language[0]) language = "__"; // neutral language const char *detected_cp = NULL; EncaAnalyser analyser = enca_analyser_alloc(language); if (analyser) { enca_set_termination_strictness(analyser, 0); EncaEncoding enc = enca_analyse_const(analyser, buf.start, buf.len); const char *tmp = enca_charset_name(enc.charset, ENCA_NAME_STYLE_ICONV); if (tmp && enc.charset != ENCA_CS_UNKNOWN) detected_cp = tmp; enca_analyser_free(analyser); } else { mp_err(log, "ENCA doesn't know language '%s'\n", language); size_t langcnt; const char **languages = enca_get_languages(&langcnt); mp_err(log, "ENCA supported languages:"); for (int i = 0; i < langcnt; i++) mp_err(log, " %s", languages[i]); mp_err(log, "\n"); free(languages); } return detected_cp; }
/* * DWIM * * Choose some suitable values of all the libenca tuning parameters. */ static void dwim_libenca_options(EncaAnalyser an, const File *file) { const double mu = 0.005; /* derivation in 0 */ const double m = 15.0; /* value in infinity */ ssize_t size = file->buffer->pos; size_t sgnf; /* The number of significant characters */ if (!size) sgnf = 1; else sgnf = ceil((double)size/(size/m + 1.0/mu)); enca_set_significant(an, sgnf); /* When buffer contains whole file, require correct termination. */ if (file->size == size) enca_set_termination_strictness(an, 1); else enca_set_termination_strictness(an, 0); enca_set_filtering(an, sgnf > 2); }