コード例 #1
0
ファイル: charset_conv.c プロジェクト: htc550605125/mpv
static const char *enca_guess(struct mp_log *log, bstr buf, const char *language)
{
    // Do our own UTF-8 detection, because ENCA seems to get it wrong sometimes
    // (suggested by divVerent). Explicitly allow cut-off UTF-8.
    if (bstr_validate_utf8(buf) > -8)
        return "UTF-8";

    if (!language || !language[0])
        language = "__"; // neutral language

    const char *detected_cp = NULL;

    EncaAnalyser analyser = enca_analyser_alloc(language);
    if (analyser) {
        enca_set_termination_strictness(analyser, 0);
        EncaEncoding enc = enca_analyse_const(analyser, buf.start, buf.len);
        const char *tmp = enca_charset_name(enc.charset, ENCA_NAME_STYLE_ICONV);
        if (tmp && enc.charset != ENCA_CS_UNKNOWN)
            detected_cp = tmp;
        enca_analyser_free(analyser);
    } else {
        mp_err(log, "ENCA doesn't know language '%s'\n", language);
        size_t langcnt;
        const char **languages = enca_get_languages(&langcnt);
        mp_err(log, "ENCA supported languages:");
        for (int i = 0; i < langcnt; i++)
            mp_err(log, " %s", languages[i]);
        mp_err(log, "\n");
        free(languages);
    }

    return detected_cp;
}
コード例 #2
0
ファイル: charset_conv.c プロジェクト: 0x0all/mpv
static const char *enca_guess(struct mp_log *log, bstr buf, const char *language)
{
    if (!language || !language[0])
        language = "__"; // neutral language

    const char *detected_cp = NULL;

    EncaAnalyser analyser = enca_analyser_alloc(language);
    if (analyser) {
        enca_set_termination_strictness(analyser, 0);
        EncaEncoding enc = enca_analyse_const(analyser, buf.start, buf.len);
        const char *tmp = enca_charset_name(enc.charset, ENCA_NAME_STYLE_ICONV);
        if (tmp && enc.charset != ENCA_CS_UNKNOWN)
            detected_cp = tmp;
        enca_analyser_free(analyser);
    } else {
        mp_err(log, "ENCA doesn't know language '%s'\n", language);
        size_t langcnt;
        const char **languages = enca_get_languages(&langcnt);
        mp_err(log, "ENCA supported languages:");
        for (int i = 0; i < langcnt; i++)
            mp_err(log, " %s", languages[i]);
        mp_err(log, "\n");
        free(languages);
    }

    return detected_cp;
}
コード例 #3
0
ファイル: enca.c プロジェクト: ShiftMediaProject/enca
/*
 * DWIM
 *
 * Choose some suitable values of all the libenca tuning parameters.
 */
static void
dwim_libenca_options(EncaAnalyser an, const File *file)
{
  const double mu = 0.005;  /* derivation in 0 */
  const double m = 15.0;  /* value in infinity */
  ssize_t size = file->buffer->pos;
  size_t sgnf;

  /* The number of significant characters */
  if (!size)
    sgnf = 1;
  else
    sgnf = ceil((double)size/(size/m + 1.0/mu));
  enca_set_significant(an, sgnf);

  /* When buffer contains whole file, require correct termination. */
  if (file->size == size)
    enca_set_termination_strictness(an, 1);
  else
    enca_set_termination_strictness(an, 0);

  enca_set_filtering(an, sgnf > 2);
}