Esempio n. 1
0
// Returns true if user_cp implies that calling mp_charset_guess() on the
// input data is required to determine the real codepage. This is the case
// if user_cp is not a real iconv codepage, but a magic value that requests
// for example ENCA charset auto-detection.
bool mp_charset_requires_guess(const char *user_cp)
{
    bstr res[2] = {{0}};
    split_colon(user_cp, 2, res);
    return bstrcasecmp0(res[0], "enca") == 0 ||
           bstrcasecmp0(res[0], "guess") == 0;
}
Esempio n. 2
0
// Returns true if user_cp implies that calling mp_charset_guess() on the
// input data is required to determine the real codepage. This is the case
// if user_cp is not a real iconv codepage, but a magic value that requests
// for example ENCA charset auto-detection.
bool mp_charset_requires_guess(const char *user_cp)
{
    bstr res[2] = {{0}};
    int r = split_colon(user_cp, 2, res);
    // Note that "utf8" is the UTF-8 codepage, while "utf8:..." specifies UTF-8
    // by default, plus a codepage that is used if the input is not UTF-8.
    return bstrcasecmp0(res[0], "enca") == 0 ||
           bstrcasecmp0(res[0], "guess") == 0 ||
           (r > 1 && bstrcasecmp0(res[0], "utf-8") == 0) ||
           (r > 1 && bstrcasecmp0(res[0], "utf8") == 0);
}
Esempio n. 3
0
// Runs charset auto-detection on the input buffer, and returns the result.
// If auto-detection fails, NULL is returned.
// If user_cp doesn't refer to any known auto-detection (for example because
// it's a real iconv codepage), user_cp is returned without even looking at
// the buf data.
const char *mp_charset_guess(struct mp_log *log, bstr buf, const char *user_cp,
                             int flags)
{
    if (!mp_charset_requires_guess(user_cp))
        return user_cp;

    // Do our own UTF-8 detection, because at least ENCA seems to get it
    // wrong sometimes (suggested by divVerent).
    int r = bstr_validate_utf8(buf);
    if (r >= 0 || (r > -8 && (flags & MP_ICONV_ALLOW_CUTOFF)))
        return "UTF-8";

    bstr params[3] = {{0}};
    split_colon(user_cp, 3, params);

    bstr type = params[0];
    char lang[100];
    snprintf(lang, sizeof(lang), "%.*s", BSTR_P(params[1]));
    const char *fallback = params[2].start; // last item, already 0-terminated

    const char *res = NULL;

#if HAVE_ENCA
    if (bstrcasecmp0(type, "enca") == 0)
        res = enca_guess(log, buf, lang);
#endif
#if HAVE_LIBGUESS
    if (bstrcasecmp0(type, "guess") == 0)
        res = libguess_guess(log, buf, lang);
#endif
    if (bstrcasecmp0(type, "utf8") == 0 || bstrcasecmp0(type, "utf-8") == 0) {
        if (!fallback)
            fallback = params[1].start; // must be already 0-terminated
    }

    if (res) {
        mp_dbg(log, "%.*s detected charset: '%s'\n",
               BSTR_P(type), res);
    } else {
        res = fallback;
        mp_dbg(log, "Detection with %.*s failed: fallback to %s\n",
               BSTR_P(type), res && res[0] ? res : "broken UTF-8/Latin1");
    }

    if (!res && !(flags & MP_STRICT_UTF8))
        res = "UTF-8-BROKEN";

    return res;
}
Esempio n. 4
0
int af_str2fmt_short(bstr str)
{
    for (int i = 0; af_fmtstr_table[i].name; i++) {
        if (!bstrcasecmp0(str, af_fmtstr_table[i].name))
            return af_fmtstr_table[i].format;
    }
    return 0;
}
Esempio n. 5
0
File: tags.c Progetto: AddictXQ/mpv
char *mp_tags_get_bstr(struct mp_tags *tags, bstr key)
{
    for (int n = 0; n < tags->num_keys; n++) {
        if (bstrcasecmp0(key, tags->keys[n]) == 0)
            return tags->values[n];
    }
    return NULL;
}
Esempio n. 6
0
// Runs charset auto-detection on the input buffer, and returns the result.
// If auto-detection fails, NULL is returned.
// If user_cp doesn't refer to any known auto-detection (for example because
// it's a real iconv codepage), user_cp is returned without even looking at
// the buf data.
const char *mp_charset_guess(bstr buf, const char *user_cp)
{
    if (!mp_charset_requires_guess(user_cp))
        return user_cp;

    bstr params[3] = {{0}};
    split_colon(user_cp, 3, params);

    bstr type = params[0];
    char lang[100];
    snprintf(lang, sizeof(lang), "%.*s", BSTR_P(params[1]));
    const char *fallback = params[2].start; // last item, already 0-terminated

    const char *res = NULL;

#ifdef CONFIG_ENCA
    if (bstrcasecmp0(type, "enca") == 0)
        res = enca_guess(buf, lang);
#endif
#ifdef CONFIG_LIBGUESS
    if (bstrcasecmp0(type, "guess") == 0)
        res = libguess_guess(buf, lang);
#endif

    if (res) {
        mp_msg(MSGT_SUBREADER, MSGL_DBG2, "%.*s detected charset: '%s'\n",
               BSTR_P(type), res);
    } else {
        res = fallback;
        mp_msg(MSGT_SUBREADER, MSGL_DBG2,
               "Detection with %.*s failed: fallback to %s\n",
               BSTR_P(type), res && res[0] ? res : "no conversion");
    }

    return res;
}
Esempio n. 7
0
File: format.c Progetto: kax4/mpv
int af_str2fmt_short(bstr str)
{
    if (bstr_startswith0(str, "0x")) {
        bstr rest;
        int fmt = bstrtoll(str, &rest, 16);
        if (rest.len == 0 && af_fmt_valid(fmt))
            return fmt;
    }

    for (int i = 0; af_fmtstr_table[i].name; i++)
        if (!bstrcasecmp0(str, af_fmtstr_table[i].name))
            return af_fmtstr_table[i].format;

    return -1;
}
Esempio n. 8
0
File: tags.c Progetto: AddictXQ/mpv
void mp_tags_set_bstr(struct mp_tags *tags, bstr key, bstr value)
{
    for (int n = 0; n < tags->num_keys; n++) {
        if (bstrcasecmp0(key, tags->keys[n]) == 0) {
            talloc_free(tags->values[n]);
            tags->values[n] = bstrto0(tags, value);
            return;
        }
    }

    MP_RESIZE_ARRAY(tags, tags->keys,   tags->num_keys + 1);
    MP_RESIZE_ARRAY(tags, tags->values, tags->num_keys + 1);
    tags->keys[tags->num_keys]   = bstrto0(tags, key);
    tags->values[tags->num_keys] = bstrto0(tags, value);
    tags->num_keys++;
}
Esempio n. 9
0
static int parse_pls(struct pl_parser *p)
{
    bstr line = {0};
    while (!line.len && !pl_eof(p))
        line = bstr_strip(pl_get_line(p));
    if (bstrcasecmp0(line, "[playlist]") != 0)
        return -1;
    if (p->probing)
        return 0;
    while (!pl_eof(p)) {
        line = bstr_strip(pl_get_line(p));
        bstr key, value;
        if (bstr_split_tok(line, "=", &key, &value) &&
            bstr_case_startswith(key, bstr0("File")))
        {
            pl_add(p, value);
        }
    }
    return 0;
}
Esempio n. 10
0
// Runs charset auto-detection on the input buffer, and returns the result.
// If auto-detection fails, NULL is returned.
// If user_cp doesn't refer to any known auto-detection (for example because
// it's a real iconv codepage), user_cp is returned without even looking at
// the buf data.
// The return value may (but doesn't have to) be allocated under talloc_ctx.
const char *mp_charset_guess(void *talloc_ctx, struct mp_log *log, bstr buf,
                             const char *user_cp, int flags)
{
    if (!mp_charset_requires_guess(user_cp))
        return user_cp;

    bool use_auto = strcasecmp(user_cp, "auto") == 0;
    if (use_auto) {
#if HAVE_UCHARDET
        user_cp = "uchardet";
#elif HAVE_ENCA
        user_cp = "enca";
#else
        user_cp = "UTF-8:UTF-8-BROKEN";
#endif
    }

    bstr params[3] = {{0}};
    split_colon(user_cp, 3, params);

    bstr type = params[0];
    char lang[100];
    snprintf(lang, sizeof(lang), "%.*s", BSTR_P(params[1]));
    const char *fallback = params[2].start; // last item, already 0-terminated

    const char *res = NULL;

    if (use_auto) {
        res = ms_bom_guess(buf);
        if (res)
            type = bstr0("auto");
    }

#if HAVE_ENCA
    if (bstrcasecmp0(type, "enca") == 0)
        res = enca_guess(log, buf, lang);
#endif
#if HAVE_LIBGUESS
    if (bstrcasecmp0(type, "guess") == 0)
        res = libguess_guess(log, buf, lang);
#endif
#if HAVE_UCHARDET
    if (bstrcasecmp0(type, "uchardet") == 0)
        res = mp_uchardet(talloc_ctx, log, buf);
#endif

    if (bstrcasecmp0(type, "utf8") == 0 || bstrcasecmp0(type, "utf-8") == 0) {
        if (!fallback)
            fallback = params[1].start; // must be already 0-terminated
        int r = bstr_validate_utf8(buf);
        if (r >= 0 || (r > -8 && (flags & MP_ICONV_ALLOW_CUTOFF)))
            res = "utf-8";
    }

    if (res) {
        mp_dbg(log, "%.*s detected charset: '%s'\n", BSTR_P(type), res);
    } else {
        res = fallback;
        mp_dbg(log, "Detection with %.*s failed: fallback to %s\n",
               BSTR_P(type), res && res[0] ? res : "broken UTF-8/Latin1");
    }

    if (!res && !(flags & MP_STRICT_UTF8))
        res = "UTF-8-BROKEN";

    mp_verbose(log, "Using charset '%s'.\n", res);
    return res;
}