// caller needs to free() the result
WCHAR *DecodeHtmlEntitites(const char *string, UINT codepage)
{
    WCHAR *fixed = str::conv::FromCodePage(string, codepage), *dst = fixed;
    const WCHAR *src = fixed;

    while (*src) {
        if (*src != '&') {
            *dst++ = *src++;
            continue;
        }
        src++;
        // numeric entities
        int unicode;
        if (str::Parse(src, L"#%d;", &unicode) ||
            str::Parse(src, L"#x%x;", &unicode)) {
            *dst++ = IntToChar(unicode);
            src = str::FindChar(src, ';') + 1;
            continue;
        }

        // named entities
        int rune = -1;
        const WCHAR *entityEnd = src;
        while (iswalnum(*entityEnd))
            entityEnd++;
        if (entityEnd != src) {
            size_t entityLen = entityEnd - src;
            rune = HtmlEntityNameToRune(src, entityLen);
        }
        if (-1 != rune) {
            *dst++ = IntToChar(rune);
            src = entityEnd;
            if (*src == ';')
                ++src;
        } else {
            *dst++ = '&';
        }
    }
    *dst = '\0';

    return fixed;
}
Пример #2
0
// if "&foo;" was the entity, s points at the char
// after '&' and len is the maximum lenght of the string
// (4 in case of "foo;")
// returns a pointer to the first character after the entity
const char *ResolveHtmlEntity(const char *s, size_t len, int& rune)
{
    const char *entEnd = str::Parse(s, len, "#%d%?;", &rune);
    if (entEnd)
        return entEnd;
    entEnd = str::Parse(s, len, "#x%x%?;", &rune);
    if (entEnd)
        return entEnd;

    // go to the end of a potential named entity
    for (entEnd = s; entEnd < s + len && isalnum((unsigned char)*entEnd); entEnd++);
    if (entEnd != s) {
        rune = HtmlEntityNameToRune(s, entEnd - s);
        if (-1 == rune)
            return NULL;
        // skip the trailing colon - if there is one
        if (entEnd < s + len && *entEnd == ';')
            entEnd++;
        return entEnd;
    }

    rune = -1;
    return NULL;
}