Esempio n. 1
0
Octstr *html_to_sms(Octstr *html)
{
    long i, len;
    int c;
    Octstr *sms;

    sms = octstr_create("");
    len = octstr_len(html);
    i = 0;
    while (i < len) {
        c = octstr_get_char(html, i);
        switch (c) {
        case '<':
            if (html_comment_begins(html, i))
                skip_html_comment(html, &i);
            else
                skip_html_tag(html, &i);
            break;
        case '&':
            convert_html_entity(sms, html, &i);
            break;
        default:
            octstr_append_char(sms, c);
            ++i;
            break;
        }
    }
    octstr_shrink_blanks(sms);
    octstr_strip_blanks(sms);
    return sms;
}
Esempio n. 2
0
static void
parse_html_decode(
        unsigned char *buf,
        const unsigned char *start,
        const unsigned char *end)
{
    const unsigned char *p = start;
    unsigned char *out = buf;
    while (*p && p != end) {
        if (*p != '&') {
            *out++ = *p++;
            continue;
        }
        ++p;
        if (!*p || p == end) {
            *out++ = '&';
            continue;
        }
        if (*p == '#' && p[1] == 'x') {
            const unsigned char *q = p;
            p += 2;
            while (isxdigit(*p)) ++p;
            int e = convert_html_entity(q + 2, p, 16);
            if (e >= 0) {
                out = ucs4_to_utf8_char(out, e);
                if (*p == ';') ++p;
            } else {
                *out++ = '&';
                p = q;
            }
        } else if (*p == '#') {
            const unsigned char *q = p;
            ++p;
            while (isdigit(*p)) ++p;
            int e = convert_html_entity(q + 1, p, 10);
            if (e >= 0) {
                out = ucs4_to_utf8_char(out, e);
                if (*p == ';') ++p;
            } else {
                *out++ = '&';
                p = q;
            }
        } else if (isalpha(*p)) {
            // entity expansion is not performed...
            const unsigned char *q = p;
            while (isalnum(*p)) ++p;
            int e = find_html_entity(q, p);
            if (e >= 0) {
                out = ucs4_to_utf8_char(out, e);
                if (*p == ';') ++p;
            } else {
                *out++ = '&';
                p = q;
            }
        } else {
            *out++ = '&';
            continue;
        }
    }
    *out = 0;
}