示例#1
0
wxString wxHtmlEntitiesParser::Parse(const wxString& input) const
{
    wxString output;

    const wxString::const_iterator end(input.end());
    wxString::const_iterator c(input.begin());
    wxString::const_iterator last(c);

    for ( ; c < end; ++c )
    {
        if (*c == wxT('&'))
        {
            if ( output.empty() )
                output.reserve(input.length());

            if (c - last > 0)
                output.append(last, c);
            if ( ++c == end )
                break;

            wxString entity;
            const wxString::const_iterator ent_s = c;
            wxChar entity_char;

            for ( ; c != end; ++c )
            {
                wxChar ch = *c;
                if ( !((ch >= wxT('a') && ch <= wxT('z')) ||
                       (ch >= wxT('A') && ch <= wxT('Z')) ||
                       (ch >= wxT('0') && ch <= wxT('9')) ||
                        ch == wxT('_') || ch == wxT('#')) )
                    break;
            }

            entity.append(ent_s, c);
            if (c == end || *c != wxT(';')) --c;
            last = c+1;
            entity_char = GetEntityChar(entity);
            if (entity_char)
                output << entity_char;
            else
            {
                output.append(ent_s-1, c+1);
                wxLogTrace(wxTRACE_HTML_DEBUG,
                           "Unrecognized HTML entity: '%s'",
                           entity);
            }
        }
    }
    if ( last == input.begin() ) // common case: no entity
        return input;
    if ( last != end )
        output.append(last, end);
    return output;
}
示例#2
0
wxString wx28HtmlEntitiesParser::Parse(const wxString& input)
{
    const wxChar *c, *last;
    const wxChar *in_str = input.c_str();
    wxString output;

    for (c = in_str, last = in_str; *c != wxT('\0'); c++)
    {
        if (*c == wxT('&'))
        {
            if ( output.empty() )
                output.reserve(input.length());

            if (c - last > 0)
                output.append(last, c - last);
            if ( *++c == wxT('\0') )
                break;

            wxString entity;
            const wxChar *ent_s = c;
            wxChar entity_char;

            for (; (*c >= wxT('a') && *c <= wxT('z')) ||
                   (*c >= wxT('A') && *c <= wxT('Z')) ||
                   (*c >= wxT('0') && *c <= wxT('9')) ||
                   *c == wxT('_') || *c == wxT('#'); c++) {}
            entity.append(ent_s, c - ent_s);
            if (*c != wxT(';')) c--;
            last = c+1;
            entity_char = GetEntityChar(entity);
            if (entity_char)
                output << entity_char;
            else
            {
                output.append(ent_s-1, c-ent_s+2);
                wxLogTrace(wxTRACE_HTML_DEBUG,
                           wxT("Unrecognized HTML entity: '%s'"),
                           entity.c_str());
            }
        }
    }
    if (last == in_str) // common case: no entity
        return input;
    if (*last != wxT('\0'))
        output.append(last);
    return output;
}
void Decode(std::wstring& str, int opt)
{
	// (opt <= 0 || opt > 3) : Do nothing.
	// (opt == 1)            : Decode both numeric character references and character entity references.
	// (opt == 2)            : Decode only numeric character references.
	// (opt == 3)            : Decode only character entity references.

	if (opt >= 1 && opt <= 3)
	{
		std::wstring::size_type start = 0;

		while ((start = str.find(L'&', start)) != std::wstring::npos)
		{
			std::wstring::size_type end, pos;

			if ((end = str.find(L';', start)) == std::wstring::npos) break;
			pos = start + 1;

			if (pos == end)  // &; - skip
			{
				start = end + 1;
				continue;
			}
			else if ((end - pos) > 10)  // name (or number) is too long
			{
				++start;
				continue;
			}

			if (str[pos] == L'#')  // Numeric character reference
			{
				if (opt == 3 ||    // Decode only character entity references,
					++pos == end)  // &#; - skip
				{
					start = end + 1;
					continue;
				}

				int base;
				if (str[pos] == L'x' || str[pos] == L'X')
				{
					if (++pos == end)  // &#x; or &#X; - skip
					{
						start = end + 1;
						continue;
					}
					base = 16;
				}
				else
				{
					base = 10;
				}

				std::wstring num(str, pos, end - pos);
				WCHAR* pch = nullptr;
				errno = 0;
				long ch = wcstol(num.c_str(), &pch, base);
				if (pch == nullptr || *pch != L'\0' || errno == ERANGE || ch <= 0 || ch >= 0xFFFE)  // invalid character
				{
					start = pos;
					continue;
				}
				str.replace(start, end - start + 1, 1, (WCHAR)ch);
				++start;
			}
			else  // Character entity reference
			{
				if (opt == 2)  // Decode only numeric character references - skip
				{
					start = end + 1;
					continue;
				}

				std::wstring name(str, pos, end - pos);

				WCHAR ch = GetEntityChar(name);
				if (ch)
				{
					str.replace(start, end - start + 1, 1, ch);
				}
				++start;
			}
		}
	}
}