Example #1
0
void wx28HtmlParser::CreateDOMSubTree(wx28HtmlTag *cur,
                                    int begin_pos, int end_pos,
                                    wx28HtmlTagsCache *cache)
{
    if (end_pos <= begin_pos) return;

    wxChar c;
    int i = begin_pos;
    int textBeginning = begin_pos;

    // If the tag contains CDATA text, we include the text between beginning
    // and ending tag verbosely. Setting i=end_pos will skip to the very
    // end of this function where text piece is added, bypassing any child
    // tags parsing (CDATA element can't have child elements by definition):
    if (cur != NULL && wxIsCDATAElement(cur->GetName().c_str()))
    {
        i = end_pos;
    }

    while (i < end_pos)
    {
        c = m_Source.GetChar(i);

        if (c == wxT('<'))
        {
            // add text to m_TextPieces:
            if (i - textBeginning > 0)
                m_TextPieces->Add(
                    wx28HtmlTextPiece(textBeginning, i - textBeginning));

            // if it is a comment, skip it:
            if (i < end_pos-6 && m_Source.GetChar(i+1) == wxT('!') &&
                                 m_Source.GetChar(i+2) == wxT('-') &&
                                 m_Source.GetChar(i+3) == wxT('-'))
            {
                // Comments begin with "<!--" and end with "--[ \t\r\n]*>"
                // according to HTML 4.0
                int dashes = 0;
                i += 4;
                while (i < end_pos)
                {
                    c = m_Source.GetChar(i++);
                    if ((c == wxT(' ') || c == wxT('\n') ||
                        c == wxT('\r') || c == wxT('\t')) && dashes >= 2) {}
                    else if (c == wxT('>') && dashes >= 2)
                    {
                        textBeginning = i;
                        break;
                    }
                    else if (c == wxT('-'))
                        dashes++;
                    else
                        dashes = 0;
                }
            }

            // add another tag to the tree:
            else if (i < end_pos-1 && m_Source.GetChar(i+1) != wxT('/'))
            {
                wx28HtmlTag *chd;
                if (cur)
                    chd = new wx28HtmlTag(cur, m_Source,
                                        i, end_pos, cache, m_entitiesParser);
                else
                {
                    chd = new wx28HtmlTag(NULL, m_Source,
                                        i, end_pos, cache, m_entitiesParser);
                    if (!m_Tags)
                    {
                        // if this is the first tag to be created make the root
                        // m_Tags point to it:
                        m_Tags = chd;
                    }
                    else
                    {
                        // if there is already a root tag add this tag as
                        // the last sibling:
                        chd->m_Prev = m_Tags->GetLastSibling();
                        chd->m_Prev->m_Next = chd;
                    }
                }

                if (chd->HasEnding())
                {
                    CreateDOMSubTree(chd,
                                     chd->GetBeginPos(), chd->GetEndPos1(),
                                     cache);
                    i = chd->GetEndPos2();
                }
                else
                    i = chd->GetBeginPos();

                textBeginning = i;
            }

            // ... or skip ending tag:
            else
            {
                while (i < end_pos && m_Source.GetChar(i) != wxT('>')) i++;
                textBeginning = i+1;
            }
        }
        else i++;
    }

    // add remaining text to m_TextPieces:
    if (end_pos - textBeginning > 0)
        m_TextPieces->Add(
            wx28HtmlTextPiece(textBeginning, end_pos - textBeginning));
}
Example #2
0
void wxHtmlParser::CreateDOMSubTree(wxHtmlTag *cur,
                                    const wxString::const_iterator& begin_pos,
                                    const wxString::const_iterator& end_pos,
                                    wxHtmlTagsCache *cache)
{
    if (end_pos <= begin_pos)
        return;

    wxChar c;
    wxString::const_iterator i = begin_pos;
    wxString::const_iterator textBeginning = begin_pos;

    // If the tag contains CDATA text, we include the text between beginning
    // and ending tag verbosely. Setting i=end_pos will skip to the very
    // end of this function where text piece is added, bypassing any child
    // tags parsing (CDATA element can't have child elements by definition):
    if (cur != NULL && wxIsCDATAElement(cur->GetName()))
    {
        i = end_pos;
    }

    while (i < end_pos)
    {
        c = *i;

        if (c == wxT('<'))
        {
            // add text to m_TextPieces:
            if (i > textBeginning)
                m_TextPieces->push_back(wxHtmlTextPiece(textBeginning, i));

            // if it is a comment, skip it:
            if ( SkipCommentTag(i, m_Source->end()) )
            {
                textBeginning = i = i + 1; // skip closing '>' too
            }

            // add another tag to the tree:
            else if (i < end_pos-1 && *(i+1) != wxT('/'))
            {
                wxHtmlTag *chd;
                if (cur)
                    chd = new wxHtmlTag(cur, m_Source,
                                        i, end_pos, cache, m_entitiesParser);
                else
                {
                    chd = new wxHtmlTag(NULL, m_Source,
                                        i, end_pos, cache, m_entitiesParser);
                    if (!m_Tags)
                    {
                        // if this is the first tag to be created make the root
                        // m_Tags point to it:
                        m_Tags = chd;
                    }
                    else
                    {
                        // if there is already a root tag add this tag as
                        // the last sibling:
                        chd->m_Prev = m_Tags->GetLastSibling();
                        chd->m_Prev->m_Next = chd;
                    }
                }

                if (chd->HasEnding())
                {
                    CreateDOMSubTree(chd,
                                     chd->GetBeginIter(), chd->GetEndIter1(),
                                     cache);
                    i = chd->GetEndIter2();
                }
                else
                    i = chd->GetBeginIter();

                textBeginning = i;
            }

            // ... or skip ending tag:
            else
            {
                while (i < end_pos && *i != wxT('>')) ++i;
                textBeginning = i < end_pos ? i+1 : i;
            }
        }
        else ++i;
    }

    // add remaining text to m_TextPieces:
    if (end_pos > textBeginning)
        m_TextPieces->push_back(wxHtmlTextPiece(textBeginning, end_pos));
}
Example #3
0
wxHtmlTagsCache::wxHtmlTagsCache(const wxString& source)
{
    m_Cache = new wxHtmlTagsCacheData;
    m_CachePos = 0;

    wxChar tagBuffer[256];

    const wxString::const_iterator end = source.end();
    for ( wxString::const_iterator pos = source.begin(); pos < end; ++pos )
    {
        if (*pos != wxT('<'))
            continue;

        // possible tag start found:

        // don't cache comment tags
        if ( wxHtmlParser::SkipCommentTag(pos, end) )
            continue;

        // Remember the starting tag position.
        wxString::const_iterator stpos = pos++;

        // And look for the ending one.
        int i;
        for ( i = 0;
              pos < end && i < (int)WXSIZEOF(tagBuffer) - 1 &&
              *pos != wxT('>') && !wxIsspace(*pos);
              ++i, ++pos )
        {
            tagBuffer[i] = (wxChar)wxToupper(*pos);
        }
        tagBuffer[i] = wxT('\0');

        while (pos < end && *pos != wxT('>'))
            ++pos;

        if ( pos == end )
        {
            // We didn't find a closing bracket, this is not a valid tag after
            // all. Notice that we need to roll back pos to avoid creating an
            // invalid iterator when "++pos" is done in the loop statement.
            --pos;

            continue;
        }

        // We have a valid tag, add it to the cache.
        size_t tg = Cache().size();
        Cache().push_back(wxHtmlCacheItem());
        Cache()[tg].Key = stpos;
        Cache()[tg].Name = new wxChar[i+1];
        memcpy(Cache()[tg].Name, tagBuffer, (i+1)*sizeof(wxChar));

        if ((stpos+1) < end && *(stpos+1) == wxT('/')) // ending tag:
        {
            Cache()[tg].type = wxHtmlCacheItem::Type_EndingTag;
            // find matching begin tag:
            for (i = tg; i >= 0; i--)
            {
                if ((Cache()[i].type == wxHtmlCacheItem::Type_NoMatchingEndingTag) && (wxStrcmp(Cache()[i].Name, tagBuffer+1) == 0))
                {
                    Cache()[i].type = wxHtmlCacheItem::Type_Normal;
                    Cache()[i].End1 = stpos;
                    Cache()[i].End2 = pos + 1;
                    break;
                }
            }
        }
        else
        {
            Cache()[tg].type = wxHtmlCacheItem::Type_NoMatchingEndingTag;

            if (wxIsCDATAElement(tagBuffer))
            {
                // store the orig pos in case we are missing the closing
                // tag (see below)
                const wxString::const_iterator old_pos = pos;
                bool foundCloseTag = false;

                // find next matching tag
                int tag_len = wxStrlen(tagBuffer);
                while (pos < end)
                {
                    // find the ending tag
                    while (pos + 1 < end &&
                           (*pos != '<' || *(pos+1) != '/'))
                        ++pos;
                    if (*pos == '<')
                        ++pos;

                    // see if it matches
                    int match_pos = 0;
                    while (pos < end && match_pos < tag_len )
                    {
                        wxChar c = *pos;
                        if ( c == '>' || c == '<' )
                            break;

                        // cast to wxChar needed to suppress warning in
                        // Unicode build
                        if ((wxChar)wxToupper(c) == tagBuffer[match_pos])
                        {
                            ++match_pos;
                        }
                        else if (c == wxT(' ') || c == wxT('\n') ||
                            c == wxT('\r') || c == wxT('\t'))
                        {
                            // need to skip over these
                        }
                        else
                        {
                            match_pos = 0;
                        }
                        ++pos;
                    }

                    // found a match
                    if (match_pos == tag_len)
                    {
                        pos = pos - tag_len - 3;
                        foundCloseTag = true;
                        break;
                    }
                    else // keep looking for the closing tag
                    {
                        ++pos;
                    }
                }
                if (!foundCloseTag)
                {
                    // we didn't find closing tag; this means the markup
                    // is incorrect and the best thing we can do is to
                    // ignore the unclosed tag and continue parsing as if
                    // it didn't exist:
                    pos = old_pos;
                }
            }
        }
    }

    // ok, we're done, now we'll free .Name members of cache - we don't need it anymore:
    for ( wxHtmlTagsCacheData::iterator i = Cache().begin();
          i != Cache().end(); ++i )
    {
        wxDELETEA(i->Name);
    }
}
Example #4
0
wx28HtmlTagsCache::wx28HtmlTagsCache(const wxString& source)
{
    const wxChar *src = source.c_str();
    int lng = source.length();
    wxChar tagBuffer[256];

    m_Cache = NULL;
    m_CacheSize = 0;
    m_CachePos = 0;

    int pos = 0;
    while (pos < lng)
    {
        if (src[pos] == wxT('<'))   // tag found:
        {
            if (m_CacheSize % CACHE_INCREMENT == 0)
                m_Cache = (wx28HtmlCacheItem*) realloc(m_Cache, (m_CacheSize + CACHE_INCREMENT) * sizeof(wx28HtmlCacheItem));
            int tg = m_CacheSize++;
            int stpos = pos++;
            m_Cache[tg].Key = stpos;

            int i;
            for ( i = 0;
                  pos < lng && i < (int)WXSIZEOF(tagBuffer) - 1 &&
                  src[pos] != wxT('>') && !wxIsspace(src[pos]);
                  i++, pos++ )
            {
                tagBuffer[i] = (wxChar)wxToupper(src[pos]);
            }
            tagBuffer[i] = wxT('\0');

            m_Cache[tg].Name = new wxChar[i+1];
            memcpy(m_Cache[tg].Name, tagBuffer, (i+1)*sizeof(wxChar));

            while (pos < lng && src[pos] != wxT('>')) pos++;

            if (src[stpos+1] == wxT('/')) // ending tag:
            {
                m_Cache[tg].End1 = m_Cache[tg].End2 = -2;
                // find matching begin tag:
                for (i = tg; i >= 0; i--)
                    if ((m_Cache[i].End1 == -1) && (wxStrcmp(m_Cache[i].Name, tagBuffer+1) == 0))
                    {
                        m_Cache[i].End1 = stpos;
                        m_Cache[i].End2 = pos + 1;
                        break;
                    }
            }
            else
            {
                m_Cache[tg].End1 = m_Cache[tg].End2 = -1;

                if (wxIsCDATAElement(tagBuffer))
                {
                    // store the orig pos in case we are missing the closing
                    // tag (see below)
                    wxInt32 old_pos = pos;
                    bool foundCloseTag = false;

                    // find next matching tag
                    int tag_len = wxStrlen(tagBuffer);
                    while (pos < lng)
                    {
                        // find the ending tag
                        while (pos + 1 < lng &&
                               (src[pos] != '<' || src[pos+1] != '/'))
                            ++pos;
                        if (src[pos] == '<')
                            ++pos;

                        // see if it matches
                        int match_pos = 0;
                        while (pos < lng && match_pos < tag_len && src[pos] != '>' && src[pos] != '<') {
                            // cast to wxChar needed to suppress warning in
                            // Unicode build
                            if ((wxChar)wxToupper(src[pos]) == tagBuffer[match_pos]) {
                                ++match_pos;
                            }
                            else if (src[pos] == wxT(' ') || src[pos] == wxT('\n') ||
                                src[pos] == wxT('\r') || src[pos] == wxT('\t')) {
                                // need to skip over these
                            }
                            else {
                                match_pos = 0;
                            }
                            ++pos;
                        }

                        // found a match
                        if (match_pos == tag_len)
                        {
                            pos = pos - tag_len - 3;
                            foundCloseTag = true;
                            break;
                        }
                        else // keep looking for the closing tag
                        {
                            ++pos;
                        }
                    }
                    if (!foundCloseTag)
                    {
                        // we didn't find closing tag; this means the markup
                        // is incorrect and the best thing we can do is to
                        // ignore the unclosed tag and continue parsing as if
                        // it didn't exist:
                        pos = old_pos;
                    }
                }
            }
        }

        pos++;
    }

    // ok, we're done, now we'll free .Name members of cache - we don't need it anymore:
    for (int i = 0; i < m_CacheSize; i++)
    {
        delete[] m_Cache[i].Name;
        m_Cache[i].Name = NULL;
    }
}