void wx28HtmlParser::CreateDOMSubTree(wx28HtmlTag *cur, int begin_pos, int end_pos, wx28HtmlTagsCache *cache) { if (end_pos <= begin_pos) return; wxChar c; int i = begin_pos; int textBeginning = begin_pos; // If the tag contains CDATA text, we include the text between beginning // and ending tag verbosely. Setting i=end_pos will skip to the very // end of this function where text piece is added, bypassing any child // tags parsing (CDATA element can't have child elements by definition): if (cur != NULL && wxIsCDATAElement(cur->GetName().c_str())) { i = end_pos; } while (i < end_pos) { c = m_Source.GetChar(i); if (c == wxT('<')) { // add text to m_TextPieces: if (i - textBeginning > 0) m_TextPieces->Add( wx28HtmlTextPiece(textBeginning, i - textBeginning)); // if it is a comment, skip it: if (i < end_pos-6 && m_Source.GetChar(i+1) == wxT('!') && m_Source.GetChar(i+2) == wxT('-') && m_Source.GetChar(i+3) == wxT('-')) { // Comments begin with "<!--" and end with "--[ \t\r\n]*>" // according to HTML 4.0 int dashes = 0; i += 4; while (i < end_pos) { c = m_Source.GetChar(i++); if ((c == wxT(' ') || c == wxT('\n') || c == wxT('\r') || c == wxT('\t')) && dashes >= 2) {} else if (c == wxT('>') && dashes >= 2) { textBeginning = i; break; } else if (c == wxT('-')) dashes++; else dashes = 0; } } // add another tag to the tree: else if (i < end_pos-1 && m_Source.GetChar(i+1) != wxT('/')) { wx28HtmlTag *chd; if (cur) chd = new wx28HtmlTag(cur, m_Source, i, end_pos, cache, m_entitiesParser); else { chd = new wx28HtmlTag(NULL, m_Source, i, end_pos, cache, m_entitiesParser); if (!m_Tags) { // if this is the first tag to be created make the root // m_Tags point to it: m_Tags = chd; } else { // if there is already a root tag add this tag as // the last sibling: chd->m_Prev = m_Tags->GetLastSibling(); chd->m_Prev->m_Next = chd; } } if (chd->HasEnding()) { CreateDOMSubTree(chd, chd->GetBeginPos(), chd->GetEndPos1(), cache); i = chd->GetEndPos2(); } else i = chd->GetBeginPos(); textBeginning = i; } // ... or skip ending tag: else { while (i < end_pos && m_Source.GetChar(i) != wxT('>')) i++; textBeginning = i+1; } } else i++; } // add remaining text to m_TextPieces: if (end_pos - textBeginning > 0) m_TextPieces->Add( wx28HtmlTextPiece(textBeginning, end_pos - textBeginning)); }
void wxHtmlParser::CreateDOMSubTree(wxHtmlTag *cur, const wxString::const_iterator& begin_pos, const wxString::const_iterator& end_pos, wxHtmlTagsCache *cache) { if (end_pos <= begin_pos) return; wxChar c; wxString::const_iterator i = begin_pos; wxString::const_iterator textBeginning = begin_pos; // If the tag contains CDATA text, we include the text between beginning // and ending tag verbosely. Setting i=end_pos will skip to the very // end of this function where text piece is added, bypassing any child // tags parsing (CDATA element can't have child elements by definition): if (cur != NULL && wxIsCDATAElement(cur->GetName())) { i = end_pos; } while (i < end_pos) { c = *i; if (c == wxT('<')) { // add text to m_TextPieces: if (i > textBeginning) m_TextPieces->push_back(wxHtmlTextPiece(textBeginning, i)); // if it is a comment, skip it: if ( SkipCommentTag(i, m_Source->end()) ) { textBeginning = i = i + 1; // skip closing '>' too } // add another tag to the tree: else if (i < end_pos-1 && *(i+1) != wxT('/')) { wxHtmlTag *chd; if (cur) chd = new wxHtmlTag(cur, m_Source, i, end_pos, cache, m_entitiesParser); else { chd = new wxHtmlTag(NULL, m_Source, i, end_pos, cache, m_entitiesParser); if (!m_Tags) { // if this is the first tag to be created make the root // m_Tags point to it: m_Tags = chd; } else { // if there is already a root tag add this tag as // the last sibling: chd->m_Prev = m_Tags->GetLastSibling(); chd->m_Prev->m_Next = chd; } } if (chd->HasEnding()) { CreateDOMSubTree(chd, chd->GetBeginIter(), chd->GetEndIter1(), cache); i = chd->GetEndIter2(); } else i = chd->GetBeginIter(); textBeginning = i; } // ... or skip ending tag: else { while (i < end_pos && *i != wxT('>')) ++i; textBeginning = i < end_pos ? i+1 : i; } } else ++i; } // add remaining text to m_TextPieces: if (end_pos > textBeginning) m_TextPieces->push_back(wxHtmlTextPiece(textBeginning, end_pos)); }
wxHtmlTagsCache::wxHtmlTagsCache(const wxString& source) { m_Cache = new wxHtmlTagsCacheData; m_CachePos = 0; wxChar tagBuffer[256]; const wxString::const_iterator end = source.end(); for ( wxString::const_iterator pos = source.begin(); pos < end; ++pos ) { if (*pos != wxT('<')) continue; // possible tag start found: // don't cache comment tags if ( wxHtmlParser::SkipCommentTag(pos, end) ) continue; // Remember the starting tag position. wxString::const_iterator stpos = pos++; // And look for the ending one. int i; for ( i = 0; pos < end && i < (int)WXSIZEOF(tagBuffer) - 1 && *pos != wxT('>') && !wxIsspace(*pos); ++i, ++pos ) { tagBuffer[i] = (wxChar)wxToupper(*pos); } tagBuffer[i] = wxT('\0'); while (pos < end && *pos != wxT('>')) ++pos; if ( pos == end ) { // We didn't find a closing bracket, this is not a valid tag after // all. Notice that we need to roll back pos to avoid creating an // invalid iterator when "++pos" is done in the loop statement. --pos; continue; } // We have a valid tag, add it to the cache. size_t tg = Cache().size(); Cache().push_back(wxHtmlCacheItem()); Cache()[tg].Key = stpos; Cache()[tg].Name = new wxChar[i+1]; memcpy(Cache()[tg].Name, tagBuffer, (i+1)*sizeof(wxChar)); if ((stpos+1) < end && *(stpos+1) == wxT('/')) // ending tag: { Cache()[tg].type = wxHtmlCacheItem::Type_EndingTag; // find matching begin tag: for (i = tg; i >= 0; i--) { if ((Cache()[i].type == wxHtmlCacheItem::Type_NoMatchingEndingTag) && (wxStrcmp(Cache()[i].Name, tagBuffer+1) == 0)) { Cache()[i].type = wxHtmlCacheItem::Type_Normal; Cache()[i].End1 = stpos; Cache()[i].End2 = pos + 1; break; } } } else { Cache()[tg].type = wxHtmlCacheItem::Type_NoMatchingEndingTag; if (wxIsCDATAElement(tagBuffer)) { // store the orig pos in case we are missing the closing // tag (see below) const wxString::const_iterator old_pos = pos; bool foundCloseTag = false; // find next matching tag int tag_len = wxStrlen(tagBuffer); while (pos < end) { // find the ending tag while (pos + 1 < end && (*pos != '<' || *(pos+1) != '/')) ++pos; if (*pos == '<') ++pos; // see if it matches int match_pos = 0; while (pos < end && match_pos < tag_len ) { wxChar c = *pos; if ( c == '>' || c == '<' ) break; // cast to wxChar needed to suppress warning in // Unicode build if ((wxChar)wxToupper(c) == tagBuffer[match_pos]) { ++match_pos; } else if (c == wxT(' ') || c == wxT('\n') || c == wxT('\r') || c == wxT('\t')) { // need to skip over these } else { match_pos = 0; } ++pos; } // found a match if (match_pos == tag_len) { pos = pos - tag_len - 3; foundCloseTag = true; break; } else // keep looking for the closing tag { ++pos; } } if (!foundCloseTag) { // we didn't find closing tag; this means the markup // is incorrect and the best thing we can do is to // ignore the unclosed tag and continue parsing as if // it didn't exist: pos = old_pos; } } } } // ok, we're done, now we'll free .Name members of cache - we don't need it anymore: for ( wxHtmlTagsCacheData::iterator i = Cache().begin(); i != Cache().end(); ++i ) { wxDELETEA(i->Name); } }
wx28HtmlTagsCache::wx28HtmlTagsCache(const wxString& source) { const wxChar *src = source.c_str(); int lng = source.length(); wxChar tagBuffer[256]; m_Cache = NULL; m_CacheSize = 0; m_CachePos = 0; int pos = 0; while (pos < lng) { if (src[pos] == wxT('<')) // tag found: { if (m_CacheSize % CACHE_INCREMENT == 0) m_Cache = (wx28HtmlCacheItem*) realloc(m_Cache, (m_CacheSize + CACHE_INCREMENT) * sizeof(wx28HtmlCacheItem)); int tg = m_CacheSize++; int stpos = pos++; m_Cache[tg].Key = stpos; int i; for ( i = 0; pos < lng && i < (int)WXSIZEOF(tagBuffer) - 1 && src[pos] != wxT('>') && !wxIsspace(src[pos]); i++, pos++ ) { tagBuffer[i] = (wxChar)wxToupper(src[pos]); } tagBuffer[i] = wxT('\0'); m_Cache[tg].Name = new wxChar[i+1]; memcpy(m_Cache[tg].Name, tagBuffer, (i+1)*sizeof(wxChar)); while (pos < lng && src[pos] != wxT('>')) pos++; if (src[stpos+1] == wxT('/')) // ending tag: { m_Cache[tg].End1 = m_Cache[tg].End2 = -2; // find matching begin tag: for (i = tg; i >= 0; i--) if ((m_Cache[i].End1 == -1) && (wxStrcmp(m_Cache[i].Name, tagBuffer+1) == 0)) { m_Cache[i].End1 = stpos; m_Cache[i].End2 = pos + 1; break; } } else { m_Cache[tg].End1 = m_Cache[tg].End2 = -1; if (wxIsCDATAElement(tagBuffer)) { // store the orig pos in case we are missing the closing // tag (see below) wxInt32 old_pos = pos; bool foundCloseTag = false; // find next matching tag int tag_len = wxStrlen(tagBuffer); while (pos < lng) { // find the ending tag while (pos + 1 < lng && (src[pos] != '<' || src[pos+1] != '/')) ++pos; if (src[pos] == '<') ++pos; // see if it matches int match_pos = 0; while (pos < lng && match_pos < tag_len && src[pos] != '>' && src[pos] != '<') { // cast to wxChar needed to suppress warning in // Unicode build if ((wxChar)wxToupper(src[pos]) == tagBuffer[match_pos]) { ++match_pos; } else if (src[pos] == wxT(' ') || src[pos] == wxT('\n') || src[pos] == wxT('\r') || src[pos] == wxT('\t')) { // need to skip over these } else { match_pos = 0; } ++pos; } // found a match if (match_pos == tag_len) { pos = pos - tag_len - 3; foundCloseTag = true; break; } else // keep looking for the closing tag { ++pos; } } if (!foundCloseTag) { // we didn't find closing tag; this means the markup // is incorrect and the best thing we can do is to // ignore the unclosed tag and continue parsing as if // it didn't exist: pos = old_pos; } } } } pos++; } // ok, we're done, now we'll free .Name members of cache - we don't need it anymore: for (int i = 0; i < m_CacheSize; i++) { delete[] m_Cache[i].Name; m_Cache[i].Name = NULL; } }