int CMarkupSTL::x_RemoveElem( int iPos ) { // Remove element and all contained elements // Return new position // int iPosParent = m_aPos[iPos].iElemParent; // Find previous sibling and bypass removed element // This leaves orphan positions in m_aPos array int iPosLook = m_aPos[iPosParent].iElemChild; int iPosPrev = 0; while ( iPosLook != iPos ) { iPosPrev = iPosLook; iPosLook = m_aPos[iPosLook].iElemNext; } if ( iPosPrev ) m_aPos[iPosPrev].iElemNext = m_aPos[iPos].iElemNext; else m_aPos[iPosParent].iElemChild = m_aPos[iPos].iElemNext; // Remove from document // Links have been changed to go around removed element // But element position and links are still valid int nAfterEnd = m_aPos[iPos].nEndR + 1; TokenPos token( m_csDoc ); token.nNext = nAfterEnd; if ( ! x_FindToken(token) || token.szDoc[token.nL] == _T('<') ) nAfterEnd = token.nL; int nLen = nAfterEnd - m_aPos[iPos].nStartL; x_DocChange( m_aPos[iPos].nStartL, nLen, CStdString() ); x_Adjust( iPos, - nLen, true ); return iPosPrev; }
int CMarkupSTL::x_FindElem( int iPosParent, int iPos, const char * szPath ) { // If szPath is NULL or empty, go to next sibling element // Otherwise go to next sibling element with matching path // if ( iPos ) iPos = m_aPos[iPos].iElemNext; else iPos = m_aPos[iPosParent].iElemChild; // Finished here if szPath not specified if ( szPath == NULL || !szPath[0] ) return iPos; // Search TokenPos token( m_csDoc ); while ( iPos ) { // Compare tag name token.nNext = m_aPos[iPos].nStartL + 1; x_FindToken( token ); // Locate tag name if ( token.Match(szPath) ) return iPos; iPos = m_aPos[iPos].iElemNext; } return 0; }
CStdString CMarkupSTL::x_GetTagName( int iPos ) const { // Return the tag name at specified element TokenPos token( m_csDoc ); token.nNext = m_aPos[iPos].nStartL + 1; if ( ! iPos || ! x_FindToken( token ) ) return _T(""); // Return substring of document return x_GetToken( token ); }
INXString CMarkup::x_GetTagName( int iPos ) const { // Return the tag name at specified element TokenPos token( (char*)m_csDoc.c_str() ); token.nNext = m_aPos[iPos].nStartL + 1; if ( ! iPos || ! x_FindToken( token ) ) { INXString temp(""); return temp; } // Return substring of document return x_GetToken( token ); }
CStdString CMarkupSTL::GetChildSubDoc() const { if ( m_iPosChild ) { int nL = m_aPos[m_iPosChild].nStartL; int nR = m_aPos[m_iPosChild].nEndR + 1; TokenPos token( m_csDoc ); token.nNext = nR; if ( ! x_FindToken(token) || m_csDoc[token.nL] == _T('<') ) nR = token.nL; return m_csDoc.Mid( nL, nR - nL ); } return _T(""); }
bool CMarkup::x_FindAttrib( CMarkup::TokenPos& token, char* szAttrib ) const { // If szAttrib is NULL find next attrib, otherwise find named attrib // Return true if found int nAttrib = 0; for ( int nCount = 0; x_FindToken(token); ++nCount ) { if ( ! token.bIsString ) { // Is it the right angle bracket? char cChar = m_csDoc.GetAt(token.nL); if ( cChar == '>' || cChar == '/' || cChar == '?') break; // attrib not found // Equal sign if ( cChar == '=') continue; // Potential attribute if ( ! nAttrib && nCount ) { // Attribute name search? if ( ! szAttrib || ! szAttrib[0] ) return true; // return with token at attrib name // Compare szAttrib if ( token.Match(szAttrib) ) nAttrib = nCount; } } else if ( nAttrib && nCount == nAttrib + 2 ) { return true; } } // Not found return false; }
CStdString CMarkupSTL::x_GetData( int iPos ) const { // Return a string representing data between start and end tag // Return empty string if there are any children elements if ( ! m_aPos[iPos].iElemChild && ! m_aPos[iPos].IsEmptyElement() ) { // See if it is a CDATA section TokenPos token( m_csDoc ); token.nNext = m_aPos[iPos].nStartR+1; if ( x_FindToken( token ) && m_csDoc[token.nL] == _T('<') && token.nL + 11 < m_aPos[iPos].nEndL && _tcsncmp( &token.szDoc[token.nL+1], _T("![CDATA["), 8 ) == 0 ) { int nEndCDATA = m_csDoc.Find( _T("]]>"), token.nNext ); if ( nEndCDATA != -1 && nEndCDATA < m_aPos[iPos].nEndL ) { return m_csDoc.Mid( token.nL+9, nEndCDATA-token.nL-9 ); } } return x_TextFromDoc( m_aPos[iPos].nStartR+1, m_aPos[iPos].nEndL-1 ); } return _T(""); }
int CMarkupSTL::x_ParseNode( CMarkupSTL::TokenPos& token ) { // Call this with token.nNext set to the start of the node // This returns the node type and token.nNext set to the char after the node // If the node is not found or an element, token.nR is not determined // White space between elements is a text node int nTypeFound = 0; const char * szDoc = token.szDoc; token.nL = token.nNext; if ( szDoc[token.nL] == '<' ) { // Started with <, could be: // <!--...--> comment // <!DOCTYPE ...> dtd // <?target ...?> processing instruction // <![CDATA[...]]> cdata section // <NAME ...> element // if ( ! szDoc[token.nL+1] || ! szDoc[token.nL+2] ) return 0; _TCHAR cFirstChar = szDoc[token.nL+1]; const char * szEndOfNode = NULL; if ( cFirstChar == _T('?') ) { nTypeFound = MNT_PROCESSING_INSTRUCTION; // processing instruction szEndOfNode = _T("?>"); } else if ( cFirstChar == _T('!') ) { _TCHAR cSecondChar = szDoc[token.nL+2]; if ( cSecondChar == _T('[') ) { nTypeFound = MNT_CDATA_SECTION; szEndOfNode = _T("]]>"); } else if ( cSecondChar == _T('-') ) { nTypeFound = MNT_COMMENT; szEndOfNode = _T("-->"); } else { // Document type requires tokenizing because of strings and brackets nTypeFound = 0; int nBrackets = 0; while ( x_FindToken(token) ) { if ( ! token.bIsString ) { _TCHAR cChar = szDoc[token.nL]; if ( cChar == _T('[') ) ++nBrackets; else if ( cChar == _T(']') ) --nBrackets; else if ( nBrackets == 0 && cChar == _T('>') ) { nTypeFound = MNT_DOCUMENT_TYPE; break; } } } if ( ! nTypeFound ) return 0; } } else if ( cFirstChar == _T('/') ) { // End tag means no node found within parent element return 0; } else { nTypeFound = MNT_ELEMENT; } // Search for end of node if not found yet if ( szEndOfNode ) { const char * pEnd = _tcsstr( &szDoc[token.nNext], szEndOfNode ); if ( ! pEnd ) return 0; // not well-formed token.nNext = (pEnd - szDoc) + _tcslen(szEndOfNode); } } else if ( szDoc[token.nL] ) { // It is text or whitespace because it did not start with < nTypeFound = MNT_WHITESPACE; if ( x_FindToken(token) ) { if ( szDoc[token.nL] == _T('<') ) token.nNext = token.nL; else { nTypeFound = MNT_TEXT; x_FindChar( token.szDoc, token.nNext, _T('<') ); } } } return nTypeFound; }
int CMarkupSTL::x_ParseElem( int iPosParent ) { // This is either called by SetDoc, x_AddSubDoc, or itself recursively // m_aPos[iPosParent].nEndL is where to start parsing for the child element // This returns the new position if a tag is found, otherwise zero // In all cases we need to get a new ElemPos, but release it if unused // int iPos = x_GetFreePos(); m_aPos[iPos].nStartL = m_aPos[iPosParent].nEndL; m_aPos[iPos].iElemParent = iPosParent; m_aPos[iPos].iElemChild = 0; m_aPos[iPos].iElemNext = 0; // Start Tag // A loop is used to ignore all remarks tags and special tags // i.e. <?xml version="1.0"?>, and <!-- comment here --> // So any tag beginning with ? or ! is ignored // Loop past ignored tags TokenPos token( m_csDoc ); token.nNext = m_aPos[iPosParent].nEndL; CStdString csName; while ( csName.IsEmpty() ) { // Look for left angle bracket of start tag m_aPos[iPos].nStartL = token.nNext; if ( ! x_FindChar( token.szDoc, m_aPos[iPos].nStartL, _T('<') ) ) return x_ParseError( _T("Element tag not found") ); // Set parent's End tag to start looking from here (or later) m_aPos[iPosParent].nEndL = m_aPos[iPos].nStartL; // Determine whether this is an element, or bypass other type of node token.nNext = m_aPos[iPos].nStartL + 1; if ( x_FindToken( token ) ) { if ( token.bIsString ) return x_ParseError( _T("Tag starts with quote") ); _TCHAR cFirstChar = m_csDoc[token.nL]; if ( cFirstChar == _T('?') || cFirstChar == _T('!') ) { token.nNext = m_aPos[iPos].nStartL; if ( ! x_ParseNode(token) ) return x_ParseError( _T("Invalid node") ); } else if ( cFirstChar != _T('/') ) { csName = x_GetToken( token ); // Look for end of tag if ( ! x_FindChar(token.szDoc, token.nNext, _T('>')) ) return x_ParseError( _T("End of tag not found") ); } else return x_ReleasePos(); // probably end tag of parent } else return x_ParseError( _T("Abrupt end within tag") ); } m_aPos[iPos].nStartR = token.nNext; // Is ending mark within start tag, i.e. empty element? if ( m_csDoc[m_aPos[iPos].nStartR-1] == _T('/') ) { // Empty element // Close tag left is set to ending mark, and right to open tag right m_aPos[iPos].nEndL = m_aPos[iPos].nStartR-1; m_aPos[iPos].nEndR = m_aPos[iPos].nStartR; } else // look for end tag { // Element probably has contents // Determine where to start looking for left angle bracket of end tag // This is done by recursively parsing the contents of this element int iInner, iInnerPrev = 0; m_aPos[iPos].nEndL = m_aPos[iPos].nStartR + 1; while ( (iInner = x_ParseElem( iPos )) > 0 ) { // Set links to iInner if ( iInnerPrev ) m_aPos[iInnerPrev].iElemNext = iInner; else m_aPos[iPos].iElemChild = iInner; iInnerPrev = iInner; // Set offset to reflect child m_aPos[iPos].nEndL = m_aPos[iInner].nEndR + 1; } if ( iInner == -1 ) return -1; // Look for left angle bracket of end tag if ( ! x_FindChar( token.szDoc, m_aPos[iPos].nEndL, _T('<') ) ) return x_ParseError( _T("End tag of %s element not found"), csName ); // Look through tokens of end tag token.nNext = m_aPos[iPos].nEndL + 1; int nTokenCount = 0; while ( x_FindToken( token ) ) { ++nTokenCount; if ( ! token.bIsString ) { // Is first token not an end slash mark? if ( nTokenCount == 1 && m_csDoc[token.nL] != _T('/') ) return x_ParseError( _T("Expecting end tag of element %s"), csName ); else if ( nTokenCount == 2 && ! token.Match(csName) ) return x_ParseError( _T("End tag does not correspond to %s"), csName ); // Else is it a right angle bracket? else if ( m_csDoc[token.nL] == _T('>') ) break; } } // Was a right angle bracket not found? if ( ! token.szDoc[token.nL] || nTokenCount < 2 ) return x_ParseError( _T("End tag not completed for element %s"), csName ); m_aPos[iPos].nEndR = token.nL; } // Successfully parsed element (and contained elements) return iPos; }
void CMarkupSTL::x_LocateNew( int iPosParent, int& iPosRel, int& nOffset, int nLength, int nFlags ) { // Determine where to insert new element or node // bool bInsert = (nFlags&1)?true:false; bool bHonorWhitespace = (nFlags&2)?true:false; int nStartL; if ( nLength ) { // Located at a non-element node if ( bInsert ) nStartL = nOffset; else nStartL = nOffset + nLength; } else if ( iPosRel ) { // Located at an element if ( bInsert ) // precede iPosRel nStartL = m_aPos[iPosRel].nStartL; else // follow iPosRel nStartL = m_aPos[iPosRel].nEndR + 1; } else if ( m_aPos[iPosParent].IsEmptyElement() ) { // Parent has no separate end tag, so split empty element nStartL = m_aPos[iPosParent].nStartR; } else { if ( bInsert ) // after start tag nStartL = m_aPos[iPosParent].nStartR + 1; else // before end tag nStartL = m_aPos[iPosParent].nEndL; } // Go up to start of next node, unless its splitting an empty element if ( ! bHonorWhitespace && ! m_aPos[iPosParent].IsEmptyElement() ) { TokenPos token( m_csDoc ); token.nNext = nStartL; if ( ! x_FindToken(token) || m_csDoc[token.nL] == _T('<') ) nStartL = token.nL; } // Determine iPosBefore int iPosBefore = 0; if ( iPosRel ) { if ( bInsert ) { // Is iPosRel past first sibling? int iPosPrev = m_aPos[iPosParent].iElemChild; if ( iPosPrev != iPosRel ) { // Find previous sibling of iPosRel while ( m_aPos[iPosPrev].iElemNext != iPosRel ) iPosPrev = m_aPos[iPosPrev].iElemNext; iPosBefore = iPosPrev; } } else { iPosBefore = iPosRel; } } else if ( m_aPos[iPosParent].iElemChild ) { if ( ! bInsert ) { // Find last element under iPosParent int iPosLast = m_aPos[iPosParent].iElemChild; int iPosNext = iPosLast; while ( iPosNext ) { iPosLast = iPosNext; iPosNext = m_aPos[iPosNext].iElemNext; } iPosBefore = iPosLast; } } nOffset = nStartL; iPosRel = iPosBefore; }