/** * Find the next token in a string. */ static XML::NextToken XML::GetNextToken(Parser *pXML) { XML::NextToken result; const TCHAR *lpXML; TCHAR ch; TCHAR temp_ch; size_t size; unsigned n; bool found_match; bool is_text = false; // Find next non-white space character ch = FindNonWhiteSpace(pXML); if (gcc_unlikely(ch == 0)) { // If we failed to obtain a valid character return { nullptr, 0, eTokenError }; } // Cache the current string pointer lpXML = pXML->lpXML; result.pStr = &lpXML[pXML->nIndex - 1]; switch (ch) { // Check for quotes case _T('\''): case _T('\"'): // Type of token result.type = eTokenQuotedText; temp_ch = ch; n = pXML->nIndex; // Set the size size = 1; found_match = false; // Search through the string to find a matching quote while (((ch = GetNextChar(pXML))) != 0) { size++; if (ch == temp_ch) { found_match = true; break; } if (ch == _T('<')) break; } // If we failed to find a matching quote if (!found_match) { pXML->nIndex = n; is_text = true; break; } // 4.02.2002 if (FindNonWhiteSpace(pXML)) { pXML->nIndex--; } break; // Equals (used with attribute values) case _T('='): size = 1; result.type = eTokenEquals; break; // Close tag case _T('>'): size = 1; result.type = eTokenCloseTag; break; // Check for tag start and tag end case _T('<'): // Peek at the next character to see if we have an end tag '</', // or an xml declaration '<?' temp_ch = pXML->lpXML[pXML->nIndex]; // If we have a tag end... if (temp_ch == _T('/')) { // Set the type and ensure we point at the next character GetNextChar(pXML); result.type = eTokenTagEnd; size = 2; } // If we have an XML declaration tag else if (temp_ch == _T('?')) { // Set the type and ensure we point at the next character GetNextChar(pXML); result.type = eTokenDeclaration; size = 2; } // Otherwise we must have a start tag else { result.type = eTokenTagStart; size = 1; } break; // Check to see if we have a short hand type end tag ('/>'). case _T('/'): // Peek at the next character to see if we have a short end tag '/>' temp_ch = pXML->lpXML[pXML->nIndex]; // If we have a short hand end tag... if (temp_ch == _T('>')) { // Set the type and ensure we point at the next character GetNextChar(pXML); result.type = eTokenShortHandClose; size = 2; break; } // If we haven't found a short hand closing tag then drop into the // text process #if GCC_CHECK_VERSION(7,0) [[fallthrough]]; #endif // Other characters default: is_text = true; } // If this is a TEXT node if (is_text) { // Indicate we are dealing with text result.type = eTokenText; size = 1; bool nExit = false; while (!nExit && ((ch = GetNextChar(pXML)) != 0)) { if (IsWhitespaceOrNull(ch)) // Break when we find white space break; switch (ch) { // If we find a slash then this maybe text or a short hand end tag. case _T('/'): // Peek at the next character to see it we have short hand end tag temp_ch = pXML->lpXML[pXML->nIndex]; // If we found a short hand end tag then we need to exit the loop if (temp_ch == _T('>')) { pXML->nIndex--; // 03.02.2002 nExit = true; } else { size++; } break; // Break when we find a terminator and decrement the index and // column count so that we are pointing at the right character // the next time we are called. case _T('<'): case _T('>'): case _T('='): pXML->nIndex--; nExit = true; break; case 0: nExit = true; break; default: size++; } } } result.length = size; return result; }
/** * Find the next token in a string. * * @param pcbToken contains the number of characters that have been read */ static NextToken GetNextToken(XML *pXML, size_t *pcbToken, enum TokenTypeTag *pType) { NextToken result; const TCHAR *lpXML; TCHAR ch; TCHAR chTemp; size_t nSize; bool nFoundMatch; unsigned n; bool nIsText = false; // Find next non-white space character ch = FindNonWhiteSpace(pXML); if (gcc_unlikely(ch == 0)) { // If we failed to obtain a valid character *pcbToken = 0; *pType = eTokenError; result.pStr = NULL; return result; } // Cache the current string pointer lpXML = pXML->lpXML; result.pStr = &lpXML[pXML->nIndex - 1]; chTemp = 0; switch (ch) { // Check for quotes case _T('\''): case _T('\"'): // Type of token *pType = eTokenQuotedText; chTemp = ch; n = pXML->nIndex; // Set the size nSize = 1; nFoundMatch = false; // Search through the string to find a matching quote while (((ch = getNextChar(pXML))) != 0) { nSize++; if (ch == chTemp) { nFoundMatch = true; break; } if (ch == _T('<')) break; } // If we failed to find a matching quote if (!nFoundMatch) { pXML->nIndex = n - 1; ch = getNextChar(pXML); nIsText = true; break; } // 4.02.2002 if (FindNonWhiteSpace(pXML)) { pXML->nIndex--; } break; // Equals (used with attribute values) case _T('='): nSize = 1; *pType = eTokenEquals; break; // Close tag case _T('>'): nSize = 1; *pType = eTokenCloseTag; break; // Check for tag start and tag end case _T('<'): // Peek at the next character to see if we have an end tag '</', // or an xml declaration '<?' chTemp = pXML->lpXML[pXML->nIndex]; // If we have a tag end... if (chTemp == _T('/')) { // Set the type and ensure we point at the next character getNextChar(pXML); *pType = eTokenTagEnd; nSize = 2; } // If we have an XML declaration tag else if (chTemp == _T('?')) { // Set the type and ensure we point at the next character getNextChar(pXML); *pType = eTokenDeclaration; nSize = 2; } // Otherwise we must have a start tag else { *pType = eTokenTagStart; nSize = 1; } break; // Check to see if we have a short hand type end tag ('/>'). case _T('/'): // Peek at the next character to see if we have a short end tag '/>' chTemp = pXML->lpXML[pXML->nIndex]; // If we have a short hand end tag... if (chTemp == _T('>')) { // Set the type and ensure we point at the next character getNextChar(pXML); *pType = eTokenShortHandClose; nSize = 2; break; } // If we haven't found a short hand closing tag then drop into the // text process // Other characters default: nIsText = true; } // If this is a TEXT node if (nIsText) { // Indicate we are dealing with text *pType = eTokenText; nSize = 1; bool nExit = false; while (!nExit && ((ch = getNextChar(pXML)) != 0)) { switch (ch) { // Break when we find white space case _T('\n'): case _T(' '): case _T('\t'): case _T('\r'): nExit = true; break; // If we find a slash then this maybe text or a short hand end tag. case _T('/'): // Peek at the next character to see it we have short hand end tag chTemp = pXML->lpXML[pXML->nIndex]; // If we found a short hand end tag then we need to exit the loop if (chTemp == _T('>')) { pXML->nIndex--; // 03.02.2002 nExit = true; } else { nSize++; } break; // Break when we find a terminator and decrement the index and // column count so that we are pointing at the right character // the next time we are called. case _T('<'): case _T('>'): case _T('='): pXML->nIndex--; nExit = true; break; case 0: nExit = true; break; default: nSize++; } } } *pcbToken = nSize; return result; }