bool XmlDocument::parseHeader() { skipNextWhiteSpace(); if (CharacterFunctions::compareUpTo (input, CharPointer_ASCII ("<?xml"), 5) == 0) { const String::CharPointerType headerEnd (CharacterFunctions::find (input, CharPointer_ASCII ("?>"))); if (headerEnd.isEmpty()) return false; #if JUCE_DEBUG const String encoding (String (input, headerEnd) .fromFirstOccurrenceOf ("encoding", false, true) .fromFirstOccurrenceOf ("=", false, false) .fromFirstOccurrenceOf ("\"", false, false) .upToFirstOccurrenceOf ("\"", false, false).trim()); /* If you load an XML document with a non-UTF encoding type, it may have been loaded wrongly.. Since all the files are read via the normal juce file streams, they're treated as UTF-8, so by the time it gets to the parser, the encoding will have been lost. Best plan is to stick to utf-8 or if you have specific files to read, use your own code to convert them to a unicode String, and pass that to the XML parser. */ jassert (encoding.isEmpty() || encoding.startsWithIgnoreCase ("utf-")); #endif input = headerEnd + 2; skipNextWhiteSpace(); } return true; }
void XmlDocument::skipHeader() { const int headerStart = input.indexOf (CharPointer_UTF8 ("<?xml")); if (headerStart >= 0) { const int headerEnd = (input + headerStart).indexOf (CharPointer_UTF8 ("?>")); if (headerEnd < 0) return; #if JUCE_DEBUG const String header (input + headerStart, (size_t) (headerEnd - headerStart)); const String encoding (header.fromFirstOccurrenceOf ("encoding", false, true) .fromFirstOccurrenceOf ("=", false, false) .fromFirstOccurrenceOf ("\"", false, false) .upToFirstOccurrenceOf ("\"", false, false).trim()); /* If you load an XML document with a non-UTF encoding type, it may have been loaded wrongly.. Since all the files are read via the normal juce file streams, they're treated as UTF-8, so by the time it gets to the parser, the encoding will have been lost. Best plan is to stick to utf-8 or if you have specific files to read, use your own code to convert them to a unicode String, and pass that to the XML parser. */ jassert (encoding.isEmpty() || encoding.startsWithIgnoreCase ("utf-")); #endif input += headerEnd + 2; } skipNextWhiteSpace(); const int docTypeIndex = input.indexOf (CharPointer_UTF8 ("<!DOCTYPE")); if (docTypeIndex < 0) return; input += docTypeIndex + 9; const String::CharPointerType docType (input); int n = 1; while (n > 0) { const juce_wchar c = readNextChar(); if (outOfData) return; if (c == '<') ++n; else if (c == '>') --n; } dtdText = String (docType, (size_t) (input.getAddress() - (docType.getAddress() + 1))).trim(); }
void XmlDocument::readChildElements (XmlElement& parent) { LinkedListPointer<XmlElement>::Appender childAppender (parent.firstChildElement); for (;;) { const String::CharPointerType preWhitespaceInput (input); skipNextWhiteSpace(); if (outOfData) { setLastError ("unmatched tags", false); break; } if (*input == '<') { const juce_wchar c1 = input[1]; if (c1 == '/') { // our close tag.. const int closeTag = input.indexOf ((juce_wchar) '>'); if (closeTag >= 0) input += closeTag + 1; break; } if (c1 == '!' && CharacterFunctions::compareUpTo (input + 2, CharPointer_ASCII ("[CDATA["), 7) == 0) { input += 9; const String::CharPointerType inputStart (input); for (;;) { const juce_wchar c0 = *input; if (c0 == 0) { setLastError ("unterminated CDATA section", false); outOfData = true; break; } else if (c0 == ']' && input[1] == ']' && input[2] == '>') { childAppender.append (XmlElement::createTextElement (String (inputStart, input))); input += 3; break; } ++input; } } else { // this is some other element, so parse and add it.. if (XmlElement* const n = readNextElement (true)) childAppender.append (n); else break; } } else // must be a character block { input = preWhitespaceInput; // roll back to include the leading whitespace MemoryOutputStream textElementContent; bool contentShouldBeUsed = ! ignoreEmptyTextElements; for (;;) { const juce_wchar c = *input; if (c == '<') { if (input[1] == '!' && input[2] == '-' && input[3] == '-') { input += 4; const int closeComment = input.indexOf (CharPointer_ASCII ("-->")); if (closeComment < 0) { setLastError ("unterminated comment", false); outOfData = true; return; } input += closeComment + 3; continue; } break; } if (c == 0) { setLastError ("unmatched tags", false); outOfData = true; return; } if (c == '&') { String entity; readEntity (entity); if (entity.startsWithChar ('<') && entity [1] != 0) { const String::CharPointerType oldInput (input); const bool oldOutOfData = outOfData; input = entity.getCharPointer(); outOfData = false; while (XmlElement* n = readNextElement (true)) childAppender.append (n); input = oldInput; outOfData = oldOutOfData; } else { textElementContent << entity; contentShouldBeUsed = contentShouldBeUsed || entity.containsNonWhitespaceChars(); } } else { for (;; ++input) { juce_wchar nextChar = *input; if (nextChar == '\r') { nextChar = '\n'; if (input[1] == '\n') continue; } if (nextChar == '<' || nextChar == '&') break; if (nextChar == 0) { setLastError ("unmatched tags", false); outOfData = true; return; } textElementContent.appendUTF8Char (nextChar); contentShouldBeUsed = contentShouldBeUsed || ! CharacterFunctions::isWhitespace (nextChar); } } } if (contentShouldBeUsed) childAppender.append (XmlElement::createTextElement (textElementContent.toUTF8())); } } }
XmlElement* XmlDocument::readNextElement (const bool alsoParseSubElements) { XmlElement* node = nullptr; skipNextWhiteSpace(); if (outOfData) return nullptr; if (*input == '<') { ++input; String::CharPointerType endOfToken (XmlIdentifierChars::findEndOfToken (input)); if (endOfToken == input) { // no tag name - but allow for a gap after the '<' before giving an error skipNextWhiteSpace(); endOfToken = XmlIdentifierChars::findEndOfToken (input); if (endOfToken == input) { setLastError ("tag name missing", false); return node; } } node = new XmlElement (input, endOfToken); input = endOfToken; LinkedListPointer<XmlElement::XmlAttributeNode>::Appender attributeAppender (node->attributes); // look for attributes for (;;) { skipNextWhiteSpace(); const juce_wchar c = *input; // empty tag.. if (c == '/' && input[1] == '>') { input += 2; break; } // parse the guts of the element.. if (c == '>') { ++input; if (alsoParseSubElements) readChildElements (*node); break; } // get an attribute.. if (XmlIdentifierChars::isIdentifierChar (c)) { String::CharPointerType attNameEnd (XmlIdentifierChars::findEndOfToken (input)); if (attNameEnd != input) { const String::CharPointerType attNameStart (input); input = attNameEnd; skipNextWhiteSpace(); if (readNextChar() == '=') { skipNextWhiteSpace(); const juce_wchar nextChar = *input; if (nextChar == '"' || nextChar == '\'') { XmlElement::XmlAttributeNode* const newAtt = new XmlElement::XmlAttributeNode (attNameStart, attNameEnd); readQuotedString (newAtt->value); attributeAppender.append (newAtt); continue; } } else { setLastError ("expected '=' after attribute '" + String (attNameStart, attNameEnd) + "'", false); return node; } } } else { if (! outOfData) setLastError ("illegal character found in " + node->getTagName() + ": '" + c + "'", false); } break; } } return node; }
void XmlDocument::readChildElements (XmlElement* parent) { LinkedListPointer<XmlElement>::Appender childAppender (parent->firstChildElement); for (;;) { const String::CharPointerType preWhitespaceInput (input); skipNextWhiteSpace(); if (outOfData) { setLastError ("unmatched tags", false); break; } if (*input == '<') { if (input[1] == '/') { // our close tag.. const int closeTag = input.indexOf ((juce_wchar) '>'); if (closeTag >= 0) input += closeTag + 1; break; } else if (input[1] == '!' && input[2] == '[' && input[3] == 'C' && input[4] == 'D' && input[5] == 'A' && input[6] == 'T' && input[7] == 'A' && input[8] == '[') { input += 9; const String::CharPointerType inputStart (input); size_t len = 0; for (;;) { if (*input == 0) { setLastError ("unterminated CDATA section", false); outOfData = true; break; } else if (input[0] == ']' && input[1] == ']' && input[2] == '>') { input += 3; break; } ++input; ++len; } childAppender.append (XmlElement::createTextElement (String (inputStart, len))); } else { // this is some other element, so parse and add it.. if (XmlElement* const n = readNextElement (true)) childAppender.append (n); else break; } } else // must be a character block { input = preWhitespaceInput; // roll back to include the leading whitespace String textElementContent; for (;;) { const juce_wchar c = *input; if (c == '<') break; if (c == 0) { setLastError ("unmatched tags", false); outOfData = true; return; } if (c == '&') { String entity; readEntity (entity); if (entity.startsWithChar ('<') && entity [1] != 0) { const String::CharPointerType oldInput (input); const bool oldOutOfData = outOfData; input = entity.getCharPointer(); outOfData = false; for (;;) { XmlElement* const n = readNextElement (true); if (n == nullptr) break; childAppender.append (n); } input = oldInput; outOfData = oldOutOfData; } else { textElementContent += entity; } } else { const String::CharPointerType start (input); size_t len = 0; for (;;) { const juce_wchar nextChar = *input; if (nextChar == '<' || nextChar == '&') { break; } else if (nextChar == 0) { setLastError ("unmatched tags", false); outOfData = true; return; } ++input; ++len; } textElementContent.appendCharPointer (start, len); } } if ((! ignoreEmptyTextElements) || textElementContent.containsNonWhitespaceChars()) { childAppender.append (XmlElement::createTextElement (textElementContent)); } } } }
XmlElement* XmlDocument::readNextElement (const bool alsoParseSubElements) { XmlElement* node = nullptr; skipNextWhiteSpace(); if (outOfData) return nullptr; const int openBracket = input.indexOf ((juce_wchar) '<'); if (openBracket >= 0) { input += openBracket + 1; int tagLen = findNextTokenLength(); if (tagLen == 0) { // no tag name - but allow for a gap after the '<' before giving an error skipNextWhiteSpace(); tagLen = findNextTokenLength(); if (tagLen == 0) { setLastError ("tag name missing", false); return node; } } node = new XmlElement (String (input, (size_t) tagLen)); input += tagLen; LinkedListPointer<XmlElement::XmlAttributeNode>::Appender attributeAppender (node->attributes); // look for attributes for (;;) { skipNextWhiteSpace(); const juce_wchar c = *input; // empty tag.. if (c == '/' && input[1] == '>') { input += 2; break; } // parse the guts of the element.. if (c == '>') { ++input; if (alsoParseSubElements) readChildElements (node); break; } // get an attribute.. if (XmlIdentifierChars::isIdentifierChar (c)) { const int attNameLen = findNextTokenLength(); if (attNameLen > 0) { const String::CharPointerType attNameStart (input); input += attNameLen; skipNextWhiteSpace(); if (readNextChar() == '=') { skipNextWhiteSpace(); const juce_wchar nextChar = *input; if (nextChar == '"' || nextChar == '\'') { XmlElement::XmlAttributeNode* const newAtt = new XmlElement::XmlAttributeNode (String (attNameStart, (size_t) attNameLen), String::empty); readQuotedString (newAtt->value); attributeAppender.append (newAtt); continue; } } } } else { if (! outOfData) setLastError ("illegal character found in " + node->getTagName() + ": '" + c + "'", false); } break; } } return node; }