void TiXmlElement::StreamIn (std::istream * in, TIXML_STRING * tag) { // We're called with some amount of pre-parsing. That is, some of "this" // element is in "tag". Go ahead and stream to the closing ">" while( in->good() ) { int c = in->get(); if ( c <= 0 ) { TiXmlDocument* document = GetDocument(); if ( document ) document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN ); return; } (*tag) += (char) c ; if ( c == '>' ) break; } if ( tag->length() < 3 ) return; // Okay...if we are a "/>" tag, then we're done. We've read a complete tag. // If not, identify and stream. if ( tag->at( tag->length() - 1 ) == '>' && tag->at( tag->length() - 2 ) == '/' ) { // All good! return; } else if ( tag->at( tag->length() - 1 ) == '>' ) { // There is more. Could be: // text // cdata text (which looks like another node) // closing tag // another node. for ( ;; ) { StreamWhiteSpace( in, tag ); // Do we have text? if ( in->good() && in->peek() != '<' ) { // Yep, text. TiXmlText text( "" ); text.StreamIn( in, tag ); // What follows text is a closing tag or another node. // Go around again and figure it out. continue; } // We now have either a closing tag...or another node. // We should be at a "<", regardless. if ( !in->good() ) return; assert( in->peek() == '<' ); int tagIndex = (int) tag->length(); bool closingTag = false; bool firstCharFound = false; for( ;; ) { if ( !in->good() ) return; int c = in->peek(); if ( c <= 0 ) { TiXmlDocument* document = GetDocument(); if ( document ) document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN ); return; } if ( c == '>' ) break; *tag += (char) c; in->get(); // Early out if we find the CDATA id. if ( c == '[' && tag->size() >= 9 ) { size_t len = tag->size(); const char* start = tag->c_str() + len - 9; if ( strcmp( start, "<![CDATA[" ) == 0 ) { assert( !closingTag ); break; } } if ( !firstCharFound && c != '<' && !IsWhiteSpace( c ) ) { firstCharFound = true; if ( c == '/' ) closingTag = true; } } // If it was a closing tag, then read in the closing '>' to clean up the input stream. // If it was not, the streaming will be done by the tag. if ( closingTag ) { if ( !in->good() ) return; int c = in->get(); if ( c <= 0 ) { TiXmlDocument* document = GetDocument(); if ( document ) document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN ); return; } assert( c == '>' ); *tag += (char) c; // We are done, once we've found our closing tag. return; } else { // If not a closing tag, id it, and stream. const char* tagloc = tag->c_str() + tagIndex; TiXmlNode* node = Identify( tagloc, TIXML_DEFAULT_ENCODING ); if ( !node ) return; node->StreamIn( in, tag ); delete node; node = 0; // No return: go around from the beginning: text, closing tag, or node. } } } }
TiXmlNode* TiXmlNode::Identify( const char* p, TiXmlEncoding encoding ) { TiXmlNode* returnNode = 0; p = SkipWhiteSpace( p, encoding ); if( !p || !*p || *p != '<' ) { return 0; } TiXmlDocument* doc = GetDocument(); p = SkipWhiteSpace( p, encoding ); if ( !p || !*p ) { return 0; } // What is this thing? // - Elements start with a letter or underscore, but xml is reserved. // - Comments: <!-- // - Decleration: <?xml // - Everthing else is unknown to tinyxml. // const char* xmlHeader = { "<?xml" }; const char* commentHeader = { "<!--" }; const char* dtdHeader = { "<!" }; const char* cdataHeader = { "<![CDATA[" }; if ( StringEqual( p, xmlHeader, true, encoding ) ) { #ifdef DEBUG_PARSER TIXML_LOG( "XML parsing Declaration\n" ); #endif returnNode = new TiXmlDeclaration(); } else if ( StringEqual( p, commentHeader, false, encoding ) ) { #ifdef DEBUG_PARSER TIXML_LOG( "XML parsing Comment\n" ); #endif returnNode = new TiXmlComment(); } else if ( StringEqual( p, cdataHeader, false, encoding ) ) { #ifdef DEBUG_PARSER TIXML_LOG( "XML parsing CDATA\n" ); #endif TiXmlText* text = new TiXmlText( "" ); text->SetCDATA( true ); returnNode = text; } else if ( StringEqual( p, dtdHeader, false, encoding ) ) { #ifdef DEBUG_PARSER TIXML_LOG( "XML parsing Unknown(1)\n" ); #endif returnNode = new TiXmlUnknown(); } else if ( IsAlpha( *(p+1), encoding ) || *(p+1) == '_' ) { #ifdef DEBUG_PARSER TIXML_LOG( "XML parsing Element\n" ); #endif returnNode = new TiXmlElement( "" ); } else { #ifdef DEBUG_PARSER TIXML_LOG( "XML parsing Unknown(2)\n" ); #endif returnNode = new TiXmlUnknown(); } if ( returnNode ) { // Set the parent, so it can report errors returnNode->parent = this; } else { if ( doc ) doc->SetError( TIXML_ERROR_OUT_OF_MEMORY, 0, 0, TIXML_ENCODING_UNKNOWN ); } return returnNode; }
const char* TiXmlElement::ReadValue( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding ) { TiXmlDocument* document = GetDocument(); // Read in text and elements in any order. const char* pWithWhiteSpace = p; p = SkipWhiteSpace( p, encoding ); while ( p && *p ) { if ( *p != '<' ) { // Take what we have, make a text element. TiXmlText* textNode = new TiXmlText( "" ); if ( !textNode ) { if ( document ) document->SetError( TIXML_ERROR_OUT_OF_MEMORY, 0, 0, encoding ); return 0; } if ( TiXmlBase::IsWhiteSpaceCondensed() ) { p = textNode->Parse( p, data, encoding ); } else { // Special case: we want to keep the white space // so that leading spaces aren't removed. p = textNode->Parse( pWithWhiteSpace, data, encoding ); } if ( !textNode->Blank() ) LinkEndChild( textNode ); else delete textNode; } else { // We hit a '<' // Have we hit a new element or an end tag? This could also be // a TiXmlText in the "CDATA" style. if ( StringEqual( p, "</", false, encoding ) ) { return p; } else { TiXmlNode* node = Identify( p, encoding ); if ( node ) { p = node->Parse( p, data, encoding ); LinkEndChild( node ); } else { return 0; } } } pWithWhiteSpace = p; p = SkipWhiteSpace( p, encoding ); } if ( !p ) { if ( document ) document->SetError( TIXML_ERROR_READING_ELEMENT_VALUE, 0, 0, encoding ); } return p; }
const char* TiXmlElement::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding ) { p = SkipWhiteSpace( p, encoding ); TiXmlDocument* document = GetDocument(); if ( !p || !*p ) { if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, 0, 0, encoding ); return 0; } if ( data ) { data->Stamp( p, encoding ); location = data->Cursor(); } if ( *p != '<' ) { if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, p, data, encoding ); return 0; } p = SkipWhiteSpace( p+1, encoding ); // Read the name. const char* pErr = p; p = ReadName( p, &value, encoding ); if ( !p || !*p ) { if ( document ) document->SetError( TIXML_ERROR_FAILED_TO_READ_ELEMENT_NAME, pErr, data, encoding ); return 0; } TIXML_STRING endTag ("</"); endTag += value; endTag += ">"; // Check for and read attributes. Also look for an empty // tag or an end tag. while ( p && *p ) { pErr = p; p = SkipWhiteSpace( p, encoding ); if ( !p || !*p ) { if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding ); return 0; } if ( *p == '/' ) { ++p; // Empty tag. if ( *p != '>' ) { if ( document ) document->SetError( TIXML_ERROR_PARSING_EMPTY, p, data, encoding ); return 0; } return (p+1); } else if ( *p == '>' ) { // Done with attributes (if there were any.) // Read the value -- which can include other // elements -- read the end tag, and return. ++p; p = ReadValue( p, data, encoding ); // Note this is an Element method, and will set the error if one happens. if ( !p || !*p ) { // We were looking for the end tag, but found nothing. // Fix for [ 1663758 ] Failure to report error on bad XML if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding ); return 0; } // We should find the end tag now if ( StringEqual( p, endTag.c_str(), false, encoding ) ) { p += endTag.length(); return p; } else { if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding ); return 0; } } else { // Try to read an attribute: TiXmlAttribute* attrib = new TiXmlAttribute(); if ( !attrib ) { if ( document ) document->SetError( TIXML_ERROR_OUT_OF_MEMORY, pErr, data, encoding ); return 0; } attrib->SetDocument( document ); pErr = p; p = attrib->Parse( p, data, encoding ); if ( !p || !*p ) { if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, pErr, data, encoding ); delete attrib; return 0; } // Handle the strange case of double attributes: #ifdef TIXML_USE_STL TiXmlAttribute* node = attributeSet.Find( attrib->NameTStr() ); #else TiXmlAttribute* node = attributeSet.Find( attrib->Name() ); #endif if ( node ) { node->SetValue( attrib->Value() ); delete attrib; return 0; } attributeSet.Add( attrib ); } } return p; }
TiXmlNode* TiXmlNode::Identify( const TCHAR* p ) { TiXmlNode* returnNode = 0; p = SkipWhiteSpace( p ); if( !p || !*p || *p != '<' ) { return 0; } TiXmlDocument* doc = GetDocument(); p = SkipWhiteSpace( p ); if ( !p || !*p ) { return 0; } // What is this thing? // - Elements start with a letter or underscore, but xml is reserved. // - Comments: <!-- // - Decleration: <?xml // - Everthing else is unknown to tinyxml. // const TCHAR* xmlHeader = { TEXT("<?xml") }; const TCHAR* commentHeader = { TEXT("<!--") }; if ( StringEqual( p, xmlHeader, true ) ) { #ifdef DEBUG_PARSER TIXML_LOG( "XML parsing Declaration\n" ); #endif returnNode = new TiXmlDeclaration(); } else if ( isalpha( *(p+1) ) || *(p+1) == '_' ) { #ifdef DEBUG_PARSER TIXML_LOG( "XML parsing Element\n" ); #endif returnNode = new TiXmlElement( TEXT("") ); } else if ( StringEqual( p, commentHeader, false ) ) { #ifdef DEBUG_PARSER TIXML_LOG( "XML parsing Comment\n" ); #endif returnNode = new TiXmlComment(); } else { #ifdef DEBUG_PARSER TIXML_LOG( "XML parsing Unknown\n" ); #endif returnNode = new TiXmlUnknown(); } if ( returnNode ) { // Set the parent, so it can report errors returnNode->parent = this; } else { if ( doc ) doc->SetError( TIXML_ERROR_OUT_OF_MEMORY, 0, 0 ); } return returnNode; }
// We're expecting a tag; identify what kind here and allocate the // appropriate type of node (but don't parse yet). We'll return null if // we can't find a "<" as the next non-whitespace character, or if we // find the "<" but it is followed by nothing but whitespace. Note that // we're only looking at the beginning of the tag, we won't check to see // if it is terminated properly. TiXmlNode* TiXmlNode::Identify( const char* p, TiXmlEncoding encoding ) { TiXmlNode* returnNode = 0; p = SkipWhiteSpace( p, encoding ); if( !p || !*p || *p != '<' ) { return 0; // couldn't find the tag-opening "<" } TiXmlDocument* doc = GetDocument(); p = SkipWhiteSpace( p, encoding ); if ( !p || !*p ) { return 0; // a lone "<" at the end of the document } // What is this thing? // - Elements: < X where X is a letter or underscore; // white space optional between "<" and "X". // - Comments: <!-- // - Declaration: <?xml // - CDATA Text: <![CDATA[ // - All other tags are unknown to tinyxml. // const char* xmlHeader = { "<?xml" }; const char* commentHeader = { "<!--" }; const char* dtdHeader = { "<!" }; // treated as "unknown" const char* cdataHeader = { "<![CDATA[" }; if ( StringEqual( p, xmlHeader, true, encoding ) ) { #ifdef DEBUG_PARSER TIXML_LOG( "XML parsing Declaration\n" ); #endif returnNode = new TiXmlDeclaration(); } else if ( StringEqual( p, commentHeader, false, encoding ) ) { #ifdef DEBUG_PARSER TIXML_LOG( "XML parsing Comment\n" ); #endif returnNode = new TiXmlComment(); } else if ( StringEqual( p, cdataHeader, false, encoding ) ) { #ifdef DEBUG_PARSER TIXML_LOG( "XML parsing CDATA\n" ); #endif TiXmlText* text = new TiXmlText( "" ); text->SetCDATA( true ); returnNode = text; } else if ( StringEqual( p, dtdHeader, false, encoding ) ) { #ifdef DEBUG_PARSER TIXML_LOG( "XML parsing Unknown(1)\n" ); #endif returnNode = new TiXmlUnknown(); } else if ( IsAlpha( *(p+1), encoding ) || *(p+1) == '_' ) { #ifdef DEBUG_PARSER TIXML_LOG( "XML parsing Element\n" ); #endif returnNode = new TiXmlElement( "" ); } else { #ifdef DEBUG_PARSER TIXML_LOG( "XML parsing Unknown(2)\n" ); #endif returnNode = new TiXmlUnknown(); } if ( returnNode ) { // Set the parent, so it can report errors returnNode->parent = this; } else { if ( doc ) doc->SetError( TIXML_ERROR_OUT_OF_MEMORY, 0, 0, TIXML_ENCODING_UNKNOWN ); } return returnNode; }