const char* TTiXmlDocument::Parse( const char* p, TTiXmlParsingData* prevData, TTiXmlEncoding encoding ) { ClearError(); // Parse away, at the document level. Since a document // contains nothing but other tags, most of what happens // here is skipping white space. if ( !p || !*p ) { SetError( TTiXml_ERROR_DOCUMENT_EMPTY, 0, 0, TTiXml_ENCODING_UNKNOWN ); return 0; } // Note that, for a document, this needs to come // before the while space skip, so that parsing // starts from the pointer we are given. location.Clear(); if ( prevData ) { location.row = prevData->cursor.row; location.col = prevData->cursor.col; } else { location.row = 0; location.col = 0; } TTiXmlParsingData data( p, TabSize(), location.row, location.col ); location = data.Cursor(); if ( encoding == TTiXml_ENCODING_UNKNOWN ) { // Check for the Microsoft UTF-8 lead bytes. const unsigned char* pU = (const unsigned char*)p; if ( *(pU+0) && *(pU+0) == TTiXml_UTF_LEAD_0 && *(pU+1) && *(pU+1) == TTiXml_UTF_LEAD_1 && *(pU+2) && *(pU+2) == TTiXml_UTF_LEAD_2 ) { encoding = TTiXml_ENCODING_UTF8; useMicrosoftBOM = true; } } p = SkipWhiteSpace( p, encoding ); if ( !p ) { SetError( TTiXml_ERROR_DOCUMENT_EMPTY, 0, 0, TTiXml_ENCODING_UNKNOWN ); return 0; } while ( p && *p ) { TTiXmlNode* node = Identify( p, encoding ); if ( node ) { p = node->Parse( p, &data, encoding ); LinkEndChild( node ); } else { break; } // Did we get encoding info? if ( encoding == TTiXml_ENCODING_UNKNOWN && node->ToDeclaration() ) { TTiXmlDeclaration* dec = node->ToDeclaration(); const char* enc = dec->Encoding(); assert( enc ); if ( *enc == 0 ) encoding = TTiXml_ENCODING_UTF8; else if ( StringEqual( enc, "UTF-8", true, TTiXml_ENCODING_UNKNOWN ) ) encoding = TTiXml_ENCODING_UTF8; else if ( StringEqual( enc, "UTF8", true, TTiXml_ENCODING_UNKNOWN ) ) encoding = TTiXml_ENCODING_UTF8; // incorrect, but be nice else encoding = TTiXml_ENCODING_LEGACY; } p = SkipWhiteSpace( p, encoding ); } // Was this empty? if ( !firstChild ) { SetError( TTiXml_ERROR_DOCUMENT_EMPTY, 0, 0, encoding ); return 0; } // All is well. return p; }
const char* TTiXmlElement::ReadValue( const char* p, TTiXmlParsingData* data, TTiXmlEncoding encoding ) { TTiXmlDocument* document = GetDocument(); // Read in text and elements in any order. const char* pWithWhiteSpace = p; p = SkipWhiteSpace( p, encoding ); while ( p && *p ) { if ( *p != '<' ) { // Take what we have, make a text element. TTiXmlText* textNode = new TTiXmlText( "" ); if ( !textNode ) { if ( document ) document->SetError( TTiXml_ERROR_OUT_OF_MEMORY, 0, 0, encoding ); return 0; } if ( TTiXmlBase::IsWhiteSpaceCondensed() ) { p = textNode->Parse( p, data, encoding ); } else { // Special case: we want to keep the white space // so that leading spaces aren't removed. p = textNode->Parse( pWithWhiteSpace, data, encoding ); } if ( !textNode->Blank() ) LinkEndChild( textNode ); else delete textNode; } else { // We hit a '<' // Have we hit a new element or an end tag? This could also be // a TTiXmlText in the "CDATA" style. if ( StringEqual( p, "</", false, encoding ) ) { return p; } else { TTiXmlNode* node = Identify( p, encoding ); if ( node ) { p = node->Parse( p, data, encoding ); LinkEndChild( node ); } else { return 0; } } } pWithWhiteSpace = p; p = SkipWhiteSpace( p, encoding ); } if ( !p ) { if ( document ) document->SetError( TTiXml_ERROR_READING_ELEMENT_VALUE, 0, 0, encoding ); } return p; }