const char* TiXmlElement::ReadValue( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
{
	TiXmlDocument* document = GetDocument();

	// Read in text and elements in any order.
	const char* pWithWhiteSpace = p;
	p = SkipWhiteSpace( p, encoding );

	while ( p && *p )
	{
		if ( *p != '<' )
		{
			// Take what we have, make a text element.
			TiXmlText* textNode = new TiXmlText( "" );

			if ( !textNode )
			{
			    return 0;
			}

			if ( TiXmlBase::IsWhiteSpaceCondensed() )
			{
				p = textNode->Parse( p, data, encoding );
			}
			else
			{
				// Special case: we want to keep the white space
				// so that leading spaces aren't removed.
				p = textNode->Parse( pWithWhiteSpace, data, encoding );
			}

			if ( !textNode->Blank() )
				LinkEndChild( textNode );
			else
				delete textNode;
		} 
		else 
		{
			// We hit a '<'
			// Have we hit a new element or an end tag? This could also be
			// a TiXmlText in the "CDATA" style.
			if ( StringEqual( p, "</", false, encoding ) )
			{
				return p;
			}
			else
			{
				TiXmlNode* node = Identify( p, encoding );
				if ( node )
				{
					p = node->Parse( p, data, encoding );
					LinkEndChild( node );
				}				
				else
				{
					return 0;
				}
			}
		}
		pWithWhiteSpace = p;
		p = SkipWhiteSpace( p, encoding );
	}

	if ( !p )
	{
		if ( document ) document->SetError( TIXML_ERROR_READING_ELEMENT_VALUE, 0, 0, encoding );
	}	
	return p;
}
const char* TiXmlDocument::Parse( const char* p, TiXmlParsingData* prevData, TiXmlEncoding encoding )
{
	ClearError();

	// Parse away, at the document level. Since a document
	// contains nothing but other tags, most of what happens
	// here is skipping white space.
	if ( !p || !*p )
	{
		SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
		return 0;
	}

	// Note that, for a document, this needs to come
	// before the while space skip, so that parsing
	// starts from the pointer we are given.
	location.Clear();
	if ( prevData )
	{
		location.row = prevData->cursor.row;
		location.col = prevData->cursor.col;
	}
	else
	{
		location.row = 0;
		location.col = 0;
	}
	TiXmlParsingData data( p, TabSize(), location.row, location.col );
	location = data.Cursor();

	if ( encoding == TIXML_ENCODING_UNKNOWN )
	{
		// Check for the Microsoft UTF-8 lead bytes.
		const unsigned char* pU = (const unsigned char*)p;
		if (	*(pU+0) && *(pU+0) == TIXML_UTF_LEAD_0
			 && *(pU+1) && *(pU+1) == TIXML_UTF_LEAD_1
			 && *(pU+2) && *(pU+2) == TIXML_UTF_LEAD_2 )
		{
			encoding = TIXML_ENCODING_UTF8;
			useMicrosoftBOM = true;
		}
	}

    p = SkipWhiteSpace( p, encoding );
	if ( !p )
	{
		SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
		return 0;
	}

	while ( p && *p )
	{
		TiXmlNode* node = Identify( p, encoding );
		if ( node )
		{
			p = node->Parse( p, &data, encoding );
			LinkEndChild( node );
		}
		else
		{
			break;
		}

		// Did we get encoding info?
		if (    encoding == TIXML_ENCODING_UNKNOWN
			 && node->ToDeclaration() )
		{
			TiXmlDeclaration* dec = node->ToDeclaration();
			const char* enc = dec->Encoding();
			assert( enc );

			if ( *enc == 0 )
				encoding = TIXML_ENCODING_UTF8;
			else if ( StringEqual( enc, "UTF-8", true, TIXML_ENCODING_UNKNOWN ) )
				encoding = TIXML_ENCODING_UTF8;
			else if ( StringEqual( enc, "UTF8", true, TIXML_ENCODING_UNKNOWN ) )
				encoding = TIXML_ENCODING_UTF8;	// incorrect, but be nice
			else 
				encoding = TIXML_ENCODING_LEGACY;
		}

		p = SkipWhiteSpace( p, encoding );
	}

	// Was this empty?
	if ( !firstChild ) {
		SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, encoding );
		return 0;
	}

	// All is well.
	return p;
}
示例#3
0
const char* TiXmlDocument::Parse
  ( const char* p, TiXmlParsingData* prevData, TiXmlEncoding encoding )
{
    ClearError();

    // Parse away, at the document level. Since a document
    // contains nothing but other tags, most of what happens
    // here is skipping white space.
    // sherm 100319: I changed this so that untagged top-level text is
    // parsed as a Text node rather than a parsing error. CDATA text was
    // already allowed at the top level so this seems more consistent.
    if ( !p || !*p )
    {
        SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
        return 0;
    }

    // Note that, for a document, this needs to come
    // before the while space skip, so that parsing
    // starts from the pointer we are given.
    location.Clear();
    if ( prevData )
    {
        location.row = prevData->cursor.row;
        location.col = prevData->cursor.col;
    }
    else
    {
        location.row = 0;
        location.col = 0;
    }
    TiXmlParsingData data( p, TabSize(), location.row, location.col );
    location = data.Cursor();

    if ( encoding == TIXML_ENCODING_UNKNOWN )
    {
        // Check for the Microsoft UTF-8 lead bytes.
        const unsigned char* pU = (const unsigned char*)p;
        if (    *(pU+0) && *(pU+0) == TIXML_UTF_LEAD_0
             && *(pU+1) && *(pU+1) == TIXML_UTF_LEAD_1
             && *(pU+2) && *(pU+2) == TIXML_UTF_LEAD_2 )
        {
            encoding = TIXML_ENCODING_UTF8;
            useMicrosoftBOM = true;
        }
    }

    // Remember the start of white space in case we end up reading a text
    // element in a "keep white space" mode.
    const char* pWithWhiteSpace = p;
    p = SkipWhiteSpace( p, encoding );
    if ( !p )
    {
        SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
        return 0;
    }

    // sherm 100319: ignore all but the first Declaration
    bool haveSeenDeclaration = false;
    while ( p && *p )
    {
        TiXmlNode* node = 0;
        if ( *p != '<' )
        {   // sherm 100319: I added this case by stealing the code from
            // Element parsing; see above comment.
            // Take what we have, make a text element.
            TiXmlText* textNode = new TiXmlText( "" );

            if ( !textNode )
            {
                SetError( TIXML_ERROR_OUT_OF_MEMORY, 0, 0, encoding );
                return 0;
            }

            if ( TiXmlBase::IsWhiteSpaceCondensed() )
            {
                p = textNode->Parse( p, &data, encoding );
            }
            else
            {
                // Special case: we want to keep the white space
                // so that leading spaces aren't removed.
                p = textNode->Parse( pWithWhiteSpace, &data, encoding );
            }

            if ( !textNode->Blank() ) {
                LinkEndChild( textNode );
                node = textNode;
            }
            else
                delete textNode;
        }
        else // We saw a '<', now identify what kind of tag it is.
        {
            TiXmlNode* node = Identify( p, encoding );
            if ( node )
            {
                p = node->Parse( p, &data, encoding );
                if (node->ToDeclaration()) {
                    if (haveSeenDeclaration) {
                        delete node; node=0; // ignore duplicate Declaration
                    } else
                        haveSeenDeclaration = true;
                }
                if (node)
                    LinkEndChild( node );
            }
            else
            {
                // If Identify fails then no further parsing is possible.
                break;
            }
        }

        // Did we get encoding info?
        if (    encoding == TIXML_ENCODING_UNKNOWN
             && node && node->ToDeclaration() )
        {
            TiXmlDeclaration* dec = node->ToDeclaration();
            const char* enc = dec->Encoding();
            assert( enc );

            if ( *enc == 0 )
                encoding = TIXML_ENCODING_UTF8;
            else if ( StringEqual( enc, "UTF-8", true, TIXML_ENCODING_UNKNOWN ) )
                encoding = TIXML_ENCODING_UTF8;
            else if ( StringEqual( enc, "UTF8", true, TIXML_ENCODING_UNKNOWN ) )
                encoding = TIXML_ENCODING_UTF8;    // incorrect, but be nice
            else
                encoding = TIXML_ENCODING_LEGACY;
        }

        pWithWhiteSpace = p;
        p = SkipWhiteSpace( p, encoding );
    }

    // Was this empty?
    if ( !firstChild ) {
        SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, encoding );
        return 0;
    }

    // All is well.
    return p;
}