bool TiXmlDocument::LoadFile( const TCHAR* filename ) { // Delete the existing data: Clear(); location.Clear(); // There was a really terrifying little bug here. The code: // value = filename // in the STL case, cause the assignment method of the std::generic_string to // be called. What is strange, is that the std::generic_string had the same // address as it's c_str() method, and so bad things happen. Looks // like a bug in the Microsoft STL implementation. // See STL_STRING_BUG above. // Fixed with the StringToBuffer class. value = filename; FILE* file = generic_fopen( value.c_str (), TEXT("r") ); if ( file ) { // Get the file size, so we can pre-allocate the generic_string. HUGE speed impact. long length = 0; fseek( file, 0, SEEK_END ); length = ftell( file ); fseek( file, 0, SEEK_SET ); // Strange case, but good to handle up front. if ( length == 0 ) { fclose( file ); return false; } // If we have a file, assume it is all one big XML file, and read it in. // The document parser may decide the document ends sooner than the entire file, however. TIXML_STRING data; data.reserve( length ); const int BUF_SIZE = 2048; TCHAR buf[BUF_SIZE]; while( generic_fgets( buf, BUF_SIZE, file ) ) { data += buf; } fclose( file ); Parse( data.c_str(), 0 ); if ( Error() ) return false; else return true; } SetError( TIXML_ERROR_OPENING_FILE, 0, 0 ); return false; }
bool TiXmlDocument::LoadFile( FILE* file, TiXmlEncoding encoding ) { if ( !file ) { SetError( TIXML_ERROR_OPENING_FILE, 0, 0, TIXML_ENCODING_UNKNOWN ); return false; } // Delete the existing data: Clear(); location.Clear(); // Get the file size, so we can pre-allocate the string. HUGE speed impact. long length = 0; fseek( file, 0, SEEK_END ); length = ftell( file ); fseek( file, 0, SEEK_SET ); if ( length == -1 ) { // Some more serious error. Like openning the direcotory :-) return false; } // Strange case, but good to handle up front. if ( length == 0 ) { SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN ); return false; } // If we have a file, assume it is all one big XML file, and read it in. // The document parser may decide the document ends sooner than the entire file, however. TIXML_STRING data; data.reserve( length ); // Subtle bug here. TinyXml did use fgets. But from the XML spec: // 2.11 End-of-Line Handling // <snip> // <quote> // ...the XML processor MUST behave as if it normalized all line breaks in external // parsed entities (including the document entity) on input, before parsing, by translating // both the two-character sequence #xD #xA and any #xD that is not followed by #xA to // a single #xA character. // </quote> // // It is not clear fgets does that, and certainly isn't clear it works cross platform. // Generally, you expect fgets to translate from the convention of the OS to the c/unix // convention, and not work generally. /* while( fgets( buf, sizeof(buf), file ) ) { data += buf; } */ char* buf = new char[ length+1 ]; buf[0] = 0; if ( fread( buf, length, 1, file ) != 1 ) { delete [] buf; SetError( TIXML_ERROR_OPENING_FILE, 0, 0, TIXML_ENCODING_UNKNOWN ); return false; } const char* lastPos = buf; const char* p = buf; buf[length] = 0; while( *p ) { assert( p < (buf+length) ); if ( *p == 0xa ) { // Newline character. No special rules for this. Append all the characters // since the last string, and include the newline. data.append( lastPos, (p-lastPos+1) ); // append, include the newline ++p; // move past the newline lastPos = p; // and point to the new buffer (may be 0) assert( p <= (buf+length) ); } else if ( *p == 0xd ) { // Carriage return. Append what we have so far, then // handle moving forward in the buffer. if ( (p-lastPos) > 0 ) { data.append( lastPos, p-lastPos ); // do not add the CR } data += (char)0xa; // a proper newline if ( *(p+1) == 0xa ) { // Carriage return - new line sequence p += 2; lastPos = p; assert( p <= (buf+length) ); } else { // it was followed by something else...that is presumably characters again. ++p; lastPos = p; assert( p <= (buf+length) ); } } else { ++p; } } // Handle any left over characters. if ( p-lastPos ) { data.append( lastPos, p-lastPos ); } delete [] buf; buf = 0; Parse( data.c_str(), 0, encoding ); if ( Error() ) return false; else return true; }
bool TiXmlDocument::LoadBuffer(char* buf, long length, TiXmlEncoding encoding) { // Delete the existing data: Clear(); location.Clear(); // Strange case, but good to handle up front. if ( length <= 0 ) { SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN ); return false; } // If we have a file, assume it is all one big XML file, and read it in. // The document parser may decide the document ends sooner than the entire file, however. TIXML_STRING data; data.reserve( length ); const char* lastPos = buf; const char* p = buf; while( *p ) { assert( p < (buf+length) ); if ( *p == 0xa ) { // Newline character. No special rules for this. Append all the characters // since the last string, and include the newline. data.append( lastPos, (p-lastPos+1) ); // append, include the newline ++p; // move past the newline lastPos = p; // and point to the new buffer (may be 0) assert( p <= (buf+length) ); } else if ( *p == 0xd ) { // Carriage return. Append what we have so far, then // handle moving forward in the buffer. if ( (p-lastPos) > 0 ) { data.append( lastPos, p-lastPos ); // do not add the CR } data += (char)0xa; // a proper newline if ( *(p+1) == 0xa ) { // Carriage return - new line sequence p += 2; lastPos = p; assert( p <= (buf+length) ); } else { // it was followed by something else...that is presumably characters again. ++p; lastPos = p; assert( p <= (buf+length) ); } } else { ++p; } } // Handle any left over characters. if ( p-lastPos ) { data.append( lastPos, p-lastPos ); } Parse( data.c_str(), 0, encoding ); if ( Error() ) return false; else return true; }
bool TiXmlDocument::PHYSFS_LoadFile( const std::string& file, TiXmlEncoding encoding ) { PHYSFS_file* f; if(!(f = PHYSFS_openRead(file.c_str()))) { SetError( TIXML_ERROR_OPENING_FILE, 0, 0, TIXML_ENCODING_UNKNOWN ); return false; } PHYSFS_sint64 length = PHYSFS_fileLength(f); char* buf = new char[(size_t)length+1]; PHYSFS_sint64 length_read = PHYSFS_read(f, buf, 1, (PHYSFS_uint32) length); PHYSFS_close(f); if (length_read != length) { delete[] buf; return false; } // Delete the existing data: Clear(); location.Clear(); // Subtle bug here. TinyXml did use physfs. But from the XML spec: // 2.11 End-of-Line Handling // <snip> // <quote> // ...the XML processor MUST behave as if it normalized all line breaks in external // parsed entities (including the document entity) on input, before parsing, by translating // both the two-character sequence #xD #xA and any #xD that is not followed by #xA to // a single #xA character. // </quote> // // It is not clear physfs does that, and certainly isn't clear it works cross platform. TIXML_STRING data; data.reserve( (unsigned int) length ); const char* lastPos = buf; const char* p = buf; buf[length] = 0; length++; //+ 0 char while( *p ) { assert( p < (buf+length) ); if ( *p == 0xa ) { // Newline character. No special rules for this. Append all the characters // since the last string, and include the newline. data.append( lastPos, (p-lastPos+1) ); // append, include the newline ++p; // move past the newline lastPos = p; // and point to the new buffer (may be 0) assert( p <= (buf+length) ); } else if ( *p == 0xd ) { // Carriage return. Append what we have so far, then // handle moving forward in the buffer. if ( (p-lastPos) > 0 ) { data.append( lastPos, p-lastPos ); // do not add the CR } data += (char)0xa; // a proper newline if ( *(p+1) == 0xa ) { // Carriage return - new line sequence p += 2; lastPos = p; assert( p <= (buf+length) ); } else { // it was followed by something else...that is presumably characters again. ++p; lastPos = p; assert( p <= (buf+length) ); } } else { ++p; } } // Handle any left over characters. if ( p-lastPos ) { data.append( lastPos, p-lastPos ); } delete [] buf; buf = 0; Parse( data.c_str(), 0, encoding ); if ( Error() ) return false; else return true; }
bool TiXmlDocument::LoadFile( const char* filename, TiXmlEncoding encoding ) { // Delete the existing data: Clear(); location.Clear(); // There was a really terrifying little bug here. The code: // value = filename // in the STL case, cause the assignment method of the std::string to // be called. What is strange, is that the std::string had the same // address as it's c_str() method, and so bad things happen. Looks // like a bug in the Microsoft STL implementation. // See STL_STRING_BUG above. // Fixed with the StringToBuffer class. value = filename; // reading in binary mode so that tinyxml can normalize the EOL FILE* file = fopen( value.c_str (), "rb" ); if ( file ) { // Get the file size, so we can pre-allocate the string. HUGE speed impact. long length = 0; fseek( file, 0, SEEK_END ); length = ftell( file ); fseek( file, 0, SEEK_SET ); // Strange case, but good to handle up front. if ( length == 0 ) { fclose( file ); return false; } // If we have a file, assume it is all one big XML file, and read it in. // The document parser may decide the document ends sooner than the entire file, however. TIXML_STRING data; data.reserve( length ); // Subtle bug here. TinyXml did use fgets. But from the XML spec: // 2.11 End-of-Line Handling // <snip> // <quote> // ...the XML processor MUST behave as if it normalized all line breaks in external // parsed entities (including the document entity) on input, before parsing, by translating // both the two-character sequence #xD #xA and any #xD that is not followed by #xA to // a single #xA character. // </quote> // // It is not clear fgets does that, and certainly isn't clear it works cross platform. // Generally, you expect fgets to translate from the convention of the OS to the c/unix // convention, and not work generally. /* while( fgets( buf, sizeof(buf), file ) ) { data += buf; } */ char* buf = new char[ length+1 ]; buf[0] = 0; if ( fread( buf, length, 1, file ) != 1 ) { //if ( fread( buf, 1, length, file ) != (size_t)length ) { SetError( TIXML_ERROR_OPENING_FILE, 0, 0, TIXML_ENCODING_UNKNOWN ); fclose( file ); return false; } fclose( file ); const char* lastPos = buf; const char* p = buf; buf[length] = 0; while( *p ) { assert( p < (buf+length) ); if ( *p == 0xa ) { // Newline character. No special rules for this. Append all the characters // since the last string, and include the newline. data.append( lastPos, p-lastPos+1 ); // append, include the newline ++p; // move past the newline lastPos = p; // and point to the new buffer (may be 0) assert( p <= (buf+length) ); } else if ( *p == 0xd ) { // Carriage return. Append what we have so far, then // handle moving forward in the buffer. if ( (p-lastPos) > 0 ) { data.append( lastPos, p-lastPos ); // do not add the CR } data += (char)0xa; // a proper newline if ( *(p+1) == 0xa ) { // Carriage return - new line sequence p += 2; lastPos = p; assert( p <= (buf+length) ); } else { // it was followed by something else...that is presumably characters again. ++p; lastPos = p; assert( p <= (buf+length) ); } } else { ++p; } } // Handle any left over characters. if ( p-lastPos ) { data.append( lastPos, p-lastPos ); } delete [] buf; buf = 0; Parse( data.c_str(), 0, encoding ); if ( Error() ) return false; else return true; } SetError( TIXML_ERROR_OPENING_FILE, 0, 0, TIXML_ENCODING_UNKNOWN ); return false; }
bool TiXmlDocument::LoadFile( FILE* file, TiXmlEncoding encoding ) { if ( !file ) { SetError( TIXML_ERROR_OPENING_FILE, 0, 0, TIXML_ENCODING_UNKNOWN ); return false; } // Delete the existing data: Clear(); location.Clear(); // Get the file size, so we can pre-allocate the string. HUGE speed impact. long length = 0; fseek( file, 0, SEEK_END ); length = ftell( file ); fseek( file, 0, SEEK_SET ); // Strange case, but good to handle up front. if ( length <= 0 ) { SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN ); return false; } // If we have a file, assume it is all one big XML file, and read it in. // The document parser may decide the document ends sooner than the entire file, however. TIXML_STRING data; data.reserve( length ); // Subtle bug here. TinyXml did use fgets. But from the XML spec: // 2.11 End-of-Line Handling // <snip> // <quote> // ...the XML processor MUST behave as if it normalized all line breaks in external // parsed entities (including the document entity) on input, before parsing, by translating // both the two-character sequence #xD #xA and any #xD that is not followed by #xA to // a single #xA character. // </quote> // // It is not clear fgets does that, and certainly isn't clear it works cross platform. // Generally, you expect fgets to translate from the convention of the OS to the c/unix // convention, and not work generally. /* while( fgets( buf, sizeof(buf), file ) ) { data += buf; } */ #define BLANK_LINE_COMMENT_MAGIC "##BLANK-LINE##" bool bUseBlankLineMagic = true; char* buf = new char[ length+1 ]; buf[0] = 0; if ( fread( buf, length, 1, file ) != 1 ) { delete [] buf; SetError( TIXML_ERROR_OPENING_FILE, 0, 0, TIXML_ENCODING_UNKNOWN ); return false; } const char* lastPos = buf; const char* p = buf; bool bInComment = false; bool bInTag = false; bool bEmptyLine = false; int iNewLineCount = 0; bool bOnlyWhiteSpaceChars = false; buf[length] = 0; while( *p ) { assert( p < (buf+length) ); if ( *p == 0xa || *p == 0xd ) { if ( bEmptyLine && !bInTag && !bInComment && bUseBlankLineMagic ) iNewLineCount++; bEmptyLine = true; } if ( *p == 0xa ) { // Newline character. No special rules for this. Append all the characters // since the last string, and include the newline. data.append( lastPos, (p-lastPos+1) ); // append, include the newline ++p; // move past the newline lastPos = p; // and point to the new buffer (may be 0) assert( p <= (buf+length) ); } else if ( *p == 0xd ) { // Carriage return. Append what we have so far, then // handle moving forward in the buffer. if ( (p-lastPos) > 0 ) { data.append( lastPos, p-lastPos ); // do not add the CR } data += (char)0xa; // a proper newline if ( *(p+1) == 0xa ) { // Carriage return - new line sequence p += 2; lastPos = p; assert( p <= (buf+length) ); } else { // it was followed by something else...that is presumably characters again. ++p; lastPos = p; assert( p <= (buf+length) ); } } else if ( *p == ' ' || *p == '\t' ) { // White space ++p; } else { if ( strncmp ( p, "<!--", 4 ) == 0 ) bInComment = true; // Entering comment else if ( strncmp ( p, "-->", 3 ) == 0 ) bInComment = false; // Leaving comment if ( strncmp ( p, "<", 1 ) == 0 ) { bInTag = true; // Entering tag // If preceeding text contains only white space, save the blank lines as comments if ( bOnlyWhiteSpaceChars ) { for ( int i = 0 ; i < iNewLineCount ; i++ ) { data.append( "<!--" BLANK_LINE_COMMENT_MAGIC "-->" ); } bOnlyWhiteSpaceChars = false; iNewLineCount = 0; } } else if ( strncmp ( p, ">", 1 ) == 0 ) { bInTag = false; // Leaving tag // Start of possible white space area containing blank lines bOnlyWhiteSpaceChars = true; iNewLineCount = 0; } else bOnlyWhiteSpaceChars = false; bEmptyLine = false; ++p; } } // Handle any left over characters. if ( p-lastPos ) { data.append( lastPos, p-lastPos ); } delete [] buf; buf = 0; Parse( data.c_str(), 0, encoding ); if ( Error() ) return false; else return true; }
bool TiXmlDocument::LoadFile( const TCHAR* filename ) { // Delete the existing data: Clear(); location.Clear(); // There was a really terrifying little bug here. The code: // value = filename // in the STL case, cause the assignment method of the std::generic_string to // be called. What is strange, is that the std::generic_string had the same // address as it's c_str() method, and so bad things happen. Looks // like a bug in the Microsoft STL implementation. // See STL_STRING_BUG above. // Fixed with the StringToBuffer class. value = filename; FILE* file = generic_fopen( value.c_str (), TEXT("r") ); if ( file ) { // Get the file size, so we can pre-allocate the generic_string. HUGE speed impact. long length = 0; fseek( file, 0, SEEK_END ); length = ftell( file ); fseek( file, 0, SEEK_SET ); // Strange case, but good to handle up front. if ( length == 0 ) { fclose( file ); return false; } // If we have a file, assume it is all one big XML file, and read it in. // The document parser may decide the document ends sooner than the entire file, however. TIXML_STRING data; data.reserve( length ); const int BUF_SIZE = 2048; TCHAR buf[BUF_SIZE]; while( generic_fgets( buf, BUF_SIZE, file ) ) { data += buf; } fclose( file ); //input is in UTF-8, so transformation is needed to UTF-16 used by windows for TCHAR in unicode mode std::vector<char> inputdataInUTF8(data.size()+1); //+1 for the null termination size_t datalength = wcstombs(inputdataInUTF8.data(), data.c_str(), data.size()); int transformedDataCharCount = MultiByteToWideChar(CP_UTF8, 0, (LPCSTR)inputdataInUTF8.data(), -1, nullptr, 0); std::vector<wchar_t> transformedData(transformedDataCharCount+1); //+1 for the null termination transformedDataCharCount = MultiByteToWideChar(CP_UTF8, 0, (LPCSTR)inputdataInUTF8.data(), -1, transformedData.data(), transformedDataCharCount); if(transformedDataCharCount > 0) { //replace the original data with the new tranformed one, on success ot transformation otherwise go with old style data data.clear(); data = transformedData.data(); } Parse( data.c_str(), 0 ); if ( Error() ) return false; else return true; } SetError( TIXML_ERROR_OPENING_FILE, 0, 0 ); return false; }
bool TiXmlDocument::LoadFile( FILE* file, TiXmlEncoding encoding ) { if ( !file ) { SetError( TIXML_ERROR_OPENING_FILE, 0, 0, TIXML_ENCODING_UNKNOWN ); return false; } Clear(); location.Clear(); long length = 0; fseek( file, 0, SEEK_END ); length = ftell( file ); fseek( file, 0, SEEK_SET ); if ( length <= 0 ) { SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN ); return false; } TIXML_STRING data; data.reserve( length ); char* buf = new char[ length+1 ]; buf[0] = 0; if ( fread( buf, length, 1, file ) != 1 ) { delete [] buf; SetError( TIXML_ERROR_OPENING_FILE, 0, 0, TIXML_ENCODING_UNKNOWN ); return false; } const char* lastPos = buf; const char* p = buf; buf[length] = 0; while( *p ) { assert( p < (buf+length) ); if ( *p == 0xa ) { data.append( lastPos, (p-lastPos+1) ); ++p; lastPos = p; assert( p <= (buf+length) ); } else if ( *p == 0xd ) { if ( (p-lastPos) > 0 ) { data.append( lastPos, p-lastPos ); } data += (char)0xa; if ( *(p+1) == 0xa ) { p += 2; lastPos = p; assert( p <= (buf+length) ); } else { ++p; lastPos = p; assert( p <= (buf+length) ); } } else { ++p; } } if ( p-lastPos ) { data.append( lastPos, p-lastPos ); } delete [] buf; buf = 0; Parse( data.c_str(), 0, encoding ); if ( Error() ) return false; else return true; }
bool TiXmlDocument::LoadFile( const char* filename, TiXmlEncoding encoding ) { // Delete the existing data: Clear(); location.Clear(); // There was a really terrifying little bug here. The code: // value = filename // in the STL case, cause the assignment method of the std::string to // be called. What is strange, is that the std::string had the same // address as it's c_str() method, and so bad things happen. Looks // like a bug in the Microsoft STL implementation. // See STL_STRING_BUG above. // Fixed with the StringToBuffer class. value = filename; FILE* file = fopen( value.c_str (), "r" ); if ( file ) { // Get the file size, so we can pre-allocate the string. HUGE speed impact. long length = 0; fseek( file, 0, SEEK_END ); length = ftell( file ); fseek( file, 0, SEEK_SET ); // Strange case, but good to handle up front. if ( length == 0 ) { fclose( file ); return false; } // If we have a file, assume it is all one big XML file, and read it in. // The document parser may decide the document ends sooner than the entire file, however. TIXML_STRING data; data.reserve( length ); const int BUF_SIZE = 2048; char buf[BUF_SIZE]; while(int byteRead=fread( buf,1,BUF_SIZE-1, file ) ) { buf[byteRead]=0 ; char *src=buf ; char *dst=buf ; while (*src) { if ((*src!='\n')&&(*src!='\r')) *dst++=*src ; src++ ; } ; *dst=0 ; data += buf; } fclose( file ); Parse( data.c_str(), 0, encoding ); if ( Error() ) return false; else return true; } SetError( TIXML_ERROR_OPENING_FILE, 0, 0, TIXML_ENCODING_UNKNOWN ); return false; }