wxUint16 wxDataInputStream::Read16() { wxUint16 i16; m_input->Read(&i16, 2); if (m_be_order) return wxUINT16_SWAP_ON_LE(i16); else return wxUINT16_SWAP_ON_BE(i16); }
wxUint16 wxArchive::LoadUint16() { wxUint16 value = 0; // reads a 16bits from the stream if(CanLoad()) { m_idstr.Read((void *)&value, sizeof(wxUint16)); return wxUINT16_SWAP_ON_LE(value); } return value; }
// Stolen from https://github.com/etexteditor/e/blob/master/src/Strings.cpp // and: https://github.com/etexteditor/e/blob/master/src/Utf.cpp // Copyright (c) 2009, Alexander Stigsen, e-texteditor.com (All rights reserved) // http://www.e-texteditor.com/ bool EncodingDetector::DetectEncodingEx(const wxByte* buffer, size_t size) { if (!buffer || size == 0) return false; const wxByte* buff_ptr = buffer; const wxByte* buff_end = &buffer[size]; wxFontEncoding enc = wxFONTENCODING_DEFAULT; // Check if the buffer starts with a BOM (Byte Order Marker) if (size >= 2) { if (size >= 4 && memcmp(buffer, "\xFF\xFE\x00\x00", 4) == 0) { enc = wxFONTENCODING_UTF32LE; m_BOMSizeInBytes = 4; m_UseBOM = true; } else if (size >= 4 && memcmp(buffer, "\xFE\xFF\x00\x00", 4) == 0) { // FE FF 00 00 UCS-4, unusual octet order BOM (3412) // X-ISO-10646-UCS-4-3412 can not (yet) be handled by wxWidgets enc = (wxFontEncoding)-1; } else if (size >= 4 && memcmp(buffer, "\x00\x00\xFE\xFF", 4) == 0) { enc = wxFONTENCODING_UTF32BE; m_BOMSizeInBytes = 4; m_UseBOM = true; } else if (size >= 4 && memcmp(buffer, "\x00\x00\xFF\xFE", 4) == 0) { // 00 00 FF FE UCS-4, unusual octet order BOM (2143) // X-ISO-10646-UCS-4-2143 can not (yet) be handled by wxWidgets enc = (wxFontEncoding)-1; } else if ( memcmp(buffer, "\xFF\xFE", 2) == 0) { enc = wxFONTENCODING_UTF16LE; m_BOMSizeInBytes = 2; m_UseBOM = true; } else if ( memcmp(buffer, "\xFE\xFF", 2) == 0) { enc = wxFONTENCODING_UTF16BE; m_BOMSizeInBytes = 2; m_UseBOM = true; } else if (size >= 3 && memcmp(buffer, "\xEF\xBB\xBF", 3) == 0) { enc = wxFONTENCODING_UTF8; m_BOMSizeInBytes = 3; m_UseBOM = true; } else if (size >= 5 && memcmp(buffer, "\x2B\x2F\x76\x38\x2D", 5) == 0) { enc = wxFONTENCODING_UTF7; m_BOMSizeInBytes = 5; m_UseBOM = true; } buff_ptr += m_BOMSizeInBytes; } // If the file starts with a leading < (less) sign, it is probably an XML file // and we can determine the encoding by how the sign is encoded. if (enc == wxFONTENCODING_DEFAULT && size >= 2) { if (size >= 4 && memcmp(buffer, "\x3C\x00\x00\x00", 4) == 0) enc = wxFONTENCODING_UTF32LE; else if (size >= 4 && memcmp(buffer, "\x00\x00\x00\x3C", 4) == 0) enc = wxFONTENCODING_UTF32BE; else if ( memcmp(buffer, "\x3C\x00", 2) == 0) enc = wxFONTENCODING_UTF16LE; else if ( memcmp(buffer, "\x00\x3C", 2) == 0) enc = wxFONTENCODING_UTF16BE; } // Unicode Detection if (enc == wxFONTENCODING_DEFAULT) { unsigned int null_byte_count = 0; unsigned int utf_bytes = 0; unsigned int good_utf_count = 0; unsigned int bad_utf_count = 0; unsigned int bad_utf32_count = 0; unsigned int bad_utf16_count = 0; unsigned int nl_utf32le_count = 0; unsigned int nl_utf32be_count = 0; unsigned int nl_utf16le_count = 0; unsigned int nl_utf16be_count = 0; while (buff_ptr != buff_end) { if (*buff_ptr == 0) ++null_byte_count; // Detect UTF-8 by scanning for invalid sequences if (utf_bytes == 0) { if ((*buff_ptr & 0xC0) == 0x80 || *buff_ptr == 0) ++bad_utf_count; else { const char c = *buff_ptr; utf_bytes = 5; // invalid length if ((c & 0x80) == 0x00) utf_bytes = 1; else if ((c & 0xE0) == 0xC0) utf_bytes = 2; else if ((c & 0xF0) == 0xE0) utf_bytes = 3; else if ((c & 0xF8) == 0xF0) utf_bytes = 4; if (utf_bytes > 3) { ++bad_utf_count; utf_bytes = 0; } } } else if ((*buff_ptr & 0xC0) == 0x80) { --utf_bytes; if (utf_bytes == 0) ++good_utf_count; } else { ++bad_utf_count; utf_bytes = 0; } // Detect UTF-32 by scanning for newlines (and lack of null chars) if ((wxUIntPtr)buff_ptr % 4 == 0 && buff_ptr+4 <= buff_end) { if (*((wxUint32*)buff_ptr) == 0 ) ++bad_utf32_count; if (*((wxUint32*)buff_ptr) == wxUINT32_SWAP_ON_BE(0x0A)) ++nl_utf32le_count; if (*((wxUint32*)buff_ptr) == wxUINT32_SWAP_ON_LE(0x0A)) ++nl_utf32be_count; } // Detect UTF-16 by scanning for newlines (and lack of null chars) if ((wxUIntPtr)buff_ptr % 2 == 0 && buff_ptr+4 <= buff_end) { if (*((wxUint16*)buff_ptr) == 0) ++bad_utf16_count; if (*((wxUint16*)buff_ptr) == wxUINT16_SWAP_ON_BE(0x0A)) ++nl_utf16le_count; if (*((wxUint16*)buff_ptr) == wxUINT16_SWAP_ON_LE(0x0A)) ++nl_utf16be_count; } ++buff_ptr; } if (bad_utf_count == 0) enc = wxFONTENCODING_UTF8; else if (bad_utf32_count == 0 && nl_utf32le_count > size / 400) enc = wxFONTENCODING_UTF32LE; else if (bad_utf32_count == 0 && nl_utf32be_count > size / 400) enc = wxFONTENCODING_UTF32BE; else if (bad_utf16_count == 0 && nl_utf16le_count > size / 200) enc = wxFONTENCODING_UTF16LE; else if (bad_utf16_count == 0 && nl_utf16be_count > size / 200) enc = wxFONTENCODING_UTF16BE; else if (null_byte_count) return false; // Maybe this is a binary file? } if (enc != wxFONTENCODING_DEFAULT) { m_Encoding = enc; // Success. return true; } // If we can't detect encoding and it does not contain null bytes // just ignore it and try backup-procedures (Mozilla) later... return false; }
wxDataInputStream::wxDataInputStream(wxInputStream& s, const wxMBConv& conv) : m_input(&s), m_be_order(false), m_conv(conv.Clone()) #else wxDataInputStream::wxDataInputStream(wxInputStream& s) : m_input(&s), m_be_order(false) #endif { } wxDataInputStream::~wxDataInputStream() { #if wxUSE_UNICODE delete m_conv; #endif // wxUSE_UNICODE } #if wxHAS_INT64 wxUint64 wxDataInputStream::Read64() { wxUint64 tmp; Read64(&tmp, 1); return tmp; } #endif // wxHAS_INT64 wxUint32 wxDataInputStream::Read32() { wxUint32 i32; m_input->Read(&i32, 4); if (m_be_order) return wxUINT32_SWAP_ON_LE(i32); else return wxUINT32_SWAP_ON_BE(i32); } wxUint16 wxDataInputStream::Read16() { wxUint16 i16; m_input->Read(&i16, 2); if (m_be_order) return wxUINT16_SWAP_ON_LE(i16); else return wxUINT16_SWAP_ON_BE(i16); } wxUint8 wxDataInputStream::Read8() { wxUint8 buf; m_input->Read(&buf, 1); return (wxUint8)buf; } double wxDataInputStream::ReadDouble() { #if wxUSE_APPLE_IEEE char buf[10]; m_input->Read(buf, 10); return ConvertFromIeeeExtended((const wxInt8 *)buf); #else return 0.0; #endif }
void Extract(bool bits16, bool sign, bool stereo, bool bigendian, bool offset, char *rawData, int dataSize, float *data1, float *data2, int *len1, int *len2) { int rawCount = 0; int dataCount1 = 0; int dataCount2 = 0; int i; *len1 = 0; *len2 = 0; if (offset && bits16) { /* Special case so as to not flip stereo channels during analysis */ if (stereo && !bigendian) { rawData += 3; dataSize -= 3; } else { rawData++; dataSize--; } } if (bits16) { if (sign && bigendian) while (rawCount + 1 < dataSize) { /* 16-bit signed BE */ data1[dataCount1] = (wxINT16_SWAP_ON_LE(*((signed short *) &rawData[rawCount]))) / 32768.0; dataCount1++; rawCount += 2; } if (!sign && bigendian) while (rawCount + 1 < dataSize) { /* 16-bit unsigned BE */ data1[dataCount1] = (wxUINT16_SWAP_ON_LE(*((unsigned short *) &rawData[rawCount]))) / 32768.0 - 1.0; dataCount1++; rawCount += 2; } if (sign && !bigendian) while (rawCount + 1 < dataSize) { /* 16-bit signed LE */ data1[dataCount1] = (wxINT16_SWAP_ON_BE(*((signed short *) &rawData[rawCount]))) / 32768.0; dataCount1++; rawCount += 2; } if (!sign && !bigendian) while (rawCount + 1 < dataSize) { /* 16-bit unsigned LE */ data1[dataCount1] = (wxUINT16_SWAP_ON_BE(*((unsigned short *) &rawData[rawCount]))) / 32768.0 - 1.0; dataCount1++; rawCount += 2; } } else { /* 8-bit */ if (sign) { while (rawCount < dataSize) { /* 8-bit signed */ data1[dataCount1++] = (*(signed char *) (&rawData[rawCount++])) / 128.0; } } else { while (rawCount < dataSize) { /* 8-bit unsigned */ data1[dataCount1++] = (*(unsigned char *) &rawData[rawCount++]) / 128.0 - 1.0; } } } if (stereo) { dataCount1 /= 2; for(i=0; i<dataCount1; i++) { data2[i] = data1[2*i+1]; data1[i] = data1[2*i]; } dataCount2 = dataCount1; } *len1 = dataCount1; *len2 = dataCount2; }
bool DetectTextEncoding(const char* buffer, size_t len, wxFontEncoding& encoding, unsigned int& BOM_len) { wxASSERT(buffer); if (!buffer || len == 0) return false; const char* buff_ptr = buffer; const char* buff_end = &buffer[len]; wxFontEncoding enc = wxFONTENCODING_DEFAULT; // Check if the buffer starts with a BOM (Byte Order Marker) if (len >= 2) { if (len >= 4 && memcmp(buffer, "\xFF\xFE\x00\x00", 4) == 0) {enc = wxFONTENCODING_UTF32LE; BOM_len = 4;} else if (len >= 4 && memcmp(buffer, "\x00\x00\xFE\xFF", 4) == 0) {enc = wxFONTENCODING_UTF32BE; BOM_len = 4;} else if (memcmp(buffer, "\xFF\xFE", 2) == 0) {enc = wxFONTENCODING_UTF16LE; BOM_len = 2;} else if (memcmp(buffer, "\xFE\xFF", 2) == 0) {enc = wxFONTENCODING_UTF16BE; BOM_len = 2;} else if (len >= 3 && memcmp(buffer, "\xEF\xBB\xBF", 3) == 0) {enc = wxFONTENCODING_UTF8; BOM_len = 3;} else if (len >= 5 && memcmp(buffer, "\x2B\x2F\x76\x38\x2D", 5) == 0) {enc = wxFONTENCODING_UTF7; BOM_len = 5;} buff_ptr += BOM_len; } // If the file starts with a leading < (less) sign, it is probably an XML file // and we can determine the encoding by how the sign is encoded. if (enc == wxFONTENCODING_DEFAULT && len >= 2) { if (len >= 4 && memcmp(buffer, "\x3C\x00\x00\x00", 4) == 0) enc = wxFONTENCODING_UTF32LE; else if (len >= 4 && memcmp(buffer, "\x00\x00\x00\x3C", 4) == 0) enc = wxFONTENCODING_UTF32BE; else if (memcmp(buffer, "\x3C\x00", 2) == 0) enc = wxFONTENCODING_UTF16LE; else if (memcmp(buffer, "\x00\x3C", 2) == 0) enc = wxFONTENCODING_UTF16BE; } // Unicode Detection if (enc == wxFONTENCODING_DEFAULT) { unsigned int null_byte_count = 0; unsigned int utf_bytes = 0; unsigned int good_utf_count = 0; unsigned int bad_utf_count = 0; unsigned int bad_utf32_count = 0; unsigned int bad_utf16_count = 0; unsigned int nl_utf32le_count = 0; unsigned int nl_utf32be_count = 0; unsigned int nl_utf16le_count = 0; unsigned int nl_utf16be_count = 0; while (buff_ptr != buff_end) { if (*buff_ptr == 0) ++null_byte_count; // Detect UTF-8 by scanning for invalid sequences if (utf_bytes == 0) { if ((*buff_ptr & 0xC0) == 0x80 || *buff_ptr == 0) ++bad_utf_count; else { utf_bytes = utf8_len(*buff_ptr) - 1; if (utf_bytes > 3) { ++bad_utf_count; utf_bytes = 0; } } } else if ((*buff_ptr & 0xC0) == 0x80) { --utf_bytes; if (utf_bytes == 0) ++good_utf_count; } else { ++bad_utf_count; utf_bytes = 0; } // Detect UTF-32 by scanning for newlines (and lack of null chars) if ((uintptr_t)buff_ptr % 4 == 0 && buff_ptr+4 <= buff_end) { if (*((wxUint32*)buff_ptr) == 0) ++bad_utf32_count; if (*((wxUint32*)buff_ptr) == wxUINT32_SWAP_ON_BE(0x0A)) ++nl_utf32le_count; if (*((wxUint32*)buff_ptr) == wxUINT32_SWAP_ON_LE(0x0A)) ++nl_utf32be_count; } // Detect UTF-16 by scanning for newlines (and lack of null chars) if ((uintptr_t)buff_ptr % 2 == 0 && buff_ptr+4 <= buff_end) { if (*((wxUint16*)buff_ptr) == 0) ++bad_utf16_count; if (*((wxUint16*)buff_ptr) == wxUINT16_SWAP_ON_BE(0x0A)) ++nl_utf16le_count; if (*((wxUint16*)buff_ptr) == wxUINT16_SWAP_ON_LE(0x0A)) ++nl_utf16be_count; } ++buff_ptr; } if (bad_utf_count == 0) enc = wxFONTENCODING_UTF8; else if (bad_utf32_count == 0 && nl_utf32le_count > len / 400) enc = wxFONTENCODING_UTF32LE; else if (bad_utf32_count == 0 && nl_utf32be_count > len / 400) enc = wxFONTENCODING_UTF32BE; else if (bad_utf16_count == 0 && nl_utf16le_count > len / 200) enc = wxFONTENCODING_UTF16LE; else if (bad_utf16_count == 0 && nl_utf16be_count > len / 200) enc = wxFONTENCODING_UTF16BE; else if (null_byte_count) return false; // Maybe this is a binary file? } // If we can't detect encoding and it does not contain null bytes just set it to the default encoding. if (enc == wxFONTENCODING_DEFAULT) enc = wxFONTENCODING_SYSTEM; encoding = enc; return true; }
void wxArchive::SaveUint16(wxUint16 value) { wxUint16 tmpval = wxUINT16_SWAP_ON_LE(value); if(CanStore()) m_odstr.Write(&tmpval, sizeof(wxUint16)); }