Ejemplo n.º 1
0
wxUint16 wxDataInputStream::Read16()
{
    wxUint16 i16;

    m_input->Read(&i16, 2);

    if (m_be_order)
        return wxUINT16_SWAP_ON_LE(i16);
    else
        return wxUINT16_SWAP_ON_BE(i16);
}
Ejemplo n.º 2
0
wxUint16 wxArchive::LoadUint16()
{
	wxUint16 value = 0;

	// reads a 16bits from the stream
	if(CanLoad())
	{
		m_idstr.Read((void *)&value, sizeof(wxUint16));
		return wxUINT16_SWAP_ON_LE(value);
	}

	return value;
}
Ejemplo n.º 3
0
// Stolen from  https://github.com/etexteditor/e/blob/master/src/Strings.cpp
//        and:  https://github.com/etexteditor/e/blob/master/src/Utf.cpp
// Copyright (c) 2009, Alexander Stigsen, e-texteditor.com (All rights reserved)
// http://www.e-texteditor.com/
bool EncodingDetector::DetectEncodingEx(const wxByte* buffer, size_t size)
{
    if (!buffer || size == 0) return false;

    const wxByte*  buff_ptr = buffer;
    const wxByte*  buff_end = &buffer[size];
    wxFontEncoding enc      = wxFONTENCODING_DEFAULT;

    // Check if the buffer starts with a BOM (Byte Order Marker)
    if (size >= 2)
    {
        if      (size >= 4 && memcmp(buffer, "\xFF\xFE\x00\x00", 4) == 0)
        {
            enc = wxFONTENCODING_UTF32LE;
            m_BOMSizeInBytes = 4;
            m_UseBOM = true;
        }
        else if (size >= 4 && memcmp(buffer, "\xFE\xFF\x00\x00", 4) == 0)
        {
            // FE FF 00 00  UCS-4, unusual octet order BOM (3412)
            // X-ISO-10646-UCS-4-3412 can not (yet) be handled by wxWidgets
            enc = (wxFontEncoding)-1;
        }
        else if (size >= 4 && memcmp(buffer, "\x00\x00\xFE\xFF", 4) == 0)
        {
            enc = wxFONTENCODING_UTF32BE;
            m_BOMSizeInBytes = 4;
            m_UseBOM = true;
        }
        else if (size >= 4 && memcmp(buffer, "\x00\x00\xFF\xFE", 4) == 0)
        {
            // 00 00 FF FE  UCS-4, unusual octet order BOM (2143)
            // X-ISO-10646-UCS-4-2143 can not (yet) be handled by wxWidgets
            enc = (wxFontEncoding)-1;
        }
        else if (             memcmp(buffer, "\xFF\xFE", 2) == 0)
        {
            enc = wxFONTENCODING_UTF16LE;
            m_BOMSizeInBytes = 2;
            m_UseBOM = true;
        }
        else if (             memcmp(buffer, "\xFE\xFF", 2) == 0)
        {
            enc = wxFONTENCODING_UTF16BE;
            m_BOMSizeInBytes = 2;
            m_UseBOM = true;
        }
        else if (size >= 3 && memcmp(buffer, "\xEF\xBB\xBF", 3) == 0)
        {
            enc = wxFONTENCODING_UTF8;
            m_BOMSizeInBytes = 3;
            m_UseBOM = true;
        }
        else if (size >= 5 && memcmp(buffer, "\x2B\x2F\x76\x38\x2D", 5) == 0)
        {
            enc = wxFONTENCODING_UTF7;
            m_BOMSizeInBytes = 5;
            m_UseBOM = true;
        }

        buff_ptr += m_BOMSizeInBytes;
    }

    // If the file starts with a leading < (less) sign, it is probably an XML file
    // and we can determine the encoding by how the sign is encoded.
    if (enc == wxFONTENCODING_DEFAULT && size >= 2)
    {
        if      (size >= 4 && memcmp(buffer, "\x3C\x00\x00\x00", 4) == 0) enc = wxFONTENCODING_UTF32LE;
        else if (size >= 4 && memcmp(buffer, "\x00\x00\x00\x3C", 4) == 0) enc = wxFONTENCODING_UTF32BE;
        else if (             memcmp(buffer, "\x3C\x00",         2) == 0) enc = wxFONTENCODING_UTF16LE;
        else if (             memcmp(buffer, "\x00\x3C",         2) == 0) enc = wxFONTENCODING_UTF16BE;
    }

    // Unicode Detection
    if (enc == wxFONTENCODING_DEFAULT)
    {
        unsigned int null_byte_count  = 0;
        unsigned int utf_bytes        = 0;
        unsigned int good_utf_count   = 0;
        unsigned int bad_utf_count    = 0;
        unsigned int bad_utf32_count  = 0;
        unsigned int bad_utf16_count  = 0;
        unsigned int nl_utf32le_count = 0;
        unsigned int nl_utf32be_count = 0;
        unsigned int nl_utf16le_count = 0;
        unsigned int nl_utf16be_count = 0;

        while (buff_ptr != buff_end)
        {
            if (*buff_ptr == 0) ++null_byte_count;

            // Detect UTF-8 by scanning for invalid sequences
            if (utf_bytes == 0)
            {
                if ((*buff_ptr & 0xC0) == 0x80 || *buff_ptr == 0)
                    ++bad_utf_count;
                else
                {
                    const char c = *buff_ptr;
                    utf_bytes = 5; // invalid length
                    if      ((c & 0x80) == 0x00) utf_bytes = 1;
                    else if ((c & 0xE0) == 0xC0) utf_bytes = 2;
                    else if ((c & 0xF0) == 0xE0) utf_bytes = 3;
                    else if ((c & 0xF8) == 0xF0) utf_bytes = 4;
                    if (utf_bytes > 3)
                    {
                        ++bad_utf_count;
                        utf_bytes = 0;
                    }
                }
            }
            else if ((*buff_ptr & 0xC0) == 0x80)
            {
                --utf_bytes;
                if (utf_bytes == 0)
                    ++good_utf_count;
            }
            else
            {
                ++bad_utf_count;
                utf_bytes = 0;
            }

            // Detect UTF-32 by scanning for newlines (and lack of null chars)
            if ((wxUIntPtr)buff_ptr % 4 == 0 && buff_ptr+4 <= buff_end)
            {
                if (*((wxUint32*)buff_ptr) == 0                        ) ++bad_utf32_count;
                if (*((wxUint32*)buff_ptr) == wxUINT32_SWAP_ON_BE(0x0A)) ++nl_utf32le_count;
                if (*((wxUint32*)buff_ptr) == wxUINT32_SWAP_ON_LE(0x0A)) ++nl_utf32be_count;
            }

            // Detect UTF-16 by scanning for newlines (and lack of null chars)
            if ((wxUIntPtr)buff_ptr % 2 == 0 && buff_ptr+4 <= buff_end)
            {
                if (*((wxUint16*)buff_ptr) == 0)                         ++bad_utf16_count;
                if (*((wxUint16*)buff_ptr) == wxUINT16_SWAP_ON_BE(0x0A)) ++nl_utf16le_count;
                if (*((wxUint16*)buff_ptr) == wxUINT16_SWAP_ON_LE(0x0A)) ++nl_utf16be_count;
            }

            ++buff_ptr;
        }

        if      (bad_utf_count   == 0)                                  enc = wxFONTENCODING_UTF8;
        else if (bad_utf32_count == 0 && nl_utf32le_count > size / 400) enc = wxFONTENCODING_UTF32LE;
        else if (bad_utf32_count == 0 && nl_utf32be_count > size / 400) enc = wxFONTENCODING_UTF32BE;
        else if (bad_utf16_count == 0 && nl_utf16le_count > size / 200) enc = wxFONTENCODING_UTF16LE;
        else if (bad_utf16_count == 0 && nl_utf16be_count > size / 200) enc = wxFONTENCODING_UTF16BE;
        else if (null_byte_count)
            return false; // Maybe this is a binary file?
    }

    if (enc != wxFONTENCODING_DEFAULT)
    {
        m_Encoding = enc; // Success.
        return true;
    }

    // If we can't detect encoding and it does not contain null bytes
    // just ignore it and try backup-procedures (Mozilla) later...
    return false;
}
Ejemplo n.º 4
0
wxDataInputStream::wxDataInputStream(wxInputStream& s, const wxMBConv& conv)
  : m_input(&s), m_be_order(false), m_conv(conv.Clone())
#else
wxDataInputStream::wxDataInputStream(wxInputStream& s)
  : m_input(&s), m_be_order(false)
#endif
{
}

wxDataInputStream::~wxDataInputStream()
{
#if wxUSE_UNICODE
    delete m_conv;
#endif // wxUSE_UNICODE
}

#if wxHAS_INT64
wxUint64 wxDataInputStream::Read64()
{
  wxUint64 tmp;
  Read64(&tmp, 1);
  return tmp;
}
#endif // wxHAS_INT64

wxUint32 wxDataInputStream::Read32()
{
  wxUint32 i32;

  m_input->Read(&i32, 4);

  if (m_be_order)
    return wxUINT32_SWAP_ON_LE(i32);
  else
    return wxUINT32_SWAP_ON_BE(i32);
}

wxUint16 wxDataInputStream::Read16()
{
  wxUint16 i16;

  m_input->Read(&i16, 2);

  if (m_be_order)
    return wxUINT16_SWAP_ON_LE(i16);
  else
    return wxUINT16_SWAP_ON_BE(i16);
}

wxUint8 wxDataInputStream::Read8()
{
  wxUint8 buf;

  m_input->Read(&buf, 1);
  return (wxUint8)buf;
}

double wxDataInputStream::ReadDouble()
{
#if wxUSE_APPLE_IEEE
  char buf[10];

  m_input->Read(buf, 10);
  return ConvertFromIeeeExtended((const wxInt8 *)buf);
#else
  return 0.0;
#endif
}
Ejemplo n.º 5
0
void Extract(bool bits16,
             bool sign,
             bool stereo,
             bool bigendian,
             bool offset,
             char *rawData, int dataSize,
             float *data1, float *data2, int *len1, int *len2)
{
   int rawCount = 0;
   int dataCount1 = 0;
   int dataCount2 = 0;
   int i;

   *len1 = 0;
   *len2 = 0;

   if (offset && bits16) {
      /* Special case so as to not flip stereo channels during analysis */
      if (stereo && !bigendian) {
         rawData += 3;
         dataSize -= 3;
      }
      else {
         rawData++;
         dataSize--;
      }
   }

   if (bits16) {
      if (sign && bigendian)
         while (rawCount + 1 < dataSize) {
            /* 16-bit signed BE */
            data1[dataCount1] =
               (wxINT16_SWAP_ON_LE(*((signed short *)
                                     &rawData[rawCount])))
               / 32768.0;
            dataCount1++;
            rawCount += 2;
         }
      if (!sign && bigendian)
         while (rawCount + 1 < dataSize) {
            /* 16-bit unsigned BE */
            data1[dataCount1] =
               (wxUINT16_SWAP_ON_LE(*((unsigned short *)
                                      &rawData[rawCount])))
               / 32768.0 - 1.0;
            dataCount1++;
            rawCount += 2;
         }
      if (sign && !bigendian)
         while (rawCount + 1 < dataSize) {
            /* 16-bit signed LE */
            data1[dataCount1] =
               (wxINT16_SWAP_ON_BE(*((signed short *)
                                     &rawData[rawCount])))
               / 32768.0;
            dataCount1++;
            rawCount += 2;
         }
      if (!sign && !bigendian)
         while (rawCount + 1 < dataSize) {
            /* 16-bit unsigned LE */
            data1[dataCount1] =
               (wxUINT16_SWAP_ON_BE(*((unsigned short *)
                                      &rawData[rawCount])))
               / 32768.0 - 1.0;
            dataCount1++;
            rawCount += 2;
         }
   }
   else {
      /* 8-bit */
      if (sign) {
         while (rawCount < dataSize) {
            /* 8-bit signed */
            data1[dataCount1++] =
               (*(signed char *) (&rawData[rawCount++])) / 128.0;
         }
      }
      else {
         while (rawCount < dataSize) {
            /* 8-bit unsigned */
            data1[dataCount1++] =
               (*(unsigned char *) &rawData[rawCount++]) / 128.0 - 1.0;
         }
      }
   }

   if (stereo) {
      dataCount1 /= 2;
      for(i=0; i<dataCount1; i++) {
         data2[i] = data1[2*i+1];
         data1[i] = data1[2*i];
      }
      dataCount2 = dataCount1;
   }

   *len1 = dataCount1;
   *len2 = dataCount2;
}
Ejemplo n.º 6
0
bool DetectTextEncoding(const char* buffer, size_t len, wxFontEncoding& encoding, unsigned int& BOM_len) {
	wxASSERT(buffer);
	if (!buffer || len == 0) return false;

	const char* buff_ptr = buffer;
	const char* buff_end = &buffer[len];
	wxFontEncoding enc = wxFONTENCODING_DEFAULT;

	// Check if the buffer starts with a BOM (Byte Order Marker)
	if (len >= 2) {
		if (len >= 4 && memcmp(buffer, "\xFF\xFE\x00\x00", 4) == 0) {enc = wxFONTENCODING_UTF32LE; BOM_len = 4;}
		else if (len >= 4 && memcmp(buffer, "\x00\x00\xFE\xFF", 4) == 0) {enc = wxFONTENCODING_UTF32BE; BOM_len = 4;}
		else if (memcmp(buffer, "\xFF\xFE", 2) == 0) {enc = wxFONTENCODING_UTF16LE; BOM_len = 2;}
		else if (memcmp(buffer, "\xFE\xFF", 2) == 0) {enc = wxFONTENCODING_UTF16BE; BOM_len = 2;}
		else if (len >= 3 && memcmp(buffer, "\xEF\xBB\xBF", 3) == 0) {enc = wxFONTENCODING_UTF8; BOM_len = 3;}
		else if (len >= 5 && memcmp(buffer, "\x2B\x2F\x76\x38\x2D", 5) == 0) {enc = wxFONTENCODING_UTF7; BOM_len = 5;}

		buff_ptr += BOM_len;
	}

	// If the file starts with a leading < (less) sign, it is probably an XML file
	// and we can determine the encoding by how the sign is encoded.
	if (enc == wxFONTENCODING_DEFAULT && len >= 2) {
		if (len >= 4 && memcmp(buffer, "\x3C\x00\x00\x00", 4) == 0) enc = wxFONTENCODING_UTF32LE;
		else if (len >= 4 && memcmp(buffer, "\x00\x00\x00\x3C", 4) == 0) enc = wxFONTENCODING_UTF32BE;
		else if (memcmp(buffer, "\x3C\x00", 2) == 0) enc = wxFONTENCODING_UTF16LE;
		else if (memcmp(buffer, "\x00\x3C", 2) == 0) enc = wxFONTENCODING_UTF16BE;
	}

	// Unicode Detection
	if (enc == wxFONTENCODING_DEFAULT) {
		unsigned int null_byte_count = 0;
		unsigned int utf_bytes = 0;
		unsigned int good_utf_count = 0;
		unsigned int bad_utf_count = 0;
		unsigned int bad_utf32_count = 0;
		unsigned int bad_utf16_count = 0;
		unsigned int nl_utf32le_count = 0;
		unsigned int nl_utf32be_count = 0;
		unsigned int nl_utf16le_count = 0;
		unsigned int nl_utf16be_count = 0;

		while (buff_ptr != buff_end) {
			if (*buff_ptr == 0) ++null_byte_count;

			// Detect UTF-8 by scanning for invalid sequences
			if (utf_bytes == 0) {
				if ((*buff_ptr & 0xC0) == 0x80 || *buff_ptr == 0) ++bad_utf_count;
				else {
					utf_bytes = utf8_len(*buff_ptr) - 1;
					if (utf_bytes > 3) {
						++bad_utf_count;
						utf_bytes = 0;
					}
				}
			}
			else if ((*buff_ptr & 0xC0) == 0x80) {
				--utf_bytes;
				if (utf_bytes == 0) ++good_utf_count;
			}
			else {
				++bad_utf_count;
				utf_bytes = 0;
			}

			// Detect UTF-32 by scanning for newlines (and lack of null chars)
			if ((uintptr_t)buff_ptr % 4 == 0 && buff_ptr+4 <= buff_end) {
				if (*((wxUint32*)buff_ptr) == 0) ++bad_utf32_count;
				if (*((wxUint32*)buff_ptr) == wxUINT32_SWAP_ON_BE(0x0A)) ++nl_utf32le_count;
				if (*((wxUint32*)buff_ptr) == wxUINT32_SWAP_ON_LE(0x0A)) ++nl_utf32be_count;
			}

			// Detect UTF-16 by scanning for newlines (and lack of null chars)
			if ((uintptr_t)buff_ptr % 2 == 0  && buff_ptr+4 <= buff_end) {
				if (*((wxUint16*)buff_ptr) == 0) ++bad_utf16_count;
				if (*((wxUint16*)buff_ptr) == wxUINT16_SWAP_ON_BE(0x0A)) ++nl_utf16le_count;
				if (*((wxUint16*)buff_ptr) == wxUINT16_SWAP_ON_LE(0x0A)) ++nl_utf16be_count;
			}

			++buff_ptr;
		}

		if (bad_utf_count == 0) enc = wxFONTENCODING_UTF8;
		else if (bad_utf32_count == 0 && nl_utf32le_count > len / 400) enc = wxFONTENCODING_UTF32LE;
		else if (bad_utf32_count == 0 && nl_utf32be_count > len / 400) enc = wxFONTENCODING_UTF32BE;
		else if (bad_utf16_count == 0 && nl_utf16le_count > len / 200) enc = wxFONTENCODING_UTF16LE;
		else if (bad_utf16_count == 0 && nl_utf16be_count > len / 200) enc = wxFONTENCODING_UTF16BE;
		else if (null_byte_count) return false; // Maybe this is a binary file?
	}

	// If we can't detect encoding and it does not contain null bytes just set it to the default encoding.
	if (enc == wxFONTENCODING_DEFAULT)
		enc = wxFONTENCODING_SYSTEM;

	encoding = enc;
	return true;
}
Ejemplo n.º 7
0
void wxArchive::SaveUint16(wxUint16 value)
{
	wxUint16 tmpval = wxUINT16_SWAP_ON_LE(value);
	if(CanStore())
        m_odstr.Write(&tmpval, sizeof(wxUint16));
}