String io::readUnicodeString(ID3_Reader& reader)
{
  String unicode;
  ID3_Reader::char_type ch1, ch2;
  if (!readTwoChars(reader, ch1, ch2) || isNull(ch1, ch2))
  {
    return unicode;
  }
  int bom = isBOM(ch1, ch2);
  if (!bom)
  {
    unicode += static_cast<char>(ch1);
    unicode += static_cast<char>(ch2);
  }
  while (!reader.atEnd())
  {
    if (!readTwoChars(reader, ch1, ch2) || isNull(ch1, ch2))
    {
      break;
    }
    if (bom == -1)
    {
      unicode += static_cast<char>(ch2);
      unicode += static_cast<char>(ch1);
    }
    else
    {
      unicode += static_cast<char>(ch1);
      unicode += static_cast<char>(ch2);
    }
  }
  return unicode;
}
uint32 io::readBENumber(ID3_Reader& reader, size_t len)
{
  uint32 val = 0;
  
  for (ID3_Reader::size_type i = 0; i < len && !reader.atEnd(); ++i)
  {
    val *= 256; // 2^8
    val += static_cast<uint32>(0xFF & reader.readChar());
  }
  return val;
}
uint32 io::readLENumber(ID3_Reader& reader, size_t len)
{
  uint32 val = 0;
  for (size_t i = 0; i < len; i++)
  {
    if (reader.atEnd())
    {
      break;
    }
    val += (static_cast<uint32>(0xFF & reader.readChar()) << (i * 8));
  }
  return val;
}
String io::readString(ID3_Reader& reader)
{
  String str;
  while (!reader.atEnd())
  {
    ID3_Reader::char_type ch = reader.readChar();
    if (ch == '\0')
    {
      break;
    }
    str += static_cast<char>(ch);
  }
  return str;
}
String io::readText(ID3_Reader& reader, size_t len)
{
  String str;
  str.reserve(len);
  const size_t SIZE = 1024;
  ID3_Reader::char_type buf[SIZE];
  size_t remaining = len;
  while (remaining > 0 && !reader.atEnd())
  {
    size_t numRead = reader.readChars(buf, min(remaining, SIZE));
    remaining -= numRead;
    str.append(reinterpret_cast<String::value_type *>(buf), numRead);
  }
  return str;
}
bool ID3_FieldImpl::ParseText(ID3_Reader& reader)
{
  ID3D_NOTICE( "ID3_Field::ParseText(): reader.getBeg() = " << reader.getBeg() );
  ID3D_NOTICE( "ID3_Field::ParseText(): reader.getCur() = " << reader.getCur() );
  ID3D_NOTICE( "ID3_Field::ParseText(): reader.getEnd() = " << reader.getEnd() );
  this->Clear();

  ID3_TextEnc enc = this->GetEncoding();
  size_t fixed_size = this->Size();
  if (fixed_size)
  {
    ID3D_NOTICE( "ID3_Field::ParseText(): fixed size string" );
    // The string is of fixed length
    String text = readEncodedText(reader, fixed_size, enc);
    this->SetText(text);
    ID3D_NOTICE( "ID3_Field::ParseText(): fixed size string = " << text );
  }
  else if (_flags & ID3FF_LIST)
  {
    ID3D_NOTICE( "ID3_Field::ParseText(): text list" );
    // lists are always the last field in a frame.  parse all remaining 
    // characters in the reader
    while (!reader.atEnd())
    {
      String text = readEncodedString(reader, enc);
      this->AddText(text);
      ID3D_NOTICE( "ID3_Field::ParseText(): adding string = " << text );
    }
  }
  else if (_flags & ID3FF_CSTR)
  {
    ID3D_NOTICE( "ID3_Field::ParseText(): null terminated string" );
    String text = readEncodedString(reader, enc);
    this->SetText(text);
    ID3D_NOTICE( "ID3_Field::ParseText(): null terminated string = " << text );
  }
  else
  {
    ID3D_NOTICE( "ID3_Field::ParseText(): last field string" );
    String text = readEncodedText(reader, reader.remainingBytes(), enc);
    // not null terminated.  
    this->AddText(text);
    ID3D_NOTICE( "ID3_Field::ParseText(): last field string = " << text );
  }
  
  _changed = false;
  return true;
}
BString io::readBinary(ID3_Reader& reader, size_t len)
{
  BString binary;
  binary.reserve(len);
  
  size_t remaining = len;
  const size_t SIZE = 1024;
  ID3_Reader::char_type buf[SIZE];
  while (!reader.atEnd() && remaining > 0)
  {
    size_t numRead = reader.readChars(buf, min(remaining, SIZE));
    remaining -= numRead;
    binary.append(reinterpret_cast<BString::value_type *>(buf), numRead);
  }
  
  return binary;
}
Example #8
0
bool ID3_FieldImpl::ParseInteger(ID3_Reader& reader)
{
  ID3D_NOTICE( "ID3_FieldImpl::ParseInteger(): beg = " << reader.getBeg() );
  ID3D_NOTICE( "ID3_FieldImpl::ParseInteger(): cur = " << reader.getCur() );
  ID3D_NOTICE( "ID3_FieldImpl::ParseInteger(): end = " << reader.getEnd() );
  bool success = false;
  if (!reader.atEnd())
  {
    this->Clear();
    size_t fixed = this->Size();
    size_t nBytes = (fixed > 0) ? fixed : sizeof(uint32);
    this->Set(io::readBENumber(reader, nBytes));
    _changed = false;
    success = true;
  }
  return success;
}
uint32 io::readUInt28(ID3_Reader& reader)
{
  uint32 val = 0;
  const unsigned short BITSUSED = 7;
  const uint32 MAXVAL = MASK(BITSUSED * sizeof(uint32));
  // For each byte of the first 4 bytes in the string...
  for (size_t i = 0; i < sizeof(uint32); ++i)
  {
    if (reader.atEnd())
    {
      break;
    }
    // ...append the last 7 bits to the end of the temp integer...
    val = (val << BITSUSED) | static_cast<uint32>(reader.readChar()) & MASK(BITSUSED);
  }

  // We should always parse 4 characters
  return min(val, MAXVAL);
}
String io::readUnicodeString(ID3_Reader& reader)
{
  String unicode;
  ID3_Reader::char_type ch1, ch2;
  if (!readTwoChars(reader, ch1, ch2) || isNull(ch1, ch2))
  {
    return unicode;
  }
  int bom = isBOM(ch1, ch2);
  int bo_actual;

  if (!bom)
  {
    ID3D_NOTICE( "ID3_BOM::readUnicodeString(): Unicode has no BOM" );

    // The string is UTF-16 (Unicode) but with no Byte Order Marker (BOM).
    // Even though the Unicode standard says that big-endian should be assumed in the absence of
    // a BOM, it also says that this can be overriden by other concerns.  Some Windows software
    // authors appear to have interpreted this as meaning that Wintel's little-endianism
    // may override the presumption of big-endianism.  Others assume that it does not.
    // Files  may, therefore, contain either, and neither big- nor little-endian is a safe
    // assumption.
    // For western alphabets, most characters are represented by a zero as the most significant
    // byte.  A zero as the second byte, therefore, indicates strongly that the string is
    // little-endian.  There are only five cases in which this is not true - where the first byte is:
    //    00 00 - Null (reversible, "non-endian", terminates string)
    //    01 00 - Latin capital letter A with macron
    //    02 00 - Latin capital letter A with double grave 
    //    03 00 - Combining grave accent
    //    04 00 - Cyrillic capital letter IE with grave (U+0400)
    // The corresponding reversed characters are:
    //    00 01 - Start of Heading
    //    00 02 - Start of Text
    //    00 03 - End of Text
    //    00 04 - End of Transmission
    // None of these reversed characters are likely to occur in ID3 strings.
    // We can therefore safely improve on the big-endian assumption for strings without BOM
    // by recognising that if the second byte is zero, and the first byte is greater than 04,
    // then the string must be little-endian.
    // This modification does not address the missing BOM problem completely, because incorrectly
    // non-BOMd little-endian strings using non-western alphabets will still not be detected.
    // However, this method will not cause any "false positives" resulting in big-endian strings
    // being incorrectly reversed.

    if ( ( ch1 >= 4 ) && ( ch2 == 0) )
      // Probably little-endian
      {
      ID3D_NOTICE( "ID3_BOM::readUnicodeString(): Second char is zero: Probably little-endian" );
      bo_actual = -1;
      unicode += static_cast<char>(ch2);
      unicode += static_cast<char>(ch1);
      ID3D_NOTICE( "ID3_BOM::readUnicodeString(): Little-endian data read and stored as: " << static_cast<int>(ch2) << " " << static_cast<int>(ch1) );
      }
    else
      // Probably big-endian
      {
      ID3D_NOTICE( "ID3_BOM::readUnicodeString(): Second char is non-zero: Probably big-endian" );
      bo_actual = 1;
      unicode += static_cast<char>(ch1);
      unicode += static_cast<char>(ch2);
      ID3D_NOTICE( "ID3_BOM::readUnicodeString(): Big-endian data read and stored as: " << static_cast<int>(ch1) << " " << static_cast<int>(ch2) );
      }
  }
  else
  {
    bo_actual = bom;
  }
  while (!reader.atEnd())
  {
    if (!readTwoChars(reader, ch1, ch2) || isNull(ch1, ch2))
    {
      break;
    }
    if (bo_actual == -1)
    {
      unicode += static_cast<char>(ch2);
      unicode += static_cast<char>(ch1);
      ID3D_NOTICE( "ID3_BOM::readUnicodeString(): Little-endian data read and stored as: " << static_cast<int>(ch2) << " " << static_cast<int>(ch1) );
    }
    else
    {
      unicode += static_cast<char>(ch1);
      unicode += static_cast<char>(ch2);
      ID3D_NOTICE( "ID3_BOM::readUnicodeString(): Big-endian data read and stored as: " << static_cast<int>(ch1) << " " << static_cast<int>(ch2) );
    }
  }
  return unicode;
}