String io::readUnicodeString(ID3_Reader& reader) { String unicode; ID3_Reader::char_type ch1, ch2; if (!readTwoChars(reader, ch1, ch2) || isNull(ch1, ch2)) { return unicode; } int bom = isBOM(ch1, ch2); if (!bom) { unicode += static_cast<char>(ch1); unicode += static_cast<char>(ch2); } while (!reader.atEnd()) { if (!readTwoChars(reader, ch1, ch2) || isNull(ch1, ch2)) { break; } if (bom == -1) { unicode += static_cast<char>(ch2); unicode += static_cast<char>(ch1); } else { unicode += static_cast<char>(ch1); unicode += static_cast<char>(ch2); } } return unicode; }
uint32 io::readBENumber(ID3_Reader& reader, size_t len) { uint32 val = 0; for (ID3_Reader::size_type i = 0; i < len && !reader.atEnd(); ++i) { val *= 256; // 2^8 val += static_cast<uint32>(0xFF & reader.readChar()); } return val; }
uint32 io::readLENumber(ID3_Reader& reader, size_t len) { uint32 val = 0; for (size_t i = 0; i < len; i++) { if (reader.atEnd()) { break; } val += (static_cast<uint32>(0xFF & reader.readChar()) << (i * 8)); } return val; }
String io::readString(ID3_Reader& reader) { String str; while (!reader.atEnd()) { ID3_Reader::char_type ch = reader.readChar(); if (ch == '\0') { break; } str += static_cast<char>(ch); } return str; }
String io::readText(ID3_Reader& reader, size_t len) { String str; str.reserve(len); const size_t SIZE = 1024; ID3_Reader::char_type buf[SIZE]; size_t remaining = len; while (remaining > 0 && !reader.atEnd()) { size_t numRead = reader.readChars(buf, min(remaining, SIZE)); remaining -= numRead; str.append(reinterpret_cast<String::value_type *>(buf), numRead); } return str; }
bool ID3_FieldImpl::ParseText(ID3_Reader& reader) { ID3D_NOTICE( "ID3_Field::ParseText(): reader.getBeg() = " << reader.getBeg() ); ID3D_NOTICE( "ID3_Field::ParseText(): reader.getCur() = " << reader.getCur() ); ID3D_NOTICE( "ID3_Field::ParseText(): reader.getEnd() = " << reader.getEnd() ); this->Clear(); ID3_TextEnc enc = this->GetEncoding(); size_t fixed_size = this->Size(); if (fixed_size) { ID3D_NOTICE( "ID3_Field::ParseText(): fixed size string" ); // The string is of fixed length String text = readEncodedText(reader, fixed_size, enc); this->SetText(text); ID3D_NOTICE( "ID3_Field::ParseText(): fixed size string = " << text ); } else if (_flags & ID3FF_LIST) { ID3D_NOTICE( "ID3_Field::ParseText(): text list" ); // lists are always the last field in a frame. parse all remaining // characters in the reader while (!reader.atEnd()) { String text = readEncodedString(reader, enc); this->AddText(text); ID3D_NOTICE( "ID3_Field::ParseText(): adding string = " << text ); } } else if (_flags & ID3FF_CSTR) { ID3D_NOTICE( "ID3_Field::ParseText(): null terminated string" ); String text = readEncodedString(reader, enc); this->SetText(text); ID3D_NOTICE( "ID3_Field::ParseText(): null terminated string = " << text ); } else { ID3D_NOTICE( "ID3_Field::ParseText(): last field string" ); String text = readEncodedText(reader, reader.remainingBytes(), enc); // not null terminated. this->AddText(text); ID3D_NOTICE( "ID3_Field::ParseText(): last field string = " << text ); } _changed = false; return true; }
BString io::readBinary(ID3_Reader& reader, size_t len) { BString binary; binary.reserve(len); size_t remaining = len; const size_t SIZE = 1024; ID3_Reader::char_type buf[SIZE]; while (!reader.atEnd() && remaining > 0) { size_t numRead = reader.readChars(buf, min(remaining, SIZE)); remaining -= numRead; binary.append(reinterpret_cast<BString::value_type *>(buf), numRead); } return binary; }
bool ID3_FieldImpl::ParseInteger(ID3_Reader& reader) { ID3D_NOTICE( "ID3_FieldImpl::ParseInteger(): beg = " << reader.getBeg() ); ID3D_NOTICE( "ID3_FieldImpl::ParseInteger(): cur = " << reader.getCur() ); ID3D_NOTICE( "ID3_FieldImpl::ParseInteger(): end = " << reader.getEnd() ); bool success = false; if (!reader.atEnd()) { this->Clear(); size_t fixed = this->Size(); size_t nBytes = (fixed > 0) ? fixed : sizeof(uint32); this->Set(io::readBENumber(reader, nBytes)); _changed = false; success = true; } return success; }
uint32 io::readUInt28(ID3_Reader& reader) { uint32 val = 0; const unsigned short BITSUSED = 7; const uint32 MAXVAL = MASK(BITSUSED * sizeof(uint32)); // For each byte of the first 4 bytes in the string... for (size_t i = 0; i < sizeof(uint32); ++i) { if (reader.atEnd()) { break; } // ...append the last 7 bits to the end of the temp integer... val = (val << BITSUSED) | static_cast<uint32>(reader.readChar()) & MASK(BITSUSED); } // We should always parse 4 characters return min(val, MAXVAL); }
String io::readUnicodeString(ID3_Reader& reader) { String unicode; ID3_Reader::char_type ch1, ch2; if (!readTwoChars(reader, ch1, ch2) || isNull(ch1, ch2)) { return unicode; } int bom = isBOM(ch1, ch2); int bo_actual; if (!bom) { ID3D_NOTICE( "ID3_BOM::readUnicodeString(): Unicode has no BOM" ); // The string is UTF-16 (Unicode) but with no Byte Order Marker (BOM). // Even though the Unicode standard says that big-endian should be assumed in the absence of // a BOM, it also says that this can be overriden by other concerns. Some Windows software // authors appear to have interpreted this as meaning that Wintel's little-endianism // may override the presumption of big-endianism. Others assume that it does not. // Files may, therefore, contain either, and neither big- nor little-endian is a safe // assumption. // For western alphabets, most characters are represented by a zero as the most significant // byte. A zero as the second byte, therefore, indicates strongly that the string is // little-endian. There are only five cases in which this is not true - where the first byte is: // 00 00 - Null (reversible, "non-endian", terminates string) // 01 00 - Latin capital letter A with macron // 02 00 - Latin capital letter A with double grave // 03 00 - Combining grave accent // 04 00 - Cyrillic capital letter IE with grave (U+0400) // The corresponding reversed characters are: // 00 01 - Start of Heading // 00 02 - Start of Text // 00 03 - End of Text // 00 04 - End of Transmission // None of these reversed characters are likely to occur in ID3 strings. // We can therefore safely improve on the big-endian assumption for strings without BOM // by recognising that if the second byte is zero, and the first byte is greater than 04, // then the string must be little-endian. // This modification does not address the missing BOM problem completely, because incorrectly // non-BOMd little-endian strings using non-western alphabets will still not be detected. // However, this method will not cause any "false positives" resulting in big-endian strings // being incorrectly reversed. if ( ( ch1 >= 4 ) && ( ch2 == 0) ) // Probably little-endian { ID3D_NOTICE( "ID3_BOM::readUnicodeString(): Second char is zero: Probably little-endian" ); bo_actual = -1; unicode += static_cast<char>(ch2); unicode += static_cast<char>(ch1); ID3D_NOTICE( "ID3_BOM::readUnicodeString(): Little-endian data read and stored as: " << static_cast<int>(ch2) << " " << static_cast<int>(ch1) ); } else // Probably big-endian { ID3D_NOTICE( "ID3_BOM::readUnicodeString(): Second char is non-zero: Probably big-endian" ); bo_actual = 1; unicode += static_cast<char>(ch1); unicode += static_cast<char>(ch2); ID3D_NOTICE( "ID3_BOM::readUnicodeString(): Big-endian data read and stored as: " << static_cast<int>(ch1) << " " << static_cast<int>(ch2) ); } } else { bo_actual = bom; } while (!reader.atEnd()) { if (!readTwoChars(reader, ch1, ch2) || isNull(ch1, ch2)) { break; } if (bo_actual == -1) { unicode += static_cast<char>(ch2); unicode += static_cast<char>(ch1); ID3D_NOTICE( "ID3_BOM::readUnicodeString(): Little-endian data read and stored as: " << static_cast<int>(ch2) << " " << static_cast<int>(ch1) ); } else { unicode += static_cast<char>(ch1); unicode += static_cast<char>(ch2); ID3D_NOTICE( "ID3_BOM::readUnicodeString(): Big-endian data read and stored as: " << static_cast<int>(ch1) << " " << static_cast<int>(ch2) ); } } return unicode; }