void VCodePoint::writeToBinaryStream(VBinaryIOStream& stream) const { switch (VCodePoint::getUTF8LengthFromCodePointValue(mIntValue)) { case 1: stream.writeU8((Vu8) mIntValue); // first byte binary: 0xxxxxxx (with 7 used bits) break; case 2: stream.writeU8((Vu8) UTF8_BYTE_1_OF_2(mIntValue)); stream.writeU8((Vu8) UTF8_BYTE_2_OF_2(mIntValue)); break; case 3: stream.writeU8((Vu8) UTF8_BYTE_1_OF_3(mIntValue)); stream.writeU8((Vu8) UTF8_BYTE_2_OF_3(mIntValue)); stream.writeU8((Vu8) UTF8_BYTE_3_OF_3(mIntValue)); break; case 4: stream.writeU8((Vu8) UTF8_BYTE_1_OF_4(mIntValue)); stream.writeU8((Vu8) UTF8_BYTE_2_OF_4(mIntValue)); stream.writeU8((Vu8) UTF8_BYTE_3_OF_4(mIntValue)); stream.writeU8((Vu8) UTF8_BYTE_4_OF_4(mIntValue)); break; default: throw VRangeException(VSTRING_FORMAT("VCodePoint::writeToBinaryStream() for an invalid UTF-8 code point 0x%X", mIntValue)); break; } }
VString VCodePoint::toString() const { VString s; // Use of 0x40 (decimal 64) here is to chop a number into 6-bit parts. // 0x40 is binary 01000000, so // n / 0x40 effectively strips off the low 6 bits // n % 0x40 effectively strips off all but the low 6 bits // n / 0x40 % 0x40 effectively yields the "next" 6 bits by combining those two operations switch (VCodePoint::getUTF8LengthFromCodePointValue(mIntValue)) { case 1: s += (char) mIntValue; // first byte binary: 0xxxxxxx (with 7 used bits) break; case 2: s += (char) UTF8_BYTE_1_OF_2(mIntValue); s += (char) UTF8_BYTE_2_OF_2(mIntValue); break; case 3: s += (char) UTF8_BYTE_1_OF_3(mIntValue); s += (char) UTF8_BYTE_2_OF_3(mIntValue); s += (char) UTF8_BYTE_3_OF_3(mIntValue); break; case 4: s += (char) UTF8_BYTE_1_OF_4(mIntValue); s += (char) UTF8_BYTE_2_OF_4(mIntValue); s += (char) UTF8_BYTE_3_OF_4(mIntValue); s += (char) UTF8_BYTE_4_OF_4(mIntValue); break; default: throw VRangeException(VSTRING_FORMAT("VCodePoint::toString() for an invalid UTF-8 code point 0x%X", mIntValue)); break; } return s; }
static bstring read_string(char * src, size_t length) { int utf8_char; char hex_num[7] = "0x0000"; char * end = src + length; bstring s = bfromcstralloc(length, ""); char * dst = bdata(s); if (* src == '"') src++; while (src < end) { if (* src != '\\') { * dst = * src; dst++; } else { src++; if (* src == '"') { * dst = '"'; dst++; } else if (* src == '\\') { * dst = '\\'; dst++; } else if (* src == '/') { * dst = '/'; dst++; } else if (* src == 'b') { * dst = '\b'; dst++; } else if (* src == 'f') { * dst = '\f'; dst++; } else if (* src == 'n') { * dst = '\n'; dst++; } else if (* src == 'r') { * dst = '\r'; dst++; } else if (* src == 't') { * dst = '\t'; dst++; } else if (* src == 'u') { memcpy(hex_num + 2, src + 1, 4); sscanf(hex_num, "%x", &utf8_char); if (utf8_char <= UTF8_2_BYTE_LIMIT) { * dst = UTF8_BYTE_1_OF_2(utf8_char); dst++; * dst = UTF8_BYTE_2_OF_2(utf8_char); dst++; } else { * dst = UTF8_BYTE_1_OF_3(utf8_char); dst++; * dst = UTF8_BYTE_2_OF_3(utf8_char); dst++; * dst = UTF8_BYTE_3_OF_3(utf8_char); dst++; } src += 4; } } src++; } s->slen = dst - bdata(s); return s; }