Esempio n. 1
0
void VCodePoint::writeToBinaryStream(VBinaryIOStream& stream) const {
    switch (VCodePoint::getUTF8LengthFromCodePointValue(mIntValue)) {

        case 1:
            stream.writeU8((Vu8) mIntValue);    // first byte binary:   0xxxxxxx (with 7 used bits)
            break;

        case 2:
            stream.writeU8((Vu8) UTF8_BYTE_1_OF_2(mIntValue));
            stream.writeU8((Vu8) UTF8_BYTE_2_OF_2(mIntValue));
            break;

        case 3:
            stream.writeU8((Vu8) UTF8_BYTE_1_OF_3(mIntValue));
            stream.writeU8((Vu8) UTF8_BYTE_2_OF_3(mIntValue));
            stream.writeU8((Vu8) UTF8_BYTE_3_OF_3(mIntValue));
            break;

        case 4:
            stream.writeU8((Vu8) UTF8_BYTE_1_OF_4(mIntValue));
            stream.writeU8((Vu8) UTF8_BYTE_2_OF_4(mIntValue));
            stream.writeU8((Vu8) UTF8_BYTE_3_OF_4(mIntValue));
            stream.writeU8((Vu8) UTF8_BYTE_4_OF_4(mIntValue));
            break;
            
        default:
            throw VRangeException(VSTRING_FORMAT("VCodePoint::writeToBinaryStream() for an invalid UTF-8 code point 0x%X", mIntValue));
            break;
    }
}
Esempio n. 2
0
VString VCodePoint::toString() const {
    VString s;
    
    // Use of 0x40 (decimal 64) here is to chop a number into 6-bit parts.
    // 0x40 is binary 01000000, so
    //      n / 0x40 effectively strips off the low 6 bits
    //      n % 0x40 effectively strips off all but the low 6 bits
    //      n / 0x40 % 0x40 effectively yields the "next" 6 bits by combining those two operations
    
    switch (VCodePoint::getUTF8LengthFromCodePointValue(mIntValue)) {

        case 1:
            s += (char) mIntValue;  // first byte binary:   0xxxxxxx (with 7 used bits)
            break;

        case 2:
            s += (char) UTF8_BYTE_1_OF_2(mIntValue);
            s += (char) UTF8_BYTE_2_OF_2(mIntValue);
            break;

        case 3:
            s += (char) UTF8_BYTE_1_OF_3(mIntValue);
            s += (char) UTF8_BYTE_2_OF_3(mIntValue);
            s += (char) UTF8_BYTE_3_OF_3(mIntValue);
            break;

        case 4:
            s += (char) UTF8_BYTE_1_OF_4(mIntValue);
            s += (char) UTF8_BYTE_2_OF_4(mIntValue);
            s += (char) UTF8_BYTE_3_OF_4(mIntValue);
            s += (char) UTF8_BYTE_4_OF_4(mIntValue);
            break;
            
        default:
            throw VRangeException(VSTRING_FORMAT("VCodePoint::toString() for an invalid UTF-8 code point 0x%X", mIntValue));
            break;
    }

    return s;
}
Esempio n. 3
0
static bstring read_string(char * src, size_t length)
{
	int utf8_char;
	char hex_num[7] = "0x0000";

	char * end = src + length;
	bstring s = bfromcstralloc(length, "");
	char * dst = bdata(s);

	if (* src == '"') src++;

	while (src < end) {

		if (* src != '\\') {
			* dst = * src;
			dst++;

		} else {
			src++;
			if (* src == '"') {
				* dst = '"';
				dst++;
			} else if (* src == '\\') {
				* dst = '\\';
				dst++;
			} else if (* src == '/') {
				* dst = '/';
				dst++;
			} else if (* src == 'b') {
				* dst = '\b';
				dst++;
			} else if (* src == 'f') {
				* dst = '\f';
				dst++;
			} else if (* src == 'n') {
				* dst = '\n';
				dst++;
			} else if (* src == 'r') {
				* dst = '\r';
				dst++;
			} else if (* src == 't') {
				* dst = '\t';
				dst++;
			} else if (* src == 'u') {
				memcpy(hex_num + 2, src + 1, 4);
				sscanf(hex_num, "%x", &utf8_char);
				if (utf8_char <= UTF8_2_BYTE_LIMIT) {
					* dst = UTF8_BYTE_1_OF_2(utf8_char);
					dst++;
					* dst = UTF8_BYTE_2_OF_2(utf8_char);
					dst++;
				} else {
					* dst = UTF8_BYTE_1_OF_3(utf8_char);
					dst++;
					* dst = UTF8_BYTE_2_OF_3(utf8_char);
					dst++;
					* dst = UTF8_BYTE_3_OF_3(utf8_char);
					dst++;
				}
				src += 4;
			}
		} 
		src++;
	}	

	s->slen = dst - bdata(s);
	return s;
}