Exemplo n.º 1
0
 base_icu::UChar32 operator()(const char** p, const char* end)
 {
     base_icu::UChar32 c;
     int offset = 0;
     CBU8_NEXT(*p, offset, end-*p, c);
     *p += offset;
     return c;
 }
Exemplo n.º 2
0
	bool IsStringUTF8(const std::string& str) {
		const char *src = str.data();
		int32 src_len = static_cast<int32>(str.length());
		int32 char_index = 0;

		while (char_index < src_len) {
			int32 code_point;
			CBU8_NEXT(src, char_index, src_len, code_point);
			if (!IsValidCharacter(code_point))
				return false;
		}
		return true;
	}
bool ReadUnicodeCharacter(const char* src, int32 src_len,
                          int32* char_index, uint32* code_point)
{
    // U8_NEXT使用-1表示错误, 因此code_point使用有符号类型.
    // 函数出错时返回false, 因此code_point使用无符号类型.
    int32 cp;
    CBU8_NEXT(src, *char_index, src_len, cp);
    *code_point = static_cast<uint32>(cp);

    // 上面的ICU宏移到下一字符, 函数要求移到最后一个用掉的字符.
    (*char_index)--;

    // 验证解码值的合法性 .
    return IsValidCodepoint(cp);
}
Exemplo n.º 4
0
	bool ReadUTFChar(const char* str, int* begin, int length,
		unsigned* code_point_out) {
		int code_point;  // Avoids warning when U8_NEXT writes -1 to it.
		CBU8_NEXT(str, *begin, length, code_point);
		*code_point_out = static_cast<unsigned>(code_point);

		// The ICU macro above moves to the next char, we want to point to the last
		// char consumed.
		(*begin)--;

		// Validate the decoded value.
		if (CBU_IS_UNICODE_CHAR(code_point))
			return true;
		*code_point_out = kUnicodeReplacementCharacter;
		return false;
	}
Exemplo n.º 5
0
void TruncateUTF8ToByteSize(const std::string& input,
                            const size_t byte_size,
                            std::string* output)
{
    DCHECK(output);
    if(byte_size > input.length())
    {
        *output = input;
        return;
    }
    DCHECK_LE(byte_size, static_cast<uint32>(kint32max));
    // 注意: 由于CBU8_NEXT使用的是int32.
    int32 truncation_length = static_cast<int32>(byte_size);
    int32 char_index = truncation_length - 1;
    const char* data = input.data();

    // 使用CBU8, 从阶段点由后向前移动查找合法的UTF8字符. 一旦找到
    // 一个UTF8字符, 截断到该字符作为输出.
    while(char_index >= 0)
    {
        int32 prev = char_index;
        uint32 code_point = 0;
        CBU8_NEXT(data, char_index, truncation_length, code_point);
        if(!base::IsValidCharacter(code_point) ||
            !base::IsValidCodepoint(code_point))
        {
            char_index = prev - 1;
        }
        else
        {
            break;
        }
    }

    if(char_index >= 0)
    {
        *output = input.substr(0, char_index);
    }
    else
    {
        output->clear();
    }
}
Exemplo n.º 6
0
	void TruncateUTF8ToByteSize(const std::string& input,
		const size_t byte_size,
		std::string* output) {
		DCHECK(output);
		if (byte_size > input.length()) {
			*output = input;
			return;
		}
		DCHECK_LE(byte_size, static_cast<uint32>(kint32max));
		// Note: This cast is necessary because CBU8_NEXT uses int32s.
		int32 truncation_length = static_cast<int32>(byte_size);
		int32 char_index = truncation_length - 1;
		const char* data = input.data();

		// Using CBU8, we will move backwards from the truncation point
		// to the beginning of the string looking for a valid UTF8
		// character.  Once a full UTF8 character is found, we will
		// truncate the string to the end of that character.
		while (char_index >= 0) {
			int32 prev = char_index;
			base_icu::UChar32 code_point = 0;
			CBU8_NEXT(data, char_index, truncation_length, code_point);
			if (!IsValidCharacter(code_point) ||
				!IsValidCodepoint(code_point)) {
				char_index = prev - 1;
			}
			else {
				break;
			}
		}

		if (char_index >= 0)
			*output = input.substr(0, char_index);
		else
			output->clear();
	}