void m1::utf8_decode_each(InputItr code_unit_itr, InputEnd code_unit_end, CodePointFunc code_point_func) noexcept { while(code_unit_itr != code_unit_end) { code_point const c = utf8_decode_next(/*ref*/ code_unit_itr, code_unit_end); code_point_func(c); } }
void m1::utf8_decode_copy(InputItr code_unit_itr, InputEnd code_unit_end, OutputItr out_itr) noexcept { while(code_unit_itr != code_unit_end) { code_point const c = utf8_decode_next(/*ref*/ code_unit_itr, code_unit_end); *out_itr = c; ++out_itr; } }
void m1::utf8_decode_each_n(InputItr code_unit_itr, InputEnd const code_unit_end, std::size_t str_length, CodePointFunc code_point_func) noexcept { while((code_unit_itr != code_unit_end) && (str_length-- > 0)) { code_point const c = utf8_decode_next(/*ref*/ code_unit_itr, code_unit_end); code_point_func(c); } }
void m1::utf8_decode_copy_n(InputItr code_unit_itr, InputEnd const code_unit_end, std::size_t str_length, OutputItr out_itr) noexcept { while((code_unit_itr != code_unit_end) && (str_length-- > 0)) { code_point const c = utf8_decode_next(/*ref*/ code_unit_itr, code_unit_end); *out_itr = c; ++out_itr; } }
int UTF8To16Decoder::getNext() { int c = utf8_decode_next(&m_decode); if (c < 0) { /*** BEGIN Facebook: json_utf8_loose ***/ if (c == UTF8_END) { return UTF8_END; } if (m_loose) { return '?'; } else { return UTF8_ERROR; } /*** END Facebook: json_utf8_loose ***/ } else { return c; } }
int varnam_detect_lang(varnam *handle, const char *input) { strbuf *word; utf8_decoder decoder; int codepoint, language = VARNAM_LANG_CODE_UNKNOWN, prev_language = 0; if (handle == NULL || input == NULL) { return VARNAM_LANG_CODE_UNKNOWN; } word = get_pooled_string (handle); strbuf_add (word, input); if (strbuf_is_blank (word)) { return VARNAM_LANG_CODE_UNKNOWN; } utf8_decode_init (word->buffer, (int) word->length, &decoder); for (;;) { codepoint = utf8_decode_next (&decoder); if (codepoint == UTF8_END || codepoint == UTF8_ERROR) break; if (should_skip(codepoint)) continue; language = get_language (codepoint); if (language == VARNAM_LANG_CODE_UNKNOWN) return VARNAM_LANG_CODE_UNKNOWN; if (prev_language != 0 && language != prev_language) { /* Looks like characters from multiple languages are mixed */ return VARNAM_LANG_CODE_UNKNOWN; } prev_language = language; } return language; }