예제 #1
0
void m1::utf8_decode_each(InputItr code_unit_itr,
                          InputEnd code_unit_end,
                          CodePointFunc code_point_func) noexcept
{
    while(code_unit_itr != code_unit_end)
    {
        code_point const c = utf8_decode_next(/*ref*/ code_unit_itr,
                                              code_unit_end);
        code_point_func(c);
    }
}
예제 #2
0
void m1::utf8_decode_copy(InputItr code_unit_itr,
                          InputEnd code_unit_end,
                          OutputItr out_itr) noexcept
{
    while(code_unit_itr != code_unit_end)
    {
        code_point const c = utf8_decode_next(/*ref*/ code_unit_itr,
                                              code_unit_end);
        *out_itr = c;
        ++out_itr;
    }
}
예제 #3
0
void m1::utf8_decode_each_n(InputItr code_unit_itr,
                            InputEnd const code_unit_end,
                            std::size_t str_length,
                            CodePointFunc code_point_func) noexcept
{
    while((code_unit_itr != code_unit_end) && (str_length-- > 0))
    {
        code_point const c = utf8_decode_next(/*ref*/ code_unit_itr,
                                              code_unit_end);
        code_point_func(c);
    }
}
예제 #4
0
void m1::utf8_decode_copy_n(InputItr code_unit_itr,
                            InputEnd const code_unit_end,
                            std::size_t str_length,
                            OutputItr out_itr) noexcept
{
    while((code_unit_itr != code_unit_end) && (str_length-- > 0))
    {
        code_point const c = utf8_decode_next(/*ref*/ code_unit_itr,
                                              code_unit_end);
        *out_itr = c;
        ++out_itr;
    }
}
예제 #5
0
int UTF8To16Decoder::getNext() {
  int c = utf8_decode_next(&m_decode);
  if (c < 0) {
  /*** BEGIN Facebook: json_utf8_loose ***/
    if (c == UTF8_END) {
      return UTF8_END;
    }
    if (m_loose) {
      return '?';
    } else {
      return UTF8_ERROR;
    }
  /*** END Facebook: json_utf8_loose ***/
  } else {
    return c;
  }
}
예제 #6
0
int
varnam_detect_lang(varnam *handle, const char *input)
{
    strbuf *word;
    utf8_decoder decoder;
    int codepoint, language = VARNAM_LANG_CODE_UNKNOWN, prev_language = 0;
    
    if (handle == NULL || input == NULL) {
        return VARNAM_LANG_CODE_UNKNOWN;
    }

    word = get_pooled_string (handle);
    strbuf_add (word, input);

    if (strbuf_is_blank (word)) {
        return VARNAM_LANG_CODE_UNKNOWN;
    }

    utf8_decode_init (word->buffer, (int) word->length, &decoder);

    for (;;)
    {
        codepoint = utf8_decode_next (&decoder);
        if (codepoint == UTF8_END || codepoint == UTF8_ERROR)
            break;

        if (should_skip(codepoint))
            continue;

        language = get_language (codepoint);

        if (language == VARNAM_LANG_CODE_UNKNOWN)
            return VARNAM_LANG_CODE_UNKNOWN;
        
        if (prev_language != 0 && language != prev_language) {
            /* Looks like characters from multiple languages are mixed */
            return VARNAM_LANG_CODE_UNKNOWN;
        }
        prev_language = language;
    }

    return language;
}