C++ (Cpp) utf_char2bytes 예제들

예제 #1

0

파일 보기

파일: keymap.c 프로젝트: kwon-young/neovim

char_u *add_char2buf(int c, char_u *s)
{
  char_u temp[MB_MAXBYTES + 1];
  const int len = utf_char2bytes(c, temp);
  for (int i = 0; i < len; ++i) {
    c = temp[i];
    // Need to escape K_SPECIAL and CSI like in the typeahead buffer.
    if (c == K_SPECIAL) {
      *s++ = K_SPECIAL;
      *s++ = KS_SPECIAL;
      *s++ = KE_FILLER;
    } else {
      *s++ = c;
    }
  }
  return s;
}

예제 #2

0

파일 보기

파일: json.c 프로젝트: KamarajuKusumanchi/vim

    static void
write_string(garray_T *gap, char_u *str)
{
    char_u	*res = str;
    char_u	numbuf[NUMBUFLEN];

    if (res == NULL)
	ga_concat(gap, (char_u *)"\"\"");
    else
    {
#if defined(FEAT_MBYTE) && defined(USE_ICONV)
	vimconv_T   conv;
	char_u	    *converted = NULL;

	if (!enc_utf8)
	{
	    /* Convert the text from 'encoding' to utf-8, the JSON string is
	     * always utf-8. */
	    conv.vc_type = CONV_NONE;
	    convert_setup(&conv, p_enc, (char_u*)"utf-8");
	    if (conv.vc_type != CONV_NONE)
		converted = res = string_convert(&conv, res, NULL);
	    convert_setup(&conv, NULL, NULL);
	}
#endif
	ga_append(gap, '"');
	while (*res != NUL)
	{
	    int c;
#ifdef FEAT_MBYTE
	    /* always use utf-8 encoding, ignore 'encoding' */
	    c = utf_ptr2char(res);
#else
	    c = *res;
#endif

	    switch (c)
	    {
		case 0x08:
		    ga_append(gap, '\\'); ga_append(gap, 'b'); break;
		case 0x09:
		    ga_append(gap, '\\'); ga_append(gap, 't'); break;
		case 0x0a:
		    ga_append(gap, '\\'); ga_append(gap, 'n'); break;
		case 0x0c:
		    ga_append(gap, '\\'); ga_append(gap, 'f'); break;
		case 0x0d:
		    ga_append(gap, '\\'); ga_append(gap, 'r'); break;
		case 0x22: /* " */
		case 0x5c: /* \ */
		    ga_append(gap, '\\');
		    ga_append(gap, c);
		    break;
		default:
		    if (c >= 0x20)
		    {
#ifdef FEAT_MBYTE
			numbuf[utf_char2bytes(c, numbuf)] = NUL;
#else
			numbuf[0] = c;
			numbuf[1] = NUL;
#endif
			ga_concat(gap, numbuf);
		    }
		    else
		    {
			vim_snprintf((char *)numbuf, NUMBUFLEN,
							 "\\u%04lx", (long)c);
			ga_concat(gap, numbuf);
		    }
	    }
#ifdef FEAT_MBYTE
	    res += utf_ptr2len(res);
#else
	    ++res;
#endif
	}
	ga_append(gap, '"');
#if defined(FEAT_MBYTE) && defined(USE_ICONV)
	vim_free(converted);
#endif
    }
}

예제 #3

0

파일 보기

파일: json.c 프로젝트: KamarajuKusumanchi/vim

    static int
json_decode_string(js_read_T *reader, typval_T *res, int quote)
{
    garray_T    ga;
    int		len;
    char_u	*p;
    int		c;
    varnumber_T	nr;

    if (res != NULL)
	ga_init2(&ga, 1, 200);

    p = reader->js_buf + reader->js_used + 1; /* skip over " or ' */
    while (*p != quote)
    {
	/* The JSON is always expected to be utf-8, thus use utf functions
	 * here. The string is converted below if needed. */
	if (*p == NUL || p[1] == NUL
#ifdef FEAT_MBYTE
		|| utf_ptr2len(p) < utf_byte2len(*p)
#endif
		)
	{
	    /* Not enough bytes to make a character or end of the string. Get
	     * more if possible. */
	    if (reader->js_fill == NULL)
		break;
	    len = (int)(reader->js_end - p);
	    reader->js_used = (int)(p - reader->js_buf);
	    if (!reader->js_fill(reader))
		break; /* didn't get more */
	    p = reader->js_buf + reader->js_used;
	    reader->js_end = reader->js_buf + STRLEN(reader->js_buf);
	    continue;
	}

	if (*p == '\\')
	{
	    c = -1;
	    switch (p[1])
	    {
		case '\\': c = '\\'; break;
		case '"': c = '"'; break;
		case 'b': c = BS; break;
		case 't': c = TAB; break;
		case 'n': c = NL; break;
		case 'f': c = FF; break;
		case 'r': c = CAR; break;
		case 'u':
		    if (reader->js_fill != NULL
				     && (int)(reader->js_end - p) < NUMBUFLEN)
		    {
			reader->js_used = (int)(p - reader->js_buf);
			if (reader->js_fill(reader))
			{
			    p = reader->js_buf + reader->js_used;
			    reader->js_end = reader->js_buf
						     + STRLEN(reader->js_buf);
			}
		    }
		    nr = 0;
		    len = 0;
		    vim_str2nr(p + 2, NULL, &len,
				     STR2NR_HEX + STR2NR_FORCE, &nr, NULL, 4);
		    p += len + 2;
		    if (0xd800 <= nr && nr <= 0xdfff
			    && (int)(reader->js_end - p) >= 6
			    && *p == '\\' && *(p+1) == 'u')
		    {
			varnumber_T	nr2 = 0;

			/* decode surrogate pair: \ud812\u3456 */
			len = 0;
			vim_str2nr(p + 2, NULL, &len,
				     STR2NR_HEX + STR2NR_FORCE, &nr2, NULL, 4);
			if (0xdc00 <= nr2 && nr2 <= 0xdfff)
			{
			    p += len + 2;
			    nr = (((nr - 0xd800) << 10) |
				((nr2 - 0xdc00) & 0x3ff)) + 0x10000;
			}
		    }
		    if (res != NULL)
		    {
#ifdef FEAT_MBYTE
			char_u	buf[NUMBUFLEN];
			buf[utf_char2bytes((int)nr, buf)] = NUL;
			ga_concat(&ga, buf);
#else
			ga_append(&ga, (int)nr);
#endif
		    }
		    break;
		default:
		    /* not a special char, skip over \ */
		    ++p;
		    continue;
	    }
	    if (c > 0)
	    {
		p += 2;
		if (res != NULL)
		    ga_append(&ga, c);
	    }
	}
	else
	{
#ifdef FEAT_MBYTE
	    len = utf_ptr2len(p);
#else
	    len = 1;
#endif
	    if (res != NULL)
	    {
		if (ga_grow(&ga, len) == FAIL)
		{
		    ga_clear(&ga);
		    return FAIL;
		}
		mch_memmove((char *)ga.ga_data + ga.ga_len, p, (size_t)len);
		ga.ga_len += len;
	    }
	    p += len;
	}
    }

    reader->js_used = (int)(p - reader->js_buf);
    if (*p == quote)
    {
	++reader->js_used;
	if (res != NULL)
	{
	    ga_append(&ga, NUL);
	    res->v_type = VAR_STRING;
#if defined(FEAT_MBYTE) && defined(USE_ICONV)
	    if (!enc_utf8)
	    {
		vimconv_T   conv;

		/* Convert the utf-8 string to 'encoding'. */
		conv.vc_type = CONV_NONE;
		convert_setup(&conv, (char_u*)"utf-8", p_enc);
		if (conv.vc_type != CONV_NONE)
		{
		    res->vval.v_string =
				      string_convert(&conv, ga.ga_data, NULL);
		    vim_free(ga.ga_data);
		}
		convert_setup(&conv, NULL, NULL);
	    }
	    else
#endif
		res->vval.v_string = ga.ga_data;
	}
	return OK;
    }
    if (res != NULL)
    {
	res->v_type = VAR_SPECIAL;
	res->vval.v_number = VVAL_NONE;
	ga_clear(&ga);
    }
    return MAYBE;
}

예제 #4

0

파일 보기

파일: digraph.c 프로젝트: AdnoC/neovim

/// Lookup the pair "char1", "char2" in the digraph tables.
///
/// @param char1
/// @param char2
/// @param meta_char
///
/// @return If no match, return "char2". If "meta_char" is TRUE and "char1"
//          is a space, return "char2" | 0x80.
static int getexactdigraph(int char1, int char2, int meta_char)
{
  int retval = 0;

  if (IS_SPECIAL(char1) || IS_SPECIAL(char2)) {
    return char2;
  }

  // Search user digraphs first.
  digr_T *dp = (digr_T *)user_digraphs.ga_data;
  for (int i = 0; i < user_digraphs.ga_len; ++i) {
    if (((int) dp->char1 == char1) && ((int) dp->char2 == char2)) {
      retval = dp->result;
      break;
    }
    ++dp;
  }

  // Search default digraphs.
  if (retval == 0) {
    dp = digraphdefault;

    for (int i = 0; dp->char1 != 0; ++i) {
      if (((int) dp->char1 == char1) && ((int) dp->char2 == char2)) {
        retval = dp->result;
        break;
      }
      ++dp;
    }
  }

  if ((retval != 0) && !enc_utf8) {
    char_u buf[6], *to;
    vimconv_T vc;

    // Convert the Unicode digraph to 'encoding'.
    int i = utf_char2bytes(retval, buf);
    retval = 0;
    vc.vc_type = CONV_NONE;

    if (convert_setup(&vc, (char_u *)"utf-8", p_enc) == OK) {
      vc.vc_fail = true;
      assert(i >= 0);
      size_t len = (size_t)i;
      to = string_convert(&vc, buf, &len);

      if (to != NULL) {
        retval = (*mb_ptr2char)(to);
        xfree(to);
      }
      (void)convert_setup(&vc, NULL, NULL);
    }
  }

  // Ignore multi-byte characters when not in multi-byte mode.
  if (!has_mbyte && (retval > 0xff)) {
    retval = 0;
  }

  if (retval == 0) {
    // digraph deleted or not found
    if ((char1 == ' ') && meta_char) {
      // <space> <char> --> meta-char
      return char2 | 0x80;
    }
    return char2;
  }
  return retval;
}

예제 #5

0

파일 보기

파일: charset.c 프로젝트: ashleyh/neovim

/// Convert the string "str[orglen]" to do ignore-case comparing.  Uses the
/// current locale.
///
/// When "buf" is NULL returns an allocated string (NULL for out-of-memory).
/// Otherwise puts the result in "buf[buflen]".
///
/// @param str
/// @param orglen
/// @param buf
/// @param buflen
///
/// @return converted string.
char_u* str_foldcase(char_u *str, int orglen, char_u *buf, int buflen)
{
  garray_T ga;
  int i;
  int len = orglen;

#define GA_CHAR(i) ((char_u *)ga.ga_data)[i]
#define GA_PTR(i) ((char_u *)ga.ga_data + i)
#define STR_CHAR(i) (buf == NULL ? GA_CHAR(i) : buf[i])
#define STR_PTR(i) (buf == NULL ? GA_PTR(i) : buf + i)

  // Copy "str" into "buf" or allocated memory, unmodified.
  if (buf == NULL) {
    ga_init(&ga, 1, 10);

    if (ga_grow(&ga, len + 1) == FAIL) {
      return NULL;
    }
    memmove(ga.ga_data, str, (size_t)len);
    ga.ga_len = len;
  } else {
    if (len >= buflen) {
      // Ugly!
      len = buflen - 1;
    }
    memmove(buf, str, (size_t)len);
  }

  if (buf == NULL) {
    GA_CHAR(len) = NUL;
  } else {
    buf[len] = NUL;
  }

  // Make each character lower case.
  i = 0;
  while (STR_CHAR(i) != NUL) {
    if (enc_utf8 || (has_mbyte && (MB_BYTE2LEN(STR_CHAR(i)) > 1))) {
      if (enc_utf8) {
        int c = utf_ptr2char(STR_PTR(i));
        int olen = utf_ptr2len(STR_PTR(i));
        int lc = utf_tolower(c);

        // Only replace the character when it is not an invalid
        // sequence (ASCII character or more than one byte) and
        // utf_tolower() doesn't return the original character.
        if (((c < 0x80) || (olen > 1)) && (c != lc)) {
          int nlen = utf_char2len(lc);

          // If the byte length changes need to shift the following
          // characters forward or backward.
          if (olen != nlen) {
            if (nlen > olen) {
              if ((buf == NULL)
                  ? (ga_grow(&ga, nlen - olen + 1) == FAIL)
                  : (len + nlen - olen >= buflen)) {
                // out of memory, keep old char
                lc = c;
                nlen = olen;
              }
            }

            if (olen != nlen) {
              if (buf == NULL) {
                STRMOVE(GA_PTR(i) + nlen, GA_PTR(i) + olen);
                ga.ga_len += nlen - olen;
              } else {
                STRMOVE(buf + i + nlen, buf + i + olen);
                len += nlen - olen;
              }
            }
          }
          (void)utf_char2bytes(lc, STR_PTR(i));
        }
      }

      // skip to next multi-byte char
      i += (*mb_ptr2len)(STR_PTR(i));
    } else {
      if (buf == NULL) {
        GA_CHAR(i) = TOLOWER_LOC(GA_CHAR(i));
      } else {
        buf[i] = TOLOWER_LOC(buf[i]);
      }
      ++i;
    }
  }

  if (buf == NULL) {
    return (char_u *)ga.ga_data;
  }
  return buf;
}

예제 #6

0

파일 보기

파일: charset.c 프로젝트: dougfales/macvim

/*
 * Convert the string "str[orglen]" to do ignore-case comparing.  Uses the
 * current locale.
 * When "buf" is NULL returns an allocated string (NULL for out-of-memory).
 * Otherwise puts the result in "buf[buflen]".
 */
    char_u *
str_foldcase(
    char_u	*str,
    int		orglen,
    char_u	*buf,
    int		buflen)
{
    garray_T	ga;
    int		i;
    int		len = orglen;

#define GA_CHAR(i)  ((char_u *)ga.ga_data)[i]
#define GA_PTR(i)   ((char_u *)ga.ga_data + i)
#define STR_CHAR(i)  (buf == NULL ? GA_CHAR(i) : buf[i])
#define STR_PTR(i)   (buf == NULL ? GA_PTR(i) : buf + i)

    /* Copy "str" into "buf" or allocated memory, unmodified. */
    if (buf == NULL)
    {
	ga_init2(&ga, 1, 10);
	if (ga_grow(&ga, len + 1) == FAIL)
	    return NULL;
	mch_memmove(ga.ga_data, str, (size_t)len);
	ga.ga_len = len;
    }
    else
    {
	if (len >= buflen)	    /* Ugly! */
	    len = buflen - 1;
	mch_memmove(buf, str, (size_t)len);
    }
    if (buf == NULL)
	GA_CHAR(len) = NUL;
    else
	buf[len] = NUL;

    /* Make each character lower case. */
    i = 0;
    while (STR_CHAR(i) != NUL)
    {
#ifdef FEAT_MBYTE
	if (enc_utf8 || (has_mbyte && MB_BYTE2LEN(STR_CHAR(i)) > 1))
	{
	    if (enc_utf8)
	    {
		int	c = utf_ptr2char(STR_PTR(i));
		int	olen = utf_ptr2len(STR_PTR(i));
		int	lc = utf_tolower(c);

		/* Only replace the character when it is not an invalid
		 * sequence (ASCII character or more than one byte) and
		 * utf_tolower() doesn't return the original character. */
		if ((c < 0x80 || olen > 1) && c != lc)
		{
		    int	    nlen = utf_char2len(lc);

		    /* If the byte length changes need to shift the following
		     * characters forward or backward. */
		    if (olen != nlen)
		    {
			if (nlen > olen)
			{
			    if (buf == NULL
				    ? ga_grow(&ga, nlen - olen + 1) == FAIL
				    : len + nlen - olen >= buflen)
			    {
				/* out of memory, keep old char */
				lc = c;
				nlen = olen;
			    }
			}
			if (olen != nlen)
			{
			    if (buf == NULL)
			    {
				STRMOVE(GA_PTR(i) + nlen, GA_PTR(i) + olen);
				ga.ga_len += nlen - olen;
			    }
			    else
			    {
				STRMOVE(buf + i + nlen, buf + i + olen);
				len += nlen - olen;
			    }
			}
		    }
		    (void)utf_char2bytes(lc, STR_PTR(i));
		}
	    }
	    /* skip to next multi-byte char */
	    i += (*mb_ptr2len)(STR_PTR(i));
	}
	else
#endif
	{
	    if (buf == NULL)
		GA_CHAR(i) = TOLOWER_LOC(GA_CHAR(i));
	    else
		buf[i] = TOLOWER_LOC(buf[i]);
	    ++i;
	}
    }

    if (buf == NULL)
	return (char_u *)ga.ga_data;
    return buf;
}