char_u *add_char2buf(int c, char_u *s) { char_u temp[MB_MAXBYTES + 1]; const int len = utf_char2bytes(c, temp); for (int i = 0; i < len; ++i) { c = temp[i]; // Need to escape K_SPECIAL and CSI like in the typeahead buffer. if (c == K_SPECIAL) { *s++ = K_SPECIAL; *s++ = KS_SPECIAL; *s++ = KE_FILLER; } else { *s++ = c; } } return s; }
static void write_string(garray_T *gap, char_u *str) { char_u *res = str; char_u numbuf[NUMBUFLEN]; if (res == NULL) ga_concat(gap, (char_u *)"\"\""); else { #if defined(FEAT_MBYTE) && defined(USE_ICONV) vimconv_T conv; char_u *converted = NULL; if (!enc_utf8) { /* Convert the text from 'encoding' to utf-8, the JSON string is * always utf-8. */ conv.vc_type = CONV_NONE; convert_setup(&conv, p_enc, (char_u*)"utf-8"); if (conv.vc_type != CONV_NONE) converted = res = string_convert(&conv, res, NULL); convert_setup(&conv, NULL, NULL); } #endif ga_append(gap, '"'); while (*res != NUL) { int c; #ifdef FEAT_MBYTE /* always use utf-8 encoding, ignore 'encoding' */ c = utf_ptr2char(res); #else c = *res; #endif switch (c) { case 0x08: ga_append(gap, '\\'); ga_append(gap, 'b'); break; case 0x09: ga_append(gap, '\\'); ga_append(gap, 't'); break; case 0x0a: ga_append(gap, '\\'); ga_append(gap, 'n'); break; case 0x0c: ga_append(gap, '\\'); ga_append(gap, 'f'); break; case 0x0d: ga_append(gap, '\\'); ga_append(gap, 'r'); break; case 0x22: /* " */ case 0x5c: /* \ */ ga_append(gap, '\\'); ga_append(gap, c); break; default: if (c >= 0x20) { #ifdef FEAT_MBYTE numbuf[utf_char2bytes(c, numbuf)] = NUL; #else numbuf[0] = c; numbuf[1] = NUL; #endif ga_concat(gap, numbuf); } else { vim_snprintf((char *)numbuf, NUMBUFLEN, "\\u%04lx", (long)c); ga_concat(gap, numbuf); } } #ifdef FEAT_MBYTE res += utf_ptr2len(res); #else ++res; #endif } ga_append(gap, '"'); #if defined(FEAT_MBYTE) && defined(USE_ICONV) vim_free(converted); #endif } }
static int json_decode_string(js_read_T *reader, typval_T *res, int quote) { garray_T ga; int len; char_u *p; int c; varnumber_T nr; if (res != NULL) ga_init2(&ga, 1, 200); p = reader->js_buf + reader->js_used + 1; /* skip over " or ' */ while (*p != quote) { /* The JSON is always expected to be utf-8, thus use utf functions * here. The string is converted below if needed. */ if (*p == NUL || p[1] == NUL #ifdef FEAT_MBYTE || utf_ptr2len(p) < utf_byte2len(*p) #endif ) { /* Not enough bytes to make a character or end of the string. Get * more if possible. */ if (reader->js_fill == NULL) break; len = (int)(reader->js_end - p); reader->js_used = (int)(p - reader->js_buf); if (!reader->js_fill(reader)) break; /* didn't get more */ p = reader->js_buf + reader->js_used; reader->js_end = reader->js_buf + STRLEN(reader->js_buf); continue; } if (*p == '\\') { c = -1; switch (p[1]) { case '\\': c = '\\'; break; case '"': c = '"'; break; case 'b': c = BS; break; case 't': c = TAB; break; case 'n': c = NL; break; case 'f': c = FF; break; case 'r': c = CAR; break; case 'u': if (reader->js_fill != NULL && (int)(reader->js_end - p) < NUMBUFLEN) { reader->js_used = (int)(p - reader->js_buf); if (reader->js_fill(reader)) { p = reader->js_buf + reader->js_used; reader->js_end = reader->js_buf + STRLEN(reader->js_buf); } } nr = 0; len = 0; vim_str2nr(p + 2, NULL, &len, STR2NR_HEX + STR2NR_FORCE, &nr, NULL, 4); p += len + 2; if (0xd800 <= nr && nr <= 0xdfff && (int)(reader->js_end - p) >= 6 && *p == '\\' && *(p+1) == 'u') { varnumber_T nr2 = 0; /* decode surrogate pair: \ud812\u3456 */ len = 0; vim_str2nr(p + 2, NULL, &len, STR2NR_HEX + STR2NR_FORCE, &nr2, NULL, 4); if (0xdc00 <= nr2 && nr2 <= 0xdfff) { p += len + 2; nr = (((nr - 0xd800) << 10) | ((nr2 - 0xdc00) & 0x3ff)) + 0x10000; } } if (res != NULL) { #ifdef FEAT_MBYTE char_u buf[NUMBUFLEN]; buf[utf_char2bytes((int)nr, buf)] = NUL; ga_concat(&ga, buf); #else ga_append(&ga, (int)nr); #endif } break; default: /* not a special char, skip over \ */ ++p; continue; } if (c > 0) { p += 2; if (res != NULL) ga_append(&ga, c); } } else { #ifdef FEAT_MBYTE len = utf_ptr2len(p); #else len = 1; #endif if (res != NULL) { if (ga_grow(&ga, len) == FAIL) { ga_clear(&ga); return FAIL; } mch_memmove((char *)ga.ga_data + ga.ga_len, p, (size_t)len); ga.ga_len += len; } p += len; } } reader->js_used = (int)(p - reader->js_buf); if (*p == quote) { ++reader->js_used; if (res != NULL) { ga_append(&ga, NUL); res->v_type = VAR_STRING; #if defined(FEAT_MBYTE) && defined(USE_ICONV) if (!enc_utf8) { vimconv_T conv; /* Convert the utf-8 string to 'encoding'. */ conv.vc_type = CONV_NONE; convert_setup(&conv, (char_u*)"utf-8", p_enc); if (conv.vc_type != CONV_NONE) { res->vval.v_string = string_convert(&conv, ga.ga_data, NULL); vim_free(ga.ga_data); } convert_setup(&conv, NULL, NULL); } else #endif res->vval.v_string = ga.ga_data; } return OK; } if (res != NULL) { res->v_type = VAR_SPECIAL; res->vval.v_number = VVAL_NONE; ga_clear(&ga); } return MAYBE; }
/// Lookup the pair "char1", "char2" in the digraph tables. /// /// @param char1 /// @param char2 /// @param meta_char /// /// @return If no match, return "char2". If "meta_char" is TRUE and "char1" // is a space, return "char2" | 0x80. static int getexactdigraph(int char1, int char2, int meta_char) { int retval = 0; if (IS_SPECIAL(char1) || IS_SPECIAL(char2)) { return char2; } // Search user digraphs first. digr_T *dp = (digr_T *)user_digraphs.ga_data; for (int i = 0; i < user_digraphs.ga_len; ++i) { if (((int) dp->char1 == char1) && ((int) dp->char2 == char2)) { retval = dp->result; break; } ++dp; } // Search default digraphs. if (retval == 0) { dp = digraphdefault; for (int i = 0; dp->char1 != 0; ++i) { if (((int) dp->char1 == char1) && ((int) dp->char2 == char2)) { retval = dp->result; break; } ++dp; } } if ((retval != 0) && !enc_utf8) { char_u buf[6], *to; vimconv_T vc; // Convert the Unicode digraph to 'encoding'. int i = utf_char2bytes(retval, buf); retval = 0; vc.vc_type = CONV_NONE; if (convert_setup(&vc, (char_u *)"utf-8", p_enc) == OK) { vc.vc_fail = true; assert(i >= 0); size_t len = (size_t)i; to = string_convert(&vc, buf, &len); if (to != NULL) { retval = (*mb_ptr2char)(to); xfree(to); } (void)convert_setup(&vc, NULL, NULL); } } // Ignore multi-byte characters when not in multi-byte mode. if (!has_mbyte && (retval > 0xff)) { retval = 0; } if (retval == 0) { // digraph deleted or not found if ((char1 == ' ') && meta_char) { // <space> <char> --> meta-char return char2 | 0x80; } return char2; } return retval; }
/// Convert the string "str[orglen]" to do ignore-case comparing. Uses the /// current locale. /// /// When "buf" is NULL returns an allocated string (NULL for out-of-memory). /// Otherwise puts the result in "buf[buflen]". /// /// @param str /// @param orglen /// @param buf /// @param buflen /// /// @return converted string. char_u* str_foldcase(char_u *str, int orglen, char_u *buf, int buflen) { garray_T ga; int i; int len = orglen; #define GA_CHAR(i) ((char_u *)ga.ga_data)[i] #define GA_PTR(i) ((char_u *)ga.ga_data + i) #define STR_CHAR(i) (buf == NULL ? GA_CHAR(i) : buf[i]) #define STR_PTR(i) (buf == NULL ? GA_PTR(i) : buf + i) // Copy "str" into "buf" or allocated memory, unmodified. if (buf == NULL) { ga_init(&ga, 1, 10); if (ga_grow(&ga, len + 1) == FAIL) { return NULL; } memmove(ga.ga_data, str, (size_t)len); ga.ga_len = len; } else { if (len >= buflen) { // Ugly! len = buflen - 1; } memmove(buf, str, (size_t)len); } if (buf == NULL) { GA_CHAR(len) = NUL; } else { buf[len] = NUL; } // Make each character lower case. i = 0; while (STR_CHAR(i) != NUL) { if (enc_utf8 || (has_mbyte && (MB_BYTE2LEN(STR_CHAR(i)) > 1))) { if (enc_utf8) { int c = utf_ptr2char(STR_PTR(i)); int olen = utf_ptr2len(STR_PTR(i)); int lc = utf_tolower(c); // Only replace the character when it is not an invalid // sequence (ASCII character or more than one byte) and // utf_tolower() doesn't return the original character. if (((c < 0x80) || (olen > 1)) && (c != lc)) { int nlen = utf_char2len(lc); // If the byte length changes need to shift the following // characters forward or backward. if (olen != nlen) { if (nlen > olen) { if ((buf == NULL) ? (ga_grow(&ga, nlen - olen + 1) == FAIL) : (len + nlen - olen >= buflen)) { // out of memory, keep old char lc = c; nlen = olen; } } if (olen != nlen) { if (buf == NULL) { STRMOVE(GA_PTR(i) + nlen, GA_PTR(i) + olen); ga.ga_len += nlen - olen; } else { STRMOVE(buf + i + nlen, buf + i + olen); len += nlen - olen; } } } (void)utf_char2bytes(lc, STR_PTR(i)); } } // skip to next multi-byte char i += (*mb_ptr2len)(STR_PTR(i)); } else { if (buf == NULL) { GA_CHAR(i) = TOLOWER_LOC(GA_CHAR(i)); } else { buf[i] = TOLOWER_LOC(buf[i]); } ++i; } } if (buf == NULL) { return (char_u *)ga.ga_data; } return buf; }
/* * Convert the string "str[orglen]" to do ignore-case comparing. Uses the * current locale. * When "buf" is NULL returns an allocated string (NULL for out-of-memory). * Otherwise puts the result in "buf[buflen]". */ char_u * str_foldcase( char_u *str, int orglen, char_u *buf, int buflen) { garray_T ga; int i; int len = orglen; #define GA_CHAR(i) ((char_u *)ga.ga_data)[i] #define GA_PTR(i) ((char_u *)ga.ga_data + i) #define STR_CHAR(i) (buf == NULL ? GA_CHAR(i) : buf[i]) #define STR_PTR(i) (buf == NULL ? GA_PTR(i) : buf + i) /* Copy "str" into "buf" or allocated memory, unmodified. */ if (buf == NULL) { ga_init2(&ga, 1, 10); if (ga_grow(&ga, len + 1) == FAIL) return NULL; mch_memmove(ga.ga_data, str, (size_t)len); ga.ga_len = len; } else { if (len >= buflen) /* Ugly! */ len = buflen - 1; mch_memmove(buf, str, (size_t)len); } if (buf == NULL) GA_CHAR(len) = NUL; else buf[len] = NUL; /* Make each character lower case. */ i = 0; while (STR_CHAR(i) != NUL) { #ifdef FEAT_MBYTE if (enc_utf8 || (has_mbyte && MB_BYTE2LEN(STR_CHAR(i)) > 1)) { if (enc_utf8) { int c = utf_ptr2char(STR_PTR(i)); int olen = utf_ptr2len(STR_PTR(i)); int lc = utf_tolower(c); /* Only replace the character when it is not an invalid * sequence (ASCII character or more than one byte) and * utf_tolower() doesn't return the original character. */ if ((c < 0x80 || olen > 1) && c != lc) { int nlen = utf_char2len(lc); /* If the byte length changes need to shift the following * characters forward or backward. */ if (olen != nlen) { if (nlen > olen) { if (buf == NULL ? ga_grow(&ga, nlen - olen + 1) == FAIL : len + nlen - olen >= buflen) { /* out of memory, keep old char */ lc = c; nlen = olen; } } if (olen != nlen) { if (buf == NULL) { STRMOVE(GA_PTR(i) + nlen, GA_PTR(i) + olen); ga.ga_len += nlen - olen; } else { STRMOVE(buf + i + nlen, buf + i + olen); len += nlen - olen; } } } (void)utf_char2bytes(lc, STR_PTR(i)); } } /* skip to next multi-byte char */ i += (*mb_ptr2len)(STR_PTR(i)); } else #endif { if (buf == NULL) GA_CHAR(i) = TOLOWER_LOC(GA_CHAR(i)); else buf[i] = TOLOWER_LOC(buf[i]); ++i; } } if (buf == NULL) return (char_u *)ga.ga_data; return buf; }