/* utf32 index => original byte offset */ int UTF8Converter::getOriginOffset(int origIdx) { uint offs = 0; while (origIdx > 0 && offs < _str.size()) { // increment if it's not the start of a utf8 sequence (void)( (++offs < _str.size() && isutf(_str[offs])) || (++offs < _str.size() && isutf(_str[offs])) || (++offs < _str.size() && isutf(_str[offs])) || ++offs); origIdx--; } return offs; }
/* conversions without error checking only works for valid UTF-8, i.e. no 5- or 6-byte sequences srcsz = source size in bytes sz = dest size in # of wide characters returns # characters converted if sz == srcsz+1 (i.e. 4*srcsz+4 bytes), there will always be enough space. */ size_t u8_toucs(uint32_t *dest, size_t sz, const char *src, size_t srcsz) { uint32_t ch; const char *src_end = src + srcsz; size_t nb; size_t i=0; if (sz == 0 || srcsz == 0) return 0; while (i < sz) { if (!isutf(*src)) { // invalid sequence dest[i++] = 0xFFFD; src++; if (src >= src_end) break; continue; } nb = trailingBytesForUTF8[(unsigned char)*src]; if (src + nb >= src_end) break; ch = 0; switch (nb) { /* these fall through deliberately */ case 5: ch += (unsigned char)*src++; ch <<= 6; case 4: ch += (unsigned char)*src++; ch <<= 6; case 3: ch += (unsigned char)*src++; ch <<= 6; case 2: ch += (unsigned char)*src++; ch <<= 6; case 1: ch += (unsigned char)*src++; ch <<= 6; case 0: ch += (unsigned char)*src++; } ch -= offsetsFromUTF8[nb]; dest[i++] = ch; } return i; }
static int right_callback(int count, int key) { reset_indent(); do { rl_point += (rl_line_buffer[rl_point] == '\n') ? prompt_length+1 : 1; } while (locale_is_utf8 && !isutf(rl_line_buffer[rl_point])); if (rl_end < rl_point) rl_point = rl_end; return 0; }
static int left_callback(int count, int key) { reset_indent(); if (rl_point > 0) { int i = line_start(rl_point); do { rl_point = (i == 0 || rl_point-i > prompt_length) ? rl_point-1 : i-1; } while (locale_is_utf8 && !isutf(rl_line_buffer[rl_point]) && rl_point > i-1); } return 0; }
static int delete_callback(int count, int key) { reset_indent(); int j = rl_point; do { j += (rl_line_buffer[j] == '\n') ? prompt_length+1 : 1; } while (locale_is_utf8 && !isutf(rl_line_buffer[j])); if (rl_end < j) j = rl_end; rl_delete_text(rl_point, j); return 0; }
/* reads the next utf-8 sequence out of a string, updating an index */ uint32 UTF8Converter::nextchar(const char *s, int *i) { uint32 ch = 0; int sz = 0; do { ch <<= 6; ch += (byte)s[(*i)++]; sz++; } while (s[*i] && !isutf(s[*i])); ch -= offsetsFromUTF8[sz - 1]; return ch; }
static int backspace_callback(int count, int key) { reset_indent(); if (!rl_point) return 0; int i = line_start(rl_point), j = rl_point, k; if (!i || rl_point <= i + prompt_length) goto backspace; for (k = i; k < rl_point; k++) if (rl_line_buffer[k] != ' ') goto backspace; //unindent: k = i + prompt_length; do { rl_point--; } while ((rl_point - k) % tab_width); goto finish; backspace: do { rl_point = (i == 0 || rl_point-i > prompt_length) ? rl_point-1 : i-1; } while (locale_is_utf8 && !isutf(rl_line_buffer[rl_point]) && rl_point > i-1); finish: rl_delete_text(rl_point, j); return 0; }
/* * Decrements i to point to the previous unicode glyph * */ void u8_dec(char *s, int *i) { (void)(isutf(s[--(*i)]) || isutf(s[--(*i)]) || isutf(s[--(*i)]) || --(*i)); }