static void textxy(int width, uint16_t pp, uint16_t lineheight, char_t *str, uint16_t length, int *outx, int *outy) { int x = 0, y = 0; char_t *a = str, *b = str, *end = str + length, *p = str + pp; while(1) { if(a == end || *a == '\n' || *a == ' ') { int count = a - b, w = textwidth(b, a - b); while(x + w > width) { if(x == 0) { int fit = textfit(b, count, width); if(p >= b && p < b + fit) { break; } count -= fit; b += fit; y += lineheight; } else { y += lineheight; int l = utf8_len(b); count -= l; b += l; } x = 0; w = textwidth(b, count); } if(p >= b && p < b + count) { w = textwidth(b, p - b); a = end; } x += w; if(a == end) { break; } b = a; if(*a == '\n') { if(p == a) { break; } b += utf8_len(b); y += lineheight; x = 0; } } a += utf8_len(a); } *outx = x; *outy = y; }
_Bool edit_dclick(EDIT *edit, _Bool triclick) { if(edit != active_edit) { return 0; } if(edit->mouseover_char > edit->length) { edit->mouseover_char = edit->length; } char_t c = triclick ? '\n' : ' '; STRING_IDX i = edit->mouseover_char; while(i != 0 && edit->data[i - 1] != c) { i -= utf8_unlen(edit->data + i); } edit_sel.start = edit_sel.p1 = i; i = edit->mouseover_char; while(i != edit->length && edit->data[i] != c) { i += utf8_len(edit->data + i); } edit_sel.p2 = i; edit_sel.length = i - edit_sel.start; return 1; }
String String::join(const Array<String>& a, const String& sep) const { if (!a.len()) { return String(); } // first calculate total size size_t sep_size = std::strlen(sep.c_str()); size_t total = 0; for(size_t i = 0; i < a.len(); i++) { total += std::strlen(a[i].c_str()) + sep_size; } total -= sep_size; // make the joined string String s; s.grow(total + 1); s.s_data[0] = 0; if (a[0].s_data) { std::strcpy(s.s_data, a[0].s_data); } for(size_t i = 1; i < a.len(); i++) { std::strcat(s.s_data, sep.s_data); std::strcat(s.s_data, a[i].s_data); } s.s_len = utf8_len(s.s_data); return s; }
int dm_StrCopyOut2_U8toW ( SQLCHAR * inStr, SQLWCHAR * outStr, size_t size, u_short * result) { size_t length; if (!inStr) return -1; length = utf8_len (inStr, SQL_NTS); if (result) *result = (u_short) length; if (!outStr) return 0; if (size >= length + 1) { length = utf8towcs (inStr, outStr, size); outStr[length] = L'\0'; return 0; } if (size > 0) { length = utf8towcs (inStr, outStr, size - 1); outStr[length] = L'\0'; } return -1; }
int text_height(int right, uint16_t lineheight, char_t *str, uint16_t length) { int x = 0, y = 0; char_t *a = str, *b = a, *end = a + length; while(1) { if(a == end || *a == ' ' || *a == '\n') { int count = a - b, w = textwidth(b, count); while(x + w > right) { if(x == 0) { int fit = textfit(b, count, right); count -= fit; if(fit == 0 && (count != 0 || *b == '\n')) { return 0; } b += fit; y += lineheight; } else { y += lineheight; int l = utf8_len(b); count -= l; b += l; } x = 0; w = textwidth(b, count); } x += w; b = a; if(a == end) { break; } if(*a == '\n') { y += lineheight; b += utf8_len(b); x = 0; } } a += utf8_len(a); } y += lineheight; return y; }
String::String(const std::string& s) { s_cap = s.length() + 1; s_data = new char[s_cap]; std::memcpy(s_data, s.c_str(), s_cap); s_len = utf8_len(s_data); }
String::String(rune code) { rune r[2] = { code, 0 }; s_len = s_cap = 0; s_data = nullptr; grow(utf8_encoded_len(r) + 1); utf8_encode(r, s_data, s_cap); s_len = utf8_len(s_data); }
void draw_thread(void *arg) { struct vfd_ioctl_data *data; struct vfd_ioctl_data draw_data; unsigned char buf[17]; int count = 0; int pos = 0; data = (struct vfd_ioctl_data *)arg; draw_data.length = data->length; memset(draw_data.data, 0, sizeof(draw_data.data)); memcpy(draw_data.data,data->data,data->length); thread_stop = 0; count = utf8_len(&draw_data.data[0], draw_data.length); if(count > mode_digit) { while(pos < draw_data.length) { if(kthread_should_stop()) { thread_stop = 1; return; } clear_display(); memset(buf,0, sizeof(buf)); int countb = utf8_count(&draw_data.data[pos], draw_data.length - pos, 8); memcpy(buf, &draw_data.data[pos], countb); YWPANEL_VFD_ShowString(buf); msleep(200); if (draw_data.data[pos] > 128) { pos += 2; } else { pos += 1; } } } if(count > 0) { clear_display(); memset(buf,0, sizeof(buf)); int countb = utf8_count(&draw_data.data[0], draw_data.length, 8); memcpy(buf, draw_data.data, countb); YWPANEL_VFD_ShowString(buf); } else VFD_clr(); thread_stop = 1; }
std::wstring utf8string::to_wstring() const { // Currently no check for validity iconv_t converter = iconv_open((wchar_t_encoding + ignore_tag).c_str(), utf8_encoding.c_str()); size_t inSize = m_str.size() + 1; size_t outSize = (utf8_len(m_str.c_str()) + 1) * 2 * sizeof (wchar_t); std::vector<wchar_t> outBuf(outSize); char* outBufPtr = reinterpret_cast<char *>(data(outBuf)); const char* inBufPtr = m_str.c_str(); iconv(converter, &inBufPtr, &inSize, &outBufPtr, &outSize); iconv_close(converter); return std::wstring(outBuf.data()); // here goes additional copy }
//--------UTF-8字节序列合法性判断---------------------------------------- //功能: 判断给定的UTF-8字节序列是否合法 //参数: src 字节序列指针 // len 字节序列的长度 //返回: 0, 非法UTF-8字符 // 合法字符返回第一个UTF-8字符的字节数 //----------------------------------------------------------------- static u8 Utf8IsLegal(const char* src, s32 len) { u8 a; const char *trail; s32 length; if(len <= 0 /*|| src == NULL*/) goto __illegal; // 计算合法UTF-8字符的字节数 length = utf8_len(*src); // 判断长度 if(length > len) goto __illegal; // 指向最后一个字节的后一个字节 trail = (src + length); // 判断编码是否正确 switch(length){ default: goto __illegal; case 6: if ((a = (*--trail)) < 0x80 || a > 0xBF) goto __illegal; case 5: if ((a = (*--trail)) < 0x80 || a > 0xBF) goto __illegal; case 4: if ((a = (*--trail)) < 0x80 || a > 0xBF) goto __illegal; case 3: if ((a = (*--trail)) < 0x80 || a > 0xBF) goto __illegal; case 2: if ((a = (*--trail)) > 0xBF) goto __illegal; // 判断第二个字节(必定在0x80 - 0xBF之间) switch(*src){ case (char)0xE0: if (a < 0xA0) goto __illegal; break; case (char)0xED: if (a > 0x9F) goto __illegal; break; case (char)0xF0: if (a < 0x90) goto __illegal; break; case (char)0xF8: if (a < 0x88) goto __illegal; break; case (char)0xFC: if (a < 0x84) goto __illegal; break; default: if (a < 0x80) goto __illegal; } // 判断多字节UTF-8字符的第一个字节的范围 case 1: if (*src >= 0x80 && *src < 0xC0) goto __illegal; } // 判断第一个字节 if (*src > 0xFD) goto __illegal; return length; __illegal: return 0; }
String::String(const rune *r) { if (r == nullptr) { s_len = 0; s_cap = kSmallestString; s_data = new char[s_cap]; *s_data = 0; } else { s_len = s_cap = 0; s_data = nullptr; grow(utf8_encoded_len(r) + 1); utf8_encode(r, s_data, s_cap); s_len = utf8_len(s_data); } }
uint32_t* u8str_to_u32str(const char* p, int* len) { uint32_t *ret, *p2; int i, code, slen = utf8_len(p); ret = p2 = tre_new(uint32_t, slen + 1); for (i = 0; i < slen; i++) { p = utf8_decode(p, &code); *p2++ = (uint32_t)code; } *len = slen; *p2 = 0; return ret; }
String::String(const char *s) { if (s == nullptr) { s_len = 0; s_cap = kSmallestString; s_data = new char[s_cap]; *s_data = 0; } else { s_cap = std::strlen(s) + 1; s_data = new char[s_cap]; std::memcpy(s_data, s, s_cap); s_len = utf8_len(s_data); } }
static SQLWCHAR * strdup_U8toW (SQLCHAR * str) { SQLWCHAR *ret; size_t len; if (!str) return NULL; len = utf8_len (str, SQL_NTS); if ((ret = (SQLWCHAR *) malloc ((len + 1) * sizeof (SQLWCHAR))) == NULL) return NULL; len = utf8towcs (str, ret, len); ret[len] = L'\0'; return ret; }
// default argument sep = ' ' String String::join(const Array<String>& a, rune sep) const { if (!a.len()) { return String(); } // first calculate total size size_t rune_size = utf8_encoded_size(sep); size_t total = 0; const char *p; for(size_t i = 0; i < a.len(); i++) { p = a[i].c_str(); if (p == nullptr) { throw ReferenceError(); } total += std::strlen(p) + rune_size; } total -= rune_size; // make the joined string String s; s.grow(total + 1); s.s_data[0] = 0; if (a[0].s_data != nullptr) { std::strcpy(s.s_data, a[0].s_data); } rune r[2] = { sep, 0 }; char rs[5]; utf8_encode(r, rs, sizeof(rs)); for(size_t i = 1; i < a.len(); i++) { std::strcat(s.s_data, rs); if (a[i].s_data) { std::strcat(s.s_data, a[i].s_data); } } s.s_len = utf8_len(s.s_data); return s; }
PyLiteStrObject* pylt_obj_str_new_from_cstr(PyLiteInterpreter *I, const char *str, bool is_raw) { uint32_t code; PyLiteStrObject *obj; const char *p = (const char *)str; int len = utf8_len(str); uint32_t *buf = pylt_malloc(I, (len + 1) * sizeof(uint32_t)); for (int i = 0; i < len; i++) { p = utf8_decode(p, &code); if (!p) { pylt_free(I, buf, (len + 1) * sizeof(uint32_t)); return NULL; } buf[i] = code; } obj = pylt_obj_str_new(I, buf, len, is_raw); return obj; }
SQLWCHAR * dm_SQL_U8toW (SQLCHAR * inStr, SQLSMALLINT size) { SQLWCHAR *outStr = NULL; size_t len; if (inStr == NULL) return NULL; if (size == SQL_NTS) { outStr = strdup_U8toW (inStr); } else { len = utf8_len (inStr, size); if ((outStr = (SQLWCHAR *) calloc (len + 1, sizeof (SQLWCHAR))) != NULL) utf8ntowcs (inStr, outStr, size, len, NULL); } return outStr; }
_Bool messages_dclick(MESSAGES *m, _Bool triclick) { if(m->iover != MSG_IDX_MAX) { MESSAGE *msg = m->data->data[m->iover]; switch(msg->msg_type) { case MSG_TYPE_TEXT: case MSG_TYPE_ACTION_TEXT: { m->data->istart = m->data->iend = m->iover; char_t c = triclick ? '\n' : ' '; STRING_IDX i = m->over; while(i != 0 && msg->msg[i - 1] != c) { i -= utf8_unlen(msg->msg + i); } m->data->start = i; i = m->over; while(i != msg->length && msg->msg[i] != c) { i += utf8_len(msg->msg + i); } m->data->end = i; return 1; } case MSG_TYPE_IMAGE: { MSG_IMG *img = (void*)msg; if(m->over) { if(img->zoom) { img->zoom = 0; message_updateheight(m, msg, m->data); } } return 1; } } } return 0; }
uint16_t hittextmultiline(int mx, int right, int my, int height, uint16_t lineheight, char_t *str, uint16_t length, _Bool multiline) { if(my < 0) { return 0; } if(my >= height) { return length; } int x = 0; char_t *a = str, *b = str, *end = str + length; while(1) { if(a == end || *a == '\n' || *a == ' ') { int count = a - b, w = textwidth(b, a - b); while(x + w > right && my >= lineheight) { if(multiline && x == 0) { int fit = textfit(b, count, right); count -= fit; b += fit; my -= lineheight; height -= lineheight; } else if(!multiline) { break; } else { my -= lineheight; height -= lineheight; int l = utf8_len(b); count -= l; b += l; } if(my >= -lineheight && my < 0) { x = mx; break; } x = 0; w = textwidth(b, count); } if(a == end) { if(my >= lineheight) { return length; } break; } if((my >= 0 && my < lineheight) && (mx < 0 || (mx >= x && mx < x + w))) { break; } x += w; b = a; if(*a == '\n') { if(my >= 0 && my < lineheight) { x = mx; return a - str; } b += utf8_len(b); my -= lineheight; height -= lineheight; x = 0; } } a += utf8_len(a); } int fit; if(mx >= right) { fit = textfit(b, a - b, right - x); } else if(mx - x > 0) { int len = a - b; fit = textfit_near(b, len + (a != end), mx - x); } else { fit = 0; } return (b - str) + fit; }
int dec_string(Decoder* d, ERL_NIF_TERM* value) { int has_escape = 0; int num_escapes = 0; int st; int ulen; int ui; int hi; int lo; char* chrbuf; int chrpos; if(d->p[d->i] != '\"') { return 0; } d->i++; st = d->i; while(d->i < d->len) { if(d->u[d->i] < 0x20) { return 0; } else if(d->p[d->i] == '\"') { d->i++; goto parse; } else if(d->p[d->i] == '\\') { if(d->i+1 >= d->len) { return 0; } has_escape = 1; num_escapes += 1; d->i++; switch(d->p[d->i]) { case '\"': case '\\': case '/': case 'b': case 'f': case 'n': case 'r': case 't': d->i++; break; case 'u': hi = 0; lo = 0; d->i++; if(d->i + 4 >= d->len) { return 0; } hi = int_from_hex(&(d->u[d->i])); if(hi < 0) { return 0; } d->i += 4; if(hi >= 0xD800 && hi < 0xDC00) { if(d->i + 6 >= d->len) { return 0; } if(d->p[d->i++] != '\\') { return 0; } else if(d->p[d->i++] != 'u') { return 0; } lo = int_from_hex(&(d->u[d->i])); if(lo < 0) { return 0; } hi = unicode_from_pair(hi, lo); if(hi < 0) { return 0; } } hi = utf8_len(hi); if(hi < 0) { return 0; } if(lo == 0) { num_escapes += 5 - hi; } else { num_escapes += 11 - hi; } break; default: return 0; } } else if(d->u[d->i] < 0x80) { d->i++; } else { ulen = utf8_validate(&(d->u[d->i]), d->len - d->i); if(ulen < 0) { return 0; } d->i += ulen; } } // The goto above ensures that we only // hit this when a string is not terminated // correctly. return 0; parse: if(!has_escape) { *value = enif_make_sub_binary(d->env, d->arg, st, (d->i - st - 1)); return 1; } hi = 0; lo = 0; ulen = (d->i - 1) - st - num_escapes; chrbuf = (char*) enif_make_new_binary(d->env, ulen, value); chrpos = 0; ui = st; while(ui < d->i - 1) { if(d->p[ui] != '\\') { chrbuf[chrpos++] = d->p[ui++]; continue; } ui++; switch(d->p[ui]) { case '\"': case '\\': case '/': chrbuf[chrpos++] = d->p[ui]; ui++; break; case 'b': chrbuf[chrpos++] = '\b'; ui++; break; case 'f': chrbuf[chrpos++] = '\f'; ui++; break; case 'n': chrbuf[chrpos++] = '\n'; ui++; break; case 'r': chrbuf[chrpos++] = '\r'; ui++; break; case 't': chrbuf[chrpos++] = '\t'; ui++; break; case 'u': ui++; hi = int_from_hex(&(d->u[ui])); if(hi < 0) { return 0; } if(hi >= 0xD800 && hi < 0xDC00) { lo = int_from_hex(&(d->u[ui+6])); if(lo < 0) { return 0; } hi = unicode_from_pair(hi, lo); ui += 10; } else { ui += 4; } hi = unicode_to_utf8(hi, (unsigned char*) chrbuf+chrpos); if(hi < 0) { return 0; } chrpos += hi; break; default: return 0; } } return 1; }
int drawtextmultiline(int x, int right, int y, int top, int bottom, uint16_t lineheight, char_t *data, uint16_t length, uint16_t h, uint16_t hlen, _Bool multiline) { uint32_t c; _Bool greentext = 0, draw = y + lineheight >= top; int xc = x; char_t *a = data, *b = a, *end = a + length; while(1) { if(a != end && *a == '>' && (a == data || *a == '\n')) { c = setcolor(RGB(0, 128, 0)); greentext = 1; } if(a == end || *a == ' ' || *a == '\n') { int count = a - b, w = textwidth(b, count); while(x + w > right) { if(multiline && x == xc) { int fit = textfit(b, count, right - x); if(draw) { drawtexth(x, y, b, fit, b - data, h, hlen, lineheight); } count -= fit; b += fit; y += lineheight; draw = (y + lineheight >= top && y < bottom); } else if(!multiline) { int fit = textfit(b, count, right - x); if(draw) { drawtexth(x, y, b, fit, b - data, h, hlen, lineheight); } return y + lineheight; } else { y += lineheight; draw = (y + lineheight >= top && y < bottom); int l = utf8_len(b); count -= l; b += l; } x = xc; w = textwidth(b, count); } if(draw) { drawtexth(x, y, b, count, b - data, h, hlen, lineheight); } x += w; b = a; if(a == end) { if(greentext) { setcolor(c); greentext = 0; } break; } if(*a == '\n') { if(greentext) { setcolor(c); greentext = 0; } y += lineheight; draw = (y + lineheight >= top && y < bottom); b += utf8_len(b); x = xc; } } a += utf8_len(a); } return y + lineheight; }
static inline int enc_string(Encoder* e, ERL_NIF_TERM val) { ErlNifBinary bin; char atom[512]; unsigned char* data; size_t size; int esc_extra = 0; int ulen; int uval; int i; if(enif_is_binary(e->env, val)) { if(!enif_inspect_binary(e->env, val, &bin)) { return 0; } data = bin.data; size = bin.size; } else if(enif_is_atom(e->env, val)) { if(!enif_get_atom(e->env, val, atom, 512, ERL_NIF_LATIN1)) { return 0; } data = (unsigned char*) atom; size = strlen(atom); } else { return 0; } i = 0; while(i < size) { switch((char) data[i]) { case '\"': case '\\': case '\b': case '\f': case '\n': case '\r': case '\t': esc_extra += 1; i++; continue; default: if(data[i] < 0x20) { esc_extra += 5; i++; continue; } else if(data[i] < 0x80) { i++; continue; } ulen = utf8_validate(&(data[i]), size - i); if(ulen < 0) { return 0; } if(e->uescape) { uval = utf8_to_unicode(&(data[i]), ulen); if(uval < 0) { return 0; } esc_extra += utf8_esc_len(uval); if(ulen < 0) { return 0; } } i += ulen; } } if(!enc_ensure(e, size + esc_extra + 2)) { return 0; } e->p[e->i++] = '\"'; i = 0; while(i < size) { switch((char) data[i]) { case '\"': case '\\': e->p[e->i++] = '\\'; e->u[e->i++] = data[i]; i++; continue; case '\b': e->p[e->i++] = '\\'; e->p[e->i++] = 'b'; i++; continue; case '\f': e->p[e->i++] = '\\'; e->p[e->i++] = 'f'; i++; continue; case '\n': e->p[e->i++] = '\\'; e->p[e->i++] = 'n'; i++; continue; case '\r': e->p[e->i++] = '\\'; e->p[e->i++] = 'r'; i++; continue; case '\t': e->p[e->i++] = '\\'; e->p[e->i++] = 't'; i++; continue; default: if(data[i] < 0x20) { ulen = unicode_uescape(data[i], &(e->p[e->i])); if(ulen < 0) { return 0; } e->i += ulen; i++; } else if((data[i] & 0x80) && e->uescape) { uval = utf8_to_unicode(&(data[i]), size-i); if(uval < 0) { return 0; } ulen = unicode_uescape(uval, &(e->p[e->i])); if(ulen < 0) { return 0; } e->i += ulen; ulen = utf8_len(uval); if(ulen < 0) { return 0; } i += ulen; } else { e->u[e->i++] = data[i++]; } } } e->p[e->i++] = '\"'; e->count++; return 1; }
int utox_draw_text_multiline_within_box(int x, int y, /* x, y of the top left corner of the box */ int right, int top, int bottom, uint16_t lineheight, const char_t *data, uint16_t length, /* text, and length of the text*/ uint16_t h, uint16_t hlen, uint16_t mark, uint16_t marklen, _Bool multiline ) { uint32_t c1, c2; _Bool greentext = 0, link = 0, draw = y + lineheight >= top; int xc = x; char_t *a = data, *b = a, *end = a + length; while(1) { if(a != end) { if(*a == '>' && (a == data || *(a - 1) == '\n')) { c1 = setcolor(COLOR_MAIN_QUOTETEXT); greentext = 1; } if((a == data || *(a - 1) == '\n' || *(a - 1) == ' ') && ((end - a >= 7 && memcmp(a, "http://", 7) == 0) || (end - a >= 8 && memcmp(a, "https://", 8) == 0))) { c2 = setcolor(COLOR_MAIN_URLTEXT); link = 1; } if(a == data || *(a - 1) == '\n') { char_t *r = a; while (r != end && *r != '\n') { r++; } if (*(r - 1) == '<') { if (greentext) { setcolor(COLOR_MAIN_REDTEXT); } else { greentext = 1; c1 = setcolor(COLOR_MAIN_REDTEXT); } } } } if(a == end || *a == ' ' || *a == '\n') { int count = a - b, w = textwidth(b, count); while(x + w > right) { if(multiline && x == xc) { int fit = textfit(b, count, right - x); if(draw) { drawtexth(x, y, b, fit, b - data, h, hlen, lineheight); drawtextmark(x, y, b, fit, b - data, mark, marklen, lineheight); } count -= fit; b += fit; y += lineheight; draw = (y + lineheight >= top && y < bottom); } else if(!multiline) { int fit = textfit(b, count, right - x); if(draw) { drawtexth(x, y, b, fit, b - data, h, hlen, lineheight); drawtextmark(x, y, b, fit, b - data, mark, marklen, lineheight); } return y + lineheight; } else { y += lineheight; draw = (y + lineheight >= top && y < bottom); int l = utf8_len(b); count -= l; b += l; } x = xc; w = textwidth(b, count); } if(draw) { drawtexth(x, y, b, count, b - data, h, hlen, lineheight); drawtextmark(x, y, b, count, b - data, mark, marklen, lineheight); } x += w; b = a; if(link) { setcolor(c2); link = 0; } if(a == end) { if(greentext) { setcolor(c1); greentext = 0; } break; } if(*a == '\n') { if(greentext) { setcolor(c1); greentext = 0; } y += lineheight; draw = (y + lineheight >= top && y < bottom); b += utf8_len(b); x = xc; } } a += utf8_len(a); } return y + lineheight; }
bool DetectTextEncoding(const char* buffer, size_t len, wxFontEncoding& encoding, unsigned int& BOM_len) { wxASSERT(buffer); if (!buffer || len == 0) return false; const char* buff_ptr = buffer; const char* buff_end = &buffer[len]; wxFontEncoding enc = wxFONTENCODING_DEFAULT; // Check if the buffer starts with a BOM (Byte Order Marker) if (len >= 2) { if (len >= 4 && memcmp(buffer, "\xFF\xFE\x00\x00", 4) == 0) {enc = wxFONTENCODING_UTF32LE; BOM_len = 4;} else if (len >= 4 && memcmp(buffer, "\x00\x00\xFE\xFF", 4) == 0) {enc = wxFONTENCODING_UTF32BE; BOM_len = 4;} else if (memcmp(buffer, "\xFF\xFE", 2) == 0) {enc = wxFONTENCODING_UTF16LE; BOM_len = 2;} else if (memcmp(buffer, "\xFE\xFF", 2) == 0) {enc = wxFONTENCODING_UTF16BE; BOM_len = 2;} else if (len >= 3 && memcmp(buffer, "\xEF\xBB\xBF", 3) == 0) {enc = wxFONTENCODING_UTF8; BOM_len = 3;} else if (len >= 5 && memcmp(buffer, "\x2B\x2F\x76\x38\x2D", 5) == 0) {enc = wxFONTENCODING_UTF7; BOM_len = 5;} buff_ptr += BOM_len; } // If the file starts with a leading < (less) sign, it is probably an XML file // and we can determine the encoding by how the sign is encoded. if (enc == wxFONTENCODING_DEFAULT && len >= 2) { if (len >= 4 && memcmp(buffer, "\x3C\x00\x00\x00", 4) == 0) enc = wxFONTENCODING_UTF32LE; else if (len >= 4 && memcmp(buffer, "\x00\x00\x00\x3C", 4) == 0) enc = wxFONTENCODING_UTF32BE; else if (memcmp(buffer, "\x3C\x00", 2) == 0) enc = wxFONTENCODING_UTF16LE; else if (memcmp(buffer, "\x00\x3C", 2) == 0) enc = wxFONTENCODING_UTF16BE; } // Unicode Detection if (enc == wxFONTENCODING_DEFAULT) { unsigned int null_byte_count = 0; unsigned int utf_bytes = 0; unsigned int good_utf_count = 0; unsigned int bad_utf_count = 0; unsigned int bad_utf32_count = 0; unsigned int bad_utf16_count = 0; unsigned int nl_utf32le_count = 0; unsigned int nl_utf32be_count = 0; unsigned int nl_utf16le_count = 0; unsigned int nl_utf16be_count = 0; while (buff_ptr != buff_end) { if (*buff_ptr == 0) ++null_byte_count; // Detect UTF-8 by scanning for invalid sequences if (utf_bytes == 0) { if ((*buff_ptr & 0xC0) == 0x80 || *buff_ptr == 0) ++bad_utf_count; else { utf_bytes = utf8_len(*buff_ptr) - 1; if (utf_bytes > 3) { ++bad_utf_count; utf_bytes = 0; } } } else if ((*buff_ptr & 0xC0) == 0x80) { --utf_bytes; if (utf_bytes == 0) ++good_utf_count; } else { ++bad_utf_count; utf_bytes = 0; } // Detect UTF-32 by scanning for newlines (and lack of null chars) if ((uintptr_t)buff_ptr % 4 == 0 && buff_ptr+4 <= buff_end) { if (*((wxUint32*)buff_ptr) == 0) ++bad_utf32_count; if (*((wxUint32*)buff_ptr) == wxUINT32_SWAP_ON_BE(0x0A)) ++nl_utf32le_count; if (*((wxUint32*)buff_ptr) == wxUINT32_SWAP_ON_LE(0x0A)) ++nl_utf32be_count; } // Detect UTF-16 by scanning for newlines (and lack of null chars) if ((uintptr_t)buff_ptr % 2 == 0 && buff_ptr+4 <= buff_end) { if (*((wxUint16*)buff_ptr) == 0) ++bad_utf16_count; if (*((wxUint16*)buff_ptr) == wxUINT16_SWAP_ON_BE(0x0A)) ++nl_utf16le_count; if (*((wxUint16*)buff_ptr) == wxUINT16_SWAP_ON_LE(0x0A)) ++nl_utf16be_count; } ++buff_ptr; } if (bad_utf_count == 0) enc = wxFONTENCODING_UTF8; else if (bad_utf32_count == 0 && nl_utf32le_count > len / 400) enc = wxFONTENCODING_UTF32LE; else if (bad_utf32_count == 0 && nl_utf32be_count > len / 400) enc = wxFONTENCODING_UTF32BE; else if (bad_utf16_count == 0 && nl_utf16le_count > len / 200) enc = wxFONTENCODING_UTF16LE; else if (bad_utf16_count == 0 && nl_utf16be_count > len / 200) enc = wxFONTENCODING_UTF16BE; else if (null_byte_count) return false; // Maybe this is a binary file? } // If we can't detect encoding and it does not contain null bytes just set it to the default encoding. if (enc == wxFONTENCODING_DEFAULT) enc = wxFONTENCODING_SYSTEM; encoding = enc; return true; }