int utf8_mbstowcs(ucs4_t *pwcs, const __u8 *s, int n) { __u16 *op; const __u8 *ip; int size; op = pwcs; ip = s; while (*ip && n > 0) { if (*ip & 0x80) { size = utf8_mbtowc(op, ip, n); if (size == -1) { /* Ignore character and move on */ ip++; n--; } else { op++; ip += size; n -= size; } } else { *op++ = *ip++; } } return (op - pwcs); }
void PokemonString::fromUTF8(const char * str) { str = (str == NULL) ? "" : str; strSz = std::strlen(str) + 1; resizeStr(); dataSz = strSz; resizeData(); dataSz = 0; int s = 0; size_t offset = 0; while (offset < strSz - 1) { u32 c; s = utf8_mbtowc(NULL, &c, (const unsigned char*)str + offset, strSz - 1); if (s < 0) break; offset += s; if (c == 0) break; s = pkmgba_wctomb(NULL, _data + dataSz, c, 2, _japanese); if (s < 0) break; dataSz += s; } _data[dataSz++] = 0xff; hasChanged = true; }
static int utf8tostr( XlcConv conv, XPointer *from, int *from_left, XPointer *to, int *to_left, XPointer *args, int num_args) { unsigned char const *src; unsigned char const *srcend; unsigned char *dst; unsigned char *dstend; int unconv_num; if (from == NULL || *from == NULL) return 0; src = (unsigned char const *) *from; srcend = src + *from_left; dst = (unsigned char *) *to; dstend = dst + *to_left; unconv_num = 0; while (src < srcend) { unsigned char c; ucs4_t wc; int consumed; consumed = utf8_mbtowc(NULL, &wc, src, srcend-src); if (consumed == RET_TOOFEW(0)) break; if (dst == dstend) break; if (consumed == RET_ILSEQ) { consumed = 1; c = BAD_CHAR; unconv_num++; } else { if ((wc & ~(ucs4_t)0xff) != 0) { c = BAD_CHAR; unconv_num++; } else c = (unsigned char) wc; } *dst++ = c; src += consumed; } *from = (XPointer) src; *from_left = srcend - src; *to = (XPointer) dst; *to_left = dstend - dst; return unconv_num; }
static int befs_utf2nls(struct super_block *sb, const char *in, int in_len, char **out, int *out_len) { struct nls_table *nls = BEFS_SB(sb)->nls; int i, o; wchar_t uni; int unilen, utflen; char *result; /* The utf8->nls conversion won't make the final nls string bigger * than the utf one, but if the string is pure ascii they'll have the * same width and an extra char is needed to save the additional \0 */ int maxlen = in_len + 1; befs_debug(sb, "---> utf2nls()"); if (!nls) { befs_error(sb, "befs_utf2nls called with no NLS table loaded"); return -EINVAL; } *out = result = kmalloc(maxlen, GFP_NOFS); if (!*out) { befs_error(sb, "befs_utf2nls() cannot allocate memory"); *out_len = 0; return -ENOMEM; } for (i = o = 0; i < in_len; i += utflen, o += unilen) { /* convert from UTF-8 to Unicode */ utflen = utf8_mbtowc(&uni, &in[i], in_len - i); if (utflen < 0) { goto conv_err; } /* convert from Unicode to nls */ unilen = nls->uni2char(uni, &result[o], in_len - o); if (unilen < 0) { goto conv_err; } } result[o] = '\0'; *out_len = o; befs_debug(sb, "<--- utf2nls()"); return o; conv_err: befs_error(sb, "Name using character set %s contains a character that " "cannot be converted to unicode.", nls->charset); befs_debug(sb, "<--- utf2nls()"); kfree(result); return -EILSEQ; }
int ch_utf8_length(const char* s) { int res; ucs4_t ch; res = utf8_mbtowc(&ch, (const unsigned char*)s, strlen(s)); if(res <= 0) return 0; return res; }
static int befs_utf2nls(struct super_block *sb, const char *in, int in_len, char **out, int *out_len) { struct nls_table *nls = BEFS_SB(sb)->nls; int i, o; wchar_t uni; int unilen, utflen; char *result; int maxlen = in_len; /* The utf8->nls conversion can't make more chars */ befs_debug(sb, "---> utf2nls()"); if (!nls) { befs_error(sb, "befs_utf2nls called with no NLS table loaded"); return -EINVAL; } *out = result = kmalloc(maxlen, GFP_NOFS); if (!*out) { befs_error(sb, "befs_utf2nls() cannot allocate memory"); *out_len = 0; return -ENOMEM; } for (i = o = 0; i < in_len; i += utflen, o += unilen) { /* convert from UTF-8 to Unicode */ utflen = utf8_mbtowc(&uni, &in[i], in_len - i); if (utflen < 0) { goto conv_err; } /* convert from Unicode to nls */ unilen = nls->uni2char(uni, &result[o], 1); if (unilen < 0) { goto conv_err; } } result[o] = '\0'; *out_len = o; befs_debug(sb, "<--- utf2nls()"); return o; conv_err: befs_error(sb, "Name using charecter set %s contains a charecter that " "cannot be converted to unicode.", nls->charset); befs_debug(sb, "<--- utf2nls()"); kfree(result); return -EILSEQ; }
int gr_measure(const char *s) { GRFont* fnt = NULL; int n, l; wchar_t ch; if (!fnt) fnt = gr_font; l = utf8_mbtowc(&ch, s, strlen(s)); //fprintf(stdout, "unicode: %d\n", l); if(l <= 0 ) return 0; n = fnt->cwidth[getCharID(s,NULL)]; return n; }
int gr_textExWH(int x, int y, const char *s, void* pFont, int max_width, int max_height) { GGLContext *gl = gr_context; GRFont *gfont = (GRFont*) pFont; unsigned off, width, height, n; int rect_x, rect_y; wchar_t ch; /* Handle default font */ if (!gfont) gfont = gr_font; y -= gfont->ascent; gl->texEnvi(gl, GGL_TEXTURE_ENV, GGL_TEXTURE_ENV_MODE, GGL_REPLACE); gl->texGeni(gl, GGL_S, GGL_TEXTURE_GEN_MODE, GGL_ONE_TO_ONE); gl->texGeni(gl, GGL_T, GGL_TEXTURE_GEN_MODE, GGL_ONE_TO_ONE); gl->enable(gl, GGL_TEXTURE_2D); while(*s) { if(*((unsigned char*)(s)) < 0x20) { s++; continue; } off = getCharID(s,pFont); n = utf8_mbtowc(&ch, s, strlen(s)); if(n <= 0) break; s += n; width = gfont->cwidth[off]; height = gfont->cheight[off]; memcpy(&font_ftex, &gfont->texture, sizeof(font_ftex)); font_ftex.width = width; font_ftex.height = height; font_ftex.stride = width; font_ftex.data = gfont->fontdata[off]; gl->bindTexture(gl, &font_ftex); if ((x + (int)width) < max_width) rect_x = x + width; else rect_x = max_width; if (y + height < (unsigned int)(max_height)) rect_y = y + height; else rect_y = max_height; gl->texCoord2i(gl, 0 - x, 0 - y); gl->recti(gl, x, y, rect_x, rect_y); x += width; if (x > max_width) return x; } return x; }
int getCharID(const char* s, void* pFont) { unsigned i, unicode; GRFont *gfont = (GRFont*) pFont; if (!gfont) gfont = gr_font; utf8_mbtowc(&unicode, s, strlen(s)); for (i = 0; i < gfont->count; i++) { if (unicode == gfont->unicodemap[i]) return i; } return 0; }
void XimServer::strToUstring(uString *d, const char *s) { int len; int l = 0, nbyte = 0; uchar ch; len = static_cast<int>(strlen(s)); while (l < len && *s != 0 && (nbyte = utf8_mbtowc(&ch, (const unsigned char *)s, len - l)) > 0) { d->push_back(ch); s += nbyte; l += nbyte; } }
static int utf8towcs( XlcConv conv, XPointer *from, int *from_left, XPointer *to, int *to_left, XPointer *args, int num_args) { unsigned char const *src; unsigned char const *srcend; wchar_t *dst; wchar_t *dstend; int unconv_num; if (from == NULL || *from == NULL) return 0; src = (unsigned char const *) *from; srcend = src + *from_left; dst = (wchar_t *) *to; dstend = dst + *to_left; unconv_num = 0; while (src < srcend && dst < dstend) { ucs4_t wc; int consumed = utf8_mbtowc(NULL, &wc, src, srcend-src); if (consumed == RET_TOOFEW(0)) break; if (consumed == RET_ILSEQ) { src++; *dst = BAD_WCHAR; unconv_num++; } else { src += consumed; *dst = wc; } dst++; } *from = (XPointer) src; *from_left = srcend - src; *to = (XPointer) dst; *to_left = dstend - dst; return unconv_num; }
int gr_measureEx(const char *s, void* font) { GRFont* fnt = (GRFont*) font; int n, l, off; wchar_t ch; if (!fnt) fnt = gr_font; n = 0; off = 0; while(*(s + off)) { l = utf8_mbtowc(&ch, s+off, strlen(s + off)); n += fnt->cwidth[getCharID(s+off,font)]; off += l; } return n; }
WideString utf8_mbstowcs (const String & str) { WideString wstr; ucs4_t wc; unsigned int sn = 0; int un = 0; const unsigned char *s = (const unsigned char *) str.c_str (); while (sn < str.length () && *s != 0 && (un=utf8_mbtowc (&wc, s, str.length () - sn)) > 0) { wstr.push_back (wc); s += un; sn += un; } return wstr; }
int nge_charset_utf8_to_ucs2(const uint8_t* in, uint16_t* out, int len, int n) { ucs4_t wc = 0; int cur = 0, ret; uint16_t *pOut = out, *pEnd = out+n/2; while (cur < len) { ret = utf8_mbtowc(&wc, in+cur, len-cur); if (ret < 0 || wc > 0xffff) return 0; *pOut = (uint16_t)wc; cur += ret; pOut++; if (pOut > pEnd) return -1; } *pOut = 0x0; return pOut - (uint16_t*)out; }
int gr_text(int x, int y, const char *s, int bold) { GGLContext *gl = gr_context; GRFont *gfont = NULL; unsigned off, width, height, n; wchar_t ch; /* Handle default font */ if (!gfont) gfont = gr_font; x += overscan_offset_x; y += overscan_offset_y; y -= gfont->ascent; gl->texEnvi(gl, GGL_TEXTURE_ENV, GGL_TEXTURE_ENV_MODE, GGL_REPLACE); gl->texGeni(gl, GGL_S, GGL_TEXTURE_GEN_MODE, GGL_ONE_TO_ONE); gl->texGeni(gl, GGL_T, GGL_TEXTURE_GEN_MODE, GGL_ONE_TO_ONE); gl->enable(gl, GGL_TEXTURE_2D); while(*s) { if(*((unsigned char*)(s)) < 0x20) { s++; continue; } off = getCharID(s,NULL); n = utf8_mbtowc(&ch, s, strlen(s)); if(n <= 0) break; s += n; width = gfont->cwidth[off]; height = gfont->cheight[off]; memcpy(&font_ftex, &gfont->texture, sizeof(font_ftex)); font_ftex.width = width; font_ftex.height = height; font_ftex.stride = width; font_ftex.data = gfont->fontdata[off]; gl->bindTexture(gl, &font_ftex); gl->texCoord2i(gl, 0 - x, 0 - y); gl->recti(gl, x, y, x + width, y + height); x += width; } return x; }
static void test_nls_base(void) { wchar_t p=0x20; __u8 s=0x01; int n=2; struct nls_table nls; char charset[20]="David"; load_nls_default(); register_nls(&nls); unload_nls(&nls); load_nls(charset); unregister_nls(&nls); utf8_mbtowc(&p, &s, n); utf8_mbstowcs(&p, &s, n); n=20; utf8_wctomb(&s, p, n); utf8_wcstombs(&s, &p, n); }
WideString utf8_mbstowcs (const char *str, int len) { WideString wstr; if (str) { ucs4_t wc; unsigned int sn = 0; int un = 0; if (len < 0) len = strlen (str); while (sn < len && *str != 0 && (un=utf8_mbtowc (&wc, (const unsigned char *)str, len - sn)) > 0) { wstr.push_back (wc); str += un; sn += un; } } return wstr; }
int hfsplus_asc2uni(struct hfsplus_unistr *ustr, const char *astr, int len) { int tmp; wchar_t c; u16 outlen = 0; while (outlen <= HFSPLUS_MAX_STRLEN && len > 0) { if (*astr & 0x80) { tmp = utf8_mbtowc(&c, astr, len); if (tmp < 0) { astr++; len--; continue; } else { astr += tmp; len -= tmp; } } else { c = *astr++; len--; } switch (c) { case 0x2400: c = 0; break; case ':': c = '/'; break; } ustr->unicode[outlen] = cpu_to_be16(c); outlen++; } ustr->length = cpu_to_be16(outlen); if (len > 0) return -ENAMETOOLONG; return 0; }
int sf_nlscpy(struct sf_glob_info *sf_g, char *name, size_t name_bound_len, const unsigned char *utf8_name, size_t utf8_len) { if (sf_g->nls) { const char *in; char *out; size_t out_len; size_t out_bound_len; size_t in_bound_len; in = utf8_name; in_bound_len = utf8_len; out = name; out_len = 0; out_bound_len = name_bound_len; while (in_bound_len) { int nb; #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 31) unicode_t uni; nb = utf8_to_utf32(in, in_bound_len, &uni); #else linux_wchar_t uni; nb = utf8_mbtowc(&uni, in, in_bound_len); #endif if (nb < 0) { LogFunc(("utf8_mbtowc failed(%s) %x:%d\n", (const char *) utf8_name, *in, in_bound_len)); return -EINVAL; } in += nb; in_bound_len -= nb; nb = sf_g->nls->uni2char(uni, out, out_bound_len); if (nb < 0) { LogFunc(("nls->uni2char failed(%s) %x:%d\n", utf8_name, uni, out_bound_len)); return nb; } out += nb; out_bound_len -= nb; out_len += nb; } *out = 0; } else { if (utf8_len + 1 > name_bound_len) return -ENAMETOOLONG; memcpy(name, utf8_name, utf8_len + 1); } return 0; }
static enum mu_filter_result _xml_encoder (void *xd, enum mu_filter_command cmd, struct mu_filter_io *iobuf) { struct xml_encode_state *cp = xd; const unsigned char *iptr; size_t isize; char *optr; size_t osize; switch (cmd) { case mu_filter_init: cp->idx = -1; return mu_filter_ok; case mu_filter_done: return mu_filter_ok; default: break; } iptr = (unsigned char*) iobuf->input; isize = iobuf->isize; optr = iobuf->output; osize = iobuf->osize; while (osize) { if (cp->idx > 0) { *optr++ = cp->buf[--cp->idx]; --osize; } else if (isize == 0) break; else { struct transcode_map *p; unsigned int c = *iptr; p = ch2ent (c); if (p) { store_buf (cp, p->ent, p->len); ++iptr; --isize; } else { int count = utf8_char_width (c); if (count == 0) { store_char_ref (cp, *iptr); ++iptr; --isize; } else if (count > isize) { if (cmd == mu_filter_lastbuf) { store_buf (cp, (char*) iptr, isize); iptr += isize; isize = 0; } else break; } else { unsigned wc; int rc; rc = utf8_mbtowc (iptr, isize, &wc); if (rc == -1) { store_char_ref (cp, *iptr); ++iptr; --isize; } else { /* http://www.w3.org/TR/xml/#dt-charref: Character Range Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] (any Unicode character, excluding the surrogate blocks, FFFE, and FFFF) */ if (wc == 0x9 || wc == 0xa || wc == 0xd || (wc >= 0x20 && wc < 0xd7ff) || (wc >= 0xe000 && wc < 0xfffd) || (wc >= 0x10000 && wc < 0x10FFFF)) { if (osize >= count) { memcpy (optr, iptr, count); optr += count; osize -= count; iptr += count; isize -= count; } else { store_buf (cp, (char*) iptr, count); iptr += count; isize -= count; } } else { store_char_ref (cp, wc); iptr += count; isize -= count; } } } } } } iobuf->isize -= isize; iobuf->osize -= osize; return mu_filter_ok; }
static int utf8tocs1( XlcConv conv, XPointer *from, int *from_left, XPointer *to, int *to_left, XPointer *args, int num_args) { Utf8Conv *preferred_charsets; XlcCharSet last_charset = NULL; unsigned char const *src; unsigned char const *srcend; unsigned char *dst; unsigned char *dstend; int unconv_num; if (from == NULL || *from == NULL) return 0; preferred_charsets = (Utf8Conv *) conv->state; src = (unsigned char const *) *from; srcend = src + *from_left; dst = (unsigned char *) *to; dstend = dst + *to_left; unconv_num = 0; while (src < srcend && dst < dstend) { Utf8Conv chosen_charset = NULL; XlcSide chosen_side = XlcNONE; ucs4_t wc; int consumed; int count; consumed = utf8_mbtowc(NULL, &wc, src, srcend-src); if (consumed == RET_TOOFEW(0)) break; if (consumed == RET_ILSEQ) { src++; unconv_num++; continue; } count = charset_wctocs(preferred_charsets, &chosen_charset, &chosen_side, conv, dst, wc, dstend-dst); if (count == RET_TOOSMALL) break; if (count == RET_ILSEQ) { src += consumed; unconv_num++; continue; } if (last_charset == NULL) { last_charset = _XlcGetCharSetWithSide(chosen_charset->name, chosen_side); if (last_charset == NULL) { src += consumed; unconv_num++; continue; } } else { if (!(last_charset->xrm_encoding_name == chosen_charset->xrm_name && (last_charset->side == XlcGLGR || last_charset->side == chosen_side))) break; } src += consumed; dst += count; break; } if (last_charset == NULL) return -1; *from = (XPointer) src; *from_left = srcend - src; *to = (XPointer) dst; *to_left = dstend - dst; if (num_args >= 1) *((XlcCharSet *)args[0]) = last_charset; return unconv_num; }