static int str_chars(strm_stream* strm, int argc, strm_value* args, strm_value* ret) { const char* str; const char* s; const char* prev = NULL; strm_int slen; strm_array ary; strm_int n = 0; strm_value* sps; strm_int i = 0; strm_get_args(strm, argc, args, "s", &str, &slen); s = str; while (*s) { s += utf8len(s, s + slen); n++; } ary = strm_ary_new(NULL, n); sps = strm_ary_ptr(ary); s = str; while (*s) { prev = s; s += utf8len(s, s + slen); sps[i++] = strm_str_new(prev, s - prev); } *ret = strm_ary_value(ary); return STRM_OK; }
int64_t utf8strlen(const char * const s, const int64_t len) { int64_t i = 0, l = 0; while(i < len) { assert(utf8len(s[i]) >= 0); i += utf8len(s[i]); l++; } return l; }
static mrb_value mrb_str_reverse_bang(mrb_state *mrb, mrb_value str) { mrb_int utf8_len = mrb_utf8_strlen(str, -1); if (utf8_len > 1) { mrb_int len; char *buf; unsigned char *p, *e, *r; mrb_str_modify(mrb, mrb_str_ptr(str)); len = RSTRING_LEN(str); buf = (char *)mrb_malloc(mrb, (size_t)len); p = (unsigned char*)buf; e = (unsigned char*)buf + len; memcpy(buf, RSTRING_PTR(str), len); r = (unsigned char*)RSTRING_PTR(str) + len; while (p<e) { mrb_int clen = utf8len(p); r -= clen; memcpy(r, p, clen); p += clen; } mrb_free(mrb, buf); } return str; }
PERF_TEST_F(GreekSeeking, Begin) { const char* s = m_contents.c_str(); const char* n = utf8seek(s, m_contents.length(), s, (off_t)utf8len(s) - 1, SEEK_SET); PERF_ASSERT(n == s + m_contents.length() - 1); }
PERF_TEST_F(GreekSeeking, CurrentForwards) { const char* s = m_contents.c_str(); const char* n = utf8seek(s, m_contents.length(), s, (off_t)utf8len(s) - 1, SEEK_CUR); PERF_ASSERT(n == s + m_contents.length() - 1); }
int* utf8str(const char* s) { int codepoint; int size = 0, index = 0, out_index = 0; unsigned char *utf8 = (unsigned char*)s; unsigned char c; memman_t* memman = (memman_t*)MEMMAN_ADDR; int* unicode = (int*)memman_alloc_4k(memman, (utf8len(s) + 1) * sizeof(int)); c = utf8[index++]; while (c) { if ((c & 0x80) == 0) { codepoint = c; } else if ((c & 0xe0) == 0xe0) { codepoint = (c & 0x1F) << 12; c = utf8[index++]; codepoint |= (c & 0x3F) << 6; c = utf8[index++]; codepoint |= (c & 0x3F); } else { codepoint = (c & 0x3F) << 6; c = utf8[index++]; codepoint |= (c & 0x3F); } c = utf8[index++]; unicode[out_index++] = codepoint; } unicode[out_index] = 0; return unicode; }
kbool_t knh_bytes_checkENCODING(kbytes_t v) { #ifdef K_USING_UTF8 const unsigned char *s = v.utext; const unsigned char *e = s + v.len; while (s < e) { size_t ulen = utf8len(s[0]); switch(ulen) { case 1: s++; break; case 2: if(!utf8_isTrail(s[1])) return 0; s+=2; break; case 3: if(!utf8_isTrail(s[1])) return 0; if(!utf8_isTrail(s[2])) return 0; s+=3; break; case 4: if(!utf8_isTrail(s[1])) return 0; if(!utf8_isTrail(s[2])) return 0; if(!utf8_isTrail(s[3])) return 0; s+=4; break; case 5: case 6: case 0: default: return 0; } } return (s == e); #else return 1; #endif }
static mrb_value str_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len) { mrb_int i; unsigned char *p = (unsigned char*) RSTRING_PTR(str), *t; unsigned char *e = p + RSTRING_LEN(str); for (i = 0; i < beg && p<e; i++) { p += utf8len(p); } t = p; for (i = 0; i < len && t<e; i++) { t += utf8len(t); } return mrb_str_new(mrb, (const char*)p, (size_t)(t - p)); }
std::wstring Str2WStr(const std::string &str) { std::wstring dest; dest.resize(utf8len(str.c_str())); MultiByteToWideChar(CP_UTF8, 0, str.c_str(), str.length(), (wchar_t*)dest.c_str(), str.length()); return dest; }
int main(int argc, char **argv) { char *p = malloc(0); size_t len = 0; while (true) { p = realloc(p, len + 0x10000); ssize_t cnt = read(STDIN_FILENO, p + len, 0x10000); if (cnt == -1) { perror("read"); abort(); } else if (cnt == 0) { break; } else { len += cnt; } } printf("utf8len=%zu\n", utf8len(p)); ssize_t start = 2, end = 3; utf8slice(p, &start, &end); printf("utf8slice[2:3]=%.*s\n", end - start, p + start); start = 3; end = 4; utf8slice(p, &start, &end); printf("utf8slice[3:4]=%.*s\n", end - start, p + start); return 0; }
static void gdi_render_msg( video_frame_info_t *video_info, void *data, const char *msg, const void *userdata) { float x, y, scale; gdi_raster_t *font = (gdi_raster_t*)data; unsigned newX, newY, len; unsigned align; const struct font_params *params = (const struct font_params*)userdata; unsigned width = video_info->width; unsigned height = video_info->height; if (!font || string_is_empty(msg)) return; if (params) { x = params->x; y = params->y; scale = params->scale; align = params->text_align; } else { x = video_info->font_msg_pos_x; y = video_info->font_msg_pos_y; scale = 1.0f; align = TEXT_ALIGN_LEFT; } if (!font->gdi) return; len = utf8len(msg); switch (align) { case TEXT_ALIGN_LEFT: newX = x * width * scale; break; case TEXT_ALIGN_RIGHT: newX = (x * width * scale) - len; break; case TEXT_ALIGN_CENTER: newX = (x * width * scale) - (len / 2); break; default: break; } newY = height - (y * height * scale); font->gdi->bmp_old = (HBITMAP)SelectObject(font->gdi->memDC, font->gdi->bmp); SetBkMode(font->gdi->memDC, TRANSPARENT); SetTextColor(font->gdi->memDC, RGB(255,255,255)); TextOut(font->gdi->memDC, newX, newY, msg, len); SelectObject(font->gdi->memDC, font->gdi->bmp_old); }
PERF_TEST_F(GreekSeeking, End) { const char* s = m_contents.c_str(); const char* e = s + m_contents.length(); const char* n = utf8seek(e, m_contents.length(), s, (off_t)utf8len(s) - 1, SEEK_END); PERF_ASSERT(n == s + 1); }
PERF_TEST_F(GreekSeeking, CurrentBackwards) { const char* s = m_contents.c_str(); const char* e = s + m_contents.length(); const char* n = utf8seek(e, m_contents.length(), s, -(off_t)utf8len(s) + 1, SEEK_CUR); PERF_ASSERT(n == s + 1); }
/* Returns number of UTF8 characters in char array */ int utf8strlen(char *str) { int i,j, len; len = strlen(str); for (i = 0, j = 0; *(str+i) != '\0' && i < len; j++) { i = i + utf8len(str+i); } return j; }
static void String_setNextResult(KonohaContext *kctx, KonohaStack* sfp) { kIterator *itr = sfp[0].asIterator; kString *s = (kString *)itr->source; const char *t = S_text(s) + itr->current_pos; size_t charsize = utf8len(t[0]); itr->current_pos += charsize; KReturn(KLIB new_kString(kctx, OnStack, t, charsize, (charsize == 1) ? StringPolicy_ASCII : StringPolicy_UTF8)); }
void add_string_pair(char *in, char *out) { int *int_in, *int_out; int i, j; char *token; struct stringpair *newpair; /* Get int array */ int_in = malloc(sizeof(int) * (utf8strlen(in) + 1)); int_out = malloc(sizeof(int) * (utf8strlen(out) + 1)); if (g_input_format == INPUT_FORMAT_L2P) { for (i = 0, j = 0; in[i] != '\0'; i += utf8len(&in[i]), j++) { int_in[j] = get_set_char_num(strndup(&in[i], utf8len(&in[i]))); } int_in[j] = -1; for (i = 0, j = 0; out[i] != '\0'; i += utf8len(&out[i]), j++) { int_out[j] = get_set_char_num(strndup(&out[i], utf8len(&out[i]))); } int_out[j] = -1; } else if (g_input_format == INPUT_FORMAT_NEWS) { token = strtok(in, " "); for (j = 0; token != NULL; j++) { int_in[j] = get_set_char_num(token); token = strtok(NULL, " "); } int_in[j] = -1; token = strtok(out, " "); for (j = 0; token != NULL; j++) { int_out[j] = get_set_char_num(token); token = strtok(NULL, " "); } int_out[j] = -1; } newpair = malloc(sizeof(struct stringpair)); newpair->in = int_in; newpair->out = int_out; newpair->next = NULL; if (g_stringpairs == NULL) { g_stringpairs = newpair; g_stringpairs_tail = newpair; } else { g_stringpairs_tail->next = newpair; g_stringpairs_tail = newpair; } }
int String::Length() const { MUTEX_LOCK(str_mutex); #ifdef SCRATCH_NO_UTF8 return (int)strlen(this->str_szBuffer); #else return (int)utf8len(this->str_szBuffer); #endif }
kbytes_t knh_bytes_mofflen(kbytes_t v, size_t moff, size_t mlen) { #ifdef K_USING_UTF8 size_t i; const unsigned char *s = v.utext; const unsigned char *e = s + v.len; for(i = 0; i < moff; i++) { s += utf8len(s[0]); } v.ubuf = (kchar_t*)s; for(i = 0; i < mlen; i++) { s += utf8len(s[0]); } KNH_ASSERT(s <= e); v.len = (const char*)s - v.text; return v; #else return knh_bytes_subbytes(m, moff, mlen); /* if K_ENCODING is not set */ #endif }
static mrb_int mrb_utf8_strlen(mrb_value str) { mrb_int total = 0; unsigned char* p = (unsigned char*) RSTRING_PTR(str); unsigned char* e = p + RSTRING_LEN(str); while (p<e) { p += utf8len(p); total++; } return total; }
static size_t utf8_strlen(const char *text, size_t len) { size_t size = 0; const unsigned char *s = (const unsigned char *)text; const unsigned char *eos = s + len; while(s < eos) { size_t ulen = utf8len(s[0]); size++; s += ulen; } return size; }
/** * Creates a new filter. */ static void filter_new(void) { struct filter *f = filter_create(); if (f) { DoMethod(filter_list, MUIM_NList_InsertSingle, (ULONG)f, MUIV_NList_Insert_Bottom); filter_dispose(f); set(filter_list, MUIA_NList_Active, MUIV_NList_Active_Bottom); if (filter_last_selected) set(filter_name_string, MUIA_BetterString_SelectSize, -utf8len(filter_last_selected->name)); set(filter_wnd, MUIA_Window_ActiveObject, filter_name_string); } }
int get_set_char_num(char *utfstring) { int i; debug("Finding symbol %s with len %i... ", utfstring, utf8len(utfstring)); for (i = 1; i <= g_maxsymbol; i++) { if (strcmp(utfstring, g_symboltable[i]) == 0) { debug("Found at %i\n", i); return i; } } g_maxsymbol++; debug("Not found, adding at %i\n", g_maxsymbol); g_symboltable[g_maxsymbol] = strdup(utfstring); return(g_maxsymbol); }
efi_loadopt_create(uint8_t *buf, ssize_t size, uint32_t attributes, efidp dp, ssize_t dp_size, unsigned char *description, uint8_t *optional_data, size_t optional_data_size) { if (!description) { errno = EINVAL; return -1; } ssize_t desc_len = utf8len((uint8_t *)description, 1024) * 2 + 2; ssize_t sz = sizeof (attributes) + sizeof (uint16_t) + desc_len + dp_size + optional_data_size; if (size == 0) return sz; if (size < sz) { errno = ENOSPC; return -1; } if (!optional_data && optional_data_size != 0) { errno = EINVAL; return -1; } if (!dp && dp_size == 0) { errno = EINVAL; return -1; } uint8_t *pos = buf; *(uint32_t *)pos = attributes; pos += sizeof (attributes); *(uint16_t *)pos = dp_size; pos += sizeof (uint16_t); utf8_to_ucs2((uint16_t *)pos, desc_len, 1, (uint8_t *)description); pos += desc_len; memcpy(pos, dp, dp_size); pos += dp_size; if (optional_data && optional_data_size > 0) memcpy(pos, optional_data, optional_data_size); return sz; }
efidp_make_file(uint8_t *buf, ssize_t size, char *filepath) { efidp_file *file = (efidp_file *)buf; unsigned char *lf = (unsigned char *)filepath; ssize_t sz; ssize_t len = utf8len(lf, -1) + 1; ssize_t req = sizeof (*file) + len * sizeof (uint16_t); sz = efidp_make_generic(buf, size, EFIDP_MEDIA_TYPE, EFIDP_MEDIA_FILE, req); if (size && sz == req) { memset(buf+4, 0, req-4); utf8_to_ucs2(file->name, req-4, 1, lf); } return sz; }
size_t knh_bytes_mlen(kbytes_t v) { #ifdef K_USING_UTF8 size_t size = 0; const unsigned char *s = v.utext; const unsigned char *e = s + v.len; while (s < e) { size_t ulen = utf8len(s[0]); size ++; s += ulen; } return size; #else return v.len; #endif }
String String::SubString(int iStart, int iLen) const { MUTEX_LOCK(str_mutex); // Empty strings if (iStart < 0 || iLen <= 0) { return ""; } // Get the first offset #ifdef SCRATCH_NO_UTF8 String strRet(this->str_szBuffer + iStart); #else void* subFirst = this->str_szBuffer; s_char codepoint; do { subFirst = utf8codepoint(subFirst, &codepoint); } while (--iStart > 0 && codepoint != '\0'); String strRet((const char*)subFirst); #endif // Check for stupid developers #ifdef SCRATCH_NO_UTF8 if ((uint32_t)iLen > strlen(strRet)) { return strRet; } #else if ((uint32_t)iLen > utf8len(strRet)) { return strRet; } #endif // Then set the null terminator at the length the user wants #ifdef SCRATCH_NO_UTF8 strRet.str_szBuffer[iLen] = '\0'; #else void* sz = strRet.str_szBuffer; do { sz = utf8codepoint(sz, &codepoint); } while (--iLen > 0 && codepoint != '\0'); *(char*)sz = '\0'; #endif // Return return strRet; }
kint_t kchar_toucs4(kutext_t *utf8) /* utf8 -> ucs4 */ { #if defined(K_USING_UTF8) kint_t ucs4 = 0; int i= 0; kchar_t ret = 0; if (!utf8_isSingleton(utf8[0])) { kushort_t length_utf8 = utf8len(utf8[i]); kchar_t mask = (kchar_t)(1 << 0 | 1 << 1 | 1 << 2 | 1 << 3); switch(length_utf8){ case 2: /* 110xxxxx 10xxxxxx */ TODO(); break; case 3: /* format 1110xxxx 10xxxxxx 10xxxxxx */ // first 4 bits ucs4 = 0; ret = utf8[0] & mask; ucs4 = ucs4 | ret; // second bit ucs4 = ucs4 << 6; mask = mask | 1 << 4 | 1 << 5; ret = utf8[1] & mask; ucs4 = ucs4 | ret; // third bit ucs4 = ucs4 << 6; ret = mask & utf8[2]; ucs4 = ucs4 | ret; break; default: /* TODO: */ break; } } else { /* ASCII, let it goes...*/ ucs4 = utf8[0]; } return ucs4; #else return (kint_t)utf8[0]; #endif }
NS_IMETHODIMP nsRenderingContextPh::DrawString(const char *aString, PRUint32 aLength, nscoord aX, nscoord aY, const nscoord* aSpacing) { if ( aLength == 0 ) return NS_OK; UpdateGC(); PgSetTextColorCx( mGC, mCurrentColor ); PgSetFontCx( mGC, mPhotonFontName ); PgSetExtendedTextFlagsCx( mGC, Pg_TEXT_SIMPLE_METRICS ); if( !aSpacing ) { mTranMatrix->TransformCoord( &aX, &aY ); PhPoint_t pos = { aX, aY }; PgDrawTextCharsCx( mSurfaceDC, aString, aLength, &pos, Pg_TEXT_LEFT); } else { nscoord x = aX; nscoord y = aY; const char* end = aString + aLength; while( aString < end ) { const char *ch = aString; int charlen = utf8len( aString, aLength ); if( charlen <= 0 ) break; aString += charlen; aLength -= charlen; nscoord xx = x; nscoord yy = y; mTranMatrix->TransformCoord(&xx, &yy); PhPoint_t pos = { xx, yy }; PgDrawTextCx( mSurfaceDC, ch, charlen, &pos, Pg_TEXT_LEFT); x += *aSpacing++; } } PgSetExtendedTextFlagsCx( mGC, 0 ); return NS_OK; }
efi_loadopt_args_as_ucs2(uint16_t *buf, ssize_t size, uint8_t *utf8) { ssize_t req; if (!utf8 || (!buf && size > 0)) { errno = EINVAL; return -1; } req = utf8len(utf8, -1) * sizeof(uint16_t); if (size == 0) return req; if (size < req) { errno = ENOSPC; return -1; } return utf8_to_ucs2(buf, size, 0, utf8); }
static void kArray_split(KonohaContext *kctx, kArray *resultArray, kString *str, kRegExp *regex, size_t limit) { int stringPolicy = kString_is(ASCII, str) ? StringPolicy_ASCII : 0; if(IS_NOTNULL(regex) && S_size(regex->pattern) > 0) { const char *s = S_text(str); // necessary const char *eos = s + S_size(str); kregmatch_t pmatch[2]; int res = 0; while(s < eos && res == 0) { res = pcre_regexec(kctx, regex->reg, s, 1, pmatch, regex->eflags); if(res != 0) break; size_t len = pmatch[0].rm_eo; if(len > 0) { KLIB new_kString(kctx, resultArray, s, pmatch[0].rm_so, stringPolicy); s += len; } if(!(kArray_size(resultArray) + 1 < limit)) { return; } } if(s < eos) { KLIB new_kString(kctx, resultArray, s, eos - s, stringPolicy); // append remaining string to array } } else { const unsigned char *s = (const unsigned char *)S_text(str); size_t i, n = S_size(str); if(kString_is(ASCII, str)) { for(i = 0; i < n; i++) { KLIB new_kString(kctx, resultArray, (const char *)s + i, 1, StringPolicy_ASCII); } } else { for(i = 0; i < n; i++) { int len = utf8len(s[i]); KLIB new_kString(kctx, resultArray, (const char *)s + i, len, len == 1 ? StringPolicy_ASCII: StringPolicy_UTF8); i += len; } } } }